From b818dfed8df2923f001a683e41520ad6bf6ec734 Mon Sep 17 00:00:00 2001 From: Nikolaj Hey Hinnerskov Date: Fri, 7 May 2021 17:33:51 +0200 Subject: [PATCH] Generated Futhark OpenCL code. --- bfast/monitor/opencl/bfastfinal.py | 216641 ++++++++++++++++++++------ 1 file changed, 168429 insertions(+), 48212 deletions(-) diff --git a/bfast/monitor/opencl/bfastfinal.py b/bfast/monitor/opencl/bfastfinal.py index 3e9def0..26f2fde 100644 --- a/bfast/monitor/opencl/bfastfinal.py +++ b/bfast/monitor/opencl/bfastfinal.py @@ -258,6 +258,8 @@ def sync(self): #pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable #endif #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +#define FUTHARK_F64_ENABLED __kernel void dummy_kernel(__global unsigned char *dummy, int n) { const int thread_gid = get_global_id(0); @@ -1387,6 +1389,15 @@ def sync(self): { return (uint64_t) x; } +static inline bool futrts_isnan32(float x) +{ + return isnan(x); +} +static inline bool futrts_isinf32(float x) +{ + return isinf(x); +} +#ifdef __OPENCL_VERSION__ static inline float futrts_log32(float x) { return log(x); @@ -1467,14 +1478,144 @@ def sync(self): { return lgamma(x); } -static inline bool futrts_isnan32(float x) +static inline float fmod32(float x, float y) { - return isnan(x); + return fmod(x, y); } -static inline bool futrts_isinf32(float x) +static inline float futrts_round32(float x) { - return isinf(x); + return rint(x); +} +static inline float futrts_floor32(float x) +{ + return floor(x); +} +static inline float futrts_ceil32(float x) +{ + return ceil(x); +} +static inline float futrts_lerp32(float v0, float v1, float t) +{ + return mix(v0, v1, t); +} +static inline float futrts_mad32(float a, float b, float c) +{ + return mad(a, b, c); +} +static inline float futrts_fma32(float a, float b, float c) +{ + return fma(a, b, c); +} +#else +static inline float futrts_log32(float x) +{ + return logf(x); +} +static inline float futrts_log2_32(float x) +{ + return log2f(x); +} +static inline float futrts_log10_32(float x) +{ + return log10f(x); +} +static inline float futrts_sqrt32(float x) +{ + return sqrtf(x); +} +static inline float futrts_exp32(float x) +{ + return expf(x); +} +static inline float futrts_cos32(float x) +{ + return cosf(x); +} +static inline float futrts_sin32(float x) +{ + return sinf(x); +} +static inline float futrts_tan32(float x) +{ + return tanf(x); +} +static inline float futrts_acos32(float x) +{ + return acosf(x); +} +static inline float futrts_asin32(float x) +{ + return asinf(x); +} +static inline float futrts_atan32(float x) +{ + return atanf(x); +} +static inline float futrts_cosh32(float x) +{ + return coshf(x); } +static inline float futrts_sinh32(float x) +{ + return sinhf(x); +} +static inline float futrts_tanh32(float x) +{ + return tanhf(x); +} +static inline float futrts_acosh32(float x) +{ + return acoshf(x); +} +static inline float futrts_asinh32(float x) +{ + return asinhf(x); +} +static inline float futrts_atanh32(float x) +{ + return atanhf(x); +} +static inline float futrts_atan2_32(float x, float y) +{ + return atan2f(x, y); +} +static inline float futrts_gamma32(float x) +{ + return tgammaf(x); +} +static inline float futrts_lgamma32(float x) +{ + return lgammaf(x); +} +static inline float fmod32(float x, float y) +{ + return fmodf(x, y); +} +static inline float futrts_round32(float x) +{ + return rintf(x); +} +static inline float futrts_floor32(float x) +{ + return floorf(x); +} +static inline float futrts_ceil32(float x) +{ + return ceilf(x); +} +static inline float futrts_lerp32(float v0, float v1, float t) +{ + return v0 + (v1 - v0) * t; +} +static inline float futrts_mad32(float a, float b, float c) +{ + return a * b + c; +} +static inline float futrts_fma32(float a, float b, float c) +{ + return fmaf(a, b, c); +} +#endif static inline int32_t futrts_to_bits32(float x) { union { @@ -1495,69 +1636,277 @@ def sync(self): p.f = x; return p.t; } -static inline double fsignum32(double x) +static inline float fsignum32(float x) { return futrts_isnan32(x) ? x : (x > 0) - (x < 0); } -#ifdef __OPENCL_VERSION__ -static inline float fmod32(float x, float y) +static inline double fdiv64(double x, double y) { - return fmod(x, y); + return x / y; } -static inline float futrts_round32(float x) +static inline double fadd64(double x, double y) +{ + return x + y; +} +static inline double fsub64(double x, double y) +{ + return x - y; +} +static inline double fmul64(double x, double y) +{ + return x * y; +} +static inline double fmin64(double x, double y) +{ + return fmin(x, y); +} +static inline double fmax64(double x, double y) +{ + return fmax(x, y); +} +static inline double fpow64(double x, double y) +{ + return pow(x, y); +} +static inline bool cmplt64(double x, double y) +{ + return x < y; +} +static inline bool cmple64(double x, double y) +{ + return x <= y; +} +static inline double sitofp_i8_f64(int8_t x) +{ + return (double) x; +} +static inline double sitofp_i16_f64(int16_t x) +{ + return (double) x; +} +static inline double sitofp_i32_f64(int32_t x) +{ + return (double) x; +} +static inline double sitofp_i64_f64(int64_t x) +{ + return (double) x; +} +static inline double uitofp_i8_f64(uint8_t x) +{ + return (double) x; +} +static inline double uitofp_i16_f64(uint16_t x) +{ + return (double) x; +} +static inline double uitofp_i32_f64(uint32_t x) +{ + return (double) x; +} +static inline double uitofp_i64_f64(uint64_t x) +{ + return (double) x; +} +static inline int8_t fptosi_f64_i8(double x) +{ + return (int8_t) x; +} +static inline int16_t fptosi_f64_i16(double x) +{ + return (int16_t) x; +} +static inline int32_t fptosi_f64_i32(double x) +{ + return (int32_t) x; +} +static inline int64_t fptosi_f64_i64(double x) +{ + return (int64_t) x; +} +static inline uint8_t fptoui_f64_i8(double x) +{ + return (uint8_t) x; +} +static inline uint16_t fptoui_f64_i16(double x) +{ + return (uint16_t) x; +} +static inline uint32_t fptoui_f64_i32(double x) +{ + return (uint32_t) x; +} +static inline uint64_t fptoui_f64_i64(double x) +{ + return (uint64_t) x; +} +static inline double futrts_log64(double x) +{ + return log(x); +} +static inline double futrts_log2_64(double x) +{ + return log2(x); +} +static inline double futrts_log10_64(double x) +{ + return log10(x); +} +static inline double futrts_sqrt64(double x) +{ + return sqrt(x); +} +static inline double futrts_exp64(double x) +{ + return exp(x); +} +static inline double futrts_cos64(double x) +{ + return cos(x); +} +static inline double futrts_sin64(double x) +{ + return sin(x); +} +static inline double futrts_tan64(double x) +{ + return tan(x); +} +static inline double futrts_acos64(double x) +{ + return acos(x); +} +static inline double futrts_asin64(double x) +{ + return asin(x); +} +static inline double futrts_atan64(double x) +{ + return atan(x); +} +static inline double futrts_cosh64(double x) +{ + return cosh(x); +} +static inline double futrts_sinh64(double x) +{ + return sinh(x); +} +static inline double futrts_tanh64(double x) +{ + return tanh(x); +} +static inline double futrts_acosh64(double x) +{ + return acosh(x); +} +static inline double futrts_asinh64(double x) +{ + return asinh(x); +} +static inline double futrts_atanh64(double x) +{ + return atanh(x); +} +static inline double futrts_atan2_64(double x, double y) +{ + return atan2(x, y); +} +static inline double futrts_gamma64(double x) +{ + return tgamma(x); +} +static inline double futrts_lgamma64(double x) +{ + return lgamma(x); +} +static inline double futrts_fma64(double a, double b, double c) +{ + return fma(a, b, c); +} +static inline double futrts_round64(double x) { return rint(x); } -static inline float futrts_floor32(float x) +static inline double futrts_ceil64(double x) +{ + return ceil(x); +} +static inline double futrts_floor64(double x) { return floor(x); } -static inline float futrts_ceil32(float x) +static inline bool futrts_isnan64(double x) { - return ceil(x); + return isnan(x); } -static inline float futrts_lerp32(float v0, float v1, float t) +static inline bool futrts_isinf64(double x) { - return mix(v0, v1, t); + return isinf(x); } -static inline float futrts_mad32(float a, float b, float c) +static inline int64_t futrts_to_bits64(double x) { - return mad(a, b, c); + union { + double f; + int64_t t; + } p; + + p.f = x; + return p.t; } -static inline float futrts_fma32(float a, float b, float c) +static inline double futrts_from_bits64(int64_t x) { - return fma(a, b, c); + union { + int64_t f; + double t; + } p; + + p.f = x; + return p.t; } -#else -static inline float fmod32(float x, float y) +static inline double fmod64(double x, double y) { - return fmodf(x, y); + return fmod(x, y); } -static inline float futrts_round32(float x) +static inline double fsignum64(double x) { - return rintf(x); + return futrts_isnan64(x) ? x : (x > 0) - (x < 0); } -static inline float futrts_floor32(float x) +#ifdef __OPENCL_VERSION__ +static inline double futrts_lerp64(double v0, double v1, double t) { - return floorf(x); + return mix(v0, v1, t); } -static inline float futrts_ceil32(float x) +static inline double futrts_mad64(double a, double b, double c) { - return ceilf(x); + return mad(a, b, c); } -static inline float futrts_lerp32(float v0, float v1, float t) +#else +static inline double futrts_lerp64(double v0, double v1, double t) { return v0 + (v1 - v0) * t; } -static inline float futrts_mad32(float a, float b, float c) +static inline double futrts_mad64(double a, double b, double c) { return a * b + c; } -static inline float futrts_fma32(float a, float b, float c) +#endif +static inline float fpconv_f32_f32(float x) { - return fmaf(a, b, c); + return (float) x; +} +static inline double fpconv_f32_f64(float x) +{ + return (double) x; +} +static inline float fpconv_f64_f32(double x) +{ + return (float) x; +} +static inline double fpconv_f64_f64(double x) +{ + return (double) x; } -#endif // Start of atomics.h inline int32_t atomic_xchg_i32_global(volatile __global int32_t *p, int32_t x) { @@ -1806,6 +2155,8 @@ def sync(self): #endif } +#ifdef FUTHARK_F64_ENABLED + inline double atomic_fadd_f64_global(volatile __global double *p, double x) { #if defined(FUTHARK_CUDA) && __CUDA_ARCH__ >= 600 return atomicAdd((double*)p, x); @@ -1838,6 +2189,8 @@ def sync(self): #endif } +#endif + inline int64_t atomic_smax_i64_global(volatile __global int64_t *p, int64_t x) { #ifdef FUTHARK_CUDA return atomicMax((int64_t*)p, x); @@ -1955,59 +2308,83 @@ def sync(self): -__kernel void builtinzhreplicate_f32zireplicate_46318(int32_t num_elems_46315, - float val_46316, __global - unsigned char *mem_46314) +__kernel void builtinzhiota_i64ziiota_i64_126544(int32_t n_126540, + int64_t x_126541, + int64_t s_126542, __global + unsigned char *mem_126539) { const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - int32_t replicate_gtid_46318; - int32_t replicate_ltid_46319; - int32_t replicate_gid_46320; - - replicate_gtid_46318 = get_global_id(0); - replicate_ltid_46319 = get_local_id(0); - replicate_gid_46320 = get_group_id(0); - if (slt64(replicate_gtid_46318, num_elems_46315)) { - ((__global float *) mem_46314)[sext_i32_i64(replicate_gtid_46318)] = - val_46316; + int32_t iota_gtid_126544; + int32_t iota_ltid_126545; + int32_t iota_gid_126546; + + iota_gtid_126544 = get_global_id(0); + iota_ltid_126545 = get_local_id(0); + iota_gid_126546 = get_group_id(0); + if (slt64(iota_gtid_126544, n_126540)) { + ((__global int64_t *) mem_126539)[sext_i32_i64(iota_gtid_126544)] = + add64(mul64(sext_i32_i64(iota_gtid_126544), s_126542), x_126541); } error_0: return; } -__kernel void builtinzhreplicate_i32zireplicate_46327(int32_t num_elems_46324, - int32_t val_46325, - __global - unsigned char *mem_46323) +__kernel void builtinzhreplicate_f64zireplicate_126475(int32_t num_elems_126472, + double val_126473, + __global + unsigned char *mem_126471) { const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - int32_t replicate_gtid_46327; - int32_t replicate_ltid_46328; - int32_t replicate_gid_46329; + int32_t replicate_gtid_126475; + int32_t replicate_ltid_126476; + int32_t replicate_gid_126477; + + replicate_gtid_126475 = get_global_id(0); + replicate_ltid_126476 = get_local_id(0); + replicate_gid_126477 = get_group_id(0); + if (slt64(replicate_gtid_126475, num_elems_126472)) { + ((__global double *) mem_126471)[sext_i32_i64(replicate_gtid_126475)] = + val_126473; + } - replicate_gtid_46327 = get_global_id(0); - replicate_ltid_46328 = get_local_id(0); - replicate_gid_46329 = get_group_id(0); - if (slt64(replicate_gtid_46327, num_elems_46324)) { - ((__global int32_t *) mem_46323)[sext_i32_i64(replicate_gtid_46327)] = - val_46325; + error_0: + return; +} +__kernel void builtinzhreplicate_i64zireplicate_126484(int32_t num_elems_126481, + int64_t val_126482, + __global + unsigned char *mem_126480) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t replicate_gtid_126484; + int32_t replicate_ltid_126485; + int32_t replicate_gid_126486; + + replicate_gtid_126484 = get_global_id(0); + replicate_ltid_126485 = get_local_id(0); + replicate_gid_126486 = get_group_id(0); + if (slt64(replicate_gtid_126484, num_elems_126481)) { + ((__global int64_t *) mem_126480)[sext_i32_i64(replicate_gtid_126484)] = + val_126482; } error_0: return; } -__kernel void convertToFloatzisegmap_29816(__global int *global_failure, - int64_t m_27757, int64_t n_27758, - int64_t p_27759, - int16_t nan_value_27760, __global - unsigned char *images_mem_44380, - __global unsigned char *mem_44385) +__kernel void convertToFloatzisegmap_77185(__global int *global_failure, + int64_t m_70846, int64_t n_70847, + int64_t p_70848, + int16_t nan_value_70849, __global + unsigned char *images_mem_120107, + __global unsigned char *mem_120112) { - #define segmap_group_sizze_29877 (convertToFloatzisegmap_group_sizze_29820) + #define segmap_group_sizze_77235 (convertToFloatzisegmap_group_sizze_77189) const int block_dim0 = 0; const int block_dim1 = 1; @@ -2016,80 +2393,79 @@ def sync(self): if (*global_failure >= 0) return; - int32_t global_tid_45677; - int32_t local_tid_45678; - int64_t group_sizze_45681; - int32_t wave_sizze_45680; - int32_t group_tid_45679; - - global_tid_45677 = get_global_id(0); - local_tid_45678 = get_local_id(0); - group_sizze_45681 = get_local_size(0); - wave_sizze_45680 = LOCKSTEP_WIDTH; - group_tid_45679 = get_group_id(0); - - int32_t phys_tid_29816; - - phys_tid_29816 = global_tid_45677; - - int64_t gtid_29813; - - gtid_29813 = squot64(sext_i32_i64(group_tid_45679) * - segmap_group_sizze_29877 + - sext_i32_i64(local_tid_45678), n_27758 * p_27759); - - int64_t gtid_29814; - - gtid_29814 = squot64(sext_i32_i64(group_tid_45679) * - segmap_group_sizze_29877 + - sext_i32_i64(local_tid_45678) - - squot64(sext_i32_i64(group_tid_45679) * - segmap_group_sizze_29877 + - sext_i32_i64(local_tid_45678), n_27758 * - p_27759) * (n_27758 * p_27759), p_27759); - - int64_t gtid_29815; - - gtid_29815 = sext_i32_i64(group_tid_45679) * segmap_group_sizze_29877 + - sext_i32_i64(local_tid_45678) - squot64(sext_i32_i64(group_tid_45679) * - segmap_group_sizze_29877 + - sext_i32_i64(local_tid_45678), - n_27758 * p_27759) * (n_27758 * - p_27759) - - squot64(sext_i32_i64(group_tid_45679) * segmap_group_sizze_29877 + - sext_i32_i64(local_tid_45678) - - squot64(sext_i32_i64(group_tid_45679) * - segmap_group_sizze_29877 + - sext_i32_i64(local_tid_45678), n_27758 * p_27759) * - (n_27758 * p_27759), p_27759) * p_27759; - if ((slt64(gtid_29813, m_27757) && slt64(gtid_29814, n_27758)) && - slt64(gtid_29815, p_27759)) { - int16_t x_29880 = ((__global int16_t *) images_mem_44380)[gtid_29813 * - (p_27759 * - n_27758) + - gtid_29814 * - p_27759 + - gtid_29815]; - bool cond_29881 = x_29880 == nan_value_27760; - float defunc_0_f_res_29882; - - if (cond_29881) { - defunc_0_f_res_29882 = NAN; + int32_t global_tid_126321; + int32_t local_tid_126322; + int64_t group_sizze_126325; + int32_t wave_sizze_126324; + int32_t group_tid_126323; + + global_tid_126321 = get_global_id(0); + local_tid_126322 = get_local_id(0); + group_sizze_126325 = get_local_size(0); + wave_sizze_126324 = LOCKSTEP_WIDTH; + group_tid_126323 = get_group_id(0); + + int32_t phys_tid_77185; + + phys_tid_77185 = global_tid_126321; + + int64_t gtid_77182; + + gtid_77182 = squot64(sext_i32_i64(group_tid_126323) * + segmap_group_sizze_77235 + + sext_i32_i64(local_tid_126322), n_70847 * p_70848); + + int64_t gtid_77183; + + gtid_77183 = squot64(sext_i32_i64(group_tid_126323) * + segmap_group_sizze_77235 + + sext_i32_i64(local_tid_126322) - + squot64(sext_i32_i64(group_tid_126323) * + segmap_group_sizze_77235 + + sext_i32_i64(local_tid_126322), n_70847 * + p_70848) * (n_70847 * p_70848), p_70848); + + int64_t gtid_77184; + + gtid_77184 = sext_i32_i64(group_tid_126323) * segmap_group_sizze_77235 + + sext_i32_i64(local_tid_126322) - + squot64(sext_i32_i64(group_tid_126323) * segmap_group_sizze_77235 + + sext_i32_i64(local_tid_126322), n_70847 * p_70848) * (n_70847 * + p_70848) - + squot64(sext_i32_i64(group_tid_126323) * segmap_group_sizze_77235 + + sext_i32_i64(local_tid_126322) - + squot64(sext_i32_i64(group_tid_126323) * + segmap_group_sizze_77235 + + sext_i32_i64(local_tid_126322), n_70847 * p_70848) * + (n_70847 * p_70848), p_70848) * p_70848; + if ((slt64(gtid_77182, m_70846) && slt64(gtid_77183, n_70847)) && + slt64(gtid_77184, p_70848)) { + int16_t x_77238 = ((__global int16_t *) images_mem_120107)[gtid_77182 * + (p_70848 * + n_70847) + + gtid_77183 * + p_70848 + + gtid_77184]; + bool cond_77239 = x_77238 == nan_value_70849; + double defunc_0_f_res_77240; + + if (cond_77239) { + defunc_0_f_res_77240 = NAN; } else { - float i16_res_29883 = sitofp_i16_f32(x_29880); + double i16_res_77241 = sitofp_i16_f64(x_77238); - defunc_0_f_res_29882 = i16_res_29883; + defunc_0_f_res_77240 = i16_res_77241; } - ((__global float *) mem_44385)[gtid_29813 * (p_27759 * n_27758) + - gtid_29814 * p_27759 + gtid_29815] = - defunc_0_f_res_29882; + ((__global double *) mem_120112)[gtid_77182 * (p_70848 * n_70847) + + gtid_77183 * p_70848 + gtid_77184] = + defunc_0_f_res_77240; } error_0: return; - #undef segmap_group_sizze_29877 + #undef segmap_group_sizze_77235 } -__kernel void gpu_map_transpose_f32(__local volatile +__kernel void gpu_map_transpose_f64(__local volatile int64_t *block_9_backing_aligned_0, int32_t destoffset_1, int32_t srcoffset_3, int32_t num_arrays_4, int32_t x_elems_5, @@ -2132,8 +2508,8 @@ def sync(self): get_group_id_2_42 = get_group_id(2); int32_t our_array_offset_30 = get_group_id_2_42 * x_elems_5 * y_elems_6; - int32_t odata_offset_33 = squot32(destoffset_1, 4) + our_array_offset_30; - int32_t idata_offset_34 = squot32(srcoffset_3, 4) + our_array_offset_30; + int32_t odata_offset_33 = squot32(destoffset_1, 8) + our_array_offset_30; + int32_t idata_offset_34 = squot32(srcoffset_3, 8) + our_array_offset_30; int32_t x_index_31 = get_global_id_0_37; int32_t y_index_32 = get_group_id_1_41 * 32 + get_local_id_1_39; @@ -2143,11 +2519,11 @@ def sync(self): x_index_31; if (slt32(y_index_32 + j_43 * 8, y_elems_6)) { - ((__local float *) block_9)[sext_i32_i64((get_local_id_1_39 + - j_43 * 8) * 33 + - get_local_id_0_38)] = ((__global - float *) srcmem_2)[sext_i32_i64(idata_offset_34 + - index_in_35)]; + ((__local double *) block_9)[sext_i32_i64((get_local_id_1_39 + + j_43 * 8) * 33 + + get_local_id_0_38)] = ((__global + double *) srcmem_2)[sext_i32_i64(idata_offset_34 + + index_in_35)]; } } } @@ -2160,13 +2536,13 @@ def sync(self): x_index_31; if (slt32(y_index_32 + j_43 * 8, x_elems_5)) { - ((__global float *) destmem_0)[sext_i32_i64(odata_offset_33 + - index_out_36)] = ((__local - float *) block_9)[sext_i32_i64(get_local_id_0_38 * - 33 + - get_local_id_1_39 + - j_43 * - 8)]; + ((__global double *) destmem_0)[sext_i32_i64(odata_offset_33 + + index_out_36)] = ((__local + double *) block_9)[sext_i32_i64(get_local_id_0_38 * + 33 + + get_local_id_1_39 + + j_43 * + 8)]; } } } @@ -2174,7 +2550,7 @@ def sync(self): error_0: return; } -__kernel void gpu_map_transpose_f32_low_height(__local volatile +__kernel void gpu_map_transpose_f64_low_height(__local volatile int64_t *block_9_backing_aligned_0, int32_t destoffset_1, int32_t srcoffset_3, @@ -2220,8 +2596,8 @@ def sync(self): get_group_id_2_42 = get_group_id(2); int32_t our_array_offset_30 = get_group_id_2_42 * x_elems_5 * y_elems_6; - int32_t odata_offset_33 = squot32(destoffset_1, 4) + our_array_offset_30; - int32_t idata_offset_34 = squot32(srcoffset_3, 4) + our_array_offset_30; + int32_t odata_offset_33 = squot32(destoffset_1, 8) + our_array_offset_30; + int32_t idata_offset_34 = squot32(srcoffset_3, 8) + our_array_offset_30; int32_t x_index_31 = get_group_id_0_40 * 16 * mulx_7 + get_local_id_0_38 + srem32(get_local_id_1_39, mulx_7) * 16; int32_t y_index_32 = get_group_id_1_41 * 16 + squot32(get_local_id_1_39, @@ -2229,10 +2605,10 @@ def sync(self): int32_t index_in_35 = y_index_32 * x_elems_5 + x_index_31; if (slt32(x_index_31, x_elems_5) && slt32(y_index_32, y_elems_6)) { - ((__local float *) block_9)[sext_i32_i64(get_local_id_1_39 * 17 + - get_local_id_0_38)] = ((__global - float *) srcmem_2)[sext_i32_i64(idata_offset_34 + - index_in_35)]; + ((__local double *) block_9)[sext_i32_i64(get_local_id_1_39 * 17 + + get_local_id_0_38)] = ((__global + double *) srcmem_2)[sext_i32_i64(idata_offset_34 + + index_in_35)]; } barrier(CLK_LOCAL_MEM_FENCE); x_index_31 = get_group_id_1_41 * 16 + squot32(get_local_id_0_38, mulx_7); @@ -2242,17 +2618,17 @@ def sync(self): int32_t index_out_36 = y_index_32 * y_elems_6 + x_index_31; if (slt32(x_index_31, y_elems_6) && slt32(y_index_32, x_elems_5)) { - ((__global float *) destmem_0)[sext_i32_i64(odata_offset_33 + - index_out_36)] = ((__local - float *) block_9)[sext_i32_i64(get_local_id_0_38 * - 17 + - get_local_id_1_39)]; + ((__global double *) destmem_0)[sext_i32_i64(odata_offset_33 + + index_out_36)] = ((__local + double *) block_9)[sext_i32_i64(get_local_id_0_38 * + 17 + + get_local_id_1_39)]; } error_0: return; } -__kernel void gpu_map_transpose_f32_low_width(__local volatile +__kernel void gpu_map_transpose_f64_low_width(__local volatile int64_t *block_9_backing_aligned_0, int32_t destoffset_1, int32_t srcoffset_3, @@ -2297,8 +2673,8 @@ def sync(self): get_group_id_2_42 = get_group_id(2); int32_t our_array_offset_30 = get_group_id_2_42 * x_elems_5 * y_elems_6; - int32_t odata_offset_33 = squot32(destoffset_1, 4) + our_array_offset_30; - int32_t idata_offset_34 = squot32(srcoffset_3, 4) + our_array_offset_30; + int32_t odata_offset_33 = squot32(destoffset_1, 8) + our_array_offset_30; + int32_t idata_offset_34 = squot32(srcoffset_3, 8) + our_array_offset_30; int32_t x_index_31 = get_group_id_0_40 * 16 + squot32(get_local_id_0_38, muly_8); int32_t y_index_32 = get_group_id_1_41 * 16 * muly_8 + get_local_id_1_39 + @@ -2306,11 +2682,314 @@ def sync(self): int32_t index_in_35 = y_index_32 * x_elems_5 + x_index_31; if (slt32(x_index_31, x_elems_5) && slt32(y_index_32, y_elems_6)) { - ((__local float *) block_9)[sext_i32_i64(get_local_id_1_39 * 17 + - get_local_id_0_38)] = ((__global - float *) srcmem_2)[sext_i32_i64(idata_offset_34 + + ((__local double *) block_9)[sext_i32_i64(get_local_id_1_39 * 17 + + get_local_id_0_38)] = ((__global + double *) srcmem_2)[sext_i32_i64(idata_offset_34 + + index_in_35)]; + } + barrier(CLK_LOCAL_MEM_FENCE); + x_index_31 = get_group_id_1_41 * 16 * muly_8 + get_local_id_0_38 + + srem32(get_local_id_1_39, muly_8) * 16; + y_index_32 = get_group_id_0_40 * 16 + squot32(get_local_id_1_39, muly_8); + + int32_t index_out_36 = y_index_32 * y_elems_6 + x_index_31; + + if (slt32(x_index_31, y_elems_6) && slt32(y_index_32, x_elems_5)) { + ((__global double *) destmem_0)[sext_i32_i64(odata_offset_33 + + index_out_36)] = ((__local + double *) block_9)[sext_i32_i64(get_local_id_0_38 * + 17 + + get_local_id_1_39)]; + } + + error_0: + return; +} +__kernel void gpu_map_transpose_f64_small(__local volatile + int64_t *block_9_backing_aligned_0, + int32_t destoffset_1, + int32_t srcoffset_3, + int32_t num_arrays_4, + int32_t x_elems_5, int32_t y_elems_6, + int32_t mulx_7, int32_t muly_8, + __global unsigned char *destmem_0, + __global unsigned char *srcmem_2) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict block_9_backing_0 = (__local volatile + char *) block_9_backing_aligned_0; + __local char *block_9; + + block_9 = (__local char *) block_9_backing_0; + + int32_t get_global_id_0_37; + + get_global_id_0_37 = get_global_id(0); + + int32_t get_local_id_0_38; + + get_local_id_0_38 = get_local_id(0); + + int32_t get_local_id_1_39; + + get_local_id_1_39 = get_local_id(1); + + int32_t get_group_id_0_40; + + get_group_id_0_40 = get_group_id(0); + + int32_t get_group_id_1_41; + + get_group_id_1_41 = get_group_id(1); + + int32_t get_group_id_2_42; + + get_group_id_2_42 = get_group_id(2); + + int32_t our_array_offset_30 = squot32(get_global_id_0_37, y_elems_6 * + x_elems_5) * (y_elems_6 * x_elems_5); + int32_t x_index_31 = squot32(srem32(get_global_id_0_37, y_elems_6 * + x_elems_5), y_elems_6); + int32_t y_index_32 = srem32(get_global_id_0_37, y_elems_6); + int32_t odata_offset_33 = squot32(destoffset_1, 8) + our_array_offset_30; + int32_t idata_offset_34 = squot32(srcoffset_3, 8) + our_array_offset_30; + int32_t index_in_35 = y_index_32 * x_elems_5 + x_index_31; + int32_t index_out_36 = x_index_31 * y_elems_6 + y_index_32; + + if (slt32(get_global_id_0_37, x_elems_5 * y_elems_6 * num_arrays_4)) { + ((__global double *) destmem_0)[sext_i32_i64(odata_offset_33 + + index_out_36)] = ((__global + double *) srcmem_2)[sext_i32_i64(idata_offset_34 + index_in_35)]; } + + error_0: + return; +} +__kernel void gpu_map_transpose_i64(__local volatile + int64_t *block_9_backing_aligned_0, + int32_t destoffset_1, int32_t srcoffset_3, + int32_t num_arrays_4, int32_t x_elems_5, + int32_t y_elems_6, int32_t mulx_7, + int32_t muly_8, __global + unsigned char *destmem_0, __global + unsigned char *srcmem_2) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict block_9_backing_0 = (__local volatile + char *) block_9_backing_aligned_0; + __local char *block_9; + + block_9 = (__local char *) block_9_backing_0; + + int32_t get_global_id_0_37; + + get_global_id_0_37 = get_global_id(0); + + int32_t get_local_id_0_38; + + get_local_id_0_38 = get_local_id(0); + + int32_t get_local_id_1_39; + + get_local_id_1_39 = get_local_id(1); + + int32_t get_group_id_0_40; + + get_group_id_0_40 = get_group_id(0); + + int32_t get_group_id_1_41; + + get_group_id_1_41 = get_group_id(1); + + int32_t get_group_id_2_42; + + get_group_id_2_42 = get_group_id(2); + + int32_t our_array_offset_30 = get_group_id_2_42 * x_elems_5 * y_elems_6; + int32_t odata_offset_33 = squot32(destoffset_1, 8) + our_array_offset_30; + int32_t idata_offset_34 = squot32(srcoffset_3, 8) + our_array_offset_30; + int32_t x_index_31 = get_global_id_0_37; + int32_t y_index_32 = get_group_id_1_41 * 32 + get_local_id_1_39; + + if (slt32(x_index_31, x_elems_5)) { + for (int32_t j_43 = 0; j_43 < 4; j_43++) { + int32_t index_in_35 = (y_index_32 + j_43 * 8) * x_elems_5 + + x_index_31; + + if (slt32(y_index_32 + j_43 * 8, y_elems_6)) { + ((__local int64_t *) block_9)[sext_i32_i64((get_local_id_1_39 + + j_43 * 8) * 33 + + get_local_id_0_38)] = ((__global + int64_t *) srcmem_2)[sext_i32_i64(idata_offset_34 + + index_in_35)]; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + x_index_31 = get_group_id_1_41 * 32 + get_local_id_0_38; + y_index_32 = get_group_id_0_40 * 32 + get_local_id_1_39; + if (slt32(x_index_31, y_elems_6)) { + for (int32_t j_43 = 0; j_43 < 4; j_43++) { + int32_t index_out_36 = (y_index_32 + j_43 * 8) * y_elems_6 + + x_index_31; + + if (slt32(y_index_32 + j_43 * 8, x_elems_5)) { + ((__global int64_t *) destmem_0)[sext_i32_i64(odata_offset_33 + + index_out_36)] = ((__local + int64_t *) block_9)[sext_i32_i64(get_local_id_0_38 * + 33 + + get_local_id_1_39 + + j_43 * + 8)]; + } + } + } + + error_0: + return; +} +__kernel void gpu_map_transpose_i64_low_height(__local volatile + int64_t *block_9_backing_aligned_0, + int32_t destoffset_1, + int32_t srcoffset_3, + int32_t num_arrays_4, + int32_t x_elems_5, + int32_t y_elems_6, + int32_t mulx_7, int32_t muly_8, + __global + unsigned char *destmem_0, + __global unsigned char *srcmem_2) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict block_9_backing_0 = (__local volatile + char *) block_9_backing_aligned_0; + __local char *block_9; + + block_9 = (__local char *) block_9_backing_0; + + int32_t get_global_id_0_37; + + get_global_id_0_37 = get_global_id(0); + + int32_t get_local_id_0_38; + + get_local_id_0_38 = get_local_id(0); + + int32_t get_local_id_1_39; + + get_local_id_1_39 = get_local_id(1); + + int32_t get_group_id_0_40; + + get_group_id_0_40 = get_group_id(0); + + int32_t get_group_id_1_41; + + get_group_id_1_41 = get_group_id(1); + + int32_t get_group_id_2_42; + + get_group_id_2_42 = get_group_id(2); + + int32_t our_array_offset_30 = get_group_id_2_42 * x_elems_5 * y_elems_6; + int32_t odata_offset_33 = squot32(destoffset_1, 8) + our_array_offset_30; + int32_t idata_offset_34 = squot32(srcoffset_3, 8) + our_array_offset_30; + int32_t x_index_31 = get_group_id_0_40 * 16 * mulx_7 + get_local_id_0_38 + + srem32(get_local_id_1_39, mulx_7) * 16; + int32_t y_index_32 = get_group_id_1_41 * 16 + squot32(get_local_id_1_39, + mulx_7); + int32_t index_in_35 = y_index_32 * x_elems_5 + x_index_31; + + if (slt32(x_index_31, x_elems_5) && slt32(y_index_32, y_elems_6)) { + ((__local int64_t *) block_9)[sext_i32_i64(get_local_id_1_39 * 17 + + get_local_id_0_38)] = ((__global + int64_t *) srcmem_2)[sext_i32_i64(idata_offset_34 + + index_in_35)]; + } + barrier(CLK_LOCAL_MEM_FENCE); + x_index_31 = get_group_id_1_41 * 16 + squot32(get_local_id_0_38, mulx_7); + y_index_32 = get_group_id_0_40 * 16 * mulx_7 + get_local_id_1_39 + + srem32(get_local_id_0_38, mulx_7) * 16; + + int32_t index_out_36 = y_index_32 * y_elems_6 + x_index_31; + + if (slt32(x_index_31, y_elems_6) && slt32(y_index_32, x_elems_5)) { + ((__global int64_t *) destmem_0)[sext_i32_i64(odata_offset_33 + + index_out_36)] = ((__local + int64_t *) block_9)[sext_i32_i64(get_local_id_0_38 * + 17 + + get_local_id_1_39)]; + } + + error_0: + return; +} +__kernel void gpu_map_transpose_i64_low_width(__local volatile + int64_t *block_9_backing_aligned_0, + int32_t destoffset_1, + int32_t srcoffset_3, + int32_t num_arrays_4, + int32_t x_elems_5, + int32_t y_elems_6, int32_t mulx_7, + int32_t muly_8, __global + unsigned char *destmem_0, __global + unsigned char *srcmem_2) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict block_9_backing_0 = (__local volatile + char *) block_9_backing_aligned_0; + __local char *block_9; + + block_9 = (__local char *) block_9_backing_0; + + int32_t get_global_id_0_37; + + get_global_id_0_37 = get_global_id(0); + + int32_t get_local_id_0_38; + + get_local_id_0_38 = get_local_id(0); + + int32_t get_local_id_1_39; + + get_local_id_1_39 = get_local_id(1); + + int32_t get_group_id_0_40; + + get_group_id_0_40 = get_group_id(0); + + int32_t get_group_id_1_41; + + get_group_id_1_41 = get_group_id(1); + + int32_t get_group_id_2_42; + + get_group_id_2_42 = get_group_id(2); + + int32_t our_array_offset_30 = get_group_id_2_42 * x_elems_5 * y_elems_6; + int32_t odata_offset_33 = squot32(destoffset_1, 8) + our_array_offset_30; + int32_t idata_offset_34 = squot32(srcoffset_3, 8) + our_array_offset_30; + int32_t x_index_31 = get_group_id_0_40 * 16 + squot32(get_local_id_0_38, + muly_8); + int32_t y_index_32 = get_group_id_1_41 * 16 * muly_8 + get_local_id_1_39 + + srem32(get_local_id_0_38, muly_8) * 16; + int32_t index_in_35 = y_index_32 * x_elems_5 + x_index_31; + + if (slt32(x_index_31, x_elems_5) && slt32(y_index_32, y_elems_6)) { + ((__local int64_t *) block_9)[sext_i32_i64(get_local_id_1_39 * 17 + + get_local_id_0_38)] = ((__global + int64_t *) srcmem_2)[sext_i32_i64(idata_offset_34 + + index_in_35)]; + } barrier(CLK_LOCAL_MEM_FENCE); x_index_31 = get_group_id_1_41 * 16 * muly_8 + get_local_id_0_38 + srem32(get_local_id_1_39, muly_8) * 16; @@ -2319,17 +2998,17 @@ def sync(self): int32_t index_out_36 = y_index_32 * y_elems_6 + x_index_31; if (slt32(x_index_31, y_elems_6) && slt32(y_index_32, x_elems_5)) { - ((__global float *) destmem_0)[sext_i32_i64(odata_offset_33 + - index_out_36)] = ((__local - float *) block_9)[sext_i32_i64(get_local_id_0_38 * - 17 + - get_local_id_1_39)]; + ((__global int64_t *) destmem_0)[sext_i32_i64(odata_offset_33 + + index_out_36)] = ((__local + int64_t *) block_9)[sext_i32_i64(get_local_id_0_38 * + 17 + + get_local_id_1_39)]; } error_0: return; } -__kernel void gpu_map_transpose_f32_small(__local volatile +__kernel void gpu_map_transpose_i64_small(__local volatile int64_t *block_9_backing_aligned_0, int32_t destoffset_1, int32_t srcoffset_3, @@ -2377,427 +3056,1417 @@ def sync(self): int32_t x_index_31 = squot32(srem32(get_global_id_0_37, y_elems_6 * x_elems_5), y_elems_6); int32_t y_index_32 = srem32(get_global_id_0_37, y_elems_6); - int32_t odata_offset_33 = squot32(destoffset_1, 4) + our_array_offset_30; - int32_t idata_offset_34 = squot32(srcoffset_3, 4) + our_array_offset_30; + int32_t odata_offset_33 = squot32(destoffset_1, 8) + our_array_offset_30; + int32_t idata_offset_34 = squot32(srcoffset_3, 8) + our_array_offset_30; int32_t index_in_35 = y_index_32 * x_elems_5 + x_index_31; int32_t index_out_36 = x_index_31 * y_elems_6 + y_index_32; if (slt32(get_global_id_0_37, x_elems_5 * y_elems_6 * num_arrays_4)) { - ((__global float *) destmem_0)[sext_i32_i64(odata_offset_33 + - index_out_36)] = ((__global - float *) srcmem_2)[sext_i32_i64(idata_offset_34 + - index_in_35)]; + ((__global int64_t *) destmem_0)[sext_i32_i64(odata_offset_33 + + index_out_36)] = ((__global + int64_t *) srcmem_2)[sext_i32_i64(idata_offset_34 + + index_in_35)]; + } + + error_0: + return; +} +__kernel void mainzicopy_126383(int64_t m_75136, int64_t n_75139, __global + unsigned char *mem_120177, __global + unsigned char *mem_120224) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_126383; + int32_t copy_ltid_126384; + int32_t copy_gid_126385; + + copy_gtid_126383 = get_global_id(0); + copy_ltid_126384 = get_local_id(0); + copy_gid_126385 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_126383), m_75136 * n_75139)) { + ((__global double *) mem_120224)[squot64(sext_i32_i64(copy_gtid_126383), + n_75139) * n_75139 + + (sext_i32_i64(copy_gtid_126383) - + squot64(sext_i32_i64(copy_gtid_126383), + n_75139) * n_75139)] = + ((__global double *) mem_120177)[(sext_i32_i64(copy_gtid_126383) - + squot64(sext_i32_i64(copy_gtid_126383), + n_75139) * n_75139) * + m_75136 + + squot64(sext_i32_i64(copy_gtid_126383), + n_75139)]; + } + + error_0: + return; +} +__kernel void mainzicopy_126388(int64_t m_75136, int64_t n_75139, __global + unsigned char *mem_120180, __global + unsigned char *mem_120228) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_126388; + int32_t copy_ltid_126389; + int32_t copy_gid_126390; + + copy_gtid_126388 = get_global_id(0); + copy_ltid_126389 = get_local_id(0); + copy_gid_126390 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_126388), m_75136 * n_75139)) { + ((__global + int64_t *) mem_120228)[squot64(sext_i32_i64(copy_gtid_126388), + n_75139) * n_75139 + + (sext_i32_i64(copy_gtid_126388) - + squot64(sext_i32_i64(copy_gtid_126388), + n_75139) * n_75139)] = ((__global + int64_t *) mem_120180)[(sext_i32_i64(copy_gtid_126388) - + squot64(sext_i32_i64(copy_gtid_126388), + n_75139) * + n_75139) * + m_75136 + + squot64(sext_i32_i64(copy_gtid_126388), + n_75139)]; + } + + error_0: + return; +} +__kernel void mainzicopy_126466(int64_t m_75136, int64_t n_75139, + int64_t m_75231, __global + unsigned char *mem_120201, __global + unsigned char *mem_120203) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_126466; + int32_t copy_ltid_126467; + int32_t copy_gid_126468; + + copy_gtid_126466 = get_global_id(0); + copy_ltid_126467 = get_local_id(0); + copy_gid_126468 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_126466), m_75136)) { + ((__global int64_t *) mem_120203)[sext_i32_i64(copy_gtid_126466)] = + ((__global int64_t *) mem_120201)[m_75231 + + sext_i32_i64(copy_gtid_126466) * + n_75139]; + } + + error_0: + return; +} +__kernel void mainzicopy_126562(int64_t m_75136, int64_t n_75139, + int64_t k2p2zq_75151, __global + unsigned char *defunc_3_map_res_mem_120231, + __global unsigned char *mem_120257) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_126562; + int32_t copy_ltid_126563; + int32_t copy_gid_126564; + + copy_gtid_126562 = get_global_id(0); + copy_ltid_126563 = get_local_id(0); + copy_gid_126564 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_126562), m_75136 * k2p2zq_75151)) { + ((__global double *) mem_120257)[(sext_i32_i64(copy_gtid_126562) - + squot64(sext_i32_i64(copy_gtid_126562), + k2p2zq_75151) * + k2p2zq_75151) * m_75136 + + squot64(sext_i32_i64(copy_gtid_126562), + k2p2zq_75151)] = ((__global + double *) defunc_3_map_res_mem_120231)[squot64(sext_i32_i64(copy_gtid_126562), + k2p2zq_75151) * + n_75139 + + (sext_i32_i64(copy_gtid_126562) - + squot64(sext_i32_i64(copy_gtid_126562), + k2p2zq_75151) * + k2p2zq_75151)]; + } + + error_0: + return; +} +__kernel void mainzicopy_126567(int64_t m_75136, int64_t k2p2zq_75151, + int64_t defunc_2_reduce_res_75260, __global + unsigned char *mem_120246, __global + unsigned char *mem_120261) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_126567; + int32_t copy_ltid_126568; + int32_t copy_gid_126569; + + copy_gtid_126567 = get_global_id(0); + copy_ltid_126568 = get_local_id(0); + copy_gid_126569 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_126567), m_75136 * k2p2zq_75151 * + k2p2zq_75151)) { + ((__global + double *) mem_120261)[squot64(sext_i32_i64(copy_gtid_126567) - + squot64(sext_i32_i64(copy_gtid_126567), + k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151) * (m_75136 * + k2p2zq_75151) + + (sext_i32_i64(copy_gtid_126567) - + squot64(sext_i32_i64(copy_gtid_126567), + k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151) - + squot64(sext_i32_i64(copy_gtid_126567) - + squot64(sext_i32_i64(copy_gtid_126567), + k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151) * k2p2zq_75151) * + m_75136 + + squot64(sext_i32_i64(copy_gtid_126567), + k2p2zq_75151 * k2p2zq_75151)] = + ((__global + double *) mem_120246)[squot64(sext_i32_i64(copy_gtid_126567) - + squot64(sext_i32_i64(copy_gtid_126567), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151) * + (defunc_2_reduce_res_75260 * m_75136) + + squot64(sext_i32_i64(copy_gtid_126567), + k2p2zq_75151 * k2p2zq_75151) * + defunc_2_reduce_res_75260 + + (sext_i32_i64(copy_gtid_126567) - + squot64(sext_i32_i64(copy_gtid_126567), + k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151) - + squot64(sext_i32_i64(copy_gtid_126567) - + squot64(sext_i32_i64(copy_gtid_126567), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151) * k2p2zq_75151)]; + } + + error_0: + return; +} +__kernel void mainzicopy_126572(int64_t m_75136, int64_t k2p2zq_75151, + int64_t defunc_2_reduce_res_75260, __global + unsigned char *mem_120246, __global + unsigned char *mem_120265) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_126572; + int32_t copy_ltid_126573; + int32_t copy_gid_126574; + + copy_gtid_126572 = get_global_id(0); + copy_ltid_126573 = get_local_id(0); + copy_gid_126574 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_126572), m_75136 * k2p2zq_75151 * + k2p2zq_75151)) { + ((__global double *) mem_120265)[(sext_i32_i64(copy_gtid_126572) - + squot64(sext_i32_i64(copy_gtid_126572), + k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151) - + squot64(sext_i32_i64(copy_gtid_126572) - + squot64(sext_i32_i64(copy_gtid_126572), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151) * + k2p2zq_75151) * (k2p2zq_75151 * + m_75136) + + squot64(sext_i32_i64(copy_gtid_126572), + k2p2zq_75151 * k2p2zq_75151) * + k2p2zq_75151 + + squot64(sext_i32_i64(copy_gtid_126572) - + squot64(sext_i32_i64(copy_gtid_126572), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151)] = ((__global + double *) mem_120246)[squot64(sext_i32_i64(copy_gtid_126572) - + squot64(sext_i32_i64(copy_gtid_126572), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * + k2p2zq_75151), + k2p2zq_75151) * + (defunc_2_reduce_res_75260 * + m_75136) + + squot64(sext_i32_i64(copy_gtid_126572), + k2p2zq_75151 * + k2p2zq_75151) * + defunc_2_reduce_res_75260 + + (sext_i32_i64(copy_gtid_126572) - + squot64(sext_i32_i64(copy_gtid_126572), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * + k2p2zq_75151) - + squot64(sext_i32_i64(copy_gtid_126572) - + squot64(sext_i32_i64(copy_gtid_126572), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * + k2p2zq_75151), + k2p2zq_75151) * + k2p2zq_75151)]; + } + + error_0: + return; +} +__kernel void mainzicopy_126682(int64_t m_75136, int64_t k2p2zq_75151, + int64_t defunc_2_reduce_res_75260, __global + unsigned char *mem_120246, __global + unsigned char *mem_120894) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_126682; + int32_t copy_ltid_126683; + int32_t copy_gid_126684; + + copy_gtid_126682 = get_global_id(0); + copy_ltid_126683 = get_local_id(0); + copy_gid_126684 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_126682), m_75136 * k2p2zq_75151 * + k2p2zq_75151)) { + ((__global double *) mem_120894)[(sext_i32_i64(copy_gtid_126682) - + squot64(sext_i32_i64(copy_gtid_126682), + k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151) - + squot64(sext_i32_i64(copy_gtid_126682) - + squot64(sext_i32_i64(copy_gtid_126682), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151) * + k2p2zq_75151) * (k2p2zq_75151 * + m_75136) + + squot64(sext_i32_i64(copy_gtid_126682), + k2p2zq_75151 * k2p2zq_75151) * + k2p2zq_75151 + + squot64(sext_i32_i64(copy_gtid_126682) - + squot64(sext_i32_i64(copy_gtid_126682), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151)] = ((__global + double *) mem_120246)[squot64(sext_i32_i64(copy_gtid_126682) - + squot64(sext_i32_i64(copy_gtid_126682), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * + k2p2zq_75151), + k2p2zq_75151) * + (defunc_2_reduce_res_75260 * + m_75136) + + squot64(sext_i32_i64(copy_gtid_126682), + k2p2zq_75151 * + k2p2zq_75151) * + defunc_2_reduce_res_75260 + + (sext_i32_i64(copy_gtid_126682) - + squot64(sext_i32_i64(copy_gtid_126682), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * + k2p2zq_75151) - + squot64(sext_i32_i64(copy_gtid_126682) - + squot64(sext_i32_i64(copy_gtid_126682), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * + k2p2zq_75151), + k2p2zq_75151) * + k2p2zq_75151)]; + } + + error_0: + return; +} +__kernel void mainzicopy_126786(int64_t m_75136, int64_t k2p2zq_75151, + int64_t defunc_2_reduce_res_75260, __global + unsigned char *mem_120246, __global + unsigned char *mem_121001) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_126786; + int32_t copy_ltid_126787; + int32_t copy_gid_126788; + + copy_gtid_126786 = get_global_id(0); + copy_ltid_126787 = get_local_id(0); + copy_gid_126788 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_126786), m_75136 * k2p2zq_75151 * + k2p2zq_75151)) { + ((__global + double *) mem_121001)[squot64(sext_i32_i64(copy_gtid_126786) - + squot64(sext_i32_i64(copy_gtid_126786), + k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151) * (m_75136 * + k2p2zq_75151) + + (sext_i32_i64(copy_gtid_126786) - + squot64(sext_i32_i64(copy_gtid_126786), + k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151) - + squot64(sext_i32_i64(copy_gtid_126786) - + squot64(sext_i32_i64(copy_gtid_126786), + k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151) * k2p2zq_75151) * + m_75136 + + squot64(sext_i32_i64(copy_gtid_126786), + k2p2zq_75151 * k2p2zq_75151)] = + ((__global + double *) mem_120246)[squot64(sext_i32_i64(copy_gtid_126786) - + squot64(sext_i32_i64(copy_gtid_126786), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151) * + (defunc_2_reduce_res_75260 * m_75136) + + squot64(sext_i32_i64(copy_gtid_126786), + k2p2zq_75151 * k2p2zq_75151) * + defunc_2_reduce_res_75260 + + (sext_i32_i64(copy_gtid_126786) - + squot64(sext_i32_i64(copy_gtid_126786), + k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151) - + squot64(sext_i32_i64(copy_gtid_126786) - + squot64(sext_i32_i64(copy_gtid_126786), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151) * k2p2zq_75151)]; + } + + error_0: + return; +} +__kernel void mainzicopy_126870(int64_t m_75136, int64_t k2p2zq_75151, __global + unsigned char *mem_121351, __global + unsigned char *mem_121363) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_126870; + int32_t copy_ltid_126871; + int32_t copy_gid_126872; + + copy_gtid_126870 = get_global_id(0); + copy_ltid_126871 = get_local_id(0); + copy_gid_126872 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_126870), m_75136 * k2p2zq_75151 * + k2p2zq_75151)) { + ((__global + double *) mem_121363)[squot64(sext_i32_i64(copy_gtid_126870) - + squot64(sext_i32_i64(copy_gtid_126870), + k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151) * (k2p2zq_75151 * + m_75136) + + squot64(sext_i32_i64(copy_gtid_126870), + k2p2zq_75151 * k2p2zq_75151) * + k2p2zq_75151 + (sext_i32_i64(copy_gtid_126870) - + squot64(sext_i32_i64(copy_gtid_126870), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151) - + squot64(sext_i32_i64(copy_gtid_126870) - + squot64(sext_i32_i64(copy_gtid_126870), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * + k2p2zq_75151), + k2p2zq_75151) * + k2p2zq_75151)] = ((__global + double *) mem_121351)[squot64(sext_i32_i64(copy_gtid_126870), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * + k2p2zq_75151) + + squot64(sext_i32_i64(copy_gtid_126870) - + squot64(sext_i32_i64(copy_gtid_126870), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * + k2p2zq_75151), + k2p2zq_75151) * + k2p2zq_75151 + + (sext_i32_i64(copy_gtid_126870) - + squot64(sext_i32_i64(copy_gtid_126870), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * + k2p2zq_75151) - + squot64(sext_i32_i64(copy_gtid_126870) - + squot64(sext_i32_i64(copy_gtid_126870), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * + k2p2zq_75151), + k2p2zq_75151) * + k2p2zq_75151)]; + } + + error_0: + return; +} +__kernel void mainzicopy_127134(int64_t m_75136, int64_t n_75139, + int64_t k2p2zq_75151, __global + unsigned char *defunc_3_map_res_mem_120231, + __global unsigned char *mem_121850) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127134; + int32_t copy_ltid_127135; + int32_t copy_gid_127136; + + copy_gtid_127134 = get_global_id(0); + copy_ltid_127135 = get_local_id(0); + copy_gid_127136 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127134), m_75136 * k2p2zq_75151)) { + ((__global double *) mem_121850)[(sext_i32_i64(copy_gtid_127134) - + squot64(sext_i32_i64(copy_gtid_127134), + k2p2zq_75151) * + k2p2zq_75151) * m_75136 + + squot64(sext_i32_i64(copy_gtid_127134), + k2p2zq_75151)] = ((__global + double *) defunc_3_map_res_mem_120231)[squot64(sext_i32_i64(copy_gtid_127134), + k2p2zq_75151) * + n_75139 + + (sext_i32_i64(copy_gtid_127134) - + squot64(sext_i32_i64(copy_gtid_127134), + k2p2zq_75151) * + k2p2zq_75151)]; + } + + error_0: + return; +} +__kernel void mainzicopy_127139(int64_t m_75136, int64_t k2p2zq_75151, __global + unsigned char *mem_121854, __global + unsigned char *mem_121858) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127139; + int32_t copy_ltid_127140; + int32_t copy_gid_127141; + + copy_gtid_127139 = get_global_id(0); + copy_ltid_127140 = get_local_id(0); + copy_gid_127141 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127139), m_75136 * k2p2zq_75151 * + k2p2zq_75151)) { + ((__global double *) mem_121858)[(sext_i32_i64(copy_gtid_127139) - + squot64(sext_i32_i64(copy_gtid_127139), + k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151) - + squot64(sext_i32_i64(copy_gtid_127139) - + squot64(sext_i32_i64(copy_gtid_127139), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151) * + k2p2zq_75151) * (m_75136 * + k2p2zq_75151) + + squot64(sext_i32_i64(copy_gtid_127139) - + squot64(sext_i32_i64(copy_gtid_127139), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151) * m_75136 + + squot64(sext_i32_i64(copy_gtid_127139), + k2p2zq_75151 * k2p2zq_75151)] = + ((__global + double *) mem_121854)[squot64(sext_i32_i64(copy_gtid_127139), + k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151) + + squot64(sext_i32_i64(copy_gtid_127139) - + squot64(sext_i32_i64(copy_gtid_127139), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151) * k2p2zq_75151 + + (sext_i32_i64(copy_gtid_127139) - + squot64(sext_i32_i64(copy_gtid_127139), + k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151) - + squot64(sext_i32_i64(copy_gtid_127139) - + squot64(sext_i32_i64(copy_gtid_127139), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151) * k2p2zq_75151)]; + } + + error_0: + return; +} +__kernel void mainzicopy_127207(int64_t m_75136, int64_t n_75139, + int64_t rp1_75837, __global + unsigned char *defunc_3_map_res_mem_120231, + __global unsigned char *mem_122017) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127207; + int32_t copy_ltid_127208; + int32_t copy_gid_127209; + + copy_gtid_127207 = get_global_id(0); + copy_ltid_127208 = get_local_id(0); + copy_gid_127209 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127207), m_75136 * rp1_75837)) { + ((__global double *) mem_122017)[(sext_i32_i64(copy_gtid_127207) - + squot64(sext_i32_i64(copy_gtid_127207), + rp1_75837) * rp1_75837) * + m_75136 + + squot64(sext_i32_i64(copy_gtid_127207), + rp1_75837)] = ((__global + double *) defunc_3_map_res_mem_120231)[squot64(sext_i32_i64(copy_gtid_127207), + rp1_75837) * + n_75139 + + (sext_i32_i64(copy_gtid_127207) - + squot64(sext_i32_i64(copy_gtid_127207), + rp1_75837) * + rp1_75837)]; + } + + error_0: + return; +} +__kernel void mainzicopy_127212(int64_t m_75136, int64_t k2p2zq_75151, + int64_t defunc_2_reduce_res_75260, + int64_t rp1_75837, __global + unsigned char *mem_120246, __global + unsigned char *mem_122021) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127212; + int32_t copy_ltid_127213; + int32_t copy_gid_127214; + + copy_gtid_127212 = get_global_id(0); + copy_ltid_127213 = get_local_id(0); + copy_gid_127214 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127212), m_75136 * k2p2zq_75151 * + rp1_75837)) { + ((__global + double *) mem_122021)[squot64(sext_i32_i64(copy_gtid_127212) - + squot64(sext_i32_i64(copy_gtid_127212), + k2p2zq_75151 * rp1_75837) * + (k2p2zq_75151 * rp1_75837), rp1_75837) * + (m_75136 * rp1_75837) + + (sext_i32_i64(copy_gtid_127212) - + squot64(sext_i32_i64(copy_gtid_127212), + k2p2zq_75151 * rp1_75837) * + (k2p2zq_75151 * rp1_75837) - + squot64(sext_i32_i64(copy_gtid_127212) - + squot64(sext_i32_i64(copy_gtid_127212), + k2p2zq_75151 * rp1_75837) * + (k2p2zq_75151 * rp1_75837), + rp1_75837) * rp1_75837) * m_75136 + + squot64(sext_i32_i64(copy_gtid_127212), + k2p2zq_75151 * rp1_75837)] = ((__global + double *) mem_120246)[squot64(sext_i32_i64(copy_gtid_127212) - + squot64(sext_i32_i64(copy_gtid_127212), + k2p2zq_75151 * + rp1_75837) * + (k2p2zq_75151 * + rp1_75837), + rp1_75837) * + (defunc_2_reduce_res_75260 * + m_75136) + + squot64(sext_i32_i64(copy_gtid_127212), + k2p2zq_75151 * + rp1_75837) * + defunc_2_reduce_res_75260 + + (sext_i32_i64(copy_gtid_127212) - + squot64(sext_i32_i64(copy_gtid_127212), + k2p2zq_75151 * + rp1_75837) * + (k2p2zq_75151 * + rp1_75837) - + squot64(sext_i32_i64(copy_gtid_127212) - + squot64(sext_i32_i64(copy_gtid_127212), + k2p2zq_75151 * + rp1_75837) * + (k2p2zq_75151 * + rp1_75837), + rp1_75837) * + rp1_75837)]; + } + + error_0: + return; +} +__kernel void mainzicopy_127217(int64_t m_75136, int64_t k2p2zq_75151, + int64_t defunc_2_reduce_res_75260, + int64_t rp1_75837, __global + unsigned char *mem_120246, __global + unsigned char *mem_122025) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127217; + int32_t copy_ltid_127218; + int32_t copy_gid_127219; + + copy_gtid_127217 = get_global_id(0); + copy_ltid_127218 = get_local_id(0); + copy_gid_127219 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127217), m_75136 * k2p2zq_75151 * + rp1_75837)) { + ((__global double *) mem_122025)[(sext_i32_i64(copy_gtid_127217) - + squot64(sext_i32_i64(copy_gtid_127217), + k2p2zq_75151 * rp1_75837) * + (k2p2zq_75151 * rp1_75837) - + squot64(sext_i32_i64(copy_gtid_127217) - + squot64(sext_i32_i64(copy_gtid_127217), + k2p2zq_75151 * + rp1_75837) * + (k2p2zq_75151 * rp1_75837), + rp1_75837) * rp1_75837) * + (k2p2zq_75151 * m_75136) + + squot64(sext_i32_i64(copy_gtid_127217), + k2p2zq_75151 * rp1_75837) * + k2p2zq_75151 + + squot64(sext_i32_i64(copy_gtid_127217) - + squot64(sext_i32_i64(copy_gtid_127217), + k2p2zq_75151 * + rp1_75837) * + (k2p2zq_75151 * rp1_75837), + rp1_75837)] = ((__global + double *) mem_120246)[squot64(sext_i32_i64(copy_gtid_127217) - + squot64(sext_i32_i64(copy_gtid_127217), + k2p2zq_75151 * + rp1_75837) * + (k2p2zq_75151 * + rp1_75837), + rp1_75837) * + (defunc_2_reduce_res_75260 * + m_75136) + + squot64(sext_i32_i64(copy_gtid_127217), + k2p2zq_75151 * + rp1_75837) * + defunc_2_reduce_res_75260 + + (sext_i32_i64(copy_gtid_127217) - + squot64(sext_i32_i64(copy_gtid_127217), + k2p2zq_75151 * + rp1_75837) * + (k2p2zq_75151 * + rp1_75837) - + squot64(sext_i32_i64(copy_gtid_127217) - + squot64(sext_i32_i64(copy_gtid_127217), + k2p2zq_75151 * + rp1_75837) * + (k2p2zq_75151 * + rp1_75837), + rp1_75837) * + rp1_75837)]; + } + + error_0: + return; +} +__kernel void mainzicopy_127466(int64_t m_75136, int64_t k2p2zq_75151, + int64_t defunc_2_reduce_res_75260, + int64_t rp1_75837, __global + unsigned char *mem_120246, __global + unsigned char *mem_122686) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127466; + int32_t copy_ltid_127467; + int32_t copy_gid_127468; + + copy_gtid_127466 = get_global_id(0); + copy_ltid_127467 = get_local_id(0); + copy_gid_127468 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127466), m_75136 * k2p2zq_75151 * + rp1_75837)) { + ((__global double *) mem_122686)[(sext_i32_i64(copy_gtid_127466) - + squot64(sext_i32_i64(copy_gtid_127466), + k2p2zq_75151 * rp1_75837) * + (k2p2zq_75151 * rp1_75837) - + squot64(sext_i32_i64(copy_gtid_127466) - + squot64(sext_i32_i64(copy_gtid_127466), + k2p2zq_75151 * + rp1_75837) * + (k2p2zq_75151 * rp1_75837), + rp1_75837) * rp1_75837) * + (k2p2zq_75151 * m_75136) + + squot64(sext_i32_i64(copy_gtid_127466), + k2p2zq_75151 * rp1_75837) * + k2p2zq_75151 + + squot64(sext_i32_i64(copy_gtid_127466) - + squot64(sext_i32_i64(copy_gtid_127466), + k2p2zq_75151 * + rp1_75837) * + (k2p2zq_75151 * rp1_75837), + rp1_75837)] = ((__global + double *) mem_120246)[squot64(sext_i32_i64(copy_gtid_127466) - + squot64(sext_i32_i64(copy_gtid_127466), + k2p2zq_75151 * + rp1_75837) * + (k2p2zq_75151 * + rp1_75837), + rp1_75837) * + (defunc_2_reduce_res_75260 * + m_75136) + + squot64(sext_i32_i64(copy_gtid_127466), + k2p2zq_75151 * + rp1_75837) * + defunc_2_reduce_res_75260 + + (sext_i32_i64(copy_gtid_127466) - + squot64(sext_i32_i64(copy_gtid_127466), + k2p2zq_75151 * + rp1_75837) * + (k2p2zq_75151 * + rp1_75837) - + squot64(sext_i32_i64(copy_gtid_127466) - + squot64(sext_i32_i64(copy_gtid_127466), + k2p2zq_75151 * + rp1_75837) * + (k2p2zq_75151 * + rp1_75837), + rp1_75837) * + rp1_75837)]; + } + + error_0: + return; +} +__kernel void mainzicopy_127570(int64_t m_75136, int64_t k2p2zq_75151, + int64_t defunc_2_reduce_res_75260, + int64_t rp1_75837, __global + unsigned char *mem_120246, __global + unsigned char *mem_122793) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127570; + int32_t copy_ltid_127571; + int32_t copy_gid_127572; + + copy_gtid_127570 = get_global_id(0); + copy_ltid_127571 = get_local_id(0); + copy_gid_127572 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127570), m_75136 * k2p2zq_75151 * + rp1_75837)) { + ((__global + double *) mem_122793)[squot64(sext_i32_i64(copy_gtid_127570) - + squot64(sext_i32_i64(copy_gtid_127570), + k2p2zq_75151 * rp1_75837) * + (k2p2zq_75151 * rp1_75837), rp1_75837) * + (m_75136 * rp1_75837) + + (sext_i32_i64(copy_gtid_127570) - + squot64(sext_i32_i64(copy_gtid_127570), + k2p2zq_75151 * rp1_75837) * + (k2p2zq_75151 * rp1_75837) - + squot64(sext_i32_i64(copy_gtid_127570) - + squot64(sext_i32_i64(copy_gtid_127570), + k2p2zq_75151 * rp1_75837) * + (k2p2zq_75151 * rp1_75837), + rp1_75837) * rp1_75837) * m_75136 + + squot64(sext_i32_i64(copy_gtid_127570), + k2p2zq_75151 * rp1_75837)] = ((__global + double *) mem_120246)[squot64(sext_i32_i64(copy_gtid_127570) - + squot64(sext_i32_i64(copy_gtid_127570), + k2p2zq_75151 * + rp1_75837) * + (k2p2zq_75151 * + rp1_75837), + rp1_75837) * + (defunc_2_reduce_res_75260 * + m_75136) + + squot64(sext_i32_i64(copy_gtid_127570), + k2p2zq_75151 * + rp1_75837) * + defunc_2_reduce_res_75260 + + (sext_i32_i64(copy_gtid_127570) - + squot64(sext_i32_i64(copy_gtid_127570), + k2p2zq_75151 * + rp1_75837) * + (k2p2zq_75151 * + rp1_75837) - + squot64(sext_i32_i64(copy_gtid_127570) - + squot64(sext_i32_i64(copy_gtid_127570), + k2p2zq_75151 * + rp1_75837) * + (k2p2zq_75151 * + rp1_75837), + rp1_75837) * + rp1_75837)]; + } + + error_0: + return; +} +__kernel void mainzicopy_127654(int64_t m_75136, int64_t k2p2zq_75151, __global + unsigned char *mem_123143, __global + unsigned char *mem_123155) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127654; + int32_t copy_ltid_127655; + int32_t copy_gid_127656; + + copy_gtid_127654 = get_global_id(0); + copy_ltid_127655 = get_local_id(0); + copy_gid_127656 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127654), m_75136 * k2p2zq_75151 * + k2p2zq_75151)) { + ((__global + double *) mem_123155)[squot64(sext_i32_i64(copy_gtid_127654) - + squot64(sext_i32_i64(copy_gtid_127654), + k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151) * (k2p2zq_75151 * + m_75136) + + squot64(sext_i32_i64(copy_gtid_127654), + k2p2zq_75151 * k2p2zq_75151) * + k2p2zq_75151 + (sext_i32_i64(copy_gtid_127654) - + squot64(sext_i32_i64(copy_gtid_127654), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151) - + squot64(sext_i32_i64(copy_gtid_127654) - + squot64(sext_i32_i64(copy_gtid_127654), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * + k2p2zq_75151), + k2p2zq_75151) * + k2p2zq_75151)] = ((__global + double *) mem_123143)[squot64(sext_i32_i64(copy_gtid_127654), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * + k2p2zq_75151) + + squot64(sext_i32_i64(copy_gtid_127654) - + squot64(sext_i32_i64(copy_gtid_127654), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * + k2p2zq_75151), + k2p2zq_75151) * + k2p2zq_75151 + + (sext_i32_i64(copy_gtid_127654) - + squot64(sext_i32_i64(copy_gtid_127654), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * + k2p2zq_75151) - + squot64(sext_i32_i64(copy_gtid_127654) - + squot64(sext_i32_i64(copy_gtid_127654), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * + k2p2zq_75151), + k2p2zq_75151) * + k2p2zq_75151)]; + } + + error_0: + return; +} +__kernel void mainzicopy_127918(int64_t m_75136, int64_t n_75139, + int64_t rp1_75837, __global + unsigned char *defunc_3_map_res_mem_120231, + __global unsigned char *mem_123633) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127918; + int32_t copy_ltid_127919; + int32_t copy_gid_127920; + + copy_gtid_127918 = get_global_id(0); + copy_ltid_127919 = get_local_id(0); + copy_gid_127920 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127918), m_75136 * rp1_75837)) { + ((__global double *) mem_123633)[(sext_i32_i64(copy_gtid_127918) - + squot64(sext_i32_i64(copy_gtid_127918), + rp1_75837) * rp1_75837) * + m_75136 + + squot64(sext_i32_i64(copy_gtid_127918), + rp1_75837)] = ((__global + double *) defunc_3_map_res_mem_120231)[squot64(sext_i32_i64(copy_gtid_127918), + rp1_75837) * + n_75139 + + (sext_i32_i64(copy_gtid_127918) - + squot64(sext_i32_i64(copy_gtid_127918), + rp1_75837) * + rp1_75837)]; } error_0: return; } -__kernel void mainzicopy_45849(int64_t m_29166, int64_t nm_29314, - int64_t ctx_param_ext_44580, - int64_t ctx_param_ext_44581, - int64_t ctx_param_ext_44583, __global - unsigned char *mem_param_44585, __global - unsigned char *mem_44590) +__kernel void mainzicopy_127923(int64_t m_75136, int64_t k2p2zq_75151, + int64_t rp1_75837, __global + unsigned char *mem_123637, __global + unsigned char *mem_123641) { const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - int32_t copy_gtid_45849; - int32_t copy_ltid_45850; - int32_t copy_gid_45851; + int32_t copy_gtid_127923; + int32_t copy_ltid_127924; + int32_t copy_gid_127925; + + copy_gtid_127923 = get_global_id(0); + copy_ltid_127924 = get_local_id(0); + copy_gid_127925 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127923), m_75136 * k2p2zq_75151 * + rp1_75837)) { + ((__global double *) mem_123641)[(sext_i32_i64(copy_gtid_127923) - + squot64(sext_i32_i64(copy_gtid_127923), + k2p2zq_75151 * rp1_75837) * + (k2p2zq_75151 * rp1_75837) - + squot64(sext_i32_i64(copy_gtid_127923) - + squot64(sext_i32_i64(copy_gtid_127923), + k2p2zq_75151 * + rp1_75837) * + (k2p2zq_75151 * rp1_75837), + rp1_75837) * rp1_75837) * + (m_75136 * k2p2zq_75151) + + squot64(sext_i32_i64(copy_gtid_127923) - + squot64(sext_i32_i64(copy_gtid_127923), + k2p2zq_75151 * + rp1_75837) * + (k2p2zq_75151 * rp1_75837), + rp1_75837) * m_75136 + + squot64(sext_i32_i64(copy_gtid_127923), + k2p2zq_75151 * rp1_75837)] = + ((__global + double *) mem_123637)[squot64(sext_i32_i64(copy_gtid_127923), + k2p2zq_75151 * rp1_75837) * + (rp1_75837 * k2p2zq_75151) + + squot64(sext_i32_i64(copy_gtid_127923) - + squot64(sext_i32_i64(copy_gtid_127923), + k2p2zq_75151 * rp1_75837) * + (k2p2zq_75151 * rp1_75837), + rp1_75837) * rp1_75837 + + (sext_i32_i64(copy_gtid_127923) - + squot64(sext_i32_i64(copy_gtid_127923), + k2p2zq_75151 * rp1_75837) * + (k2p2zq_75151 * rp1_75837) - + squot64(sext_i32_i64(copy_gtid_127923) - + squot64(sext_i32_i64(copy_gtid_127923), + k2p2zq_75151 * rp1_75837) * + (k2p2zq_75151 * rp1_75837), + rp1_75837) * rp1_75837)]; + } - copy_gtid_45849 = get_global_id(0); - copy_ltid_45850 = get_local_id(0); - copy_gid_45851 = get_group_id(0); - if (slt64(sext_i32_i64(copy_gtid_45849), m_29166 * nm_29314)) { - ((__global float *) mem_44590)[(sext_i32_i64(copy_gtid_45849) - - squot64(sext_i32_i64(copy_gtid_45849), - nm_29314) * nm_29314) * - m_29166 + - squot64(sext_i32_i64(copy_gtid_45849), - nm_29314)] = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (squot64(sext_i32_i64(copy_gtid_45849), - nm_29314) * - ctx_param_ext_44581 + - (sext_i32_i64(copy_gtid_45849) - - squot64(sext_i32_i64(copy_gtid_45849), - nm_29314) * - nm_29314) * - ctx_param_ext_44583)]; + error_0: + return; +} +__kernel void mainzicopy_129312(int64_t N_75135, int64_t m_75136, + int64_t i_76911, __global + unsigned char *mem_124906, __global + unsigned char *mem_124911) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_129312; + int32_t copy_ltid_129313; + int32_t copy_gid_129314; + + copy_gtid_129312 = get_global_id(0); + copy_ltid_129313 = get_local_id(0); + copy_gid_129314 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_129312), m_75136)) { + ((__global int64_t *) mem_124911)[sext_i32_i64(copy_gtid_129312)] = + ((__global int64_t *) mem_124906)[i_76911 + + sext_i32_i64(copy_gtid_129312) * + N_75135]; } error_0: return; } -__kernel void mainziscan_stage1_41042(__global int *global_failure, - __local volatile - int64_t *scan_arr_mem_46261_backing_aligned_0, - int64_t N_29165, int64_t m_29166, - int32_t num_threads_46255, __global - unsigned char *images_mem_44381, __global - unsigned char *defunc_3_map_res_mem_45140, - __global unsigned char *mem_45163, - __global unsigned char *mem_45166) +__kernel void mainziscan_stage1_103083(__global int *global_failure, + __local volatile + int64_t *scan_arr_mem_126418_backing_aligned_0, + int64_t N_75135, int64_t m_75136, + int64_t n_75139, int64_t m_75231, + int32_t num_threads_126412, __global + unsigned char *images_mem_120108, + __global unsigned char *mem_120201) { - #define segscan_group_sizze_41059 (mainzisegscan_group_sizze_41036) + #define segscan_group_sizze_103197 (mainzisegscan_group_sizze_103077) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict scan_arr_mem_46261_backing_0 = + __local volatile char *restrict scan_arr_mem_126418_backing_0 = (__local volatile - char *) scan_arr_mem_46261_backing_aligned_0; + char *) scan_arr_mem_126418_backing_aligned_0; if (*global_failure >= 0) return; - int32_t global_tid_46256; - int32_t local_tid_46257; - int64_t group_sizze_46260; - int32_t wave_sizze_46259; - int32_t group_tid_46258; + int32_t global_tid_126413; + int32_t local_tid_126414; + int64_t group_sizze_126417; + int32_t wave_sizze_126416; + int32_t group_tid_126415; - global_tid_46256 = get_global_id(0); - local_tid_46257 = get_local_id(0); - group_sizze_46260 = get_local_size(0); - wave_sizze_46259 = LOCKSTEP_WIDTH; - group_tid_46258 = get_group_id(0); + global_tid_126413 = get_global_id(0); + local_tid_126414 = get_local_id(0); + group_sizze_126417 = get_local_size(0); + wave_sizze_126416 = LOCKSTEP_WIDTH; + group_tid_126415 = get_group_id(0); - int32_t phys_tid_41042; + int32_t phys_tid_103083; - phys_tid_41042 = global_tid_46256; + phys_tid_103083 = global_tid_126413; - __local char *scan_arr_mem_46261; + __local char *scan_arr_mem_126418; - scan_arr_mem_46261 = (__local char *) scan_arr_mem_46261_backing_0; + scan_arr_mem_126418 = (__local char *) scan_arr_mem_126418_backing_0; - int64_t x_41064; - int64_t x_41065; + int64_t x_103201; + int64_t x_103202; - x_41064 = (int64_t) 0; - for (int64_t j_46263 = 0; j_46263 < sdiv_up64(m_29166 * N_29165, - sext_i32_i64(num_threads_46255)); - j_46263++) { - int64_t chunk_offset_46264 = segscan_group_sizze_41059 * j_46263 + - sext_i32_i64(group_tid_46258) * (segscan_group_sizze_41059 * - sdiv_up64(m_29166 * N_29165, - sext_i32_i64(num_threads_46255))); - int64_t flat_idx_46265 = chunk_offset_46264 + - sext_i32_i64(local_tid_46257); - int64_t gtid_41033 = squot64(flat_idx_46265, N_29165); - int64_t gtid_41041 = flat_idx_46265 - squot64(flat_idx_46265, N_29165) * - N_29165; + x_103201 = (int64_t) 0; + for (int64_t j_126420 = 0; j_126420 < sdiv_up64(m_75136 * n_75139, + sext_i32_i64(num_threads_126412)); + j_126420++) { + int64_t chunk_offset_126421 = segscan_group_sizze_103197 * j_126420 + + sext_i32_i64(group_tid_126415) * (segscan_group_sizze_103197 * + sdiv_up64(m_75136 * n_75139, + sext_i32_i64(num_threads_126412))); + int64_t flat_idx_126422 = chunk_offset_126421 + + sext_i32_i64(local_tid_126414); + int64_t gtid_103074 = squot64(flat_idx_126422, n_75139); + int64_t gtid_103082 = flat_idx_126422 - squot64(flat_idx_126422, + n_75139) * n_75139; // threads in bounds read input { - if (slt64(gtid_41033, m_29166) && slt64(gtid_41041, N_29165)) { - float x_41069 = ((__global - float *) images_mem_44381)[gtid_41033 * - N_29165 + - gtid_41041]; - bool isnan_res_41071; - - isnan_res_41071 = futrts_isnan32(x_41069); - - bool cond_41072 = !isnan_res_41071; - float defunc_1_f_res_41073; - - if (cond_41072) { - float x_41070 = ((__global - float *) defunc_3_map_res_mem_45140)[gtid_41033 * - N_29165 + - gtid_41041]; - float defunc_1_f_res_t_res_41074 = x_41069 - x_41070; - - defunc_1_f_res_41073 = defunc_1_f_res_t_res_41074; - } else { - defunc_1_f_res_41073 = NAN; - } - - bool isnan_res_41075; + if (slt64(gtid_103074, m_75136) && slt64(gtid_103082, n_75139)) { + int64_t binop_y_115026 = (int64_t) -1 * gtid_103082; + int64_t slice_115027 = m_75231 + binop_y_115026; + double x_103205 = ((__global + double *) images_mem_120108)[gtid_103074 * + N_75135 + + slice_115027]; + bool defunc_0_f_res_103206; - isnan_res_41075 = futrts_isnan32(defunc_1_f_res_41073); + defunc_0_f_res_103206 = futrts_isnan64(x_103205); - bool defunc_0_p_res_41076 = !isnan_res_41075; - int64_t defunc_0_f_res_41077 = - btoi_bool_i64(defunc_0_p_res_41076); + bool defunc_0_g_res_103207 = !defunc_0_f_res_103206; + int64_t defunc_0_f_res_103208 = + btoi_bool_i64(defunc_0_g_res_103207); // write to-scan values to parameters { - x_41065 = defunc_0_f_res_41077; + x_103202 = defunc_0_f_res_103208; } // write mapped values results to global memory - { - ((__global float *) mem_45166)[gtid_41033 * N_29165 + - gtid_41041] = - defunc_1_f_res_41073; - } + { } } } // do one intra-group scan operation { // maybe restore some to-scan values to parameters, or read neutral { - if (!(slt64(gtid_41033, m_29166) && slt64(gtid_41041, - N_29165))) { - x_41065 = (int64_t) 0; + if (!(slt64(gtid_103074, m_75136) && slt64(gtid_103082, + n_75139))) { + x_103202 = (int64_t) 0; } } // combine with carry and write to local memory { - int64_t defunc_1_op_res_41066 = add64(x_41064, x_41065); + int64_t defunc_1_op_res_103203 = add64(x_103201, x_103202); ((__local - int64_t *) scan_arr_mem_46261)[sext_i32_i64(local_tid_46257)] = - defunc_1_op_res_41066; + int64_t *) scan_arr_mem_126418)[sext_i32_i64(local_tid_126414)] = + defunc_1_op_res_103203; } barrier(CLK_LOCAL_MEM_FENCE); - int64_t x_46266; - int64_t x_46267; - int64_t x_46269; - int64_t x_46270; - bool ltid_in_bounds_46272; + int64_t x_126423; + int64_t x_126424; + int64_t x_126426; + int64_t x_126427; + bool ltid_in_bounds_126429; - ltid_in_bounds_46272 = slt64(sext_i32_i64(local_tid_46257), - segscan_group_sizze_41059); + ltid_in_bounds_126429 = slt64(sext_i32_i64(local_tid_126414), + segscan_group_sizze_103197); - int32_t skip_threads_46273; + int32_t skip_threads_126430; // read input for in-block scan { - if (ltid_in_bounds_46272) { - x_46267 = ((volatile __local - int64_t *) scan_arr_mem_46261)[sext_i32_i64(local_tid_46257)]; - if ((local_tid_46257 - squot32(local_tid_46257, 32) * 32) == - 0) { - x_46266 = x_46267; + if (ltid_in_bounds_126429) { + x_126424 = ((volatile __local + int64_t *) scan_arr_mem_126418)[sext_i32_i64(local_tid_126414)]; + if ((local_tid_126414 - squot32(local_tid_126414, 32) * + 32) == 0) { + x_126423 = x_126424; } } } // in-block scan (hopefully no barriers needed) { - skip_threads_46273 = 1; - while (slt32(skip_threads_46273, 32)) { - if (sle32(skip_threads_46273, local_tid_46257 - - squot32(local_tid_46257, 32) * 32) && - ltid_in_bounds_46272) { + skip_threads_126430 = 1; + while (slt32(skip_threads_126430, 32)) { + if (sle32(skip_threads_126430, local_tid_126414 - + squot32(local_tid_126414, 32) * 32) && + ltid_in_bounds_126429) { // read operands { - x_46266 = ((volatile __local - int64_t *) scan_arr_mem_46261)[sext_i32_i64(local_tid_46257) - - sext_i32_i64(skip_threads_46273)]; + x_126423 = ((volatile __local + int64_t *) scan_arr_mem_126418)[sext_i32_i64(local_tid_126414) - + sext_i32_i64(skip_threads_126430)]; } // perform operation { - bool inactive_46274 = - slt64(srem64(sext_i32_i64(local_tid_46257) + - chunk_offset_46264, N_29165), - sext_i32_i64(local_tid_46257) + - chunk_offset_46264 - - (sext_i32_i64(local_tid_46257 - - skip_threads_46273) + - chunk_offset_46264)); + bool inactive_126431 = + slt64(srem64(sext_i32_i64(local_tid_126414) + + chunk_offset_126421, n_75139), + sext_i32_i64(local_tid_126414) + + chunk_offset_126421 - + (sext_i32_i64(local_tid_126414 - + skip_threads_126430) + + chunk_offset_126421)); - if (inactive_46274) { - x_46266 = x_46267; + if (inactive_126431) { + x_126423 = x_126424; } - if (!inactive_46274) { - int64_t defunc_1_op_res_46268 = add64(x_46266, - x_46267); + if (!inactive_126431) { + int64_t defunc_1_op_res_126425 = add64(x_126423, + x_126424); - x_46266 = defunc_1_op_res_46268; + x_126423 = defunc_1_op_res_126425; } } } - if (sle32(wave_sizze_46259, skip_threads_46273)) { + if (sle32(wave_sizze_126416, skip_threads_126430)) { barrier(CLK_LOCAL_MEM_FENCE); } - if (sle32(skip_threads_46273, local_tid_46257 - - squot32(local_tid_46257, 32) * 32) && - ltid_in_bounds_46272) { + if (sle32(skip_threads_126430, local_tid_126414 - + squot32(local_tid_126414, 32) * 32) && + ltid_in_bounds_126429) { // write result { ((volatile __local - int64_t *) scan_arr_mem_46261)[sext_i32_i64(local_tid_46257)] = - x_46266; - x_46267 = x_46266; + int64_t *) scan_arr_mem_126418)[sext_i32_i64(local_tid_126414)] = + x_126423; + x_126424 = x_126423; } } - if (sle32(wave_sizze_46259, skip_threads_46273)) { + if (sle32(wave_sizze_126416, skip_threads_126430)) { barrier(CLK_LOCAL_MEM_FENCE); } - skip_threads_46273 *= 2; + skip_threads_126430 *= 2; } } barrier(CLK_LOCAL_MEM_FENCE); // last thread of block 'i' writes its result to offset 'i' { - if ((local_tid_46257 - squot32(local_tid_46257, 32) * 32) == - 31 && ltid_in_bounds_46272) { + if ((local_tid_126414 - squot32(local_tid_126414, 32) * 32) == + 31 && ltid_in_bounds_126429) { ((volatile __local - int64_t *) scan_arr_mem_46261)[sext_i32_i64(squot32(local_tid_46257, - 32))] = - x_46266; + int64_t *) scan_arr_mem_126418)[sext_i32_i64(squot32(local_tid_126414, + 32))] = + x_126423; } } barrier(CLK_LOCAL_MEM_FENCE); // scan the first block, after which offset 'i' contains carry-in for block 'i+1' { - int32_t skip_threads_46275; + int32_t skip_threads_126432; // read input for in-block scan { - if (squot32(local_tid_46257, 32) == 0 && - ltid_in_bounds_46272) { - x_46270 = ((volatile __local - int64_t *) scan_arr_mem_46261)[sext_i32_i64(local_tid_46257)]; - if ((local_tid_46257 - squot32(local_tid_46257, 32) * + if (squot32(local_tid_126414, 32) == 0 && + ltid_in_bounds_126429) { + x_126427 = ((volatile __local + int64_t *) scan_arr_mem_126418)[sext_i32_i64(local_tid_126414)]; + if ((local_tid_126414 - squot32(local_tid_126414, 32) * 32) == 0) { - x_46269 = x_46270; + x_126426 = x_126427; } } } // in-block scan (hopefully no barriers needed) { - skip_threads_46275 = 1; - while (slt32(skip_threads_46275, 32)) { - if (sle32(skip_threads_46275, local_tid_46257 - - squot32(local_tid_46257, 32) * 32) && - (squot32(local_tid_46257, 32) == 0 && - ltid_in_bounds_46272)) { + skip_threads_126432 = 1; + while (slt32(skip_threads_126432, 32)) { + if (sle32(skip_threads_126432, local_tid_126414 - + squot32(local_tid_126414, 32) * 32) && + (squot32(local_tid_126414, 32) == 0 && + ltid_in_bounds_126429)) { // read operands { - x_46269 = ((volatile __local - int64_t *) scan_arr_mem_46261)[sext_i32_i64(local_tid_46257) - - sext_i32_i64(skip_threads_46275)]; + x_126426 = ((volatile __local + int64_t *) scan_arr_mem_126418)[sext_i32_i64(local_tid_126414) - + sext_i32_i64(skip_threads_126432)]; } // perform operation { - bool inactive_46276 = - slt64(srem64(sext_i32_i64(local_tid_46257 * + bool inactive_126433 = + slt64(srem64(sext_i32_i64(local_tid_126414 * 32 + 32 - 1) + - chunk_offset_46264, N_29165), - sext_i32_i64(local_tid_46257 * 32 + - 32 - 1) + chunk_offset_46264 - - (sext_i32_i64((local_tid_46257 - - skip_threads_46275) * - 32 + 32 - 1) + chunk_offset_46264)); + chunk_offset_126421, n_75139), + sext_i32_i64(local_tid_126414 * 32 + + 32 - 1) + chunk_offset_126421 - + (sext_i32_i64((local_tid_126414 - + skip_threads_126432) * + 32 + 32 - 1) + + chunk_offset_126421)); - if (inactive_46276) { - x_46269 = x_46270; + if (inactive_126433) { + x_126426 = x_126427; } - if (!inactive_46276) { - int64_t defunc_1_op_res_46271 = - add64(x_46269, x_46270); + if (!inactive_126433) { + int64_t defunc_1_op_res_126428 = + add64(x_126426, x_126427); - x_46269 = defunc_1_op_res_46271; + x_126426 = defunc_1_op_res_126428; } } } - if (sle32(wave_sizze_46259, skip_threads_46275)) { + if (sle32(wave_sizze_126416, skip_threads_126432)) { barrier(CLK_LOCAL_MEM_FENCE); } - if (sle32(skip_threads_46275, local_tid_46257 - - squot32(local_tid_46257, 32) * 32) && - (squot32(local_tid_46257, 32) == 0 && - ltid_in_bounds_46272)) { + if (sle32(skip_threads_126432, local_tid_126414 - + squot32(local_tid_126414, 32) * 32) && + (squot32(local_tid_126414, 32) == 0 && + ltid_in_bounds_126429)) { // write result { ((volatile __local - int64_t *) scan_arr_mem_46261)[sext_i32_i64(local_tid_46257)] = - x_46269; - x_46270 = x_46269; + int64_t *) scan_arr_mem_126418)[sext_i32_i64(local_tid_126414)] = + x_126426; + x_126427 = x_126426; } } - if (sle32(wave_sizze_46259, skip_threads_46275)) { + if (sle32(wave_sizze_126416, skip_threads_126432)) { barrier(CLK_LOCAL_MEM_FENCE); } - skip_threads_46275 *= 2; + skip_threads_126432 *= 2; } } } barrier(CLK_LOCAL_MEM_FENCE); // carry-in for every block except the first { - if (!(squot32(local_tid_46257, 32) == 0 || - !ltid_in_bounds_46272)) { + if (!(squot32(local_tid_126414, 32) == 0 || + !ltid_in_bounds_126429)) { // read operands { - x_46267 = x_46266; - x_46266 = ((__local - int64_t *) scan_arr_mem_46261)[sext_i32_i64(squot32(local_tid_46257, - 32)) - - (int64_t) 1]; + x_126424 = x_126423; + x_126423 = ((__local + int64_t *) scan_arr_mem_126418)[sext_i32_i64(squot32(local_tid_126414, + 32)) - + (int64_t) 1]; } // perform operation { - bool inactive_46277 = - slt64(srem64(sext_i32_i64(local_tid_46257) + - chunk_offset_46264, N_29165), - sext_i32_i64(local_tid_46257) + - chunk_offset_46264 - - (sext_i32_i64(squot32(local_tid_46257, 32) * - 32 - 1) + chunk_offset_46264)); + bool inactive_126434 = + slt64(srem64(sext_i32_i64(local_tid_126414) + + chunk_offset_126421, n_75139), + sext_i32_i64(local_tid_126414) + + chunk_offset_126421 - + (sext_i32_i64(squot32(local_tid_126414, 32) * + 32 - 1) + chunk_offset_126421)); - if (inactive_46277) { - x_46266 = x_46267; + if (inactive_126434) { + x_126423 = x_126424; } - if (!inactive_46277) { - int64_t defunc_1_op_res_46268 = add64(x_46266, - x_46267); + if (!inactive_126434) { + int64_t defunc_1_op_res_126425 = add64(x_126423, + x_126424); - x_46266 = defunc_1_op_res_46268; + x_126423 = defunc_1_op_res_126425; } } // write final result { ((__local - int64_t *) scan_arr_mem_46261)[sext_i32_i64(local_tid_46257)] = - x_46266; + int64_t *) scan_arr_mem_126418)[sext_i32_i64(local_tid_126414)] = + x_126423; } } } barrier(CLK_LOCAL_MEM_FENCE); // restore correct values for first block { - if (squot32(local_tid_46257, 32) == 0) { + if (squot32(local_tid_126414, 32) == 0) { ((__local - int64_t *) scan_arr_mem_46261)[sext_i32_i64(local_tid_46257)] = - x_46267; + int64_t *) scan_arr_mem_126418)[sext_i32_i64(local_tid_126414)] = + x_126424; } } barrier(CLK_LOCAL_MEM_FENCE); // threads in bounds write partial scan result { - if (slt64(gtid_41033, m_29166) && slt64(gtid_41041, N_29165)) { - ((__global int64_t *) mem_45163)[gtid_41033 * N_29165 + - gtid_41041] = ((__local - int64_t *) scan_arr_mem_46261)[sext_i32_i64(local_tid_46257)]; + if (slt64(gtid_103074, m_75136) && slt64(gtid_103082, + n_75139)) { + ((__global int64_t *) mem_120201)[gtid_103074 * n_75139 + + gtid_103082] = ((__local + int64_t *) scan_arr_mem_126418)[sext_i32_i64(local_tid_126414)]; } } barrier(CLK_LOCAL_MEM_FENCE); // first thread reads last element as carry-in for next iteration { - bool crosses_segment_46278 = slt64(srem64(chunk_offset_46264 + - segscan_group_sizze_41059, - N_29165), - chunk_offset_46264 + - segscan_group_sizze_41059 - - (chunk_offset_46264 + - segscan_group_sizze_41059 - - (int64_t) 1)); - bool should_load_carry_46279 = local_tid_46257 == 0 && - !crosses_segment_46278; - - if (should_load_carry_46279) { - x_41064 = ((__local - int64_t *) scan_arr_mem_46261)[segscan_group_sizze_41059 - - (int64_t) 1]; + bool crosses_segment_126435 = slt64(srem64(chunk_offset_126421 + + segscan_group_sizze_103197, + n_75139), + chunk_offset_126421 + + segscan_group_sizze_103197 - + (chunk_offset_126421 + + segscan_group_sizze_103197 - + (int64_t) 1)); + bool should_load_carry_126436 = local_tid_126414 == 0 && + !crosses_segment_126435; + + if (should_load_carry_126436) { + x_103201 = ((__local + int64_t *) scan_arr_mem_126418)[segscan_group_sizze_103197 - + (int64_t) 1]; } - if (!should_load_carry_46279) { - x_41064 = (int64_t) 0; + if (!should_load_carry_126436) { + x_103201 = (int64_t) 0; } } barrier(CLK_LOCAL_MEM_FENCE); @@ -2806,34 +4475,29 @@ def sync(self): error_1: return; - #undef segscan_group_sizze_41059 + #undef segscan_group_sizze_103197 } -__kernel void mainziscan_stage1_42114(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, - __local volatile - int64_t *scan_arr_mem_46644_backing_aligned_0, - int64_t N_29165, int64_t m_29166, - int64_t iota32_arg_29597, - int32_t num_threads_46638, __global - unsigned char *defunc_4_map_res_mem_45178, - __global - unsigned char *defunc_3_map_res_mem_45244, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global - unsigned char *defunc_0_f_res_mem_45279, - __global unsigned char *mem_45298, - __global unsigned char *mem_45302) +__kernel void mainziscan_stage1_111562(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + __local volatile + int64_t *scan_arr_mem_128475_backing_aligned_0, + int64_t m_75136, + int64_t num_recresids_padded_75809, + int64_t Nmk_76536, + int32_t num_threads_128469, __global + unsigned char *mem_124045, __global + unsigned char *mem_124057, __global + unsigned char *mem_124061) { - #define segscan_group_sizze_42200 (mainzisegscan_group_sizze_42108) + #define segscan_group_sizze_111711 (mainzisegscan_group_sizze_111556) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict scan_arr_mem_46644_backing_0 = + __local volatile char *restrict scan_arr_mem_128475_backing_0 = (__local volatile - char *) scan_arr_mem_46644_backing_aligned_0; + char *) scan_arr_mem_128475_backing_aligned_0; volatile __local bool local_failure; if (failure_is_an_option) { @@ -2845,129 +4509,85 @@ def sync(self): local_failure = false; barrier(CLK_LOCAL_MEM_FENCE); - int32_t global_tid_46639; - int32_t local_tid_46640; - int64_t group_sizze_46643; - int32_t wave_sizze_46642; - int32_t group_tid_46641; + int32_t global_tid_128470; + int32_t local_tid_128471; + int64_t group_sizze_128474; + int32_t wave_sizze_128473; + int32_t group_tid_128472; - global_tid_46639 = get_global_id(0); - local_tid_46640 = get_local_id(0); - group_sizze_46643 = get_local_size(0); - wave_sizze_46642 = LOCKSTEP_WIDTH; - group_tid_46641 = get_group_id(0); + global_tid_128470 = get_global_id(0); + local_tid_128471 = get_local_id(0); + group_sizze_128474 = get_local_size(0); + wave_sizze_128473 = LOCKSTEP_WIDTH; + group_tid_128472 = get_group_id(0); - int32_t phys_tid_42114; + int32_t phys_tid_111562; - phys_tid_42114 = global_tid_46639; + phys_tid_111562 = global_tid_128470; - __local char *scan_arr_mem_46644; + __local char *scan_arr_mem_128475; - scan_arr_mem_46644 = (__local char *) scan_arr_mem_46644_backing_0; + scan_arr_mem_128475 = (__local char *) scan_arr_mem_128475_backing_0; - float x_42204; - float x_42205; + double x_111715; + double x_111716; - x_42204 = 0.0F; - for (int64_t j_46646 = 0; j_46646 < sdiv_up64(m_29166 * iota32_arg_29597, - sext_i32_i64(num_threads_46638)); - j_46646++) { - int64_t chunk_offset_46647 = segscan_group_sizze_42200 * j_46646 + - sext_i32_i64(group_tid_46641) * (segscan_group_sizze_42200 * - sdiv_up64(m_29166 * - iota32_arg_29597, - sext_i32_i64(num_threads_46638))); - int64_t flat_idx_46648 = chunk_offset_46647 + - sext_i32_i64(local_tid_46640); - int64_t gtid_42105 = squot64(flat_idx_46648, iota32_arg_29597); - int64_t gtid_42113 = flat_idx_46648 - squot64(flat_idx_46648, - iota32_arg_29597) * - iota32_arg_29597; + x_111715 = 0.0; + for (int64_t j_128477 = 0; j_128477 < sdiv_up64(m_75136 * Nmk_76536, + sext_i32_i64(num_threads_128469)); + j_128477++) { + int64_t chunk_offset_128478 = segscan_group_sizze_111711 * j_128477 + + sext_i32_i64(group_tid_128472) * (segscan_group_sizze_111711 * + sdiv_up64(m_75136 * Nmk_76536, + sext_i32_i64(num_threads_128469))); + int64_t flat_idx_128479 = chunk_offset_128478 + + sext_i32_i64(local_tid_128471); + int64_t gtid_111553 = squot64(flat_idx_128479, Nmk_76536); + int64_t gtid_111561 = flat_idx_128479 - squot64(flat_idx_128479, + Nmk_76536) * Nmk_76536; // threads in bounds read input { - if (slt64(gtid_42105, m_29166) && slt64(gtid_42113, - iota32_arg_29597)) { - int32_t y_42211 = ((__global int32_t *) mem_45298)[gtid_42105]; - int32_t index_primexp_42401 = sext_i64_i32(gtid_42113); - bool cond_42214 = sle32(y_42211, index_primexp_42401); - float defunc_0_f_res_42215; + if (slt64(gtid_111553, m_75136) && slt64(gtid_111561, Nmk_76536)) { + bool cond_111721 = gtid_111561 == (int64_t) 0; + double defunc_0_f_res_111722; - if (cond_42214) { - defunc_0_f_res_42215 = 0.0F; + if (cond_111721) { + defunc_0_f_res_111722 = 0.0; } else { - int32_t x_42207 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_42105]; - int32_t x_42208 = ((__global - int32_t *) defunc_3_map_res_mem_45244)[gtid_42105]; - float x_42209 = ((__global - float *) defunc_0_f_res_mem_45279)[gtid_42105]; - bool cond_42216 = index_primexp_42401 == 0; - float defunc_0_f_res_f_res_42217; - - if (cond_42216) { - defunc_0_f_res_f_res_42217 = x_42209; - } else { - int32_t i_42218 = add32(x_42207, index_primexp_42401); - int64_t i_42219 = sext_i32_i64(i_42218); - bool x_42220 = sle64((int64_t) 0, i_42219); - bool y_42221 = slt64(i_42219, N_29165); - bool bounds_check_42222 = x_42220 && y_42221; - bool index_certs_42223; - - if (!bounds_check_42222) { - { - if (atomic_cmpxchg_i32_global(global_failure, - -1, 28) == -1) { - global_failure_args[0] = i_42219; - global_failure_args[1] = N_29165; - ; - } - local_failure = true; - goto error_0; - } - } - - float x_42224 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_42105 * - N_29165 + - i_42219]; - int32_t x_42225 = sub32(x_42207, x_42208); - int32_t i_42226 = add32(x_42225, index_primexp_42401); - int64_t i_42227 = sext_i32_i64(i_42226); - bool x_42228 = sle64((int64_t) 0, i_42227); - bool y_42229 = slt64(i_42227, N_29165); - bool bounds_check_42230 = x_42228 && y_42229; - bool index_certs_42231; - - if (!bounds_check_42230) { - { - if (atomic_cmpxchg_i32_global(global_failure, - -1, 29) == -1) { - global_failure_args[0] = i_42227; - global_failure_args[1] = N_29165; - ; - } - local_failure = true; - goto error_0; + double fr_111719 = ((__global + double *) mem_124057)[gtid_111553]; + int64_t i_111723 = sub64(gtid_111561, (int64_t) 1); + bool x_111724 = sle64((int64_t) 0, i_111723); + bool y_111725 = slt64(i_111723, num_recresids_padded_75809); + bool bounds_check_111726 = x_111724 && y_111725; + bool index_certs_111727; + + if (!bounds_check_111726) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 179) == -1) { + global_failure_args[0] = i_111723; + global_failure_args[1] = + num_recresids_padded_75809; + ; } + local_failure = true; + goto error_0; } - - float y_42232 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_42105 * - N_29165 + - i_42227]; - float defunc_0_f_res_f_res_f_res_42233 = x_42224 - - y_42232; - - defunc_0_f_res_f_res_42217 = - defunc_0_f_res_f_res_f_res_42233; } - defunc_0_f_res_42215 = defunc_0_f_res_f_res_42217; + + double x_111728 = ((__global + double *) mem_124045)[gtid_111553 * + num_recresids_padded_75809 + + i_111723]; + double defunc_0_f_res_f_res_111729 = x_111728 / fr_111719; + + defunc_0_f_res_111722 = defunc_0_f_res_f_res_111729; } // write to-scan values to parameters { - x_42205 = defunc_0_f_res_42215; + x_111716 = defunc_0_f_res_111722; } // write mapped values results to global memory { } @@ -2977,18 +4597,18 @@ def sync(self): { // maybe restore some to-scan values to parameters, or read neutral { - if (!(slt64(gtid_42105, m_29166) && slt64(gtid_42113, - iota32_arg_29597))) { - x_42205 = 0.0F; + if (!(slt64(gtid_111553, m_75136) && slt64(gtid_111561, + Nmk_76536))) { + x_111716 = 0.0; } } // combine with carry and write to local memory { - float defunc_1_op_res_42206 = x_42204 + x_42205; + double defunc_1_op_res_111717 = x_111715 + x_111716; ((__local - float *) scan_arr_mem_46644)[sext_i32_i64(local_tid_46640)] = - defunc_1_op_res_42206; + double *) scan_arr_mem_128475)[sext_i32_i64(local_tid_128471)] = + defunc_1_op_res_111717; } error_0: @@ -2997,252 +4617,252 @@ def sync(self): return; barrier(CLK_LOCAL_MEM_FENCE); - float x_46649; - float x_46650; - float x_46652; - float x_46653; - bool ltid_in_bounds_46655; + double x_128480; + double x_128481; + double x_128483; + double x_128484; + bool ltid_in_bounds_128486; - ltid_in_bounds_46655 = slt64(sext_i32_i64(local_tid_46640), - segscan_group_sizze_42200); + ltid_in_bounds_128486 = slt64(sext_i32_i64(local_tid_128471), + segscan_group_sizze_111711); - int32_t skip_threads_46656; + int32_t skip_threads_128487; // read input for in-block scan { - if (ltid_in_bounds_46655) { - x_46650 = ((volatile __local - float *) scan_arr_mem_46644)[sext_i32_i64(local_tid_46640)]; - if ((local_tid_46640 - squot32(local_tid_46640, 32) * 32) == - 0) { - x_46649 = x_46650; + if (ltid_in_bounds_128486) { + x_128481 = ((volatile __local + double *) scan_arr_mem_128475)[sext_i32_i64(local_tid_128471)]; + if ((local_tid_128471 - squot32(local_tid_128471, 32) * + 32) == 0) { + x_128480 = x_128481; } } } // in-block scan (hopefully no barriers needed) { - skip_threads_46656 = 1; - while (slt32(skip_threads_46656, 32)) { - if (sle32(skip_threads_46656, local_tid_46640 - - squot32(local_tid_46640, 32) * 32) && - ltid_in_bounds_46655) { + skip_threads_128487 = 1; + while (slt32(skip_threads_128487, 32)) { + if (sle32(skip_threads_128487, local_tid_128471 - + squot32(local_tid_128471, 32) * 32) && + ltid_in_bounds_128486) { // read operands { - x_46649 = ((volatile __local - float *) scan_arr_mem_46644)[sext_i32_i64(local_tid_46640) - - sext_i32_i64(skip_threads_46656)]; + x_128480 = ((volatile __local + double *) scan_arr_mem_128475)[sext_i32_i64(local_tid_128471) - + sext_i32_i64(skip_threads_128487)]; } // perform operation { - bool inactive_46657 = - slt64(srem64(sext_i32_i64(local_tid_46640) + - chunk_offset_46647, - iota32_arg_29597), - sext_i32_i64(local_tid_46640) + - chunk_offset_46647 - - (sext_i32_i64(local_tid_46640 - - skip_threads_46656) + - chunk_offset_46647)); + bool inactive_128488 = + slt64(srem64(sext_i32_i64(local_tid_128471) + + chunk_offset_128478, Nmk_76536), + sext_i32_i64(local_tid_128471) + + chunk_offset_128478 - + (sext_i32_i64(local_tid_128471 - + skip_threads_128487) + + chunk_offset_128478)); - if (inactive_46657) { - x_46649 = x_46650; + if (inactive_128488) { + x_128480 = x_128481; } - if (!inactive_46657) { - float defunc_1_op_res_46651 = x_46649 + x_46650; + if (!inactive_128488) { + double defunc_1_op_res_128482 = x_128480 + + x_128481; - x_46649 = defunc_1_op_res_46651; + x_128480 = defunc_1_op_res_128482; } } } - if (sle32(wave_sizze_46642, skip_threads_46656)) { + if (sle32(wave_sizze_128473, skip_threads_128487)) { barrier(CLK_LOCAL_MEM_FENCE); } - if (sle32(skip_threads_46656, local_tid_46640 - - squot32(local_tid_46640, 32) * 32) && - ltid_in_bounds_46655) { + if (sle32(skip_threads_128487, local_tid_128471 - + squot32(local_tid_128471, 32) * 32) && + ltid_in_bounds_128486) { // write result { ((volatile __local - float *) scan_arr_mem_46644)[sext_i32_i64(local_tid_46640)] = - x_46649; - x_46650 = x_46649; + double *) scan_arr_mem_128475)[sext_i32_i64(local_tid_128471)] = + x_128480; + x_128481 = x_128480; } } - if (sle32(wave_sizze_46642, skip_threads_46656)) { + if (sle32(wave_sizze_128473, skip_threads_128487)) { barrier(CLK_LOCAL_MEM_FENCE); } - skip_threads_46656 *= 2; + skip_threads_128487 *= 2; } } barrier(CLK_LOCAL_MEM_FENCE); // last thread of block 'i' writes its result to offset 'i' { - if ((local_tid_46640 - squot32(local_tid_46640, 32) * 32) == - 31 && ltid_in_bounds_46655) { + if ((local_tid_128471 - squot32(local_tid_128471, 32) * 32) == + 31 && ltid_in_bounds_128486) { ((volatile __local - float *) scan_arr_mem_46644)[sext_i32_i64(squot32(local_tid_46640, - 32))] = - x_46649; + double *) scan_arr_mem_128475)[sext_i32_i64(squot32(local_tid_128471, + 32))] = + x_128480; } } barrier(CLK_LOCAL_MEM_FENCE); // scan the first block, after which offset 'i' contains carry-in for block 'i+1' { - int32_t skip_threads_46658; + int32_t skip_threads_128489; // read input for in-block scan { - if (squot32(local_tid_46640, 32) == 0 && - ltid_in_bounds_46655) { - x_46653 = ((volatile __local - float *) scan_arr_mem_46644)[sext_i32_i64(local_tid_46640)]; - if ((local_tid_46640 - squot32(local_tid_46640, 32) * + if (squot32(local_tid_128471, 32) == 0 && + ltid_in_bounds_128486) { + x_128484 = ((volatile __local + double *) scan_arr_mem_128475)[sext_i32_i64(local_tid_128471)]; + if ((local_tid_128471 - squot32(local_tid_128471, 32) * 32) == 0) { - x_46652 = x_46653; + x_128483 = x_128484; } } } // in-block scan (hopefully no barriers needed) { - skip_threads_46658 = 1; - while (slt32(skip_threads_46658, 32)) { - if (sle32(skip_threads_46658, local_tid_46640 - - squot32(local_tid_46640, 32) * 32) && - (squot32(local_tid_46640, 32) == 0 && - ltid_in_bounds_46655)) { + skip_threads_128489 = 1; + while (slt32(skip_threads_128489, 32)) { + if (sle32(skip_threads_128489, local_tid_128471 - + squot32(local_tid_128471, 32) * 32) && + (squot32(local_tid_128471, 32) == 0 && + ltid_in_bounds_128486)) { // read operands { - x_46652 = ((volatile __local - float *) scan_arr_mem_46644)[sext_i32_i64(local_tid_46640) - - sext_i32_i64(skip_threads_46658)]; + x_128483 = ((volatile __local + double *) scan_arr_mem_128475)[sext_i32_i64(local_tid_128471) - + sext_i32_i64(skip_threads_128489)]; } // perform operation { - bool inactive_46659 = - slt64(srem64(sext_i32_i64(local_tid_46640 * + bool inactive_128490 = + slt64(srem64(sext_i32_i64(local_tid_128471 * 32 + 32 - 1) + - chunk_offset_46647, - iota32_arg_29597), - sext_i32_i64(local_tid_46640 * 32 + - 32 - 1) + chunk_offset_46647 - - (sext_i32_i64((local_tid_46640 - - skip_threads_46658) * - 32 + 32 - 1) + chunk_offset_46647)); + chunk_offset_128478, + Nmk_76536), + sext_i32_i64(local_tid_128471 * 32 + + 32 - 1) + chunk_offset_128478 - + (sext_i32_i64((local_tid_128471 - + skip_threads_128489) * + 32 + 32 - 1) + + chunk_offset_128478)); - if (inactive_46659) { - x_46652 = x_46653; + if (inactive_128490) { + x_128483 = x_128484; } - if (!inactive_46659) { - float defunc_1_op_res_46654 = x_46652 + - x_46653; + if (!inactive_128490) { + double defunc_1_op_res_128485 = x_128483 + + x_128484; - x_46652 = defunc_1_op_res_46654; + x_128483 = defunc_1_op_res_128485; } } } - if (sle32(wave_sizze_46642, skip_threads_46658)) { + if (sle32(wave_sizze_128473, skip_threads_128489)) { barrier(CLK_LOCAL_MEM_FENCE); } - if (sle32(skip_threads_46658, local_tid_46640 - - squot32(local_tid_46640, 32) * 32) && - (squot32(local_tid_46640, 32) == 0 && - ltid_in_bounds_46655)) { + if (sle32(skip_threads_128489, local_tid_128471 - + squot32(local_tid_128471, 32) * 32) && + (squot32(local_tid_128471, 32) == 0 && + ltid_in_bounds_128486)) { // write result { ((volatile __local - float *) scan_arr_mem_46644)[sext_i32_i64(local_tid_46640)] = - x_46652; - x_46653 = x_46652; + double *) scan_arr_mem_128475)[sext_i32_i64(local_tid_128471)] = + x_128483; + x_128484 = x_128483; } } - if (sle32(wave_sizze_46642, skip_threads_46658)) { + if (sle32(wave_sizze_128473, skip_threads_128489)) { barrier(CLK_LOCAL_MEM_FENCE); } - skip_threads_46658 *= 2; + skip_threads_128489 *= 2; } } } barrier(CLK_LOCAL_MEM_FENCE); // carry-in for every block except the first { - if (!(squot32(local_tid_46640, 32) == 0 || - !ltid_in_bounds_46655)) { + if (!(squot32(local_tid_128471, 32) == 0 || + !ltid_in_bounds_128486)) { // read operands { - x_46650 = x_46649; - x_46649 = ((__local - float *) scan_arr_mem_46644)[sext_i32_i64(squot32(local_tid_46640, - 32)) - - (int64_t) 1]; + x_128481 = x_128480; + x_128480 = ((__local + double *) scan_arr_mem_128475)[sext_i32_i64(squot32(local_tid_128471, + 32)) - + (int64_t) 1]; } // perform operation { - bool inactive_46660 = - slt64(srem64(sext_i32_i64(local_tid_46640) + - chunk_offset_46647, iota32_arg_29597), - sext_i32_i64(local_tid_46640) + - chunk_offset_46647 - - (sext_i32_i64(squot32(local_tid_46640, 32) * - 32 - 1) + chunk_offset_46647)); + bool inactive_128491 = + slt64(srem64(sext_i32_i64(local_tid_128471) + + chunk_offset_128478, Nmk_76536), + sext_i32_i64(local_tid_128471) + + chunk_offset_128478 - + (sext_i32_i64(squot32(local_tid_128471, 32) * + 32 - 1) + chunk_offset_128478)); - if (inactive_46660) { - x_46649 = x_46650; + if (inactive_128491) { + x_128480 = x_128481; } - if (!inactive_46660) { - float defunc_1_op_res_46651 = x_46649 + x_46650; + if (!inactive_128491) { + double defunc_1_op_res_128482 = x_128480 + x_128481; - x_46649 = defunc_1_op_res_46651; + x_128480 = defunc_1_op_res_128482; } } // write final result { ((__local - float *) scan_arr_mem_46644)[sext_i32_i64(local_tid_46640)] = - x_46649; + double *) scan_arr_mem_128475)[sext_i32_i64(local_tid_128471)] = + x_128480; } } } barrier(CLK_LOCAL_MEM_FENCE); // restore correct values for first block { - if (squot32(local_tid_46640, 32) == 0) { + if (squot32(local_tid_128471, 32) == 0) { ((__local - float *) scan_arr_mem_46644)[sext_i32_i64(local_tid_46640)] = - x_46650; + double *) scan_arr_mem_128475)[sext_i32_i64(local_tid_128471)] = + x_128481; } } barrier(CLK_LOCAL_MEM_FENCE); // threads in bounds write partial scan result { - if (slt64(gtid_42105, m_29166) && slt64(gtid_42113, - iota32_arg_29597)) { - ((__global float *) mem_45302)[gtid_42105 * - iota32_arg_29597 + - gtid_42113] = ((__local - float *) scan_arr_mem_46644)[sext_i32_i64(local_tid_46640)]; + if (slt64(gtid_111553, m_75136) && slt64(gtid_111561, + Nmk_76536)) { + ((__global double *) mem_124061)[gtid_111553 * Nmk_76536 + + gtid_111561] = ((__local + double *) scan_arr_mem_128475)[sext_i32_i64(local_tid_128471)]; } } barrier(CLK_LOCAL_MEM_FENCE); // first thread reads last element as carry-in for next iteration { - bool crosses_segment_46661 = slt64(srem64(chunk_offset_46647 + - segscan_group_sizze_42200, - iota32_arg_29597), - chunk_offset_46647 + - segscan_group_sizze_42200 - - (chunk_offset_46647 + - segscan_group_sizze_42200 - - (int64_t) 1)); - bool should_load_carry_46662 = local_tid_46640 == 0 && - !crosses_segment_46661; - - if (should_load_carry_46662) { - x_42204 = ((__local - float *) scan_arr_mem_46644)[segscan_group_sizze_42200 - - (int64_t) 1]; - } - if (!should_load_carry_46662) { - x_42204 = 0.0F; + bool crosses_segment_128492 = slt64(srem64(chunk_offset_128478 + + segscan_group_sizze_111711, + Nmk_76536), + chunk_offset_128478 + + segscan_group_sizze_111711 - + (chunk_offset_128478 + + segscan_group_sizze_111711 - + (int64_t) 1)); + bool should_load_carry_128493 = local_tid_128471 == 0 && + !crosses_segment_128492; + + if (should_load_carry_128493) { + x_111715 = ((__local + double *) scan_arr_mem_128475)[segscan_group_sizze_111711 - + (int64_t) 1]; + } + if (!should_load_carry_128493) { + x_111715 = 0.0; } } barrier(CLK_LOCAL_MEM_FENCE); @@ -3251,1694 +4871,2134 @@ def sync(self): error_1: return; - #undef segscan_group_sizze_42200 + #undef segscan_group_sizze_111711 } -__kernel void mainziscan_stage2_41042(__global int *global_failure, - __local volatile - int64_t *scan_arr_mem_46285_backing_aligned_0, - int64_t N_29165, int64_t m_29166, - int64_t stage1_num_groups_46254, - int32_t num_threads_46255, __global - unsigned char *mem_45163) +__kernel void mainziscan_stage1_114084(__global int *global_failure, + __local volatile + int64_t *scan_arr_mem_129264_backing_aligned_0, + int64_t N_75135, int64_t m_75136, + int32_t num_threads_129258, __global + unsigned char *mem_124142, __global + unsigned char *defunc_3_map_res_mem_124883, + __global unsigned char *mem_124906, + __global unsigned char *mem_124909) { - #define segscan_group_sizze_41059 (mainzisegscan_group_sizze_41036) + #define segscan_group_sizze_114101 (mainzisegscan_group_sizze_114078) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict scan_arr_mem_46285_backing_0 = + __local volatile char *restrict scan_arr_mem_129264_backing_0 = (__local volatile - char *) scan_arr_mem_46285_backing_aligned_0; + char *) scan_arr_mem_129264_backing_aligned_0; if (*global_failure >= 0) return; - int32_t global_tid_46280; - int32_t local_tid_46281; - int64_t group_sizze_46284; - int32_t wave_sizze_46283; - int32_t group_tid_46282; - - global_tid_46280 = get_global_id(0); - local_tid_46281 = get_local_id(0); - group_sizze_46284 = get_local_size(0); - wave_sizze_46283 = LOCKSTEP_WIDTH; - group_tid_46282 = get_group_id(0); - - int32_t phys_tid_41042; - - phys_tid_41042 = global_tid_46280; + int32_t global_tid_129259; + int32_t local_tid_129260; + int64_t group_sizze_129263; + int32_t wave_sizze_129262; + int32_t group_tid_129261; - __local char *scan_arr_mem_46285; + global_tid_129259 = get_global_id(0); + local_tid_129260 = get_local_id(0); + group_sizze_129263 = get_local_size(0); + wave_sizze_129262 = LOCKSTEP_WIDTH; + group_tid_129261 = get_group_id(0); - scan_arr_mem_46285 = (__local char *) scan_arr_mem_46285_backing_0; + int32_t phys_tid_114084; - int64_t flat_idx_46287; - - flat_idx_46287 = (sext_i32_i64(local_tid_46281) + (int64_t) 1) * - (segscan_group_sizze_41059 * sdiv_up64(m_29166 * N_29165, - sext_i32_i64(num_threads_46255))) - - (int64_t) 1; - - int64_t gtid_41033; - - gtid_41033 = squot64(flat_idx_46287, N_29165); - - int64_t gtid_41041; - - gtid_41041 = flat_idx_46287 - squot64(flat_idx_46287, N_29165) * N_29165; - // threads in bound read carries; others get neutral element - { - if (slt64(gtid_41033, m_29166) && slt64(gtid_41041, N_29165)) { - ((__local - int64_t *) scan_arr_mem_46285)[sext_i32_i64(local_tid_46281)] = - ((__global int64_t *) mem_45163)[gtid_41033 * N_29165 + - gtid_41041]; - } else { - ((__local - int64_t *) scan_arr_mem_46285)[sext_i32_i64(local_tid_46281)] = - (int64_t) 0; - } - } - barrier(CLK_LOCAL_MEM_FENCE); + phys_tid_114084 = global_tid_129259; - int64_t x_41064; - int64_t x_41065; - int64_t x_46288; - int64_t x_46289; - bool ltid_in_bounds_46291; + __local char *scan_arr_mem_129264; - ltid_in_bounds_46291 = slt64(sext_i32_i64(local_tid_46281), - stage1_num_groups_46254); + scan_arr_mem_129264 = (__local char *) scan_arr_mem_129264_backing_0; - int32_t skip_threads_46292; + int64_t x_114106; + int64_t x_114107; - // read input for in-block scan - { - if (ltid_in_bounds_46291) { - x_41065 = ((volatile __local - int64_t *) scan_arr_mem_46285)[sext_i32_i64(local_tid_46281)]; - if ((local_tid_46281 - squot32(local_tid_46281, 32) * 32) == 0) { - x_41064 = x_41065; - } - } - } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46292 = 1; - while (slt32(skip_threads_46292, 32)) { - if (sle32(skip_threads_46292, local_tid_46281 - - squot32(local_tid_46281, 32) * 32) && - ltid_in_bounds_46291) { - // read operands - { - x_41064 = ((volatile __local - int64_t *) scan_arr_mem_46285)[sext_i32_i64(local_tid_46281) - - sext_i32_i64(skip_threads_46292)]; + x_114106 = (int64_t) 0; + for (int64_t j_129266 = 0; j_129266 < sdiv_up64(m_75136 * N_75135, + sext_i32_i64(num_threads_129258)); + j_129266++) { + int64_t chunk_offset_129267 = segscan_group_sizze_114101 * j_129266 + + sext_i32_i64(group_tid_129261) * (segscan_group_sizze_114101 * + sdiv_up64(m_75136 * N_75135, + sext_i32_i64(num_threads_129258))); + int64_t flat_idx_129268 = chunk_offset_129267 + + sext_i32_i64(local_tid_129260); + int64_t gtid_114075 = squot64(flat_idx_129268, N_75135); + int64_t gtid_114083 = flat_idx_129268 - squot64(flat_idx_129268, + N_75135) * N_75135; + + // threads in bounds read input + { + if (slt64(gtid_114075, m_75136) && slt64(gtid_114083, N_75135)) { + double x_114111 = ((__global double *) mem_124142)[gtid_114075 * + N_75135 + + gtid_114083]; + bool isnan_res_114113; + + isnan_res_114113 = futrts_isnan64(x_114111); + + bool cond_114114 = !isnan_res_114113; + double defunc_1_f_res_114115; + + if (cond_114114) { + double x_114112 = ((__global + double *) defunc_3_map_res_mem_124883)[gtid_114075 * + N_75135 + + gtid_114083]; + double defunc_1_f_res_t_res_114116 = x_114111 - x_114112; + + defunc_1_f_res_114115 = defunc_1_f_res_t_res_114116; + } else { + defunc_1_f_res_114115 = NAN; + } + + bool isnan_res_114117; + + isnan_res_114117 = futrts_isnan64(defunc_1_f_res_114115); + + bool defunc_0_p_res_114118 = !isnan_res_114117; + int64_t defunc_0_f_res_114119 = + btoi_bool_i64(defunc_0_p_res_114118); + + // write to-scan values to parameters + { + x_114107 = defunc_0_f_res_114119; + } + // write mapped values results to global memory + { + ((__global double *) mem_124909)[gtid_114075 * N_75135 + + gtid_114083] = + defunc_1_f_res_114115; + } + } + } + // do one intra-group scan operation + { + // maybe restore some to-scan values to parameters, or read neutral + { + if (!(slt64(gtid_114075, m_75136) && slt64(gtid_114083, + N_75135))) { + x_114107 = (int64_t) 0; + } + } + // combine with carry and write to local memory + { + int64_t defunc_1_op_res_114108 = add64(x_114106, x_114107); + + ((__local + int64_t *) scan_arr_mem_129264)[sext_i32_i64(local_tid_129260)] = + defunc_1_op_res_114108; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t x_129269; + int64_t x_129270; + int64_t x_129272; + int64_t x_129273; + bool ltid_in_bounds_129275; + + ltid_in_bounds_129275 = slt64(sext_i32_i64(local_tid_129260), + segscan_group_sizze_114101); + + int32_t skip_threads_129276; + + // read input for in-block scan + { + if (ltid_in_bounds_129275) { + x_129270 = ((volatile __local + int64_t *) scan_arr_mem_129264)[sext_i32_i64(local_tid_129260)]; + if ((local_tid_129260 - squot32(local_tid_129260, 32) * + 32) == 0) { + x_129269 = x_129270; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129276 = 1; + while (slt32(skip_threads_129276, 32)) { + if (sle32(skip_threads_129276, local_tid_129260 - + squot32(local_tid_129260, 32) * 32) && + ltid_in_bounds_129275) { + // read operands + { + x_129269 = ((volatile __local + int64_t *) scan_arr_mem_129264)[sext_i32_i64(local_tid_129260) - + sext_i32_i64(skip_threads_129276)]; + } + // perform operation + { + bool inactive_129277 = + slt64(srem64(sext_i32_i64(local_tid_129260) + + chunk_offset_129267, N_75135), + sext_i32_i64(local_tid_129260) + + chunk_offset_129267 - + (sext_i32_i64(local_tid_129260 - + skip_threads_129276) + + chunk_offset_129267)); + + if (inactive_129277) { + x_129269 = x_129270; + } + if (!inactive_129277) { + int64_t defunc_1_op_res_129271 = add64(x_129269, + x_129270); + + x_129269 = defunc_1_op_res_129271; + } + } + } + if (sle32(wave_sizze_129262, skip_threads_129276)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129276, local_tid_129260 - + squot32(local_tid_129260, 32) * 32) && + ltid_in_bounds_129275) { + // write result + { + ((volatile __local + int64_t *) scan_arr_mem_129264)[sext_i32_i64(local_tid_129260)] = + x_129269; + x_129270 = x_129269; + } + } + if (sle32(wave_sizze_129262, skip_threads_129276)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129276 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129260 - squot32(local_tid_129260, 32) * 32) == + 31 && ltid_in_bounds_129275) { + ((volatile __local + int64_t *) scan_arr_mem_129264)[sext_i32_i64(squot32(local_tid_129260, + 32))] = + x_129269; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129278; + + // read input for in-block scan + { + if (squot32(local_tid_129260, 32) == 0 && + ltid_in_bounds_129275) { + x_129273 = ((volatile __local + int64_t *) scan_arr_mem_129264)[sext_i32_i64(local_tid_129260)]; + if ((local_tid_129260 - squot32(local_tid_129260, 32) * + 32) == 0) { + x_129272 = x_129273; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129278 = 1; + while (slt32(skip_threads_129278, 32)) { + if (sle32(skip_threads_129278, local_tid_129260 - + squot32(local_tid_129260, 32) * 32) && + (squot32(local_tid_129260, 32) == 0 && + ltid_in_bounds_129275)) { + // read operands + { + x_129272 = ((volatile __local + int64_t *) scan_arr_mem_129264)[sext_i32_i64(local_tid_129260) - + sext_i32_i64(skip_threads_129278)]; + } + // perform operation + { + bool inactive_129279 = + slt64(srem64(sext_i32_i64(local_tid_129260 * + 32 + 32 - 1) + + chunk_offset_129267, N_75135), + sext_i32_i64(local_tid_129260 * 32 + + 32 - 1) + chunk_offset_129267 - + (sext_i32_i64((local_tid_129260 - + skip_threads_129278) * + 32 + 32 - 1) + + chunk_offset_129267)); + + if (inactive_129279) { + x_129272 = x_129273; + } + if (!inactive_129279) { + int64_t defunc_1_op_res_129274 = + add64(x_129272, x_129273); + + x_129272 = defunc_1_op_res_129274; + } + } + } + if (sle32(wave_sizze_129262, skip_threads_129278)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129278, local_tid_129260 - + squot32(local_tid_129260, 32) * 32) && + (squot32(local_tid_129260, 32) == 0 && + ltid_in_bounds_129275)) { + // write result + { + ((volatile __local + int64_t *) scan_arr_mem_129264)[sext_i32_i64(local_tid_129260)] = + x_129272; + x_129273 = x_129272; + } + } + if (sle32(wave_sizze_129262, skip_threads_129278)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129278 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129260, 32) == 0 || + !ltid_in_bounds_129275)) { + // read operands + { + x_129270 = x_129269; + x_129269 = ((__local + int64_t *) scan_arr_mem_129264)[sext_i32_i64(squot32(local_tid_129260, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129280 = + slt64(srem64(sext_i32_i64(local_tid_129260) + + chunk_offset_129267, N_75135), + sext_i32_i64(local_tid_129260) + + chunk_offset_129267 - + (sext_i32_i64(squot32(local_tid_129260, 32) * + 32 - 1) + chunk_offset_129267)); + + if (inactive_129280) { + x_129269 = x_129270; + } + if (!inactive_129280) { + int64_t defunc_1_op_res_129271 = add64(x_129269, + x_129270); + + x_129269 = defunc_1_op_res_129271; + } + } + // write final result + { + ((__local + int64_t *) scan_arr_mem_129264)[sext_i32_i64(local_tid_129260)] = + x_129269; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129260, 32) == 0) { + ((__local + int64_t *) scan_arr_mem_129264)[sext_i32_i64(local_tid_129260)] = + x_129270; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // threads in bounds write partial scan result + { + if (slt64(gtid_114075, m_75136) && slt64(gtid_114083, + N_75135)) { + ((__global int64_t *) mem_124906)[gtid_114075 * N_75135 + + gtid_114083] = ((__local + int64_t *) scan_arr_mem_129264)[sext_i32_i64(local_tid_129260)]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread reads last element as carry-in for next iteration + { + bool crosses_segment_129281 = slt64(srem64(chunk_offset_129267 + + segscan_group_sizze_114101, + N_75135), + chunk_offset_129267 + + segscan_group_sizze_114101 - + (chunk_offset_129267 + + segscan_group_sizze_114101 - + (int64_t) 1)); + bool should_load_carry_129282 = local_tid_129260 == 0 && + !crosses_segment_129281; + + if (should_load_carry_129282) { + x_114106 = ((__local + int64_t *) scan_arr_mem_129264)[segscan_group_sizze_114101 - + (int64_t) 1]; + } + if (!should_load_carry_129282) { + x_114106 = (int64_t) 0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + + error_1: + return; + #undef segscan_group_sizze_114101 +} +__kernel void mainziscan_stage1_114793(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + __local volatile + int64_t *scan_arr_mem_129621_backing_aligned_0, + int64_t N_75135, int64_t m_75136, + int64_t iota_arg_77024, + int32_t num_threads_129615, __global + unsigned char *defunc_4_map_res_mem_124920, + __global + unsigned char *defunc_3_map_res_mem_124958, + __global + unsigned char *defunc_3_map_res_mem_124959, + __global + unsigned char *defunc_0_f_res_mem_124970, + __global unsigned char *mem_124987, + __global unsigned char *mem_124991) +{ + #define segscan_group_sizze_114877 (mainzisegscan_group_sizze_114787) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict scan_arr_mem_129621_backing_0 = + (__local volatile + char *) scan_arr_mem_129621_backing_aligned_0; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_129616; + int32_t local_tid_129617; + int64_t group_sizze_129620; + int32_t wave_sizze_129619; + int32_t group_tid_129618; + + global_tid_129616 = get_global_id(0); + local_tid_129617 = get_local_id(0); + group_sizze_129620 = get_local_size(0); + wave_sizze_129619 = LOCKSTEP_WIDTH; + group_tid_129618 = get_group_id(0); + + int32_t phys_tid_114793; + + phys_tid_114793 = global_tid_129616; + + __local char *scan_arr_mem_129621; + + scan_arr_mem_129621 = (__local char *) scan_arr_mem_129621_backing_0; + + double x_114881; + double x_114882; + + x_114881 = 0.0; + for (int64_t j_129623 = 0; j_129623 < sdiv_up64(m_75136 * iota_arg_77024, + sext_i32_i64(num_threads_129615)); + j_129623++) { + int64_t chunk_offset_129624 = segscan_group_sizze_114877 * j_129623 + + sext_i32_i64(group_tid_129618) * (segscan_group_sizze_114877 * + sdiv_up64(m_75136 * + iota_arg_77024, + sext_i32_i64(num_threads_129615))); + int64_t flat_idx_129625 = chunk_offset_129624 + + sext_i32_i64(local_tid_129617); + int64_t gtid_114784 = squot64(flat_idx_129625, iota_arg_77024); + int64_t gtid_114792 = flat_idx_129625 - squot64(flat_idx_129625, + iota_arg_77024) * + iota_arg_77024; + + // threads in bounds read input + { + if (slt64(gtid_114784, m_75136) && slt64(gtid_114792, + iota_arg_77024)) { + int64_t y_114888 = ((__global + int64_t *) mem_124987)[gtid_114784]; + bool cond_114891 = sle64(y_114888, gtid_114792); + double defunc_0_f_res_114892; + + if (cond_114891) { + defunc_0_f_res_114892 = 0.0; + } else { + int64_t x_114884 = ((__global + int64_t *) defunc_3_map_res_mem_124959)[gtid_114784]; + int64_t x_114885 = ((__global + int64_t *) defunc_3_map_res_mem_124958)[gtid_114784]; + double x_114886 = ((__global + double *) defunc_0_f_res_mem_124970)[gtid_114784]; + bool cond_114893 = gtid_114792 == (int64_t) 0; + double defunc_0_f_res_f_res_114894; + + if (cond_114893) { + defunc_0_f_res_f_res_114894 = x_114886; + } else { + int64_t i_114895 = add64(gtid_114792, x_114884); + bool x_114896 = sle64((int64_t) 0, i_114895); + bool y_114897 = slt64(i_114895, N_75135); + bool bounds_check_114898 = x_114896 && y_114897; + bool index_certs_114899; + + if (!bounds_check_114898) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 206) == -1) { + global_failure_args[0] = i_114895; + global_failure_args[1] = N_75135; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_114900 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_114784 * + N_75135 + + i_114895]; + int64_t x_114901 = sub64(x_114884, x_114885); + int64_t i_114902 = add64(gtid_114792, x_114901); + bool x_114903 = sle64((int64_t) 0, i_114902); + bool y_114904 = slt64(i_114902, N_75135); + bool bounds_check_114905 = x_114903 && y_114904; + bool index_certs_114906; + + if (!bounds_check_114905) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 207) == -1) { + global_failure_args[0] = i_114902; + global_failure_args[1] = N_75135; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_114907 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_114784 * + N_75135 + + i_114902]; + double defunc_0_f_res_f_res_f_res_114908 = x_114900 - + y_114907; + + defunc_0_f_res_f_res_114894 = + defunc_0_f_res_f_res_f_res_114908; + } + defunc_0_f_res_114892 = defunc_0_f_res_f_res_114894; + } + // write to-scan values to parameters + { + x_114882 = defunc_0_f_res_114892; + } + // write mapped values results to global memory + { } + } + } + // do one intra-group scan operation + { + // maybe restore some to-scan values to parameters, or read neutral + { + if (!(slt64(gtid_114784, m_75136) && slt64(gtid_114792, + iota_arg_77024))) { + x_114882 = 0.0; + } + } + // combine with carry and write to local memory + { + double defunc_1_op_res_114883 = x_114881 + x_114882; + + ((__local + double *) scan_arr_mem_129621)[sext_i32_i64(local_tid_129617)] = + defunc_1_op_res_114883; + } + + error_0: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + double x_129626; + double x_129627; + double x_129629; + double x_129630; + bool ltid_in_bounds_129632; + + ltid_in_bounds_129632 = slt64(sext_i32_i64(local_tid_129617), + segscan_group_sizze_114877); + + int32_t skip_threads_129633; + + // read input for in-block scan + { + if (ltid_in_bounds_129632) { + x_129627 = ((volatile __local + double *) scan_arr_mem_129621)[sext_i32_i64(local_tid_129617)]; + if ((local_tid_129617 - squot32(local_tid_129617, 32) * + 32) == 0) { + x_129626 = x_129627; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129633 = 1; + while (slt32(skip_threads_129633, 32)) { + if (sle32(skip_threads_129633, local_tid_129617 - + squot32(local_tid_129617, 32) * 32) && + ltid_in_bounds_129632) { + // read operands + { + x_129626 = ((volatile __local + double *) scan_arr_mem_129621)[sext_i32_i64(local_tid_129617) - + sext_i32_i64(skip_threads_129633)]; + } + // perform operation + { + bool inactive_129634 = + slt64(srem64(sext_i32_i64(local_tid_129617) + + chunk_offset_129624, + iota_arg_77024), + sext_i32_i64(local_tid_129617) + + chunk_offset_129624 - + (sext_i32_i64(local_tid_129617 - + skip_threads_129633) + + chunk_offset_129624)); + + if (inactive_129634) { + x_129626 = x_129627; + } + if (!inactive_129634) { + double defunc_1_op_res_129628 = x_129626 + + x_129627; + + x_129626 = defunc_1_op_res_129628; + } + } + } + if (sle32(wave_sizze_129619, skip_threads_129633)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129633, local_tid_129617 - + squot32(local_tid_129617, 32) * 32) && + ltid_in_bounds_129632) { + // write result + { + ((volatile __local + double *) scan_arr_mem_129621)[sext_i32_i64(local_tid_129617)] = + x_129626; + x_129627 = x_129626; + } + } + if (sle32(wave_sizze_129619, skip_threads_129633)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129633 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129617 - squot32(local_tid_129617, 32) * 32) == + 31 && ltid_in_bounds_129632) { + ((volatile __local + double *) scan_arr_mem_129621)[sext_i32_i64(squot32(local_tid_129617, + 32))] = + x_129626; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129635; + + // read input for in-block scan + { + if (squot32(local_tid_129617, 32) == 0 && + ltid_in_bounds_129632) { + x_129630 = ((volatile __local + double *) scan_arr_mem_129621)[sext_i32_i64(local_tid_129617)]; + if ((local_tid_129617 - squot32(local_tid_129617, 32) * + 32) == 0) { + x_129629 = x_129630; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129635 = 1; + while (slt32(skip_threads_129635, 32)) { + if (sle32(skip_threads_129635, local_tid_129617 - + squot32(local_tid_129617, 32) * 32) && + (squot32(local_tid_129617, 32) == 0 && + ltid_in_bounds_129632)) { + // read operands + { + x_129629 = ((volatile __local + double *) scan_arr_mem_129621)[sext_i32_i64(local_tid_129617) - + sext_i32_i64(skip_threads_129635)]; + } + // perform operation + { + bool inactive_129636 = + slt64(srem64(sext_i32_i64(local_tid_129617 * + 32 + 32 - 1) + + chunk_offset_129624, + iota_arg_77024), + sext_i32_i64(local_tid_129617 * 32 + + 32 - 1) + chunk_offset_129624 - + (sext_i32_i64((local_tid_129617 - + skip_threads_129635) * + 32 + 32 - 1) + + chunk_offset_129624)); + + if (inactive_129636) { + x_129629 = x_129630; + } + if (!inactive_129636) { + double defunc_1_op_res_129631 = x_129629 + + x_129630; + + x_129629 = defunc_1_op_res_129631; + } + } + } + if (sle32(wave_sizze_129619, skip_threads_129635)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129635, local_tid_129617 - + squot32(local_tid_129617, 32) * 32) && + (squot32(local_tid_129617, 32) == 0 && + ltid_in_bounds_129632)) { + // write result + { + ((volatile __local + double *) scan_arr_mem_129621)[sext_i32_i64(local_tid_129617)] = + x_129629; + x_129630 = x_129629; + } + } + if (sle32(wave_sizze_129619, skip_threads_129635)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129635 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129617, 32) == 0 || + !ltid_in_bounds_129632)) { + // read operands + { + x_129627 = x_129626; + x_129626 = ((__local + double *) scan_arr_mem_129621)[sext_i32_i64(squot32(local_tid_129617, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129637 = + slt64(srem64(sext_i32_i64(local_tid_129617) + + chunk_offset_129624, iota_arg_77024), + sext_i32_i64(local_tid_129617) + + chunk_offset_129624 - + (sext_i32_i64(squot32(local_tid_129617, 32) * + 32 - 1) + chunk_offset_129624)); + + if (inactive_129637) { + x_129626 = x_129627; + } + if (!inactive_129637) { + double defunc_1_op_res_129628 = x_129626 + x_129627; + + x_129626 = defunc_1_op_res_129628; + } + } + // write final result + { + ((__local + double *) scan_arr_mem_129621)[sext_i32_i64(local_tid_129617)] = + x_129626; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129617, 32) == 0) { + ((__local + double *) scan_arr_mem_129621)[sext_i32_i64(local_tid_129617)] = + x_129627; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // threads in bounds write partial scan result + { + if (slt64(gtid_114784, m_75136) && slt64(gtid_114792, + iota_arg_77024)) { + ((__global double *) mem_124991)[gtid_114784 * + iota_arg_77024 + + gtid_114792] = ((__local + double *) scan_arr_mem_129621)[sext_i32_i64(local_tid_129617)]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread reads last element as carry-in for next iteration + { + bool crosses_segment_129638 = slt64(srem64(chunk_offset_129624 + + segscan_group_sizze_114877, + iota_arg_77024), + chunk_offset_129624 + + segscan_group_sizze_114877 - + (chunk_offset_129624 + + segscan_group_sizze_114877 - + (int64_t) 1)); + bool should_load_carry_129639 = local_tid_129617 == 0 && + !crosses_segment_129638; + + if (should_load_carry_129639) { + x_114881 = ((__local + double *) scan_arr_mem_129621)[segscan_group_sizze_114877 - + (int64_t) 1]; + } + if (!should_load_carry_129639) { + x_114881 = 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + + error_1: + return; + #undef segscan_group_sizze_114877 +} +__kernel void mainziscan_stage2_103083(__global int *global_failure, + __local volatile + int64_t *scan_arr_mem_126442_backing_aligned_0, + int64_t m_75136, int64_t n_75139, + int64_t stage1_num_groups_126411, + int32_t num_threads_126412, __global + unsigned char *mem_120201) +{ + #define segscan_group_sizze_103197 (mainzisegscan_group_sizze_103077) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict scan_arr_mem_126442_backing_0 = + (__local volatile + char *) scan_arr_mem_126442_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126437; + int32_t local_tid_126438; + int64_t group_sizze_126441; + int32_t wave_sizze_126440; + int32_t group_tid_126439; + + global_tid_126437 = get_global_id(0); + local_tid_126438 = get_local_id(0); + group_sizze_126441 = get_local_size(0); + wave_sizze_126440 = LOCKSTEP_WIDTH; + group_tid_126439 = get_group_id(0); + + int32_t phys_tid_103083; + + phys_tid_103083 = global_tid_126437; + + __local char *scan_arr_mem_126442; + + scan_arr_mem_126442 = (__local char *) scan_arr_mem_126442_backing_0; + + int64_t flat_idx_126444; + + flat_idx_126444 = (sext_i32_i64(local_tid_126438) + (int64_t) 1) * + (segscan_group_sizze_103197 * sdiv_up64(m_75136 * n_75139, + sext_i32_i64(num_threads_126412))) - + (int64_t) 1; + + int64_t gtid_103074; + + gtid_103074 = squot64(flat_idx_126444, n_75139); + + int64_t gtid_103082; + + gtid_103082 = flat_idx_126444 - squot64(flat_idx_126444, n_75139) * n_75139; + // threads in bound read carries; others get neutral element + { + if (slt64(gtid_103074, m_75136) && slt64(gtid_103082, n_75139)) { + ((__local + int64_t *) scan_arr_mem_126442)[sext_i32_i64(local_tid_126438)] = + ((__global int64_t *) mem_120201)[gtid_103074 * n_75139 + + gtid_103082]; + } else { + ((__local + int64_t *) scan_arr_mem_126442)[sext_i32_i64(local_tid_126438)] = + (int64_t) 0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t x_103201; + int64_t x_103202; + int64_t x_126445; + int64_t x_126446; + bool ltid_in_bounds_126448; + + ltid_in_bounds_126448 = slt64(sext_i32_i64(local_tid_126438), + stage1_num_groups_126411); + + int32_t skip_threads_126449; + + // read input for in-block scan + { + if (ltid_in_bounds_126448) { + x_103202 = ((volatile __local + int64_t *) scan_arr_mem_126442)[sext_i32_i64(local_tid_126438)]; + if ((local_tid_126438 - squot32(local_tid_126438, 32) * 32) == 0) { + x_103201 = x_103202; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_126449 = 1; + while (slt32(skip_threads_126449, 32)) { + if (sle32(skip_threads_126449, local_tid_126438 - + squot32(local_tid_126438, 32) * 32) && + ltid_in_bounds_126448) { + // read operands + { + x_103201 = ((volatile __local + int64_t *) scan_arr_mem_126442)[sext_i32_i64(local_tid_126438) - + sext_i32_i64(skip_threads_126449)]; } // perform operation { - bool inactive_46293 = - slt64(srem64((sext_i32_i64(local_tid_46281) + + bool inactive_126450 = + slt64(srem64((sext_i32_i64(local_tid_126438) + (int64_t) 1) * - (segscan_group_sizze_41059 * - sdiv_up64(m_29166 * N_29165, - sext_i32_i64(num_threads_46255))) - - (int64_t) 1, N_29165), - (sext_i32_i64(local_tid_46281) + (int64_t) 1) * - (segscan_group_sizze_41059 * sdiv_up64(m_29166 * - N_29165, - sext_i32_i64(num_threads_46255))) - - (int64_t) 1 - ((sext_i32_i64(local_tid_46281 - - skip_threads_46292) + + (segscan_group_sizze_103197 * + sdiv_up64(m_75136 * n_75139, + sext_i32_i64(num_threads_126412))) - + (int64_t) 1, n_75139), + (sext_i32_i64(local_tid_126438) + (int64_t) 1) * + (segscan_group_sizze_103197 * sdiv_up64(m_75136 * + n_75139, + sext_i32_i64(num_threads_126412))) - + (int64_t) 1 - ((sext_i32_i64(local_tid_126438 - + skip_threads_126449) + (int64_t) 1) * - (segscan_group_sizze_41059 * - sdiv_up64(m_29166 * N_29165, - sext_i32_i64(num_threads_46255))) - + (segscan_group_sizze_103197 * + sdiv_up64(m_75136 * n_75139, + sext_i32_i64(num_threads_126412))) - (int64_t) 1)); - if (inactive_46293) { - x_41064 = x_41065; + if (inactive_126450) { + x_103201 = x_103202; } - if (!inactive_46293) { - int64_t defunc_1_op_res_41066 = add64(x_41064, x_41065); + if (!inactive_126450) { + int64_t defunc_1_op_res_103203 = add64(x_103201, + x_103202); - x_41064 = defunc_1_op_res_41066; + x_103201 = defunc_1_op_res_103203; } } } - if (sle32(wave_sizze_46283, skip_threads_46292)) { + if (sle32(wave_sizze_126440, skip_threads_126449)) { barrier(CLK_LOCAL_MEM_FENCE); } - if (sle32(skip_threads_46292, local_tid_46281 - - squot32(local_tid_46281, 32) * 32) && - ltid_in_bounds_46291) { + if (sle32(skip_threads_126449, local_tid_126438 - + squot32(local_tid_126438, 32) * 32) && + ltid_in_bounds_126448) { // write result { ((volatile __local - int64_t *) scan_arr_mem_46285)[sext_i32_i64(local_tid_46281)] = - x_41064; - x_41065 = x_41064; + int64_t *) scan_arr_mem_126442)[sext_i32_i64(local_tid_126438)] = + x_103201; + x_103202 = x_103201; } } - if (sle32(wave_sizze_46283, skip_threads_46292)) { + if (sle32(wave_sizze_126440, skip_threads_126449)) { barrier(CLK_LOCAL_MEM_FENCE); } - skip_threads_46292 *= 2; + skip_threads_126449 *= 2; } } barrier(CLK_LOCAL_MEM_FENCE); // last thread of block 'i' writes its result to offset 'i' { - if ((local_tid_46281 - squot32(local_tid_46281, 32) * 32) == 31 && - ltid_in_bounds_46291) { + if ((local_tid_126438 - squot32(local_tid_126438, 32) * 32) == 31 && + ltid_in_bounds_126448) { ((volatile __local - int64_t *) scan_arr_mem_46285)[sext_i32_i64(squot32(local_tid_46281, - 32))] = - x_41064; + int64_t *) scan_arr_mem_126442)[sext_i32_i64(squot32(local_tid_126438, + 32))] = + x_103201; } } barrier(CLK_LOCAL_MEM_FENCE); // scan the first block, after which offset 'i' contains carry-in for block 'i+1' { - int32_t skip_threads_46294; + int32_t skip_threads_126451; // read input for in-block scan { - if (squot32(local_tid_46281, 32) == 0 && ltid_in_bounds_46291) { - x_46289 = ((volatile __local - int64_t *) scan_arr_mem_46285)[sext_i32_i64(local_tid_46281)]; - if ((local_tid_46281 - squot32(local_tid_46281, 32) * 32) == + if (squot32(local_tid_126438, 32) == 0 && ltid_in_bounds_126448) { + x_126446 = ((volatile __local + int64_t *) scan_arr_mem_126442)[sext_i32_i64(local_tid_126438)]; + if ((local_tid_126438 - squot32(local_tid_126438, 32) * 32) == 0) { - x_46288 = x_46289; + x_126445 = x_126446; } } } // in-block scan (hopefully no barriers needed) { - skip_threads_46294 = 1; - while (slt32(skip_threads_46294, 32)) { - if (sle32(skip_threads_46294, local_tid_46281 - - squot32(local_tid_46281, 32) * 32) && - (squot32(local_tid_46281, 32) == 0 && - ltid_in_bounds_46291)) { + skip_threads_126451 = 1; + while (slt32(skip_threads_126451, 32)) { + if (sle32(skip_threads_126451, local_tid_126438 - + squot32(local_tid_126438, 32) * 32) && + (squot32(local_tid_126438, 32) == 0 && + ltid_in_bounds_126448)) { // read operands { - x_46288 = ((volatile __local - int64_t *) scan_arr_mem_46285)[sext_i32_i64(local_tid_46281) - - sext_i32_i64(skip_threads_46294)]; + x_126445 = ((volatile __local + int64_t *) scan_arr_mem_126442)[sext_i32_i64(local_tid_126438) - + sext_i32_i64(skip_threads_126451)]; } // perform operation { - bool inactive_46295 = - slt64(srem64((sext_i32_i64(local_tid_46281 * 32 + + bool inactive_126452 = + slt64(srem64((sext_i32_i64(local_tid_126438 * 32 + 32 - 1) + (int64_t) 1) * - (segscan_group_sizze_41059 * - sdiv_up64(m_29166 * N_29165, - sext_i32_i64(num_threads_46255))) - - (int64_t) 1, N_29165), - (sext_i32_i64(local_tid_46281 * 32 + 32 - + (segscan_group_sizze_103197 * + sdiv_up64(m_75136 * n_75139, + sext_i32_i64(num_threads_126412))) - + (int64_t) 1, n_75139), + (sext_i32_i64(local_tid_126438 * 32 + 32 - 1) + (int64_t) 1) * - (segscan_group_sizze_41059 * - sdiv_up64(m_29166 * N_29165, - sext_i32_i64(num_threads_46255))) - + (segscan_group_sizze_103197 * + sdiv_up64(m_75136 * n_75139, + sext_i32_i64(num_threads_126412))) - (int64_t) 1 - - ((sext_i32_i64((local_tid_46281 - - skip_threads_46294) * 32 + + ((sext_i32_i64((local_tid_126438 - + skip_threads_126451) * 32 + 32 - 1) + (int64_t) 1) * - (segscan_group_sizze_41059 * - sdiv_up64(m_29166 * N_29165, - sext_i32_i64(num_threads_46255))) - + (segscan_group_sizze_103197 * + sdiv_up64(m_75136 * n_75139, + sext_i32_i64(num_threads_126412))) - (int64_t) 1)); - if (inactive_46295) { - x_46288 = x_46289; + if (inactive_126452) { + x_126445 = x_126446; } - if (!inactive_46295) { - int64_t defunc_1_op_res_46290 = add64(x_46288, - x_46289); + if (!inactive_126452) { + int64_t defunc_1_op_res_126447 = add64(x_126445, + x_126446); - x_46288 = defunc_1_op_res_46290; + x_126445 = defunc_1_op_res_126447; } } } - if (sle32(wave_sizze_46283, skip_threads_46294)) { + if (sle32(wave_sizze_126440, skip_threads_126451)) { barrier(CLK_LOCAL_MEM_FENCE); } - if (sle32(skip_threads_46294, local_tid_46281 - - squot32(local_tid_46281, 32) * 32) && - (squot32(local_tid_46281, 32) == 0 && - ltid_in_bounds_46291)) { + if (sle32(skip_threads_126451, local_tid_126438 - + squot32(local_tid_126438, 32) * 32) && + (squot32(local_tid_126438, 32) == 0 && + ltid_in_bounds_126448)) { // write result { ((volatile __local - int64_t *) scan_arr_mem_46285)[sext_i32_i64(local_tid_46281)] = - x_46288; - x_46289 = x_46288; + int64_t *) scan_arr_mem_126442)[sext_i32_i64(local_tid_126438)] = + x_126445; + x_126446 = x_126445; } } - if (sle32(wave_sizze_46283, skip_threads_46294)) { + if (sle32(wave_sizze_126440, skip_threads_126451)) { barrier(CLK_LOCAL_MEM_FENCE); } - skip_threads_46294 *= 2; + skip_threads_126451 *= 2; } } } barrier(CLK_LOCAL_MEM_FENCE); // carry-in for every block except the first { - if (!(squot32(local_tid_46281, 32) == 0 || !ltid_in_bounds_46291)) { + if (!(squot32(local_tid_126438, 32) == 0 || !ltid_in_bounds_126448)) { // read operands { - x_41065 = x_41064; - x_41064 = ((__local - int64_t *) scan_arr_mem_46285)[sext_i32_i64(squot32(local_tid_46281, - 32)) - - (int64_t) 1]; + x_103202 = x_103201; + x_103201 = ((__local + int64_t *) scan_arr_mem_126442)[sext_i32_i64(squot32(local_tid_126438, + 32)) - + (int64_t) 1]; } // perform operation { - bool inactive_46296 = - slt64(srem64((sext_i32_i64(local_tid_46281) + - (int64_t) 1) * (segscan_group_sizze_41059 * - sdiv_up64(m_29166 * N_29165, - sext_i32_i64(num_threads_46255))) - - (int64_t) 1, N_29165), - (sext_i32_i64(local_tid_46281) + (int64_t) 1) * - (segscan_group_sizze_41059 * sdiv_up64(m_29166 * - N_29165, - sext_i32_i64(num_threads_46255))) - - (int64_t) 1 - ((sext_i32_i64(squot32(local_tid_46281, - 32) * 32 - 1) + - (int64_t) 1) * - (segscan_group_sizze_41059 * - sdiv_up64(m_29166 * N_29165, - sext_i32_i64(num_threads_46255))) - - (int64_t) 1)); - - if (inactive_46296) { - x_41064 = x_41065; - } - if (!inactive_46296) { - int64_t defunc_1_op_res_41066 = add64(x_41064, x_41065); - - x_41064 = defunc_1_op_res_41066; + bool inactive_126453 = + slt64(srem64((sext_i32_i64(local_tid_126438) + + (int64_t) 1) * (segscan_group_sizze_103197 * + sdiv_up64(m_75136 * n_75139, + sext_i32_i64(num_threads_126412))) - + (int64_t) 1, n_75139), + (sext_i32_i64(local_tid_126438) + (int64_t) 1) * + (segscan_group_sizze_103197 * sdiv_up64(m_75136 * + n_75139, + sext_i32_i64(num_threads_126412))) - + (int64_t) 1 - + ((sext_i32_i64(squot32(local_tid_126438, 32) * 32 - + 1) + (int64_t) 1) * (segscan_group_sizze_103197 * + sdiv_up64(m_75136 * n_75139, + sext_i32_i64(num_threads_126412))) - + (int64_t) 1)); + + if (inactive_126453) { + x_103201 = x_103202; + } + if (!inactive_126453) { + int64_t defunc_1_op_res_103203 = add64(x_103201, x_103202); + + x_103201 = defunc_1_op_res_103203; } } // write final result { ((__local - int64_t *) scan_arr_mem_46285)[sext_i32_i64(local_tid_46281)] = - x_41064; + int64_t *) scan_arr_mem_126442)[sext_i32_i64(local_tid_126438)] = + x_103201; } } } barrier(CLK_LOCAL_MEM_FENCE); // restore correct values for first block { - if (squot32(local_tid_46281, 32) == 0) { + if (squot32(local_tid_126438, 32) == 0) { ((__local - int64_t *) scan_arr_mem_46285)[sext_i32_i64(local_tid_46281)] = - x_41065; + int64_t *) scan_arr_mem_126442)[sext_i32_i64(local_tid_126438)] = + x_103202; } } barrier(CLK_LOCAL_MEM_FENCE); // threads in bounds write scanned carries { - if (slt64(gtid_41033, m_29166) && slt64(gtid_41041, N_29165)) { - ((__global int64_t *) mem_45163)[gtid_41033 * N_29165 + - gtid_41041] = ((__local - int64_t *) scan_arr_mem_46285)[sext_i32_i64(local_tid_46281)]; + if (slt64(gtid_103074, m_75136) && slt64(gtid_103082, n_75139)) { + ((__global int64_t *) mem_120201)[gtid_103074 * n_75139 + + gtid_103082] = ((__local + int64_t *) scan_arr_mem_126442)[sext_i32_i64(local_tid_126438)]; } } error_0: return; - #undef segscan_group_sizze_41059 + #undef segscan_group_sizze_103197 } -__kernel void mainziscan_stage2_42114(__global int *global_failure, - __local volatile - int64_t *scan_arr_mem_46668_backing_aligned_0, - int64_t m_29166, int64_t iota32_arg_29597, - int64_t stage1_num_groups_46637, - int32_t num_threads_46638, __global - unsigned char *mem_45302) +__kernel void mainziscan_stage2_111562(__global int *global_failure, + __local volatile + int64_t *scan_arr_mem_128499_backing_aligned_0, + int64_t m_75136, int64_t Nmk_76536, + int64_t stage1_num_groups_128468, + int32_t num_threads_128469, __global + unsigned char *mem_124061) { - #define segscan_group_sizze_42200 (mainzisegscan_group_sizze_42108) + #define segscan_group_sizze_111711 (mainzisegscan_group_sizze_111556) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict scan_arr_mem_46668_backing_0 = + __local volatile char *restrict scan_arr_mem_128499_backing_0 = (__local volatile - char *) scan_arr_mem_46668_backing_aligned_0; + char *) scan_arr_mem_128499_backing_aligned_0; if (*global_failure >= 0) return; - int32_t global_tid_46663; - int32_t local_tid_46664; - int64_t group_sizze_46667; - int32_t wave_sizze_46666; - int32_t group_tid_46665; + int32_t global_tid_128494; + int32_t local_tid_128495; + int64_t group_sizze_128498; + int32_t wave_sizze_128497; + int32_t group_tid_128496; - global_tid_46663 = get_global_id(0); - local_tid_46664 = get_local_id(0); - group_sizze_46667 = get_local_size(0); - wave_sizze_46666 = LOCKSTEP_WIDTH; - group_tid_46665 = get_group_id(0); + global_tid_128494 = get_global_id(0); + local_tid_128495 = get_local_id(0); + group_sizze_128498 = get_local_size(0); + wave_sizze_128497 = LOCKSTEP_WIDTH; + group_tid_128496 = get_group_id(0); - int32_t phys_tid_42114; + int32_t phys_tid_111562; - phys_tid_42114 = global_tid_46663; + phys_tid_111562 = global_tid_128494; - __local char *scan_arr_mem_46668; + __local char *scan_arr_mem_128499; - scan_arr_mem_46668 = (__local char *) scan_arr_mem_46668_backing_0; + scan_arr_mem_128499 = (__local char *) scan_arr_mem_128499_backing_0; - int64_t flat_idx_46670; + int64_t flat_idx_128501; - flat_idx_46670 = (sext_i32_i64(local_tid_46664) + (int64_t) 1) * - (segscan_group_sizze_42200 * sdiv_up64(m_29166 * iota32_arg_29597, - sext_i32_i64(num_threads_46638))) - + flat_idx_128501 = (sext_i32_i64(local_tid_128495) + (int64_t) 1) * + (segscan_group_sizze_111711 * sdiv_up64(m_75136 * Nmk_76536, + sext_i32_i64(num_threads_128469))) - (int64_t) 1; - int64_t gtid_42105; + int64_t gtid_111553; - gtid_42105 = squot64(flat_idx_46670, iota32_arg_29597); + gtid_111553 = squot64(flat_idx_128501, Nmk_76536); - int64_t gtid_42113; + int64_t gtid_111561; - gtid_42113 = flat_idx_46670 - squot64(flat_idx_46670, iota32_arg_29597) * - iota32_arg_29597; + gtid_111561 = flat_idx_128501 - squot64(flat_idx_128501, Nmk_76536) * + Nmk_76536; // threads in bound read carries; others get neutral element { - if (slt64(gtid_42105, m_29166) && slt64(gtid_42113, iota32_arg_29597)) { + if (slt64(gtid_111553, m_75136) && slt64(gtid_111561, Nmk_76536)) { ((__local - float *) scan_arr_mem_46668)[sext_i32_i64(local_tid_46664)] = - ((__global float *) mem_45302)[gtid_42105 * iota32_arg_29597 + - gtid_42113]; + double *) scan_arr_mem_128499)[sext_i32_i64(local_tid_128495)] = + ((__global double *) mem_124061)[gtid_111553 * Nmk_76536 + + gtid_111561]; } else { ((__local - float *) scan_arr_mem_46668)[sext_i32_i64(local_tid_46664)] = - 0.0F; + double *) scan_arr_mem_128499)[sext_i32_i64(local_tid_128495)] = + 0.0; } } barrier(CLK_LOCAL_MEM_FENCE); - float x_42204; - float x_42205; - float x_46671; - float x_46672; - bool ltid_in_bounds_46674; + double x_111715; + double x_111716; + double x_128502; + double x_128503; + bool ltid_in_bounds_128505; - ltid_in_bounds_46674 = slt64(sext_i32_i64(local_tid_46664), - stage1_num_groups_46637); + ltid_in_bounds_128505 = slt64(sext_i32_i64(local_tid_128495), + stage1_num_groups_128468); - int32_t skip_threads_46675; + int32_t skip_threads_128506; // read input for in-block scan { - if (ltid_in_bounds_46674) { - x_42205 = ((volatile __local - float *) scan_arr_mem_46668)[sext_i32_i64(local_tid_46664)]; - if ((local_tid_46664 - squot32(local_tid_46664, 32) * 32) == 0) { - x_42204 = x_42205; + if (ltid_in_bounds_128505) { + x_111716 = ((volatile __local + double *) scan_arr_mem_128499)[sext_i32_i64(local_tid_128495)]; + if ((local_tid_128495 - squot32(local_tid_128495, 32) * 32) == 0) { + x_111715 = x_111716; } } } // in-block scan (hopefully no barriers needed) { - skip_threads_46675 = 1; - while (slt32(skip_threads_46675, 32)) { - if (sle32(skip_threads_46675, local_tid_46664 - - squot32(local_tid_46664, 32) * 32) && - ltid_in_bounds_46674) { + skip_threads_128506 = 1; + while (slt32(skip_threads_128506, 32)) { + if (sle32(skip_threads_128506, local_tid_128495 - + squot32(local_tid_128495, 32) * 32) && + ltid_in_bounds_128505) { // read operands { - x_42204 = ((volatile __local - float *) scan_arr_mem_46668)[sext_i32_i64(local_tid_46664) - - sext_i32_i64(skip_threads_46675)]; + x_111715 = ((volatile __local + double *) scan_arr_mem_128499)[sext_i32_i64(local_tid_128495) - + sext_i32_i64(skip_threads_128506)]; } // perform operation { - bool inactive_46676 = - slt64(srem64((sext_i32_i64(local_tid_46664) + + bool inactive_128507 = + slt64(srem64((sext_i32_i64(local_tid_128495) + (int64_t) 1) * - (segscan_group_sizze_42200 * - sdiv_up64(m_29166 * iota32_arg_29597, - sext_i32_i64(num_threads_46638))) - - (int64_t) 1, iota32_arg_29597), - (sext_i32_i64(local_tid_46664) + (int64_t) 1) * - (segscan_group_sizze_42200 * sdiv_up64(m_29166 * - iota32_arg_29597, - sext_i32_i64(num_threads_46638))) - - (int64_t) 1 - ((sext_i32_i64(local_tid_46664 - - skip_threads_46675) + + (segscan_group_sizze_111711 * + sdiv_up64(m_75136 * Nmk_76536, + sext_i32_i64(num_threads_128469))) - + (int64_t) 1, Nmk_76536), + (sext_i32_i64(local_tid_128495) + (int64_t) 1) * + (segscan_group_sizze_111711 * sdiv_up64(m_75136 * + Nmk_76536, + sext_i32_i64(num_threads_128469))) - + (int64_t) 1 - ((sext_i32_i64(local_tid_128495 - + skip_threads_128506) + (int64_t) 1) * - (segscan_group_sizze_42200 * - sdiv_up64(m_29166 * - iota32_arg_29597, - sext_i32_i64(num_threads_46638))) - + (segscan_group_sizze_111711 * + sdiv_up64(m_75136 * Nmk_76536, + sext_i32_i64(num_threads_128469))) - (int64_t) 1)); - if (inactive_46676) { - x_42204 = x_42205; + if (inactive_128507) { + x_111715 = x_111716; } - if (!inactive_46676) { - float defunc_1_op_res_42206 = x_42204 + x_42205; + if (!inactive_128507) { + double defunc_1_op_res_111717 = x_111715 + x_111716; - x_42204 = defunc_1_op_res_42206; + x_111715 = defunc_1_op_res_111717; } } } - if (sle32(wave_sizze_46666, skip_threads_46675)) { + if (sle32(wave_sizze_128497, skip_threads_128506)) { barrier(CLK_LOCAL_MEM_FENCE); } - if (sle32(skip_threads_46675, local_tid_46664 - - squot32(local_tid_46664, 32) * 32) && - ltid_in_bounds_46674) { + if (sle32(skip_threads_128506, local_tid_128495 - + squot32(local_tid_128495, 32) * 32) && + ltid_in_bounds_128505) { // write result { ((volatile __local - float *) scan_arr_mem_46668)[sext_i32_i64(local_tid_46664)] = - x_42204; - x_42205 = x_42204; + double *) scan_arr_mem_128499)[sext_i32_i64(local_tid_128495)] = + x_111715; + x_111716 = x_111715; } } - if (sle32(wave_sizze_46666, skip_threads_46675)) { + if (sle32(wave_sizze_128497, skip_threads_128506)) { barrier(CLK_LOCAL_MEM_FENCE); } - skip_threads_46675 *= 2; + skip_threads_128506 *= 2; } } barrier(CLK_LOCAL_MEM_FENCE); // last thread of block 'i' writes its result to offset 'i' { - if ((local_tid_46664 - squot32(local_tid_46664, 32) * 32) == 31 && - ltid_in_bounds_46674) { + if ((local_tid_128495 - squot32(local_tid_128495, 32) * 32) == 31 && + ltid_in_bounds_128505) { ((volatile __local - float *) scan_arr_mem_46668)[sext_i32_i64(squot32(local_tid_46664, - 32))] = x_42204; + double *) scan_arr_mem_128499)[sext_i32_i64(squot32(local_tid_128495, + 32))] = + x_111715; } } barrier(CLK_LOCAL_MEM_FENCE); // scan the first block, after which offset 'i' contains carry-in for block 'i+1' { - int32_t skip_threads_46677; + int32_t skip_threads_128508; // read input for in-block scan { - if (squot32(local_tid_46664, 32) == 0 && ltid_in_bounds_46674) { - x_46672 = ((volatile __local - float *) scan_arr_mem_46668)[sext_i32_i64(local_tid_46664)]; - if ((local_tid_46664 - squot32(local_tid_46664, 32) * 32) == + if (squot32(local_tid_128495, 32) == 0 && ltid_in_bounds_128505) { + x_128503 = ((volatile __local + double *) scan_arr_mem_128499)[sext_i32_i64(local_tid_128495)]; + if ((local_tid_128495 - squot32(local_tid_128495, 32) * 32) == 0) { - x_46671 = x_46672; + x_128502 = x_128503; } } } // in-block scan (hopefully no barriers needed) { - skip_threads_46677 = 1; - while (slt32(skip_threads_46677, 32)) { - if (sle32(skip_threads_46677, local_tid_46664 - - squot32(local_tid_46664, 32) * 32) && - (squot32(local_tid_46664, 32) == 0 && - ltid_in_bounds_46674)) { + skip_threads_128508 = 1; + while (slt32(skip_threads_128508, 32)) { + if (sle32(skip_threads_128508, local_tid_128495 - + squot32(local_tid_128495, 32) * 32) && + (squot32(local_tid_128495, 32) == 0 && + ltid_in_bounds_128505)) { // read operands { - x_46671 = ((volatile __local - float *) scan_arr_mem_46668)[sext_i32_i64(local_tid_46664) - - sext_i32_i64(skip_threads_46677)]; + x_128502 = ((volatile __local + double *) scan_arr_mem_128499)[sext_i32_i64(local_tid_128495) - + sext_i32_i64(skip_threads_128508)]; } // perform operation { - bool inactive_46678 = - slt64(srem64((sext_i32_i64(local_tid_46664 * 32 + + bool inactive_128509 = + slt64(srem64((sext_i32_i64(local_tid_128495 * 32 + 32 - 1) + (int64_t) 1) * - (segscan_group_sizze_42200 * - sdiv_up64(m_29166 * iota32_arg_29597, - sext_i32_i64(num_threads_46638))) - - (int64_t) 1, iota32_arg_29597), - (sext_i32_i64(local_tid_46664 * 32 + 32 - + (segscan_group_sizze_111711 * + sdiv_up64(m_75136 * Nmk_76536, + sext_i32_i64(num_threads_128469))) - + (int64_t) 1, Nmk_76536), + (sext_i32_i64(local_tid_128495 * 32 + 32 - 1) + (int64_t) 1) * - (segscan_group_sizze_42200 * - sdiv_up64(m_29166 * iota32_arg_29597, - sext_i32_i64(num_threads_46638))) - + (segscan_group_sizze_111711 * + sdiv_up64(m_75136 * Nmk_76536, + sext_i32_i64(num_threads_128469))) - (int64_t) 1 - - ((sext_i32_i64((local_tid_46664 - - skip_threads_46677) * 32 + + ((sext_i32_i64((local_tid_128495 - + skip_threads_128508) * 32 + 32 - 1) + (int64_t) 1) * - (segscan_group_sizze_42200 * - sdiv_up64(m_29166 * iota32_arg_29597, - sext_i32_i64(num_threads_46638))) - + (segscan_group_sizze_111711 * + sdiv_up64(m_75136 * Nmk_76536, + sext_i32_i64(num_threads_128469))) - (int64_t) 1)); - if (inactive_46678) { - x_46671 = x_46672; + if (inactive_128509) { + x_128502 = x_128503; } - if (!inactive_46678) { - float defunc_1_op_res_46673 = x_46671 + x_46672; + if (!inactive_128509) { + double defunc_1_op_res_128504 = x_128502 + x_128503; - x_46671 = defunc_1_op_res_46673; + x_128502 = defunc_1_op_res_128504; } } } - if (sle32(wave_sizze_46666, skip_threads_46677)) { + if (sle32(wave_sizze_128497, skip_threads_128508)) { barrier(CLK_LOCAL_MEM_FENCE); } - if (sle32(skip_threads_46677, local_tid_46664 - - squot32(local_tid_46664, 32) * 32) && - (squot32(local_tid_46664, 32) == 0 && - ltid_in_bounds_46674)) { + if (sle32(skip_threads_128508, local_tid_128495 - + squot32(local_tid_128495, 32) * 32) && + (squot32(local_tid_128495, 32) == 0 && + ltid_in_bounds_128505)) { // write result { ((volatile __local - float *) scan_arr_mem_46668)[sext_i32_i64(local_tid_46664)] = - x_46671; - x_46672 = x_46671; + double *) scan_arr_mem_128499)[sext_i32_i64(local_tid_128495)] = + x_128502; + x_128503 = x_128502; } } - if (sle32(wave_sizze_46666, skip_threads_46677)) { + if (sle32(wave_sizze_128497, skip_threads_128508)) { barrier(CLK_LOCAL_MEM_FENCE); } - skip_threads_46677 *= 2; + skip_threads_128508 *= 2; } } } barrier(CLK_LOCAL_MEM_FENCE); // carry-in for every block except the first { - if (!(squot32(local_tid_46664, 32) == 0 || !ltid_in_bounds_46674)) { + if (!(squot32(local_tid_128495, 32) == 0 || !ltid_in_bounds_128505)) { // read operands { - x_42205 = x_42204; - x_42204 = ((__local - float *) scan_arr_mem_46668)[sext_i32_i64(squot32(local_tid_46664, - 32)) - - (int64_t) 1]; + x_111716 = x_111715; + x_111715 = ((__local + double *) scan_arr_mem_128499)[sext_i32_i64(squot32(local_tid_128495, + 32)) - + (int64_t) 1]; } // perform operation { - bool inactive_46679 = - slt64(srem64((sext_i32_i64(local_tid_46664) + - (int64_t) 1) * (segscan_group_sizze_42200 * - sdiv_up64(m_29166 * - iota32_arg_29597, - sext_i32_i64(num_threads_46638))) - - (int64_t) 1, iota32_arg_29597), - (sext_i32_i64(local_tid_46664) + (int64_t) 1) * - (segscan_group_sizze_42200 * sdiv_up64(m_29166 * - iota32_arg_29597, - sext_i32_i64(num_threads_46638))) - - (int64_t) 1 - ((sext_i32_i64(squot32(local_tid_46664, - 32) * 32 - 1) + - (int64_t) 1) * - (segscan_group_sizze_42200 * - sdiv_up64(m_29166 * iota32_arg_29597, - sext_i32_i64(num_threads_46638))) - - (int64_t) 1)); - - if (inactive_46679) { - x_42204 = x_42205; - } - if (!inactive_46679) { - float defunc_1_op_res_42206 = x_42204 + x_42205; - - x_42204 = defunc_1_op_res_42206; + bool inactive_128510 = + slt64(srem64((sext_i32_i64(local_tid_128495) + + (int64_t) 1) * (segscan_group_sizze_111711 * + sdiv_up64(m_75136 * + Nmk_76536, + sext_i32_i64(num_threads_128469))) - + (int64_t) 1, Nmk_76536), + (sext_i32_i64(local_tid_128495) + (int64_t) 1) * + (segscan_group_sizze_111711 * sdiv_up64(m_75136 * + Nmk_76536, + sext_i32_i64(num_threads_128469))) - + (int64_t) 1 - + ((sext_i32_i64(squot32(local_tid_128495, 32) * 32 - + 1) + (int64_t) 1) * (segscan_group_sizze_111711 * + sdiv_up64(m_75136 * Nmk_76536, + sext_i32_i64(num_threads_128469))) - + (int64_t) 1)); + + if (inactive_128510) { + x_111715 = x_111716; + } + if (!inactive_128510) { + double defunc_1_op_res_111717 = x_111715 + x_111716; + + x_111715 = defunc_1_op_res_111717; } } // write final result { ((__local - float *) scan_arr_mem_46668)[sext_i32_i64(local_tid_46664)] = - x_42204; + double *) scan_arr_mem_128499)[sext_i32_i64(local_tid_128495)] = + x_111715; } } } barrier(CLK_LOCAL_MEM_FENCE); // restore correct values for first block { - if (squot32(local_tid_46664, 32) == 0) { + if (squot32(local_tid_128495, 32) == 0) { ((__local - float *) scan_arr_mem_46668)[sext_i32_i64(local_tid_46664)] = - x_42205; + double *) scan_arr_mem_128499)[sext_i32_i64(local_tid_128495)] = + x_111716; } } barrier(CLK_LOCAL_MEM_FENCE); // threads in bounds write scanned carries { - if (slt64(gtid_42105, m_29166) && slt64(gtid_42113, iota32_arg_29597)) { - ((__global float *) mem_45302)[gtid_42105 * iota32_arg_29597 + - gtid_42113] = ((__local - float *) scan_arr_mem_46668)[sext_i32_i64(local_tid_46664)]; + if (slt64(gtid_111553, m_75136) && slt64(gtid_111561, Nmk_76536)) { + ((__global double *) mem_124061)[gtid_111553 * Nmk_76536 + + gtid_111561] = ((__local + double *) scan_arr_mem_128499)[sext_i32_i64(local_tid_128495)]; } } error_0: return; - #undef segscan_group_sizze_42200 + #undef segscan_group_sizze_111711 } -__kernel void mainziscan_stage3_41042(__global int *global_failure, - int64_t N_29165, int64_t m_29166, - int64_t num_groups_41060, - int32_t num_threads_46255, - int32_t required_groups_46297, __global - unsigned char *mem_45163) +__kernel void mainziscan_stage2_114084(__global int *global_failure, + __local volatile + int64_t *scan_arr_mem_129288_backing_aligned_0, + int64_t N_75135, int64_t m_75136, + int64_t stage1_num_groups_129257, + int32_t num_threads_129258, __global + unsigned char *mem_124906) { - #define segscan_group_sizze_41059 (mainzisegscan_group_sizze_41036) + #define segscan_group_sizze_114101 (mainzisegscan_group_sizze_114078) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; + __local volatile char *restrict scan_arr_mem_129288_backing_0 = + (__local volatile + char *) scan_arr_mem_129288_backing_aligned_0; if (*global_failure >= 0) return; - int32_t global_tid_46298; - int32_t local_tid_46299; - int64_t group_sizze_46302; - int32_t wave_sizze_46301; - int32_t group_tid_46300; - - global_tid_46298 = get_global_id(0); - local_tid_46299 = get_local_id(0); - group_sizze_46302 = get_local_size(0); - wave_sizze_46301 = LOCKSTEP_WIDTH; - group_tid_46300 = get_group_id(0); - - int32_t phys_tid_41042; - - phys_tid_41042 = global_tid_46298; + int32_t global_tid_129283; + int32_t local_tid_129284; + int64_t group_sizze_129287; + int32_t wave_sizze_129286; + int32_t group_tid_129285; - int32_t phys_group_id_46303; + global_tid_129283 = get_global_id(0); + local_tid_129284 = get_local_id(0); + group_sizze_129287 = get_local_size(0); + wave_sizze_129286 = LOCKSTEP_WIDTH; + group_tid_129285 = get_group_id(0); - phys_group_id_46303 = get_group_id(0); - for (int32_t i_46304 = 0; i_46304 < sdiv_up32(required_groups_46297 - - phys_group_id_46303, - sext_i64_i32(num_groups_41060)); - i_46304++) { - int32_t virt_group_id_46305 = phys_group_id_46303 + i_46304 * - sext_i64_i32(num_groups_41060); - int64_t flat_idx_46306 = sext_i32_i64(virt_group_id_46305) * - segscan_group_sizze_41059 + sext_i32_i64(local_tid_46299); - int64_t gtid_41033 = squot64(flat_idx_46306, N_29165); - int64_t gtid_41041 = flat_idx_46306 - squot64(flat_idx_46306, N_29165) * - N_29165; - int64_t orig_group_46307 = squot64(flat_idx_46306, - segscan_group_sizze_41059 * - sdiv_up64(m_29166 * N_29165, - sext_i32_i64(num_threads_46255))); - int64_t carry_in_flat_idx_46308 = orig_group_46307 * - (segscan_group_sizze_41059 * sdiv_up64(m_29166 * N_29165, - sext_i32_i64(num_threads_46255))) - - (int64_t) 1; - - if (slt64(gtid_41033, m_29166) && slt64(gtid_41041, N_29165)) { - if (!(orig_group_46307 == (int64_t) 0 || (flat_idx_46306 == - (orig_group_46307 + - (int64_t) 1) * - (segscan_group_sizze_41059 * - sdiv_up64(m_29166 * - N_29165, - sext_i32_i64(num_threads_46255))) - - (int64_t) 1 || - slt64(srem64(flat_idx_46306, - N_29165), - flat_idx_46306 - - carry_in_flat_idx_46308)))) { - int64_t x_41064; - int64_t x_41065; - - x_41064 = ((__global - int64_t *) mem_45163)[squot64(carry_in_flat_idx_46308, - N_29165) * N_29165 + - (carry_in_flat_idx_46308 - - squot64(carry_in_flat_idx_46308, - N_29165) * N_29165)]; - x_41065 = ((__global int64_t *) mem_45163)[gtid_41033 * - N_29165 + - gtid_41041]; - - int64_t defunc_1_op_res_41066; - - defunc_1_op_res_41066 = add64(x_41064, x_41065); - x_41064 = defunc_1_op_res_41066; - ((__global int64_t *) mem_45163)[gtid_41033 * N_29165 + - gtid_41041] = x_41064; - } - } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - } + int32_t phys_tid_114084; - error_0: - return; - #undef segscan_group_sizze_41059 -} -__kernel void mainziscan_stage3_42114(__global int *global_failure, - int64_t m_29166, int64_t iota32_arg_29597, - int64_t num_groups_42201, - int32_t num_threads_46638, - int32_t required_groups_46680, __global - unsigned char *mem_45302) -{ - #define segscan_group_sizze_42200 (mainzisegscan_group_sizze_42108) + phys_tid_114084 = global_tid_129283; - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; + __local char *scan_arr_mem_129288; - if (*global_failure >= 0) - return; + scan_arr_mem_129288 = (__local char *) scan_arr_mem_129288_backing_0; - int32_t global_tid_46681; - int32_t local_tid_46682; - int64_t group_sizze_46685; - int32_t wave_sizze_46684; - int32_t group_tid_46683; + int64_t flat_idx_129290; - global_tid_46681 = get_global_id(0); - local_tid_46682 = get_local_id(0); - group_sizze_46685 = get_local_size(0); - wave_sizze_46684 = LOCKSTEP_WIDTH; - group_tid_46683 = get_group_id(0); + flat_idx_129290 = (sext_i32_i64(local_tid_129284) + (int64_t) 1) * + (segscan_group_sizze_114101 * sdiv_up64(m_75136 * N_75135, + sext_i32_i64(num_threads_129258))) - + (int64_t) 1; - int32_t phys_tid_42114; + int64_t gtid_114075; - phys_tid_42114 = global_tid_46681; + gtid_114075 = squot64(flat_idx_129290, N_75135); - int32_t phys_group_id_46686; + int64_t gtid_114083; - phys_group_id_46686 = get_group_id(0); - for (int32_t i_46687 = 0; i_46687 < sdiv_up32(required_groups_46680 - - phys_group_id_46686, - sext_i64_i32(num_groups_42201)); - i_46687++) { - int32_t virt_group_id_46688 = phys_group_id_46686 + i_46687 * - sext_i64_i32(num_groups_42201); - int64_t flat_idx_46689 = sext_i32_i64(virt_group_id_46688) * - segscan_group_sizze_42200 + sext_i32_i64(local_tid_46682); - int64_t gtid_42105 = squot64(flat_idx_46689, iota32_arg_29597); - int64_t gtid_42113 = flat_idx_46689 - squot64(flat_idx_46689, - iota32_arg_29597) * - iota32_arg_29597; - int64_t orig_group_46690 = squot64(flat_idx_46689, - segscan_group_sizze_42200 * - sdiv_up64(m_29166 * iota32_arg_29597, - sext_i32_i64(num_threads_46638))); - int64_t carry_in_flat_idx_46691 = orig_group_46690 * - (segscan_group_sizze_42200 * sdiv_up64(m_29166 * - iota32_arg_29597, - sext_i32_i64(num_threads_46638))) - - (int64_t) 1; - - if (slt64(gtid_42105, m_29166) && slt64(gtid_42113, iota32_arg_29597)) { - if (!(orig_group_46690 == (int64_t) 0 || (flat_idx_46689 == - (orig_group_46690 + - (int64_t) 1) * - (segscan_group_sizze_42200 * - sdiv_up64(m_29166 * - iota32_arg_29597, - sext_i32_i64(num_threads_46638))) - - (int64_t) 1 || - slt64(srem64(flat_idx_46689, - iota32_arg_29597), - flat_idx_46689 - - carry_in_flat_idx_46691)))) { - float x_42204; - float x_42205; - - x_42204 = ((__global - float *) mem_45302)[squot64(carry_in_flat_idx_46691, - iota32_arg_29597) * - iota32_arg_29597 + - (carry_in_flat_idx_46691 - - squot64(carry_in_flat_idx_46691, - iota32_arg_29597) * - iota32_arg_29597)]; - x_42205 = ((__global float *) mem_45302)[gtid_42105 * - iota32_arg_29597 + - gtid_42113]; - - float defunc_1_op_res_42206; - - defunc_1_op_res_42206 = x_42204 + x_42205; - x_42204 = defunc_1_op_res_42206; - ((__global float *) mem_45302)[gtid_42105 * iota32_arg_29597 + - gtid_42113] = x_42204; - } + gtid_114083 = flat_idx_129290 - squot64(flat_idx_129290, N_75135) * N_75135; + // threads in bound read carries; others get neutral element + { + if (slt64(gtid_114075, m_75136) && slt64(gtid_114083, N_75135)) { + ((__local + int64_t *) scan_arr_mem_129288)[sext_i32_i64(local_tid_129284)] = + ((__global int64_t *) mem_124906)[gtid_114075 * N_75135 + + gtid_114083]; + } else { + ((__local + int64_t *) scan_arr_mem_129288)[sext_i32_i64(local_tid_129284)] = + (int64_t) 0; } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } + barrier(CLK_LOCAL_MEM_FENCE); - error_0: - return; - #undef segscan_group_sizze_42200 -} -__kernel void mainzisegmap_38661(__global int *global_failure, int64_t N_29165, - float freq_29170, int64_t i32_res_29181, - __global - unsigned char *mappingindices_mem_44380, - __global unsigned char *mem_44385) -{ - #define segmap_group_sizze_38734 (mainzisegmap_group_sizze_38664) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - - if (*global_failure >= 0) - return; - - int32_t global_tid_45679; - int32_t local_tid_45680; - int64_t group_sizze_45683; - int32_t wave_sizze_45682; - int32_t group_tid_45681; - - global_tid_45679 = get_global_id(0); - local_tid_45680 = get_local_id(0); - group_sizze_45683 = get_local_size(0); - wave_sizze_45682 = LOCKSTEP_WIDTH; - group_tid_45681 = get_group_id(0); - - int32_t phys_tid_38661; - - phys_tid_38661 = global_tid_45679; - - int64_t gtid_38659; + int64_t x_114106; + int64_t x_114107; + int64_t x_129291; + int64_t x_129292; + bool ltid_in_bounds_129294; - gtid_38659 = squot64(sext_i32_i64(group_tid_45681) * - segmap_group_sizze_38734 + - sext_i32_i64(local_tid_45680), N_29165); + ltid_in_bounds_129294 = slt64(sext_i32_i64(local_tid_129284), + stage1_num_groups_129257); - int64_t gtid_38660; + int32_t skip_threads_129295; - gtid_38660 = sext_i32_i64(group_tid_45681) * segmap_group_sizze_38734 + - sext_i32_i64(local_tid_45680) - squot64(sext_i32_i64(group_tid_45681) * - segmap_group_sizze_38734 + - sext_i32_i64(local_tid_45680), - N_29165) * N_29165; - if (slt64(gtid_38659, i32_res_29181) && slt64(gtid_38660, N_29165)) { - int32_t index_primexp_42340 = sext_i64_i32(gtid_38659); - bool index_primexp_42337 = index_primexp_42340 == 0; - float defunc_0_f_res_38740; - - if (index_primexp_42337) { - defunc_0_f_res_38740 = 1.0F; - } else { - int32_t x_38739 = ((__global - int32_t *) mappingindices_mem_44380)[gtid_38660]; - bool cond_38741 = index_primexp_42340 == 1; - float defunc_0_f_res_f_res_38742; - - if (cond_38741) { - float i32_res_38743 = sitofp_i32_f32(x_38739); - - defunc_0_f_res_f_res_38742 = i32_res_38743; - } else { - int32_t r32_arg_38744 = sdiv32(index_primexp_42340, 2); - float i32_res_38745 = sitofp_i32_f32(r32_arg_38744); - float i32_res_38746 = sitofp_i32_f32(x_38739); - float x_38747 = 6.2831855F * i32_res_38745; - float x_38748 = i32_res_38746 * x_38747; - float angle_38749 = x_38748 / freq_29170; - int32_t x_38750 = smod32(index_primexp_42340, 2); - bool cond_38751 = x_38750 == 0; - float defunc_0_f_res_f_res_f_res_38752; - - if (cond_38751) { - float sin_res_38753; - - sin_res_38753 = futrts_sin32(angle_38749); - defunc_0_f_res_f_res_f_res_38752 = sin_res_38753; - } else { - float cos_res_38754; + // read input for in-block scan + { + if (ltid_in_bounds_129294) { + x_114107 = ((volatile __local + int64_t *) scan_arr_mem_129288)[sext_i32_i64(local_tid_129284)]; + if ((local_tid_129284 - squot32(local_tid_129284, 32) * 32) == 0) { + x_114106 = x_114107; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129295 = 1; + while (slt32(skip_threads_129295, 32)) { + if (sle32(skip_threads_129295, local_tid_129284 - + squot32(local_tid_129284, 32) * 32) && + ltid_in_bounds_129294) { + // read operands + { + x_114106 = ((volatile __local + int64_t *) scan_arr_mem_129288)[sext_i32_i64(local_tid_129284) - + sext_i32_i64(skip_threads_129295)]; + } + // perform operation + { + bool inactive_129296 = + slt64(srem64((sext_i32_i64(local_tid_129284) + + (int64_t) 1) * + (segscan_group_sizze_114101 * + sdiv_up64(m_75136 * N_75135, + sext_i32_i64(num_threads_129258))) - + (int64_t) 1, N_75135), + (sext_i32_i64(local_tid_129284) + (int64_t) 1) * + (segscan_group_sizze_114101 * sdiv_up64(m_75136 * + N_75135, + sext_i32_i64(num_threads_129258))) - + (int64_t) 1 - ((sext_i32_i64(local_tid_129284 - + skip_threads_129295) + + (int64_t) 1) * + (segscan_group_sizze_114101 * + sdiv_up64(m_75136 * N_75135, + sext_i32_i64(num_threads_129258))) - + (int64_t) 1)); - cos_res_38754 = futrts_cos32(angle_38749); - defunc_0_f_res_f_res_f_res_38752 = cos_res_38754; + if (inactive_129296) { + x_114106 = x_114107; + } + if (!inactive_129296) { + int64_t defunc_1_op_res_114108 = add64(x_114106, + x_114107); + + x_114106 = defunc_1_op_res_114108; + } + } + } + if (sle32(wave_sizze_129286, skip_threads_129295)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129295, local_tid_129284 - + squot32(local_tid_129284, 32) * 32) && + ltid_in_bounds_129294) { + // write result + { + ((volatile __local + int64_t *) scan_arr_mem_129288)[sext_i32_i64(local_tid_129284)] = + x_114106; + x_114107 = x_114106; } - defunc_0_f_res_f_res_38742 = defunc_0_f_res_f_res_f_res_38752; } - defunc_0_f_res_38740 = defunc_0_f_res_f_res_38742; + if (sle32(wave_sizze_129286, skip_threads_129295)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129295 *= 2; } - ((__global float *) mem_44385)[gtid_38659 * N_29165 + gtid_38660] = - defunc_0_f_res_38740; } - - error_0: - return; - #undef segmap_group_sizze_38734 -} -__kernel void mainzisegmap_38839(__global int *global_failure, int64_t N_29165, - float freq_29170, int64_t i32_res_29181, - __global - unsigned char *mappingindices_mem_44380, - __global unsigned char *mem_44389) -{ - #define segmap_group_sizze_38908 (mainzisegmap_group_sizze_38842) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - - if (*global_failure >= 0) - return; - - int32_t global_tid_45684; - int32_t local_tid_45685; - int64_t group_sizze_45688; - int32_t wave_sizze_45687; - int32_t group_tid_45686; - - global_tid_45684 = get_global_id(0); - local_tid_45685 = get_local_id(0); - group_sizze_45688 = get_local_size(0); - wave_sizze_45687 = LOCKSTEP_WIDTH; - group_tid_45686 = get_group_id(0); - - int32_t phys_tid_38839; - - phys_tid_38839 = global_tid_45684; - - int64_t gtid_38837; - - gtid_38837 = squot64(sext_i32_i64(group_tid_45686) * - segmap_group_sizze_38908 + - sext_i32_i64(local_tid_45685), N_29165); - - int64_t gtid_38838; - - gtid_38838 = sext_i32_i64(group_tid_45686) * segmap_group_sizze_38908 + - sext_i32_i64(local_tid_45685) - squot64(sext_i32_i64(group_tid_45686) * - segmap_group_sizze_38908 + - sext_i32_i64(local_tid_45685), - N_29165) * N_29165; - if (slt64(gtid_38837, i32_res_29181) && slt64(gtid_38838, N_29165)) { - int32_t index_primexp_42349 = sext_i64_i32(gtid_38837); - bool index_primexp_42346 = index_primexp_42349 == 0; - float defunc_0_f_res_38914; - - if (index_primexp_42346) { - defunc_0_f_res_38914 = 1.0F; - } else { - int32_t x_38913 = ((__global - int32_t *) mappingindices_mem_44380)[gtid_38838]; - int32_t i_38915 = add32(1, index_primexp_42349); - int32_t r32_arg_38916 = sdiv32(i_38915, 2); - float i32_res_38917 = sitofp_i32_f32(r32_arg_38916); - float i32_res_38918 = sitofp_i32_f32(x_38913); - float x_38919 = 6.2831855F * i32_res_38917; - float x_38920 = i32_res_38918 * x_38919; - float angle_38921 = x_38920 / freq_29170; - int32_t x_38922 = smod32(i_38915, 2); - bool cond_38923 = x_38922 == 0; - float defunc_0_f_res_f_res_38924; - - if (cond_38923) { - float sin_res_38925; - - sin_res_38925 = futrts_sin32(angle_38921); - defunc_0_f_res_f_res_38924 = sin_res_38925; - } else { - float cos_res_38926; - - cos_res_38926 = futrts_cos32(angle_38921); - defunc_0_f_res_f_res_38924 = cos_res_38926; - } - defunc_0_f_res_38914 = defunc_0_f_res_f_res_38924; + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129284 - squot32(local_tid_129284, 32) * 32) == 31 && + ltid_in_bounds_129294) { + ((volatile __local + int64_t *) scan_arr_mem_129288)[sext_i32_i64(squot32(local_tid_129284, + 32))] = + x_114106; } - ((__global float *) mem_44389)[gtid_38837 * N_29165 + gtid_38838] = - defunc_0_f_res_38914; } - - error_0: - return; - #undef segmap_group_sizze_38908 -} -__kernel void mainzisegmap_38967(__global int *global_failure, int64_t N_29165, - int64_t i32_res_29181, float i32_res_29246, - __global unsigned char *mem_44393, __global - unsigned char *mem_44397) -{ - #define segmap_group_sizze_38991 (mainzisegmap_group_sizze_38970) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - - if (*global_failure >= 0) - return; - - int32_t global_tid_45689; - int32_t local_tid_45690; - int64_t group_sizze_45693; - int32_t wave_sizze_45692; - int32_t group_tid_45691; - - global_tid_45689 = get_global_id(0); - local_tid_45690 = get_local_id(0); - group_sizze_45693 = get_local_size(0); - wave_sizze_45692 = LOCKSTEP_WIDTH; - group_tid_45691 = get_group_id(0); - - int32_t phys_tid_38967; - - phys_tid_38967 = global_tid_45689; - - int64_t gtid_38965; - - gtid_38965 = squot64(sext_i32_i64(group_tid_45691) * - segmap_group_sizze_38991 + - sext_i32_i64(local_tid_45690), i32_res_29181); - - int64_t gtid_38966; - - gtid_38966 = sext_i32_i64(group_tid_45691) * segmap_group_sizze_38991 + - sext_i32_i64(local_tid_45690) - squot64(sext_i32_i64(group_tid_45691) * - segmap_group_sizze_38991 + - sext_i32_i64(local_tid_45690), - i32_res_29181) * i32_res_29181; - if (slt64(gtid_38965, N_29165) && slt64(gtid_38966, i32_res_29181)) { - float x_38994 = ((__global float *) mem_44393)[gtid_38965 * - i32_res_29181 + - gtid_38966]; - float defunc_0_f_res_38995 = i32_res_29246 + x_38994; - - ((__global float *) mem_44397)[gtid_38965 * i32_res_29181 + - gtid_38966] = defunc_0_f_res_38995; - } - - error_0: - return; - #undef segmap_group_sizze_38991 -} -__kernel void mainzisegmap_39000(__global int *global_failure, int64_t N_29165, - int64_t m_29166, int32_t n_29169, - int32_t k2p2zq_29179, int64_t i32_res_29181, - int64_t num_groups_39025, __global - unsigned char *binop_p_mem_44390, __global - unsigned char *mem_44397, __global - unsigned char *mem_44400, __global - unsigned char *mem_44404, __global - unsigned char *mem_44446) -{ - #define segmap_group_sizze_39024 (mainzisegmap_group_sizze_39002) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - - if (*global_failure >= 0) - return; - - int32_t global_tid_45697; - int32_t local_tid_45698; - int64_t group_sizze_45701; - int32_t wave_sizze_45700; - int32_t group_tid_45699; - - global_tid_45697 = get_global_id(0); - local_tid_45698 = get_local_id(0); - group_sizze_45701 = get_local_size(0); - wave_sizze_45700 = LOCKSTEP_WIDTH; - group_tid_45699 = get_group_id(0); - - int32_t phys_tid_39000; - - phys_tid_39000 = global_tid_45697; - - int32_t phys_group_id_45702; - - phys_group_id_45702 = get_group_id(0); - for (int32_t i_45703 = 0; i_45703 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_29166, segmap_group_sizze_39024)) - - phys_group_id_45702, sext_i64_i32(num_groups_39025)); - i_45703++) { - int32_t virt_group_id_45704 = phys_group_id_45702 + i_45703 * - sext_i64_i32(num_groups_39025); - int64_t gtid_38999 = sext_i32_i64(virt_group_id_45704) * - segmap_group_sizze_39024 + sext_i32_i64(local_tid_45698); + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129297; - if (slt64(gtid_38999, m_29166)) { - for (int32_t i_44360 = 0; i_44360 < k2p2zq_29179; i_44360++) { - int64_t i_44289 = sext_i32_i64(i_44360); - - for (int32_t i_44359 = 0; i_44359 < k2p2zq_29179; i_44359++) { - int64_t i_44293 = sext_i32_i64(i_44359); - float defunc_2_reduce_res_39033; - float redout_44295 = 0.0F; - - for (int32_t i_44358 = 0; i_44358 < n_29169; i_44358++) { - int64_t i_44296 = sext_i32_i64(i_44358); - float x_39037 = ((__global float *) mem_44400)[i_44296 * - m_29166 + - gtid_38999]; - float x_39038 = ((__global - float *) binop_p_mem_44390)[i_44289 * - N_29165 + - i_44296]; - float x_39039 = ((__global float *) mem_44397)[i_44296 * - i32_res_29181 + - i_44293]; - float x_39040 = x_39038 * x_39039; - bool isnan_res_39041; - - isnan_res_39041 = futrts_isnan32(x_39037); - - float y_39042; + // read input for in-block scan + { + if (squot32(local_tid_129284, 32) == 0 && ltid_in_bounds_129294) { + x_129292 = ((volatile __local + int64_t *) scan_arr_mem_129288)[sext_i32_i64(local_tid_129284)]; + if ((local_tid_129284 - squot32(local_tid_129284, 32) * 32) == + 0) { + x_129291 = x_129292; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129297 = 1; + while (slt32(skip_threads_129297, 32)) { + if (sle32(skip_threads_129297, local_tid_129284 - + squot32(local_tid_129284, 32) * 32) && + (squot32(local_tid_129284, 32) == 0 && + ltid_in_bounds_129294)) { + // read operands + { + x_129291 = ((volatile __local + int64_t *) scan_arr_mem_129288)[sext_i32_i64(local_tid_129284) - + sext_i32_i64(skip_threads_129297)]; + } + // perform operation + { + bool inactive_129298 = + slt64(srem64((sext_i32_i64(local_tid_129284 * 32 + + 32 - 1) + (int64_t) 1) * + (segscan_group_sizze_114101 * + sdiv_up64(m_75136 * N_75135, + sext_i32_i64(num_threads_129258))) - + (int64_t) 1, N_75135), + (sext_i32_i64(local_tid_129284 * 32 + 32 - + 1) + (int64_t) 1) * + (segscan_group_sizze_114101 * + sdiv_up64(m_75136 * N_75135, + sext_i32_i64(num_threads_129258))) - + (int64_t) 1 - + ((sext_i32_i64((local_tid_129284 - + skip_threads_129297) * 32 + + 32 - 1) + (int64_t) 1) * + (segscan_group_sizze_114101 * + sdiv_up64(m_75136 * N_75135, + sext_i32_i64(num_threads_129258))) - + (int64_t) 1)); - if (isnan_res_39041) { - y_39042 = 0.0F; - } else { - y_39042 = 1.0F; + if (inactive_129298) { + x_129291 = x_129292; + } + if (!inactive_129298) { + int64_t defunc_1_op_res_129293 = add64(x_129291, + x_129292); + + x_129291 = defunc_1_op_res_129293; } - - float defunc_2_f_res_39043 = x_39040 * y_39042; - float defunc_1_op_res_39036 = defunc_2_f_res_39043 + - redout_44295; - float redout_tmp_45707 = defunc_1_op_res_39036; - - redout_44295 = redout_tmp_45707; } - defunc_2_reduce_res_39033 = redout_44295; - ((__global float *) mem_44404)[phys_tid_39000 + (i_44289 * - (num_groups_39025 * - segmap_group_sizze_39024 * - i32_res_29181) + - i_44293 * - (num_groups_39025 * - segmap_group_sizze_39024))] = - defunc_2_reduce_res_39033; } - } - for (int64_t i_45708 = 0; i_45708 < i32_res_29181; i_45708++) { - for (int64_t i_45709 = 0; i_45709 < i32_res_29181; i_45709++) { - ((__global float *) mem_44446)[i_45708 * (m_29166 * - i32_res_29181) + - i_45709 * m_29166 + - gtid_38999] = ((__global - float *) mem_44404)[phys_tid_39000 + - (i_45708 * - (num_groups_39025 * - segmap_group_sizze_39024 * - i32_res_29181) + - i_45709 * - (num_groups_39025 * - segmap_group_sizze_39024))]; + if (sle32(wave_sizze_129286, skip_threads_129297)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129297, local_tid_129284 - + squot32(local_tid_129284, 32) * 32) && + (squot32(local_tid_129284, 32) == 0 && + ltid_in_bounds_129294)) { + // write result + { + ((volatile __local + int64_t *) scan_arr_mem_129288)[sext_i32_i64(local_tid_129284)] = + x_129291; + x_129292 = x_129291; + } } + if (sle32(wave_sizze_129286, skip_threads_129297)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129297 *= 2; } } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - - error_0: - return; - #undef segmap_group_sizze_39024 -} -__kernel void mainzisegmap_39046(__global int *global_failure, int64_t N_29165, - int64_t m_29166, int32_t n_29169, - int32_t k2p2zq_29179, int64_t i32_res_29181, - int64_t num_groups_39202, __global - unsigned char *images_mem_44381, __global - unsigned char *mem_44393, __global - unsigned char *mem_44397, __global - unsigned char *mem_44449, __global - unsigned char *mem_44465) -{ - #define segmap_group_sizze_39201 (mainzisegmap_group_sizze_39049) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - - if (*global_failure >= 0) - return; - - int32_t global_tid_45710; - int32_t local_tid_45711; - int64_t group_sizze_45714; - int32_t wave_sizze_45713; - int32_t group_tid_45712; - - global_tid_45710 = get_global_id(0); - local_tid_45711 = get_local_id(0); - group_sizze_45714 = get_local_size(0); - wave_sizze_45713 = LOCKSTEP_WIDTH; - group_tid_45712 = get_group_id(0); - - int32_t phys_tid_39046; - - phys_tid_39046 = global_tid_45710; - - int32_t phys_group_id_45715; - - phys_group_id_45715 = get_group_id(0); - for (int32_t i_45716 = 0; i_45716 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_29166 * i32_res_29181, - segmap_group_sizze_39201)) - - phys_group_id_45715, sext_i64_i32(num_groups_39202)); - i_45716++) { - int32_t virt_group_id_45717 = phys_group_id_45715 + i_45716 * - sext_i64_i32(num_groups_39202); - int64_t gtid_39044 = squot64(sext_i32_i64(virt_group_id_45717) * - segmap_group_sizze_39201 + - sext_i32_i64(local_tid_45711), - i32_res_29181); - int64_t gtid_39045 = sext_i32_i64(virt_group_id_45717) * - segmap_group_sizze_39201 + sext_i32_i64(local_tid_45711) - - squot64(sext_i32_i64(virt_group_id_45717) * - segmap_group_sizze_39201 + - sext_i32_i64(local_tid_45711), i32_res_29181) * - i32_res_29181; - - if (slt64(gtid_39044, m_29166) && slt64(gtid_39045, i32_res_29181)) { - for (int32_t i_44362 = 0; i_44362 < k2p2zq_29179; i_44362++) { - int64_t i_44299 = sext_i32_i64(i_44362); - float defunc_2_reduce_res_39213; - float redout_44301 = 0.0F; - - for (int32_t i_44361 = 0; i_44361 < n_29169; i_44361++) { - int64_t i_44302 = sext_i32_i64(i_44361); - float x_39217 = ((__global - float *) images_mem_44381)[gtid_39044 * - N_29165 + - i_44302]; - float x_39218 = ((__global float *) mem_44393)[i_44302 * - i32_res_29181 + - gtid_39045]; - float x_39219 = ((__global float *) mem_44397)[i_44302 * - i32_res_29181 + - i_44299]; - float x_39220 = x_39218 * x_39219; - bool isnan_res_39221; - - isnan_res_39221 = futrts_isnan32(x_39217); - - float y_39222; - - if (isnan_res_39221) { - y_39222 = 0.0F; - } else { - y_39222 = 1.0F; - } - - float defunc_2_f_res_39223 = x_39220 * y_39222; - float defunc_1_op_res_39216 = defunc_2_f_res_39223 + - redout_44301; - float redout_tmp_45719 = defunc_1_op_res_39216; + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129284, 32) == 0 || !ltid_in_bounds_129294)) { + // read operands + { + x_114107 = x_114106; + x_114106 = ((__local + int64_t *) scan_arr_mem_129288)[sext_i32_i64(squot32(local_tid_129284, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129299 = + slt64(srem64((sext_i32_i64(local_tid_129284) + + (int64_t) 1) * (segscan_group_sizze_114101 * + sdiv_up64(m_75136 * N_75135, + sext_i32_i64(num_threads_129258))) - + (int64_t) 1, N_75135), + (sext_i32_i64(local_tid_129284) + (int64_t) 1) * + (segscan_group_sizze_114101 * sdiv_up64(m_75136 * + N_75135, + sext_i32_i64(num_threads_129258))) - + (int64_t) 1 - + ((sext_i32_i64(squot32(local_tid_129284, 32) * 32 - + 1) + (int64_t) 1) * (segscan_group_sizze_114101 * + sdiv_up64(m_75136 * N_75135, + sext_i32_i64(num_threads_129258))) - + (int64_t) 1)); + + if (inactive_129299) { + x_114106 = x_114107; + } + if (!inactive_129299) { + int64_t defunc_1_op_res_114108 = add64(x_114106, x_114107); - redout_44301 = redout_tmp_45719; + x_114106 = defunc_1_op_res_114108; } - defunc_2_reduce_res_39213 = redout_44301; - ((__global float *) mem_44449)[phys_tid_39046 + i_44299 * - (num_groups_39202 * - segmap_group_sizze_39201)] = - defunc_2_reduce_res_39213; } - for (int64_t i_45720 = 0; i_45720 < i32_res_29181; i_45720++) { - ((__global float *) mem_44465)[i_45720 * (i32_res_29181 * - m_29166) + - gtid_39044 * i32_res_29181 + - gtid_39045] = ((__global - float *) mem_44449)[phys_tid_39046 + - i_45720 * - (num_groups_39202 * - segmap_group_sizze_39201)]; + // write final result + { + ((__local + int64_t *) scan_arr_mem_129288)[sext_i32_i64(local_tid_129284)] = + x_114106; } } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129284, 32) == 0) { + ((__local + int64_t *) scan_arr_mem_129288)[sext_i32_i64(local_tid_129284)] = + x_114107; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // threads in bounds write scanned carries + { + if (slt64(gtid_114075, m_75136) && slt64(gtid_114083, N_75135)) { + ((__global int64_t *) mem_124906)[gtid_114075 * N_75135 + + gtid_114083] = ((__local + int64_t *) scan_arr_mem_129288)[sext_i32_i64(local_tid_129284)]; + } } error_0: return; - #undef segmap_group_sizze_39201 + #undef segscan_group_sizze_114101 } -__kernel void mainzisegmap_39638(__global int *global_failure, int64_t m_29166, - int64_t i32_res_29181, int64_t nm_29314, - int64_t i32_res_29329, int64_t x_29330, - int64_t j_m_i_29333, - int64_t gauss_jordan_res_r_ixfn_44617, - int64_t gauss_jordan_res_r_ixfn_44618, - int64_t gauss_jordan_res_r_ixfn_44620, __global - unsigned char *gauss_jordan_res_r_mem_44622, - __global unsigned char *mem_44627) +__kernel void mainziscan_stage2_114793(__global int *global_failure, + __local volatile + int64_t *scan_arr_mem_129645_backing_aligned_0, + int64_t m_75136, int64_t iota_arg_77024, + int64_t stage1_num_groups_129614, + int32_t num_threads_129615, __global + unsigned char *mem_124991) { - #define segmap_group_sizze_40385 (mainzisegmap_group_sizze_39642) + #define segscan_group_sizze_114877 (mainzisegscan_group_sizze_114787) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; + __local volatile char *restrict scan_arr_mem_129645_backing_0 = + (__local volatile + char *) scan_arr_mem_129645_backing_aligned_0; if (*global_failure >= 0) return; - int32_t global_tid_45877; - int32_t local_tid_45878; - int64_t group_sizze_45881; - int32_t wave_sizze_45880; - int32_t group_tid_45879; - - global_tid_45877 = get_global_id(0); - local_tid_45878 = get_local_id(0); - group_sizze_45881 = get_local_size(0); - wave_sizze_45880 = LOCKSTEP_WIDTH; - group_tid_45879 = get_group_id(0); - - int32_t phys_tid_39638; - - phys_tid_39638 = global_tid_45877; - - int64_t gtid_39635; - - gtid_39635 = squot64(sext_i32_i64(group_tid_45879) * - segmap_group_sizze_40385 + - sext_i32_i64(local_tid_45878), i32_res_29181 * - j_m_i_29333); + int32_t global_tid_129640; + int32_t local_tid_129641; + int64_t group_sizze_129644; + int32_t wave_sizze_129643; + int32_t group_tid_129642; - int64_t gtid_slice_39633; - - gtid_slice_39633 = squot64(sext_i32_i64(group_tid_45879) * - segmap_group_sizze_40385 + - sext_i32_i64(local_tid_45878) - - squot64(sext_i32_i64(group_tid_45879) * - segmap_group_sizze_40385 + - sext_i32_i64(local_tid_45878), - i32_res_29181 * j_m_i_29333) * - (i32_res_29181 * j_m_i_29333), j_m_i_29333); - - int64_t gtid_slice_39634; - - gtid_slice_39634 = sext_i32_i64(group_tid_45879) * - segmap_group_sizze_40385 + sext_i32_i64(local_tid_45878) - - squot64(sext_i32_i64(group_tid_45879) * segmap_group_sizze_40385 + - sext_i32_i64(local_tid_45878), i32_res_29181 * j_m_i_29333) * - (i32_res_29181 * j_m_i_29333) - squot64(sext_i32_i64(group_tid_45879) * - segmap_group_sizze_40385 + - sext_i32_i64(local_tid_45878) - - squot64(sext_i32_i64(group_tid_45879) * - segmap_group_sizze_40385 + - sext_i32_i64(local_tid_45878), - i32_res_29181 * - j_m_i_29333) * - (i32_res_29181 * j_m_i_29333), - j_m_i_29333) * j_m_i_29333; - if ((slt64(gtid_39635, m_29166) && slt64(gtid_slice_39633, - i32_res_29181)) && - slt64(gtid_slice_39634, j_m_i_29333)) { - int64_t slice_40389 = i32_res_29181 + gtid_slice_39634; - int64_t binop_x_42407 = x_29330 * gtid_39635; - int64_t binop_y_42408 = i32_res_29329 * gtid_slice_39633; - int64_t binop_x_42409 = binop_x_42407 + binop_y_42408; - int64_t binop_x_42410 = slice_40389 + binop_x_42409; - int64_t new_index_42411 = squot64(binop_x_42410, nm_29314); - int64_t binop_y_42423 = nm_29314 * new_index_42411; - int64_t new_index_42424 = binop_x_42410 - binop_y_42423; - float v_40390 = ((__global - float *) gauss_jordan_res_r_mem_44622)[gauss_jordan_res_r_ixfn_44617 + - (new_index_42411 * - gauss_jordan_res_r_ixfn_44618 + - new_index_42424 * - gauss_jordan_res_r_ixfn_44620)]; - - ((__global float *) mem_44627)[gtid_39635 * (j_m_i_29333 * - i32_res_29181) + - gtid_slice_39633 * j_m_i_29333 + - gtid_slice_39634] = v_40390; - } - - error_0: - return; - #undef segmap_group_sizze_40385 -} -__kernel void mainzisegmap_39868(__global int *global_failure, int64_t m_29166, - int64_t nm_29314, int64_t ctx_param_ext_44580, - int64_t ctx_param_ext_44581, - int64_t ctx_param_ext_44583, __global - unsigned char *mem_param_44585, __global - unsigned char *mem_44605) -{ - #define segmap_group_sizze_40373 (mainzisegmap_group_sizze_39871) + global_tid_129640 = get_global_id(0); + local_tid_129641 = get_local_id(0); + group_sizze_129644 = get_local_size(0); + wave_sizze_129643 = LOCKSTEP_WIDTH; + group_tid_129642 = get_group_id(0); - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; + int32_t phys_tid_114793; - if (*global_failure >= 0) - return; + phys_tid_114793 = global_tid_129640; - int32_t global_tid_45871; - int32_t local_tid_45872; - int64_t group_sizze_45875; - int32_t wave_sizze_45874; - int32_t group_tid_45873; + __local char *scan_arr_mem_129645; - global_tid_45871 = get_global_id(0); - local_tid_45872 = get_local_id(0); - group_sizze_45875 = get_local_size(0); - wave_sizze_45874 = LOCKSTEP_WIDTH; - group_tid_45873 = get_group_id(0); + scan_arr_mem_129645 = (__local char *) scan_arr_mem_129645_backing_0; - int32_t phys_tid_39868; + int64_t flat_idx_129647; - phys_tid_39868 = global_tid_45871; + flat_idx_129647 = (sext_i32_i64(local_tid_129641) + (int64_t) 1) * + (segscan_group_sizze_114877 * sdiv_up64(m_75136 * iota_arg_77024, + sext_i32_i64(num_threads_129615))) - + (int64_t) 1; - int64_t gtid_39866; + int64_t gtid_114784; - gtid_39866 = squot64(sext_i32_i64(group_tid_45873) * - segmap_group_sizze_40373 + - sext_i32_i64(local_tid_45872), nm_29314); + gtid_114784 = squot64(flat_idx_129647, iota_arg_77024); - int64_t gtid_39867; + int64_t gtid_114792; - gtid_39867 = sext_i32_i64(group_tid_45873) * segmap_group_sizze_40373 + - sext_i32_i64(local_tid_45872) - squot64(sext_i32_i64(group_tid_45873) * - segmap_group_sizze_40373 + - sext_i32_i64(local_tid_45872), - nm_29314) * nm_29314; - if (slt64(gtid_39866, m_29166) && slt64(gtid_39867, nm_29314)) { - float write_value_40379 = ((__global float *) mem_44605)[gtid_39866 * - nm_29314 + - gtid_39867]; - - if ((sle64((int64_t) 0, gtid_39866) && slt64(gtid_39866, m_29166)) && - (sle64((int64_t) 0, gtid_39867) && slt64(gtid_39867, nm_29314))) { - ((__global float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_39866 * - ctx_param_ext_44581 + - gtid_39867 * - ctx_param_ext_44583)] = - write_value_40379; + gtid_114792 = flat_idx_129647 - squot64(flat_idx_129647, iota_arg_77024) * + iota_arg_77024; + // threads in bound read carries; others get neutral element + { + if (slt64(gtid_114784, m_75136) && slt64(gtid_114792, iota_arg_77024)) { + ((__local + double *) scan_arr_mem_129645)[sext_i32_i64(local_tid_129641)] = + ((__global double *) mem_124991)[gtid_114784 * iota_arg_77024 + + gtid_114792]; + } else { + ((__local + double *) scan_arr_mem_129645)[sext_i32_i64(local_tid_129641)] = + 0.0; } } + barrier(CLK_LOCAL_MEM_FENCE); - error_0: - return; - #undef segmap_group_sizze_40373 -} -__kernel void mainzisegmap_39938(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, int64_t m_29166, - int32_t k2p2zq_29179, int32_t m_29312, - int64_t nm_29314, int32_t i_40240, - int64_t i32_res_40242, - int64_t ctx_param_ext_44580, - int64_t ctx_param_ext_44581, - int64_t ctx_param_ext_44583, __global - unsigned char *mem_param_44585, __global - unsigned char *mem_44601, __global - unsigned char *mem_44605) -{ - #define segmap_group_sizze_40323 (mainzisegmap_group_sizze_39941) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - - if (*global_failure >= 0) - return; - - int32_t global_tid_45866; - int32_t local_tid_45867; - int64_t group_sizze_45870; - int32_t wave_sizze_45869; - int32_t group_tid_45868; - - global_tid_45866 = get_global_id(0); - local_tid_45867 = get_local_id(0); - group_sizze_45870 = get_local_size(0); - wave_sizze_45869 = LOCKSTEP_WIDTH; - group_tid_45868 = get_group_id(0); - - int32_t phys_tid_39938; - - phys_tid_39938 = global_tid_45866; - - int64_t gtid_39936; + double x_114881; + double x_114882; + double x_129648; + double x_129649; + bool ltid_in_bounds_129651; - gtid_39936 = squot64(sext_i32_i64(group_tid_45868) * - segmap_group_sizze_40323 + - sext_i32_i64(local_tid_45867), nm_29314); + ltid_in_bounds_129651 = slt64(sext_i32_i64(local_tid_129641), + stage1_num_groups_129614); - int64_t gtid_39937; + int32_t skip_threads_129652; - gtid_39937 = sext_i32_i64(group_tid_45868) * segmap_group_sizze_40323 + - sext_i32_i64(local_tid_45867) - squot64(sext_i32_i64(group_tid_45868) * - segmap_group_sizze_40323 + - sext_i32_i64(local_tid_45867), - nm_29314) * nm_29314; - if (slt64(gtid_39936, m_29166) && slt64(gtid_39937, nm_29314)) { - bool cond_40328 = ((__global bool *) mem_44601)[gtid_39936]; - int32_t defunc_0_f_res_40330 = sext_i64_i32(gtid_39937); - int32_t defunc_0_f_res_40331 = sdiv32(defunc_0_f_res_40330, m_29312); - int32_t defunc_0_f_res_40332 = smod32(defunc_0_f_res_40330, m_29312); - float defunc_0_f_res_40333; - - if (cond_40328) { - int32_t x_40334 = mul32(m_29312, defunc_0_f_res_40331); - int32_t i32_arg_40335 = add32(defunc_0_f_res_40332, x_40334); - int64_t i32_res_40336 = sext_i32_i64(i32_arg_40335); - bool x_40337 = sle64((int64_t) 0, i32_res_40336); - bool y_40338 = slt64(i32_res_40336, nm_29314); - bool bounds_check_40339 = x_40337 && y_40338; - bool index_certs_40340; - - if (!bounds_check_40339) { + // read input for in-block scan + { + if (ltid_in_bounds_129651) { + x_114882 = ((volatile __local + double *) scan_arr_mem_129645)[sext_i32_i64(local_tid_129641)]; + if ((local_tid_129641 - squot32(local_tid_129641, 32) * 32) == 0) { + x_114881 = x_114882; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129652 = 1; + while (slt32(skip_threads_129652, 32)) { + if (sle32(skip_threads_129652, local_tid_129641 - + squot32(local_tid_129641, 32) * 32) && + ltid_in_bounds_129651) { + // read operands { - if (atomic_cmpxchg_i32_global(global_failure, -1, 11) == - -1) { - global_failure_args[0] = i32_res_40336; - global_failure_args[1] = nm_29314; - ; + x_114881 = ((volatile __local + double *) scan_arr_mem_129645)[sext_i32_i64(local_tid_129641) - + sext_i32_i64(skip_threads_129652)]; + } + // perform operation + { + bool inactive_129653 = + slt64(srem64((sext_i32_i64(local_tid_129641) + + (int64_t) 1) * + (segscan_group_sizze_114877 * + sdiv_up64(m_75136 * iota_arg_77024, + sext_i32_i64(num_threads_129615))) - + (int64_t) 1, iota_arg_77024), + (sext_i32_i64(local_tid_129641) + (int64_t) 1) * + (segscan_group_sizze_114877 * sdiv_up64(m_75136 * + iota_arg_77024, + sext_i32_i64(num_threads_129615))) - + (int64_t) 1 - ((sext_i32_i64(local_tid_129641 - + skip_threads_129652) + + (int64_t) 1) * + (segscan_group_sizze_114877 * + sdiv_up64(m_75136 * + iota_arg_77024, + sext_i32_i64(num_threads_129615))) - + (int64_t) 1)); + + if (inactive_129653) { + x_114881 = x_114882; + } + if (!inactive_129653) { + double defunc_1_op_res_114883 = x_114881 + x_114882; + + x_114881 = defunc_1_op_res_114883; } - return; } } - - float defunc_0_f_res_t_res_40341 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_39936 * - ctx_param_ext_44581 + - i32_res_40336 * - ctx_param_ext_44583)]; - - defunc_0_f_res_40333 = defunc_0_f_res_t_res_40341; - } else { - float v1_40327 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_39936 * - ctx_param_ext_44581 + - i32_res_40242 * - ctx_param_ext_44583)]; - int64_t i32_res_40342 = sext_i32_i64(defunc_0_f_res_40332); - bool x_40343 = sle64((int64_t) 0, i32_res_40342); - bool y_40344 = slt64(i32_res_40342, nm_29314); - bool bounds_check_40345 = x_40343 && y_40344; - bool index_certs_40346; - - if (!bounds_check_40345) { + if (sle32(wave_sizze_129643, skip_threads_129652)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129652, local_tid_129641 - + squot32(local_tid_129641, 32) * 32) && + ltid_in_bounds_129651) { + // write result { - if (atomic_cmpxchg_i32_global(global_failure, -1, 12) == - -1) { - global_failure_args[0] = i32_res_40342; - global_failure_args[1] = nm_29314; - ; - } - return; + ((volatile __local + double *) scan_arr_mem_129645)[sext_i32_i64(local_tid_129641)] = + x_114881; + x_114882 = x_114881; } } - - float x_40347 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_39936 * - ctx_param_ext_44581 + - i32_res_40342 * - ctx_param_ext_44583)]; - float x_40348 = x_40347 / v1_40327; - int32_t y_40349 = sub32(k2p2zq_29179, 1); - bool cond_40350 = slt32(defunc_0_f_res_40331, y_40349); - float defunc_0_f_res_f_res_40351; - - if (cond_40350) { - int32_t x_40352 = add32(1, defunc_0_f_res_40331); - int32_t x_40353 = mul32(m_29312, x_40352); - int32_t i32_arg_40354 = add32(defunc_0_f_res_40332, x_40353); - int64_t i32_res_40355 = sext_i32_i64(i32_arg_40354); - bool x_40356 = sle64((int64_t) 0, i32_res_40355); - bool y_40357 = slt64(i32_res_40355, nm_29314); - bool bounds_check_40358 = x_40356 && y_40357; - bool index_certs_40359; - - if (!bounds_check_40358) { + if (sle32(wave_sizze_129643, skip_threads_129652)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129652 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129641 - squot32(local_tid_129641, 32) * 32) == 31 && + ltid_in_bounds_129651) { + ((volatile __local + double *) scan_arr_mem_129645)[sext_i32_i64(squot32(local_tid_129641, + 32))] = + x_114881; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129654; + + // read input for in-block scan + { + if (squot32(local_tid_129641, 32) == 0 && ltid_in_bounds_129651) { + x_129649 = ((volatile __local + double *) scan_arr_mem_129645)[sext_i32_i64(local_tid_129641)]; + if ((local_tid_129641 - squot32(local_tid_129641, 32) * 32) == + 0) { + x_129648 = x_129649; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129654 = 1; + while (slt32(skip_threads_129654, 32)) { + if (sle32(skip_threads_129654, local_tid_129641 - + squot32(local_tid_129641, 32) * 32) && + (squot32(local_tid_129641, 32) == 0 && + ltid_in_bounds_129651)) { + // read operands { - if (atomic_cmpxchg_i32_global(global_failure, -1, 13) == - -1) { - global_failure_args[0] = i32_res_40355; - global_failure_args[1] = nm_29314; - ; + x_129648 = ((volatile __local + double *) scan_arr_mem_129645)[sext_i32_i64(local_tid_129641) - + sext_i32_i64(skip_threads_129654)]; + } + // perform operation + { + bool inactive_129655 = + slt64(srem64((sext_i32_i64(local_tid_129641 * 32 + + 32 - 1) + (int64_t) 1) * + (segscan_group_sizze_114877 * + sdiv_up64(m_75136 * iota_arg_77024, + sext_i32_i64(num_threads_129615))) - + (int64_t) 1, iota_arg_77024), + (sext_i32_i64(local_tid_129641 * 32 + 32 - + 1) + (int64_t) 1) * + (segscan_group_sizze_114877 * + sdiv_up64(m_75136 * iota_arg_77024, + sext_i32_i64(num_threads_129615))) - + (int64_t) 1 - + ((sext_i32_i64((local_tid_129641 - + skip_threads_129654) * 32 + + 32 - 1) + (int64_t) 1) * + (segscan_group_sizze_114877 * + sdiv_up64(m_75136 * iota_arg_77024, + sext_i32_i64(num_threads_129615))) - + (int64_t) 1)); + + if (inactive_129655) { + x_129648 = x_129649; + } + if (!inactive_129655) { + double defunc_1_op_res_129650 = x_129648 + x_129649; + + x_129648 = defunc_1_op_res_129650; } - return; } } - - float x_40360 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_39936 * - ctx_param_ext_44581 + - i32_res_40355 * - ctx_param_ext_44583)]; - int32_t i32_arg_40361 = add32(i_40240, x_40353); - int64_t i32_res_40362 = sext_i32_i64(i32_arg_40361); - bool x_40363 = sle64((int64_t) 0, i32_res_40362); - bool y_40364 = slt64(i32_res_40362, nm_29314); - bool bounds_check_40365 = x_40363 && y_40364; - bool index_certs_40366; - - if (!bounds_check_40365) { + if (sle32(wave_sizze_129643, skip_threads_129654)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129654, local_tid_129641 - + squot32(local_tid_129641, 32) * 32) && + (squot32(local_tid_129641, 32) == 0 && + ltid_in_bounds_129651)) { + // write result { - if (atomic_cmpxchg_i32_global(global_failure, -1, 14) == - -1) { - global_failure_args[0] = i32_res_40362; - global_failure_args[1] = nm_29314; - ; - } - return; + ((volatile __local + double *) scan_arr_mem_129645)[sext_i32_i64(local_tid_129641)] = + x_129648; + x_129649 = x_129648; } } - - float x_40367 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_39936 * - ctx_param_ext_44581 + - i32_res_40362 * - ctx_param_ext_44583)]; - float y_40368 = x_40348 * x_40367; - float defunc_0_f_res_f_res_t_res_40369 = x_40360 - y_40368; - - defunc_0_f_res_f_res_40351 = defunc_0_f_res_f_res_t_res_40369; - } else { - defunc_0_f_res_f_res_40351 = x_40348; + if (sle32(wave_sizze_129643, skip_threads_129654)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129654 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129641, 32) == 0 || !ltid_in_bounds_129651)) { + // read operands + { + x_114882 = x_114881; + x_114881 = ((__local + double *) scan_arr_mem_129645)[sext_i32_i64(squot32(local_tid_129641, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129656 = + slt64(srem64((sext_i32_i64(local_tid_129641) + + (int64_t) 1) * (segscan_group_sizze_114877 * + sdiv_up64(m_75136 * + iota_arg_77024, + sext_i32_i64(num_threads_129615))) - + (int64_t) 1, iota_arg_77024), + (sext_i32_i64(local_tid_129641) + (int64_t) 1) * + (segscan_group_sizze_114877 * sdiv_up64(m_75136 * + iota_arg_77024, + sext_i32_i64(num_threads_129615))) - + (int64_t) 1 - + ((sext_i32_i64(squot32(local_tid_129641, 32) * 32 - + 1) + (int64_t) 1) * (segscan_group_sizze_114877 * + sdiv_up64(m_75136 * + iota_arg_77024, + sext_i32_i64(num_threads_129615))) - + (int64_t) 1)); + + if (inactive_129656) { + x_114881 = x_114882; + } + if (!inactive_129656) { + double defunc_1_op_res_114883 = x_114881 + x_114882; + + x_114881 = defunc_1_op_res_114883; + } + } + // write final result + { + ((__local + double *) scan_arr_mem_129645)[sext_i32_i64(local_tid_129641)] = + x_114881; } - defunc_0_f_res_40333 = defunc_0_f_res_f_res_40351; } - ((__global float *) mem_44605)[gtid_39936 * nm_29314 + gtid_39937] = - defunc_0_f_res_40333; + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129641, 32) == 0) { + ((__local + double *) scan_arr_mem_129645)[sext_i32_i64(local_tid_129641)] = + x_114882; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // threads in bounds write scanned carries + { + if (slt64(gtid_114784, m_75136) && slt64(gtid_114792, iota_arg_77024)) { + ((__global double *) mem_124991)[gtid_114784 * iota_arg_77024 + + gtid_114792] = ((__local + double *) scan_arr_mem_129645)[sext_i32_i64(local_tid_129641)]; + } } error_0: return; - #undef segmap_group_sizze_40323 + #undef segscan_group_sizze_114877 } -__kernel void mainzisegmap_40038(__global int *global_failure, int64_t m_29166, - int64_t i32_res_40242, - int64_t ctx_param_ext_44580, - int64_t ctx_param_ext_44581, - int64_t ctx_param_ext_44583, __global - unsigned char *mem_param_44585, __global - unsigned char *mem_44601) +__kernel void mainziscan_stage3_103083(__global int *global_failure, + int64_t m_75136, int64_t n_75139, + int64_t num_groups_103198, + int32_t num_threads_126412, + int32_t required_groups_126454, __global + unsigned char *mem_120201) { - #define segmap_group_sizze_40307 (mainzisegmap_group_sizze_40040) + #define segscan_group_sizze_103197 (mainzisegscan_group_sizze_103077) const int block_dim0 = 0; const int block_dim1 = 1; @@ -4947,51 +7007,95 @@ def sync(self): if (*global_failure >= 0) return; - int32_t global_tid_45861; - int32_t local_tid_45862; - int64_t group_sizze_45865; - int32_t wave_sizze_45864; - int32_t group_tid_45863; - - global_tid_45861 = get_global_id(0); - local_tid_45862 = get_local_id(0); - group_sizze_45865 = get_local_size(0); - wave_sizze_45864 = LOCKSTEP_WIDTH; - group_tid_45863 = get_group_id(0); - - int32_t phys_tid_40038; - - phys_tid_40038 = global_tid_45861; - - int64_t gtid_40037; - - gtid_40037 = sext_i32_i64(group_tid_45863) * segmap_group_sizze_40307 + - sext_i32_i64(local_tid_45862); - if (slt64(gtid_40037, m_29166)) { - float v1_40312 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_40037 * - ctx_param_ext_44581 + - i32_res_40242 * - ctx_param_ext_44583)]; - bool cond_40313 = v1_40312 == 0.0F; + int32_t global_tid_126455; + int32_t local_tid_126456; + int64_t group_sizze_126459; + int32_t wave_sizze_126458; + int32_t group_tid_126457; + + global_tid_126455 = get_global_id(0); + local_tid_126456 = get_local_id(0); + group_sizze_126459 = get_local_size(0); + wave_sizze_126458 = LOCKSTEP_WIDTH; + group_tid_126457 = get_group_id(0); + + int32_t phys_tid_103083; + + phys_tid_103083 = global_tid_126455; + + int32_t phys_group_id_126460; + + phys_group_id_126460 = get_group_id(0); + for (int32_t i_126461 = 0; i_126461 < sdiv_up32(required_groups_126454 - + phys_group_id_126460, + sext_i64_i32(num_groups_103198)); + i_126461++) { + int32_t virt_group_id_126462 = phys_group_id_126460 + i_126461 * + sext_i64_i32(num_groups_103198); + int64_t flat_idx_126463 = sext_i32_i64(virt_group_id_126462) * + segscan_group_sizze_103197 + sext_i32_i64(local_tid_126456); + int64_t gtid_103074 = squot64(flat_idx_126463, n_75139); + int64_t gtid_103082 = flat_idx_126463 - squot64(flat_idx_126463, + n_75139) * n_75139; + int64_t orig_group_126464 = squot64(flat_idx_126463, + segscan_group_sizze_103197 * + sdiv_up64(m_75136 * n_75139, + sext_i32_i64(num_threads_126412))); + int64_t carry_in_flat_idx_126465 = orig_group_126464 * + (segscan_group_sizze_103197 * sdiv_up64(m_75136 * n_75139, + sext_i32_i64(num_threads_126412))) - + (int64_t) 1; - ((__global bool *) mem_44601)[gtid_40037] = cond_40313; + if (slt64(gtid_103074, m_75136) && slt64(gtid_103082, n_75139)) { + if (!(orig_group_126464 == (int64_t) 0 || (flat_idx_126463 == + (orig_group_126464 + + (int64_t) 1) * + (segscan_group_sizze_103197 * + sdiv_up64(m_75136 * + n_75139, + sext_i32_i64(num_threads_126412))) - + (int64_t) 1 || + slt64(srem64(flat_idx_126463, + n_75139), + flat_idx_126463 - + carry_in_flat_idx_126465)))) { + int64_t x_103201; + int64_t x_103202; + + x_103201 = ((__global + int64_t *) mem_120201)[squot64(carry_in_flat_idx_126465, + n_75139) * n_75139 + + (carry_in_flat_idx_126465 - + squot64(carry_in_flat_idx_126465, + n_75139) * + n_75139)]; + x_103202 = ((__global int64_t *) mem_120201)[gtid_103074 * + n_75139 + + gtid_103082]; + + int64_t defunc_1_op_res_103203; + + defunc_1_op_res_103203 = add64(x_103201, x_103202); + x_103201 = defunc_1_op_res_103203; + ((__global int64_t *) mem_120201)[gtid_103074 * n_75139 + + gtid_103082] = x_103201; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } error_0: return; - #undef segmap_group_sizze_40307 + #undef segscan_group_sizze_103197 } -__kernel void mainzisegmap_40155(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, int64_t m_29166, - int32_t k2p2zq_29179, int64_t i32_res_29181, - int32_t m_29312, int64_t nm_29314, __global - unsigned char *defunc_3_map_res_mem_44549, - __global unsigned char *mem_44577) +__kernel void mainziscan_stage3_111562(__global int *global_failure, + int64_t m_75136, int64_t Nmk_76536, + int64_t num_groups_111712, + int32_t num_threads_128469, + int32_t required_groups_128511, __global + unsigned char *mem_124061) { - #define segmap_group_sizze_40215 (mainzisegmap_group_sizze_40158) + #define segscan_group_sizze_111711 (mainzisegscan_group_sizze_111556) const int block_dim0 = 0; const int block_dim1 = 1; @@ -5000,107 +7104,96 @@ def sync(self): if (*global_failure >= 0) return; - int32_t global_tid_45828; - int32_t local_tid_45829; - int64_t group_sizze_45832; - int32_t wave_sizze_45831; - int32_t group_tid_45830; - - global_tid_45828 = get_global_id(0); - local_tid_45829 = get_local_id(0); - group_sizze_45832 = get_local_size(0); - wave_sizze_45831 = LOCKSTEP_WIDTH; - group_tid_45830 = get_group_id(0); - - int32_t phys_tid_40155; - - phys_tid_40155 = global_tid_45828; - - int64_t gtid_40153; - - gtid_40153 = squot64(sext_i32_i64(group_tid_45830) * - segmap_group_sizze_40215 + - sext_i32_i64(local_tid_45829), nm_29314); - - int64_t gtid_40154; - - gtid_40154 = sext_i32_i64(group_tid_45830) * segmap_group_sizze_40215 + - sext_i32_i64(local_tid_45829) - squot64(sext_i32_i64(group_tid_45830) * - segmap_group_sizze_40215 + - sext_i32_i64(local_tid_45829), - nm_29314) * nm_29314; - if (slt64(gtid_40153, m_29166) && slt64(gtid_40154, nm_29314)) { - int32_t index_primexp_42361 = sext_i64_i32(gtid_40154); - int32_t defunc_0_f_res_40220 = sdiv32(index_primexp_42361, m_29312); - int32_t defunc_0_f_res_40221 = smod32(index_primexp_42361, m_29312); - bool cond_40222 = slt32(defunc_0_f_res_40221, k2p2zq_29179); - float defunc_0_f_res_40223; + int32_t global_tid_128512; + int32_t local_tid_128513; + int64_t group_sizze_128516; + int32_t wave_sizze_128515; + int32_t group_tid_128514; + + global_tid_128512 = get_global_id(0); + local_tid_128513 = get_local_id(0); + group_sizze_128516 = get_local_size(0); + wave_sizze_128515 = LOCKSTEP_WIDTH; + group_tid_128514 = get_group_id(0); + + int32_t phys_tid_111562; + + phys_tid_111562 = global_tid_128512; + + int32_t phys_group_id_128517; + + phys_group_id_128517 = get_group_id(0); + for (int32_t i_128518 = 0; i_128518 < sdiv_up32(required_groups_128511 - + phys_group_id_128517, + sext_i64_i32(num_groups_111712)); + i_128518++) { + int32_t virt_group_id_128519 = phys_group_id_128517 + i_128518 * + sext_i64_i32(num_groups_111712); + int64_t flat_idx_128520 = sext_i32_i64(virt_group_id_128519) * + segscan_group_sizze_111711 + sext_i32_i64(local_tid_128513); + int64_t gtid_111553 = squot64(flat_idx_128520, Nmk_76536); + int64_t gtid_111561 = flat_idx_128520 - squot64(flat_idx_128520, + Nmk_76536) * Nmk_76536; + int64_t orig_group_128521 = squot64(flat_idx_128520, + segscan_group_sizze_111711 * + sdiv_up64(m_75136 * Nmk_76536, + sext_i32_i64(num_threads_128469))); + int64_t carry_in_flat_idx_128522 = orig_group_128521 * + (segscan_group_sizze_111711 * sdiv_up64(m_75136 * Nmk_76536, + sext_i32_i64(num_threads_128469))) - + (int64_t) 1; - if (cond_40222) { - int64_t i_40224 = sext_i32_i64(defunc_0_f_res_40220); - bool x_40225 = sle64((int64_t) 0, i_40224); - bool y_40226 = slt64(i_40224, i32_res_29181); - bool bounds_check_40227 = x_40225 && y_40226; - int64_t j_40228 = sext_i32_i64(defunc_0_f_res_40221); - bool x_40229 = sle64((int64_t) 0, j_40228); - bool y_40230 = slt64(j_40228, i32_res_29181); - bool bounds_check_40231 = x_40229 && y_40230; - bool index_ok_40232 = bounds_check_40227 && bounds_check_40231; - bool index_certs_40233; - - if (!index_ok_40232) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 6) == - -1) { - global_failure_args[0] = i_40224; - global_failure_args[1] = j_40228; - global_failure_args[2] = i32_res_29181; - global_failure_args[3] = i32_res_29181; - ; - } - return; - } - } - - float defunc_0_f_res_t_res_40234 = ((__global - float *) defunc_3_map_res_mem_44549)[gtid_40153 * - (i32_res_29181 * - i32_res_29181) + - i_40224 * - i32_res_29181 + - j_40228]; - - defunc_0_f_res_40223 = defunc_0_f_res_t_res_40234; - } else { - int32_t y_40235 = add32(k2p2zq_29179, defunc_0_f_res_40220); - bool cond_40236 = defunc_0_f_res_40221 == y_40235; - float defunc_0_f_res_f_res_40237; - - if (cond_40236) { - defunc_0_f_res_f_res_40237 = 1.0F; - } else { - defunc_0_f_res_f_res_40237 = 0.0F; + if (slt64(gtid_111553, m_75136) && slt64(gtid_111561, Nmk_76536)) { + if (!(orig_group_128521 == (int64_t) 0 || (flat_idx_128520 == + (orig_group_128521 + + (int64_t) 1) * + (segscan_group_sizze_111711 * + sdiv_up64(m_75136 * + Nmk_76536, + sext_i32_i64(num_threads_128469))) - + (int64_t) 1 || + slt64(srem64(flat_idx_128520, + Nmk_76536), + flat_idx_128520 - + carry_in_flat_idx_128522)))) { + double x_111715; + double x_111716; + + x_111715 = ((__global + double *) mem_124061)[squot64(carry_in_flat_idx_128522, + Nmk_76536) * + Nmk_76536 + + (carry_in_flat_idx_128522 - + squot64(carry_in_flat_idx_128522, + Nmk_76536) * + Nmk_76536)]; + x_111716 = ((__global double *) mem_124061)[gtid_111553 * + Nmk_76536 + + gtid_111561]; + + double defunc_1_op_res_111717; + + defunc_1_op_res_111717 = x_111715 + x_111716; + x_111715 = defunc_1_op_res_111717; + ((__global double *) mem_124061)[gtid_111553 * Nmk_76536 + + gtid_111561] = x_111715; } - defunc_0_f_res_40223 = defunc_0_f_res_f_res_40237; } - ((__global float *) mem_44577)[gtid_40153 * nm_29314 + gtid_40154] = - defunc_0_f_res_40223; + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } error_0: return; - #undef segmap_group_sizze_40215 + #undef segscan_group_sizze_111711 } -__kernel void mainzisegmap_40396(__global int *global_failure, int64_t N_29165, - int64_t m_29166, int32_t n_29169, - int32_t k2p2zq_29179, int64_t i32_res_29181, - int64_t num_groups_40417, __global - unsigned char *binop_p_mem_44390, __global - unsigned char *mem_44632, __global - unsigned char *mem_44635, __global - unsigned char *mem_44650) +__kernel void mainziscan_stage3_114084(__global int *global_failure, + int64_t N_75135, int64_t m_75136, + int64_t num_groups_114102, + int32_t num_threads_129258, + int32_t required_groups_129300, __global + unsigned char *mem_124906) { - #define segmap_group_sizze_40416 (mainzisegmap_group_sizze_40398) + #define segscan_group_sizze_114101 (mainzisegscan_group_sizze_114078) const int block_dim0 = 0; const int block_dim1 = 1; @@ -5109,80 +7202,78 @@ def sync(self): if (*global_failure >= 0) return; - int32_t global_tid_45885; - int32_t local_tid_45886; - int64_t group_sizze_45889; - int32_t wave_sizze_45888; - int32_t group_tid_45887; - - global_tid_45885 = get_global_id(0); - local_tid_45886 = get_local_id(0); - group_sizze_45889 = get_local_size(0); - wave_sizze_45888 = LOCKSTEP_WIDTH; - group_tid_45887 = get_group_id(0); - - int32_t phys_tid_40396; - - phys_tid_40396 = global_tid_45885; - - int32_t phys_group_id_45890; - - phys_group_id_45890 = get_group_id(0); - for (int32_t i_45891 = 0; i_45891 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_29166, segmap_group_sizze_40416)) - - phys_group_id_45890, sext_i64_i32(num_groups_40417)); - i_45891++) { - int32_t virt_group_id_45892 = phys_group_id_45890 + i_45891 * - sext_i64_i32(num_groups_40417); - int64_t gtid_40395 = sext_i32_i64(virt_group_id_45892) * - segmap_group_sizze_40416 + sext_i32_i64(local_tid_45886); + int32_t global_tid_129301; + int32_t local_tid_129302; + int64_t group_sizze_129305; + int32_t wave_sizze_129304; + int32_t group_tid_129303; + + global_tid_129301 = get_global_id(0); + local_tid_129302 = get_local_id(0); + group_sizze_129305 = get_local_size(0); + wave_sizze_129304 = LOCKSTEP_WIDTH; + group_tid_129303 = get_group_id(0); + + int32_t phys_tid_114084; + + phys_tid_114084 = global_tid_129301; + + int32_t phys_group_id_129306; + + phys_group_id_129306 = get_group_id(0); + for (int32_t i_129307 = 0; i_129307 < sdiv_up32(required_groups_129300 - + phys_group_id_129306, + sext_i64_i32(num_groups_114102)); + i_129307++) { + int32_t virt_group_id_129308 = phys_group_id_129306 + i_129307 * + sext_i64_i32(num_groups_114102); + int64_t flat_idx_129309 = sext_i32_i64(virt_group_id_129308) * + segscan_group_sizze_114101 + sext_i32_i64(local_tid_129302); + int64_t gtid_114075 = squot64(flat_idx_129309, N_75135); + int64_t gtid_114083 = flat_idx_129309 - squot64(flat_idx_129309, + N_75135) * N_75135; + int64_t orig_group_129310 = squot64(flat_idx_129309, + segscan_group_sizze_114101 * + sdiv_up64(m_75136 * N_75135, + sext_i32_i64(num_threads_129258))); + int64_t carry_in_flat_idx_129311 = orig_group_129310 * + (segscan_group_sizze_114101 * sdiv_up64(m_75136 * N_75135, + sext_i32_i64(num_threads_129258))) - + (int64_t) 1; - if (slt64(gtid_40395, m_29166)) { - for (int32_t i_44364 = 0; i_44364 < k2p2zq_29179; i_44364++) { - int64_t i_44305 = sext_i32_i64(i_44364); - float defunc_2_reduce_res_40423; - float redout_44307 = 0.0F; - - for (int32_t i_44363 = 0; i_44363 < n_29169; i_44363++) { - int64_t i_44308 = sext_i32_i64(i_44363); - float x_40428 = ((__global float *) mem_44632)[i_44308 * - m_29166 + - gtid_40395]; - bool isnan_res_40429; - - isnan_res_40429 = futrts_isnan32(x_40428); - - float defunc_1_f_res_40430; - - if (isnan_res_40429) { - defunc_1_f_res_40430 = 0.0F; - } else { - float x_40427 = ((__global - float *) binop_p_mem_44390)[i_44305 * - N_29165 + - i_44308]; - float defunc_1_f_res_f_res_40431 = x_40427 * x_40428; - - defunc_1_f_res_40430 = defunc_1_f_res_f_res_40431; - } - - float defunc_1_op_res_40426 = defunc_1_f_res_40430 + - redout_44307; - float redout_tmp_45894 = defunc_1_op_res_40426; - - redout_44307 = redout_tmp_45894; - } - defunc_2_reduce_res_40423 = redout_44307; - ((__global float *) mem_44635)[phys_tid_40396 + i_44305 * - (num_groups_40417 * - segmap_group_sizze_40416)] = - defunc_2_reduce_res_40423; - } - for (int64_t i_45895 = 0; i_45895 < i32_res_29181; i_45895++) { - ((__global float *) mem_44650)[i_45895 * m_29166 + gtid_40395] = - ((__global float *) mem_44635)[phys_tid_40396 + i_45895 * - (num_groups_40417 * - segmap_group_sizze_40416)]; + if (slt64(gtid_114075, m_75136) && slt64(gtid_114083, N_75135)) { + if (!(orig_group_129310 == (int64_t) 0 || (flat_idx_129309 == + (orig_group_129310 + + (int64_t) 1) * + (segscan_group_sizze_114101 * + sdiv_up64(m_75136 * + N_75135, + sext_i32_i64(num_threads_129258))) - + (int64_t) 1 || + slt64(srem64(flat_idx_129309, + N_75135), + flat_idx_129309 - + carry_in_flat_idx_129311)))) { + int64_t x_114106; + int64_t x_114107; + + x_114106 = ((__global + int64_t *) mem_124906)[squot64(carry_in_flat_idx_129311, + N_75135) * N_75135 + + (carry_in_flat_idx_129311 - + squot64(carry_in_flat_idx_129311, + N_75135) * + N_75135)]; + x_114107 = ((__global int64_t *) mem_124906)[gtid_114075 * + N_75135 + + gtid_114083]; + + int64_t defunc_1_op_res_114108; + + defunc_1_op_res_114108 = add64(x_114106, x_114107); + x_114106 = defunc_1_op_res_114108; + ((__global int64_t *) mem_124906)[gtid_114075 * N_75135 + + gtid_114083] = x_114106; } } barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); @@ -5190,17 +7281,16 @@ def sync(self): error_0: return; - #undef segmap_group_sizze_40416 + #undef segscan_group_sizze_114101 } -__kernel void mainzisegmap_40537(__global int *global_failure, int64_t m_29166, - int32_t k2p2zq_29179, int64_t i32_res_29181, - int64_t num_groups_40557, __global - unsigned char *mem_44854, __global - unsigned char *mem_44857, __global - unsigned char *mem_44860, __global - unsigned char *mem_44875) +__kernel void mainziscan_stage3_114793(__global int *global_failure, + int64_t m_75136, int64_t iota_arg_77024, + int64_t num_groups_114878, + int32_t num_threads_129615, + int32_t required_groups_129657, __global + unsigned char *mem_124991) { - #define segmap_group_sizze_40556 (mainzisegmap_group_sizze_40539) + #define segscan_group_sizze_114877 (mainzisegscan_group_sizze_114787) const int block_dim0 = 0; const int block_dim1 = 1; @@ -5209,69 +7299,81 @@ def sync(self): if (*global_failure >= 0) return; - int32_t global_tid_46017; - int32_t local_tid_46018; - int64_t group_sizze_46021; - int32_t wave_sizze_46020; - int32_t group_tid_46019; - - global_tid_46017 = get_global_id(0); - local_tid_46018 = get_local_id(0); - group_sizze_46021 = get_local_size(0); - wave_sizze_46020 = LOCKSTEP_WIDTH; - group_tid_46019 = get_group_id(0); - - int32_t phys_tid_40537; - - phys_tid_40537 = global_tid_46017; - - int32_t phys_group_id_46022; - - phys_group_id_46022 = get_group_id(0); - for (int32_t i_46023 = 0; i_46023 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_29166, segmap_group_sizze_40556)) - - phys_group_id_46022, sext_i64_i32(num_groups_40557)); - i_46023++) { - int32_t virt_group_id_46024 = phys_group_id_46022 + i_46023 * - sext_i64_i32(num_groups_40557); - int64_t gtid_40536 = sext_i32_i64(virt_group_id_46024) * - segmap_group_sizze_40556 + sext_i32_i64(local_tid_46018); + int32_t global_tid_129658; + int32_t local_tid_129659; + int64_t group_sizze_129662; + int32_t wave_sizze_129661; + int32_t group_tid_129660; + + global_tid_129658 = get_global_id(0); + local_tid_129659 = get_local_id(0); + group_sizze_129662 = get_local_size(0); + wave_sizze_129661 = LOCKSTEP_WIDTH; + group_tid_129660 = get_group_id(0); + + int32_t phys_tid_114793; + + phys_tid_114793 = global_tid_129658; + + int32_t phys_group_id_129663; + + phys_group_id_129663 = get_group_id(0); + for (int32_t i_129664 = 0; i_129664 < sdiv_up32(required_groups_129657 - + phys_group_id_129663, + sext_i64_i32(num_groups_114878)); + i_129664++) { + int32_t virt_group_id_129665 = phys_group_id_129663 + i_129664 * + sext_i64_i32(num_groups_114878); + int64_t flat_idx_129666 = sext_i32_i64(virt_group_id_129665) * + segscan_group_sizze_114877 + sext_i32_i64(local_tid_129659); + int64_t gtid_114784 = squot64(flat_idx_129666, iota_arg_77024); + int64_t gtid_114792 = flat_idx_129666 - squot64(flat_idx_129666, + iota_arg_77024) * + iota_arg_77024; + int64_t orig_group_129667 = squot64(flat_idx_129666, + segscan_group_sizze_114877 * + sdiv_up64(m_75136 * iota_arg_77024, + sext_i32_i64(num_threads_129615))); + int64_t carry_in_flat_idx_129668 = orig_group_129667 * + (segscan_group_sizze_114877 * sdiv_up64(m_75136 * + iota_arg_77024, + sext_i32_i64(num_threads_129615))) - + (int64_t) 1; - if (slt64(gtid_40536, m_29166)) { - for (int32_t i_44366 = 0; i_44366 < k2p2zq_29179; i_44366++) { - int64_t i_44311 = sext_i32_i64(i_44366); - float defunc_0_f_res_40564; - float redout_44313 = 0.0F; - - for (int32_t i_44365 = 0; i_44365 < k2p2zq_29179; i_44365++) { - int64_t i_44314 = sext_i32_i64(i_44365); - float x_40568 = ((__global float *) mem_44857)[i_44314 * - m_29166 + - gtid_40536]; - float x_40569 = ((__global float *) mem_44854)[i_44311 * - (m_29166 * - i32_res_29181) + - i_44314 * - m_29166 + - gtid_40536]; - float defunc_1_f_res_40570 = x_40568 * x_40569; - float defunc_1_op_res_40567 = defunc_1_f_res_40570 + - redout_44313; - float redout_tmp_46026 = defunc_1_op_res_40567; - - redout_44313 = redout_tmp_46026; - } - defunc_0_f_res_40564 = redout_44313; - ((__global float *) mem_44860)[phys_tid_40537 + i_44311 * - (num_groups_40557 * - segmap_group_sizze_40556)] = - defunc_0_f_res_40564; - } - for (int64_t i_46027 = 0; i_46027 < i32_res_29181; i_46027++) { - ((__global float *) mem_44875)[i_46027 * m_29166 + gtid_40536] = - ((__global float *) mem_44860)[phys_tid_40537 + i_46027 * - (num_groups_40557 * - segmap_group_sizze_40556)]; + if (slt64(gtid_114784, m_75136) && slt64(gtid_114792, iota_arg_77024)) { + if (!(orig_group_129667 == (int64_t) 0 || (flat_idx_129666 == + (orig_group_129667 + + (int64_t) 1) * + (segscan_group_sizze_114877 * + sdiv_up64(m_75136 * + iota_arg_77024, + sext_i32_i64(num_threads_129615))) - + (int64_t) 1 || + slt64(srem64(flat_idx_129666, + iota_arg_77024), + flat_idx_129666 - + carry_in_flat_idx_129668)))) { + double x_114881; + double x_114882; + + x_114881 = ((__global + double *) mem_124991)[squot64(carry_in_flat_idx_129668, + iota_arg_77024) * + iota_arg_77024 + + (carry_in_flat_idx_129668 - + squot64(carry_in_flat_idx_129668, + iota_arg_77024) * + iota_arg_77024)]; + x_114882 = ((__global double *) mem_124991)[gtid_114784 * + iota_arg_77024 + + gtid_114792]; + + double defunc_1_op_res_114883; + + defunc_1_op_res_114883 = x_114881 + x_114882; + x_114881 = defunc_1_op_res_114883; + ((__global double *) mem_124991)[gtid_114784 * iota_arg_77024 + + gtid_114792] = x_114881; } } barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); @@ -5279,18 +7381,15 @@ def sync(self): error_0: return; - #undef segmap_group_sizze_40556 + #undef segscan_group_sizze_114877 } -__kernel void mainzisegmap_40669(__global int *global_failure, int64_t N_29165, - int64_t m_29166, int32_t k2p2zq_29179, - int64_t i32_res_29181, - int64_t num_groups_40688, __global - unsigned char *mem_44397, __global - unsigned char *mem_44919, __global - unsigned char *mem_44922, __global - unsigned char *mem_44937) +__kernel void mainzisegmap_102688(__global int *global_failure, int64_t N_75135, + double freq_75140, int64_t k2p2zq_75151, + __global + unsigned char *mappingindices_mem_120107, + __global unsigned char *mem_120112) { - #define segmap_group_sizze_40687 (mainzisegmap_group_sizze_40671) + #define segmap_group_sizze_102755 (mainzisegmap_group_sizze_102691) const int block_dim0 = 0; const int block_dim1 = 1; @@ -5299,82 +7398,91 @@ def sync(self): if (*global_failure >= 0) return; - int32_t global_tid_46105; - int32_t local_tid_46106; - int64_t group_sizze_46109; - int32_t wave_sizze_46108; - int32_t group_tid_46107; + int32_t global_tid_126324; + int32_t local_tid_126325; + int64_t group_sizze_126328; + int32_t wave_sizze_126327; + int32_t group_tid_126326; + + global_tid_126324 = get_global_id(0); + local_tid_126325 = get_local_id(0); + group_sizze_126328 = get_local_size(0); + wave_sizze_126327 = LOCKSTEP_WIDTH; + group_tid_126326 = get_group_id(0); - global_tid_46105 = get_global_id(0); - local_tid_46106 = get_local_id(0); - group_sizze_46109 = get_local_size(0); - wave_sizze_46108 = LOCKSTEP_WIDTH; - group_tid_46107 = get_group_id(0); + int32_t phys_tid_102688; - int32_t phys_tid_40669; + phys_tid_102688 = global_tid_126324; - phys_tid_40669 = global_tid_46105; + int64_t gtid_102686; - int32_t phys_group_id_46110; + gtid_102686 = squot64(sext_i32_i64(group_tid_126326) * + segmap_group_sizze_102755 + + sext_i32_i64(local_tid_126325), N_75135); - phys_group_id_46110 = get_group_id(0); - for (int32_t i_46111 = 0; i_46111 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_29166, segmap_group_sizze_40687)) - - phys_group_id_46110, sext_i64_i32(num_groups_40688)); - i_46111++) { - int32_t virt_group_id_46112 = phys_group_id_46110 + i_46111 * - sext_i64_i32(num_groups_40688); - int64_t gtid_40668 = sext_i32_i64(virt_group_id_46112) * - segmap_group_sizze_40687 + sext_i32_i64(local_tid_46106); + int64_t gtid_102687; + + gtid_102687 = sext_i32_i64(group_tid_126326) * segmap_group_sizze_102755 + + sext_i32_i64(local_tid_126325) - + squot64(sext_i32_i64(group_tid_126326) * segmap_group_sizze_102755 + + sext_i32_i64(local_tid_126325), N_75135) * N_75135; + if (slt64(gtid_102686, k2p2zq_75151) && slt64(gtid_102687, N_75135)) { + bool index_primexp_115010 = gtid_102686 == (int64_t) 0; + double defunc_0_f_res_102761; - if (slt64(gtid_40668, m_29166)) { - for (int64_t i_44321 = 0; i_44321 < N_29165; i_44321++) { - float defunc_0_f_res_40694; - float redout_44323 = 0.0F; + if (index_primexp_115010) { + defunc_0_f_res_102761 = 1.0; + } else { + int64_t x_102760 = ((__global + int64_t *) mappingindices_mem_120107)[gtid_102687]; + bool cond_102762 = gtid_102686 == (int64_t) 1; + double defunc_0_f_res_f_res_102763; + + if (cond_102762) { + double i64_res_102764 = sitofp_i64_f64(x_102760); - for (int32_t i_44369 = 0; i_44369 < k2p2zq_29179; i_44369++) { - int64_t i_44324 = sext_i32_i64(i_44369); - float x_40698 = ((__global float *) mem_44919)[i_44324 * - m_29166 + - gtid_40668]; - float x_40699 = ((__global float *) mem_44397)[i_44321 * - i32_res_29181 + - i_44324]; - float defunc_1_f_res_40700 = x_40698 * x_40699; - float defunc_1_op_res_40697 = defunc_1_f_res_40700 + - redout_44323; - float redout_tmp_46114 = defunc_1_op_res_40697; + defunc_0_f_res_f_res_102763 = i64_res_102764; + } else { + int64_t i64_arg_102765 = sdiv64(gtid_102686, (int64_t) 2); + double i64_res_102766 = sitofp_i64_f64(i64_arg_102765); + double i64_res_102767 = sitofp_i64_f64(x_102760); + double x_102768 = 6.283185307179586 * i64_res_102766; + double x_102769 = i64_res_102767 * x_102768; + double angle_102770 = x_102769 / freq_75140; + int64_t x_102771 = smod64(gtid_102686, (int64_t) 2); + bool cond_102772 = x_102771 == (int64_t) 0; + double defunc_0_f_res_f_res_f_res_102773; + + if (cond_102772) { + double sin_res_102774; - redout_44323 = redout_tmp_46114; + sin_res_102774 = futrts_sin64(angle_102770); + defunc_0_f_res_f_res_f_res_102773 = sin_res_102774; + } else { + double cos_res_102775; + + cos_res_102775 = futrts_cos64(angle_102770); + defunc_0_f_res_f_res_f_res_102773 = cos_res_102775; } - defunc_0_f_res_40694 = redout_44323; - ((__global float *) mem_44922)[phys_tid_40669 + i_44321 * - (num_groups_40688 * - segmap_group_sizze_40687)] = - defunc_0_f_res_40694; - } - for (int64_t i_46115 = 0; i_46115 < N_29165; i_46115++) { - ((__global float *) mem_44937)[i_46115 * m_29166 + gtid_40668] = - ((__global float *) mem_44922)[phys_tid_40669 + i_46115 * - (num_groups_40688 * - segmap_group_sizze_40687)]; + defunc_0_f_res_f_res_102763 = defunc_0_f_res_f_res_f_res_102773; } + defunc_0_f_res_102761 = defunc_0_f_res_f_res_102763; } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + ((__global double *) mem_120112)[gtid_102686 * N_75135 + gtid_102687] = + defunc_0_f_res_102761; } error_0: return; - #undef segmap_group_sizze_40687 + #undef segmap_group_sizze_102755 } -__kernel void mainzisegmap_40949(__global int *global_failure, int64_t N_29165, - int64_t m_29166, __global - unsigned char *mem_45163, __global - unsigned char *mem_45166, __global - unsigned char *mem_45172, __global - unsigned char *mem_45175) +__kernel void mainzisegmap_102788(__global int *global_failure, int64_t N_75135, + double freq_75140, int64_t k2p2zq_75151, + __global + unsigned char *mappingindices_mem_120107, + __global unsigned char *mem_120116) { - #define segmap_group_sizze_41115 (mainzisegmap_group_sizze_40952) + #define segmap_group_sizze_102851 (mainzisegmap_group_sizze_102791) const int block_dim0 = 0; const int block_dim1 = 1; @@ -5383,81 +7491,82 @@ def sync(self): if (*global_failure >= 0) return; - int32_t global_tid_46332; - int32_t local_tid_46333; - int64_t group_sizze_46336; - int32_t wave_sizze_46335; - int32_t group_tid_46334; + int32_t global_tid_126329; + int32_t local_tid_126330; + int64_t group_sizze_126333; + int32_t wave_sizze_126332; + int32_t group_tid_126331; - global_tid_46332 = get_global_id(0); - local_tid_46333 = get_local_id(0); - group_sizze_46336 = get_local_size(0); - wave_sizze_46335 = LOCKSTEP_WIDTH; - group_tid_46334 = get_group_id(0); + global_tid_126329 = get_global_id(0); + local_tid_126330 = get_local_id(0); + group_sizze_126333 = get_local_size(0); + wave_sizze_126332 = LOCKSTEP_WIDTH; + group_tid_126331 = get_group_id(0); - int32_t phys_tid_40949; + int32_t phys_tid_102788; - phys_tid_40949 = global_tid_46332; + phys_tid_102788 = global_tid_126329; - int64_t gtid_40947; + int64_t gtid_102786; - gtid_40947 = squot64(sext_i32_i64(group_tid_46334) * - segmap_group_sizze_41115 + - sext_i32_i64(local_tid_46333), N_29165); + gtid_102786 = squot64(sext_i32_i64(group_tid_126331) * + segmap_group_sizze_102851 + + sext_i32_i64(local_tid_126330), N_75135); - int64_t gtid_40948; + int64_t gtid_102787; - gtid_40948 = sext_i32_i64(group_tid_46334) * segmap_group_sizze_41115 + - sext_i32_i64(local_tid_46333) - squot64(sext_i32_i64(group_tid_46334) * - segmap_group_sizze_41115 + - sext_i32_i64(local_tid_46333), - N_29165) * N_29165; - if (slt64(gtid_40947, m_29166) && slt64(gtid_40948, N_29165)) { - float x_41123 = ((__global float *) mem_45166)[gtid_40947 * N_29165 + - gtid_40948]; - int32_t index_primexp_42377 = sext_i64_i32(gtid_40948); - bool isnan_res_41126; + gtid_102787 = sext_i32_i64(group_tid_126331) * segmap_group_sizze_102851 + + sext_i32_i64(local_tid_126330) - + squot64(sext_i32_i64(group_tid_126331) * segmap_group_sizze_102851 + + sext_i32_i64(local_tid_126330), N_75135) * N_75135; + if (slt64(gtid_102786, k2p2zq_75151) && slt64(gtid_102787, N_75135)) { + bool index_primexp_115017 = gtid_102786 == (int64_t) 0; + double defunc_0_f_res_102857; - isnan_res_41126 = futrts_isnan32(x_41123); - - bool defunc_0_p_res_41127 = !isnan_res_41126; - int64_t defunc_1_f_res_41128; - - if (defunc_0_p_res_41127) { - int64_t x_41124 = ((__global int64_t *) mem_45163)[gtid_40947 * - N_29165 + - gtid_40948]; - int64_t defunc_1_f_res_t_res_41129 = sub64(x_41124, (int64_t) 1); - - defunc_1_f_res_41128 = defunc_1_f_res_t_res_41129; + if (index_primexp_115017) { + defunc_0_f_res_102857 = 1.0; } else { - defunc_1_f_res_41128 = (int64_t) -1; - } - if ((sle64((int64_t) 0, gtid_40947) && slt64(gtid_40947, m_29166)) && - (sle64((int64_t) 0, defunc_1_f_res_41128) && - slt64(defunc_1_f_res_41128, N_29165))) { - ((__global int32_t *) mem_45175)[gtid_40947 * N_29165 + - defunc_1_f_res_41128] = - index_primexp_42377; - } - if ((sle64((int64_t) 0, gtid_40947) && slt64(gtid_40947, m_29166)) && - (sle64((int64_t) 0, defunc_1_f_res_41128) && - slt64(defunc_1_f_res_41128, N_29165))) { - ((__global float *) mem_45172)[gtid_40947 * N_29165 + - defunc_1_f_res_41128] = x_41123; + int64_t x_102856 = ((__global + int64_t *) mappingindices_mem_120107)[gtid_102787]; + int64_t i_102858 = add64((int64_t) 1, gtid_102786); + int64_t i64_arg_102859 = sdiv64(i_102858, (int64_t) 2); + double i64_res_102860 = sitofp_i64_f64(i64_arg_102859); + double i64_res_102861 = sitofp_i64_f64(x_102856); + double x_102862 = 6.283185307179586 * i64_res_102860; + double x_102863 = i64_res_102861 * x_102862; + double angle_102864 = x_102863 / freq_75140; + int64_t x_102865 = smod64(i_102858, (int64_t) 2); + bool cond_102866 = x_102865 == (int64_t) 0; + double defunc_0_f_res_f_res_102867; + + if (cond_102866) { + double sin_res_102868; + + sin_res_102868 = futrts_sin64(angle_102864); + defunc_0_f_res_f_res_102867 = sin_res_102868; + } else { + double cos_res_102869; + + cos_res_102869 = futrts_cos64(angle_102864); + defunc_0_f_res_f_res_102867 = cos_res_102869; + } + defunc_0_f_res_102857 = defunc_0_f_res_f_res_102867; } + ((__global double *) mem_120116)[gtid_102786 * N_75135 + gtid_102787] = + defunc_0_f_res_102857; } error_0: return; - #undef segmap_group_sizze_41115 + #undef segmap_group_sizze_102851 } -__kernel void mainzisegmap_41025(__global int *global_failure, int64_t N_29165, - int64_t m_29166, int64_t i_29469, __global - unsigned char *mem_45163, __global - unsigned char *mem_45169) +__kernel void mainzisegmap_102881(__global int *global_failure, int64_t N_75135, + int64_t k2p2zq_75151, + double defunc_0_f_res_75214, __global + unsigned char *mem_120120, __global + unsigned char *mem_120124) { - #define segmap_group_sizze_41079 (mainzisegmap_group_sizze_41027) + #define segmap_group_sizze_102902 (mainzisegmap_group_sizze_102884) const int block_dim0 = 0; const int block_dim1 = 1; @@ -5466,109 +7575,57 @@ def sync(self): if (*global_failure >= 0) return; - int32_t global_tid_46309; - int32_t local_tid_46310; - int64_t group_sizze_46313; - int32_t wave_sizze_46312; - int32_t group_tid_46311; - - global_tid_46309 = get_global_id(0); - local_tid_46310 = get_local_id(0); - group_sizze_46313 = get_local_size(0); - wave_sizze_46312 = LOCKSTEP_WIDTH; - group_tid_46311 = get_group_id(0); - - int32_t phys_tid_41025; - - phys_tid_41025 = global_tid_46309; - - int64_t gtid_41024; - - gtid_41024 = sext_i32_i64(group_tid_46311) * segmap_group_sizze_41079 + - sext_i32_i64(local_tid_46310); - if (slt64(gtid_41024, m_29166)) { - int64_t last_res_41083 = ((__global int64_t *) mem_45163)[gtid_41024 * - N_29165 + - i_29469]; - int32_t defunc_0_f_res_41084 = sext_i64_i32(last_res_41083); - - ((__global int32_t *) mem_45169)[gtid_41024] = defunc_0_f_res_41084; - } - - error_0: - return; - #undef segmap_group_sizze_41079 -} -__kernel void mainzisegmap_41288(__global int *global_failure, int64_t m_29166, - float hfrac_29171, int32_t k2p2_29177, __global - unsigned char *mem_45232, __global - unsigned char *mem_45235, __global - unsigned char *mem_45238, __global - unsigned char *mem_45240) -{ - #define segmap_group_sizze_41381 (mainzisegmap_group_sizze_41290) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; + int32_t global_tid_126334; + int32_t local_tid_126335; + int64_t group_sizze_126338; + int32_t wave_sizze_126337; + int32_t group_tid_126336; - if (*global_failure >= 0) - return; + global_tid_126334 = get_global_id(0); + local_tid_126335 = get_local_id(0); + group_sizze_126338 = get_local_size(0); + wave_sizze_126337 = LOCKSTEP_WIDTH; + group_tid_126336 = get_group_id(0); - int32_t global_tid_46485; - int32_t local_tid_46486; - int64_t group_sizze_46489; - int32_t wave_sizze_46488; - int32_t group_tid_46487; + int32_t phys_tid_102881; - global_tid_46485 = get_global_id(0); - local_tid_46486 = get_local_id(0); - group_sizze_46489 = get_local_size(0); - wave_sizze_46488 = LOCKSTEP_WIDTH; - group_tid_46487 = get_group_id(0); + phys_tid_102881 = global_tid_126334; - int32_t phys_tid_41288; + int64_t gtid_102879; - phys_tid_41288 = global_tid_46485; + gtid_102879 = squot64(sext_i32_i64(group_tid_126336) * + segmap_group_sizze_102902 + + sext_i32_i64(local_tid_126335), k2p2zq_75151); - int64_t gtid_41287; + int64_t gtid_102880; - gtid_41287 = sext_i32_i64(group_tid_46487) * segmap_group_sizze_41381 + - sext_i32_i64(local_tid_46486); - if (slt64(gtid_41287, m_29166)) { - int32_t defunc_0_f_res_41385 = ((__global - int32_t *) mem_45232)[gtid_41287]; - float defunc_0_f_res_41386 = ((__global float *) mem_45235)[gtid_41287]; - int32_t r32_arg_41387 = sub32(defunc_0_f_res_41385, k2p2_29177); - float i32_res_41388 = sitofp_i32_f32(r32_arg_41387); - float sqrt_arg_41389 = defunc_0_f_res_41386 / i32_res_41388; - float sqrt_res_41390; + gtid_102880 = sext_i32_i64(group_tid_126336) * segmap_group_sizze_102902 + + sext_i32_i64(local_tid_126335) - + squot64(sext_i32_i64(group_tid_126336) * segmap_group_sizze_102902 + + sext_i32_i64(local_tid_126335), k2p2zq_75151) * k2p2zq_75151; + if (slt64(gtid_102879, N_75135) && slt64(gtid_102880, k2p2zq_75151)) { + double x_102905 = ((__global double *) mem_120120)[gtid_102879 * + k2p2zq_75151 + + gtid_102880]; + double defunc_0_f_res_102906 = defunc_0_f_res_75214 + x_102905; - sqrt_res_41390 = futrts_sqrt32(sqrt_arg_41389); - - float i32_res_41391 = sitofp_i32_f32(defunc_0_f_res_41385); - float t32_arg_41392 = hfrac_29171 * i32_res_41391; - int32_t f32_res_41393 = fptosi_f32_i32(t32_arg_41392); - - ((__global int32_t *) mem_45238)[gtid_41287] = f32_res_41393; - ((__global float *) mem_45240)[gtid_41287] = sqrt_res_41390; + ((__global double *) mem_120124)[gtid_102879 * k2p2zq_75151 + + gtid_102880] = defunc_0_f_res_102906; } error_0: return; - #undef segmap_group_sizze_41381 + #undef segmap_group_sizze_102902 } -__kernel void mainzisegmap_41589(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, int64_t N_29165, - int32_t n_29169, float lam_29172, - int64_t iota32_arg_29597, float i32_res_29609, - __global - unsigned char *mappingindices_mem_44380, - __global unsigned char *mem_45282, __global - unsigned char *mem_45284) +__kernel void mainzisegmap_103005(__global int *global_failure, int64_t N_75135, + int64_t m_75136, int64_t n_75139, + int64_t m_75231, __global + unsigned char *images_mem_120108, __global + unsigned char *mem_120201, __global + unsigned char *mem_120206, __global + unsigned char *mem_120209) { - #define segmap_group_sizze_41611 (mainzisegmap_group_sizze_41591) + #define segmap_group_sizze_103239 (mainzisegmap_group_sizze_103008) const int block_dim0 = 0; const int block_dim1 = 1; @@ -5577,221 +7634,233 @@ def sync(self): if (*global_failure >= 0) return; - int32_t global_tid_46595; - int32_t local_tid_46596; - int64_t group_sizze_46599; - int32_t wave_sizze_46598; - int32_t group_tid_46597; + int32_t global_tid_126489; + int32_t local_tid_126490; + int64_t group_sizze_126493; + int32_t wave_sizze_126492; + int32_t group_tid_126491; + + global_tid_126489 = get_global_id(0); + local_tid_126490 = get_local_id(0); + group_sizze_126493 = get_local_size(0); + wave_sizze_126492 = LOCKSTEP_WIDTH; + group_tid_126491 = get_group_id(0); + + int32_t phys_tid_103005; - global_tid_46595 = get_global_id(0); - local_tid_46596 = get_local_id(0); - group_sizze_46599 = get_local_size(0); - wave_sizze_46598 = LOCKSTEP_WIDTH; - group_tid_46597 = get_group_id(0); + phys_tid_103005 = global_tid_126489; - int32_t phys_tid_41589; + int64_t gtid_103003; - phys_tid_41589 = global_tid_46595; + gtid_103003 = squot64(sext_i32_i64(group_tid_126491) * + segmap_group_sizze_103239 + + sext_i32_i64(local_tid_126490), n_75139); - int64_t gtid_41588; + int64_t gtid_103004; - gtid_41588 = sext_i32_i64(group_tid_46597) * segmap_group_sizze_41611 + - sext_i32_i64(local_tid_46596); - if (slt64(gtid_41588, iota32_arg_29597)) { - int32_t defunc_0_f_res_41616 = sext_i64_i32(gtid_41588); - int32_t i_41617 = add32(n_29169, defunc_0_f_res_41616); - int64_t i_41618 = sext_i32_i64(i_41617); - bool x_41619 = sle64((int64_t) 0, i_41618); - bool y_41620 = slt64(i_41618, N_29165); - bool bounds_check_41621 = x_41619 && y_41620; - bool index_certs_41622; + gtid_103004 = sext_i32_i64(group_tid_126491) * segmap_group_sizze_103239 + + sext_i32_i64(local_tid_126490) - + squot64(sext_i32_i64(group_tid_126491) * segmap_group_sizze_103239 + + sext_i32_i64(local_tid_126490), n_75139) * n_75139; + if (slt64(gtid_103003, m_75136) && slt64(gtid_103004, n_75139)) { + int64_t binop_y_115030 = (int64_t) -1 * gtid_103004; + int64_t slice_115031 = m_75231 + binop_y_115030; + double x_103243 = ((__global double *) images_mem_120108)[gtid_103003 * + N_75135 + + slice_115031]; + bool defunc_0_f_res_103246; - if (!bounds_check_41621) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 24) == -1) { - global_failure_args[0] = i_41618; - global_failure_args[1] = N_29165; - ; - } - return; - } - } + defunc_0_f_res_103246 = futrts_isnan64(x_103243); - int32_t time_41623 = ((__global - int32_t *) mappingindices_mem_44380)[i_41618]; - float i32_res_41624 = sitofp_i32_f32(time_41623); - float logplus_arg_41625 = i32_res_41624 / i32_res_29609; - bool cond_41626 = 2.7182817F < logplus_arg_41625; - float logplus_res_41627; + bool defunc_0_g_res_103247 = !defunc_0_f_res_103246; + int64_t defunc_1_f_res_103248; - if (cond_41626) { - float log_res_41628; + if (defunc_0_g_res_103247) { + int64_t x_103244 = ((__global int64_t *) mem_120201)[gtid_103003 * + n_75139 + + gtid_103004]; + int64_t defunc_1_f_res_t_res_103249 = sub64(x_103244, (int64_t) 1); - log_res_41628 = futrts_log32(logplus_arg_41625); - logplus_res_41627 = log_res_41628; + defunc_1_f_res_103248 = defunc_1_f_res_t_res_103249; } else { - logplus_res_41627 = 1.0F; + defunc_1_f_res_103248 = (int64_t) -1; + } + if ((sle64((int64_t) 0, gtid_103003) && slt64(gtid_103003, m_75136)) && + (sle64((int64_t) 0, defunc_1_f_res_103248) && + slt64(defunc_1_f_res_103248, n_75139))) { + ((__global int64_t *) mem_120209)[gtid_103003 * n_75139 + + defunc_1_f_res_103248] = + gtid_103004; + } + if ((sle64((int64_t) 0, gtid_103003) && slt64(gtid_103003, m_75136)) && + (sle64((int64_t) 0, defunc_1_f_res_103248) && + slt64(defunc_1_f_res_103248, n_75139))) { + ((__global double *) mem_120206)[gtid_103003 * n_75139 + + defunc_1_f_res_103248] = x_103243; } - - float sqrt_res_41629; - - sqrt_res_41629 = futrts_sqrt32(logplus_res_41627); - - float defunc_0_f_res_41630 = lam_29172 * sqrt_res_41629; - - ((__global int32_t *) mem_45282)[gtid_41588] = defunc_0_f_res_41616; - ((__global float *) mem_45284)[gtid_41588] = defunc_0_f_res_41630; } error_0: return; - #undef segmap_group_sizze_41611 + #undef segmap_group_sizze_103239 } -__kernel void mainzisegmap_42001(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, int64_t N_29165, - int64_t m_29166, int32_t n_29169, __global - unsigned char *defunc_4_map_res_mem_45179, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global unsigned char *mem_45298, __global - unsigned char *mem_45305, __global - unsigned char *mem_45307, __global - unsigned char *mem_45309, __global - unsigned char *mem_45312, __global - unsigned char *mem_45314) +__kernel void mainzisegmap_103283(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, int64_t N_75135, + int64_t m_75136, int64_t n_75139, + int64_t k2p2zq_75151, int64_t m_75231, + int64_t defunc_2_reduce_res_75260, + int64_t num_groups_103386, + int64_t num_threads_126105, __global + unsigned char *defunc_3_map_res_mem_120232, + __global unsigned char *mem_120235, __global + unsigned char *mem_120238, __global + unsigned char *mem_120246, __global + unsigned char *mem_125145) { - #define segmap_group_sizze_42285 (mainzisegmap_group_sizze_42003) + #define segmap_group_sizze_103385 (mainzisegmap_group_sizze_103286) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; + volatile __local bool local_failure; - if (*global_failure >= 0) - return; - - int32_t global_tid_46790; - int32_t local_tid_46791; - int64_t group_sizze_46794; - int32_t wave_sizze_46793; - int32_t group_tid_46792; - - global_tid_46790 = get_global_id(0); - local_tid_46791 = get_local_id(0); - group_sizze_46794 = get_local_size(0); - wave_sizze_46793 = LOCKSTEP_WIDTH; - group_tid_46792 = get_group_id(0); - - int32_t phys_tid_42001; - - phys_tid_42001 = global_tid_46790; - - int64_t gtid_42000; - - gtid_42000 = sext_i32_i64(group_tid_46792) * segmap_group_sizze_42285 + - sext_i32_i64(local_tid_46791); - if (slt64(gtid_42000, m_29166)) { - int32_t x_42289 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_42000]; - int32_t y_42291 = ((__global int32_t *) mem_45298)[gtid_42000]; - bool acc0_42293 = ((__global bool *) mem_45305)[gtid_42000]; - bool x_42298 = acc0_42293 && acc0_42293; - int32_t defunc_1_op_res_f_res_42302; - - if (acc0_42293) { - int32_t acc0_42294 = ((__global int32_t *) mem_45307)[gtid_42000]; - - defunc_1_op_res_f_res_42302 = acc0_42294; - } else { - defunc_1_op_res_f_res_42302 = -1; - } - - bool cond_42308 = y_42291 == 0; - float defunc_0_f_res_42309; - - if (cond_42308) { - defunc_0_f_res_42309 = 0.0F; - } else { - float acc0_42295 = ((__global float *) mem_45309)[gtid_42000]; - float i32_res_42310 = sitofp_i32_f32(y_42291); - float defunc_0_f_res_f_res_42311 = acc0_42295 / i32_res_42310; - - defunc_0_f_res_42309 = defunc_0_f_res_f_res_42311; - } - - bool cond_42312 = !x_42298; - int32_t fst_breakzq_42313; + if (failure_is_an_option) { + int failed = *global_failure >= 0; - if (cond_42312) { - fst_breakzq_42313 = -1; - } else { - bool cond_42314 = slt32(defunc_1_op_res_f_res_42302, y_42291); - int32_t adjustValInds_res_42315; + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_126527; + int32_t local_tid_126528; + int64_t group_sizze_126531; + int32_t wave_sizze_126530; + int32_t group_tid_126529; + + global_tid_126527 = get_global_id(0); + local_tid_126528 = get_local_id(0); + group_sizze_126531 = get_local_size(0); + wave_sizze_126530 = LOCKSTEP_WIDTH; + group_tid_126529 = get_group_id(0); + + int32_t phys_tid_103283; + + phys_tid_103283 = global_tid_126527; + + int32_t phys_group_id_126532; + + phys_group_id_126532 = get_group_id(0); + for (int32_t i_126533 = 0; i_126533 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136 * defunc_2_reduce_res_75260, + segmap_group_sizze_103385)) - + phys_group_id_126532, sext_i64_i32(num_groups_103386)); + i_126533++) { + int32_t virt_group_id_126534 = phys_group_id_126532 + i_126533 * + sext_i64_i32(num_groups_103386); + int64_t gtid_103281 = squot64(sext_i32_i64(virt_group_id_126534) * + segmap_group_sizze_103385 + + sext_i32_i64(local_tid_126528), + defunc_2_reduce_res_75260); + int64_t gtid_103282 = sext_i32_i64(virt_group_id_126534) * + segmap_group_sizze_103385 + sext_i32_i64(local_tid_126528) - + squot64(sext_i32_i64(virt_group_id_126534) * + segmap_group_sizze_103385 + + sext_i32_i64(local_tid_126528), + defunc_2_reduce_res_75260) * defunc_2_reduce_res_75260; + + if (slt64(gtid_103281, m_75136) && slt64(gtid_103282, + defunc_2_reduce_res_75260)) { + int64_t x_103389 = ((__global + int64_t *) defunc_3_map_res_mem_120232)[gtid_103281 * + n_75139 + + gtid_103282]; + bool cond_103390 = sle64((int64_t) 0, x_103389); - if (cond_42314) { - int32_t i_42316 = add32(x_42289, defunc_1_op_res_f_res_42302); - int64_t i_42317 = sext_i32_i64(i_42316); - bool x_42318 = sle64((int64_t) 0, i_42317); - bool y_42319 = slt64(i_42317, N_29165); - bool bounds_check_42320 = x_42318 && y_42319; - bool index_certs_42321; - - if (!bounds_check_42320) { + if (cond_103390) { + bool y_103392 = slt64(x_103389, n_75139); + bool bounds_check_103393 = cond_103390 && y_103392; + bool empty_slice_103394 = k2p2zq_75151 == (int64_t) 0; + int64_t m_103395 = sub64(k2p2zq_75151, (int64_t) 1); + bool zzero_leq_i_p_m_t_s_103396 = sle64((int64_t) 0, m_103395); + bool i_p_m_t_s_leq_w_103397 = slt64(m_103395, k2p2zq_75151); + bool i_lte_j_103398 = sle64((int64_t) 0, k2p2zq_75151); + bool y_103399 = zzero_leq_i_p_m_t_s_103396 && + i_p_m_t_s_leq_w_103397; + bool y_103400 = i_lte_j_103398 && y_103399; + bool ok_or_empty_103401 = empty_slice_103394 || y_103400; + bool index_ok_103402 = bounds_check_103393 && + ok_or_empty_103401; + bool index_certs_103403; + + if (!index_ok_103402) { { - if (atomic_cmpxchg_i32_global(global_failure, -1, 30) == + if (atomic_cmpxchg_i32_global(global_failure, -1, 0) == -1) { - global_failure_args[0] = i_42317; - global_failure_args[1] = N_29165; + global_failure_args[0] = x_103389; + global_failure_args[1] = (int64_t) 0; + global_failure_args[2] = n_75139; + global_failure_args[3] = k2p2zq_75151; ; } - return; + local_failure = true; + goto error_0; } } - int32_t x_42322 = ((__global - int32_t *) defunc_4_map_res_mem_45179)[gtid_42000 * - N_29165 + - i_42317]; - int32_t adjustValInds_res_t_res_42323 = sub32(x_42322, n_29169); + int64_t binop_y_103404 = (int64_t) -1 * x_103389; + int64_t slice_103405 = m_75231 + binop_y_103404; - adjustValInds_res_42315 = adjustValInds_res_t_res_42323; + for (int64_t i_126535 = 0; i_126535 < k2p2zq_75151; + i_126535++) { + ((__global double *) mem_125145)[phys_tid_103283 + + i_126535 * + num_threads_126105] = + ((__global double *) mem_120235)[slice_103405 + + i_126535 * N_75135]; + } } else { - adjustValInds_res_42315 = -1; + for (int64_t i_126536 = 0; i_126536 < k2p2zq_75151; + i_126536++) { + ((__global double *) mem_120238)[phys_tid_103283 + + i_126536 * + num_threads_126105] = NAN; + } + for (int64_t i_126537 = 0; i_126537 < k2p2zq_75151; + i_126537++) { + ((__global double *) mem_125145)[phys_tid_103283 + + i_126537 * + num_threads_126105] = + ((__global double *) mem_120238)[phys_tid_103283 + + i_126537 * + num_threads_126105]; + } + } + for (int64_t i_126538 = 0; i_126538 < k2p2zq_75151; i_126538++) { + ((__global double *) mem_120246)[i_126538 * + (defunc_2_reduce_res_75260 * + m_75136) + gtid_103281 * + defunc_2_reduce_res_75260 + + gtid_103282] = ((__global + double *) mem_125145)[phys_tid_103283 + + i_126538 * + num_threads_126105]; } - fst_breakzq_42313 = adjustValInds_res_42315; - } - - bool cond_42324 = sle32(x_42289, 5); - bool cond_f_res_42325 = sle32(y_42291, 5); - bool x_42326 = !cond_42324; - bool y_42327 = cond_f_res_42325 && x_42326; - bool cond_42328 = cond_42324 || y_42327; - int32_t fst_breakzq_42329; - - if (cond_42328) { - fst_breakzq_42329 = -2; - } else { - fst_breakzq_42329 = fst_breakzq_42313; } - ((__global int32_t *) mem_45312)[gtid_42000] = fst_breakzq_42329; - ((__global float *) mem_45314)[gtid_42000] = defunc_0_f_res_42309; + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } error_0: return; - #undef segmap_group_sizze_42285 + #undef segmap_group_sizze_103385 } -__kernel void mainzisegmap_42155(__global int *global_failure, int64_t m_29166, - int64_t num_groups_42178, __global - unsigned char *defunc_4_map_res_mem_45177, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global - unsigned char *defunc_3_map_res_mem_45246, - __global unsigned char *mem_45296, __global - unsigned char *mem_45298) +__kernel void mainzisegmap_103419(__global int *global_failure, + int64_t k2p2zq_75151, __global + unsigned char *mem_120252) { - #define segmap_group_sizze_42177 (mainzisegmap_group_sizze_42157) + #define segmap_group_sizze_103444 (mainzisegmap_group_sizze_103422) const int block_dim0 = 0; const int block_dim1 = 1; @@ -5800,80 +7869,101 @@ def sync(self): if (*global_failure >= 0) return; - int32_t global_tid_46628; - int32_t local_tid_46629; - int64_t group_sizze_46632; - int32_t wave_sizze_46631; - int32_t group_tid_46630; + int32_t global_tid_126549; + int32_t local_tid_126550; + int64_t group_sizze_126553; + int32_t wave_sizze_126552; + int32_t group_tid_126551; + + global_tid_126549 = get_global_id(0); + local_tid_126550 = get_local_id(0); + group_sizze_126553 = get_local_size(0); + wave_sizze_126552 = LOCKSTEP_WIDTH; + group_tid_126551 = get_group_id(0); + + int32_t phys_tid_103419; - global_tid_46628 = get_global_id(0); - local_tid_46629 = get_local_id(0); - group_sizze_46632 = get_local_size(0); - wave_sizze_46631 = LOCKSTEP_WIDTH; - group_tid_46630 = get_group_id(0); + phys_tid_103419 = global_tid_126549; - int32_t phys_tid_42155; + int64_t gtid_103417; - phys_tid_42155 = global_tid_46628; + gtid_103417 = squot64(sext_i32_i64(group_tid_126551) * + segmap_group_sizze_103444 + + sext_i32_i64(local_tid_126550), k2p2zq_75151); - int32_t phys_group_id_46633; + int64_t gtid_103418; - phys_group_id_46633 = get_group_id(0); - for (int32_t i_46634 = 0; i_46634 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_29166, segmap_group_sizze_42177)) - - phys_group_id_46633, sext_i64_i32(num_groups_42178)); - i_46634++) { - int32_t virt_group_id_46635 = phys_group_id_46633 + i_46634 * - sext_i64_i32(num_groups_42178); - int64_t gtid_42154 = sext_i32_i64(virt_group_id_46635) * - segmap_group_sizze_42177 + sext_i32_i64(local_tid_46629); + gtid_103418 = sext_i32_i64(group_tid_126551) * segmap_group_sizze_103444 + + sext_i32_i64(local_tid_126550) - + squot64(sext_i32_i64(group_tid_126551) * segmap_group_sizze_103444 + + sext_i32_i64(local_tid_126550), k2p2zq_75151) * k2p2zq_75151; + if (slt64(gtid_103417, k2p2zq_75151) && slt64(gtid_103418, k2p2zq_75151)) { + bool cond_103449 = gtid_103418 == gtid_103417; + double defunc_0_f_res_103450; - if (slt64(gtid_42154, m_29166)) { - int32_t x_42184 = ((__global - int32_t *) defunc_4_map_res_mem_45177)[gtid_42154]; - int32_t x_42185 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_42154]; - float x_42186 = ((__global - float *) defunc_3_map_res_mem_45246)[gtid_42154]; - int32_t y_42187 = sub32(x_42184, x_42185); - float i32_res_42188 = sitofp_i32_f32(x_42185); - float sqrt_res_42189; - - sqrt_res_42189 = futrts_sqrt32(i32_res_42188); - - float y_42190 = x_42186 * sqrt_res_42189; - - ((__global float *) mem_45296)[gtid_42154] = y_42190; - ((__global int32_t *) mem_45298)[gtid_42154] = y_42187; + if (cond_103449) { + defunc_0_f_res_103450 = 1.0; + } else { + defunc_0_f_res_103450 = 0.0; } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + ((__global double *) mem_120252)[gtid_103417 * k2p2zq_75151 + + gtid_103418] = defunc_0_f_res_103450; } error_0: return; - #undef segmap_group_sizze_42177 + #undef segmap_group_sizze_103444 } -__kernel void mainzisegmap_intragroup_39374(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, - __local volatile - int64_t *mem_44563_backing_aligned_0, - __local volatile - int64_t *mem_44553_backing_aligned_1, - int32_t k2p2zq_29179, - int64_t i32_res_29181, - int32_t m_29312, int64_t nm_29314, - int64_t i32_res_29329, __global - unsigned char *defunc_3_map_res_mem_44549, - __global unsigned char *mem_44573) +__kernel void mainzisegmap_103550(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, int64_t m_75136, + int64_t k2p2zq_75151, int64_t m_75223, + unsigned char y_75227, + unsigned char ok_or_empty_75229, + int64_t min_res_75341, int64_t k_75342, + int64_t num_groups_104006, + int64_t binop_x_120251, + int64_t num_threads_126109, __global + unsigned char *mem_120252, __global + unsigned char *mem_120257, __global + unsigned char *mem_120261, __global + unsigned char *mem_120265, __global + unsigned char *mem_120268, __global + unsigned char *mem_120271, __global + unsigned char *mem_120273, __global + unsigned char *mem_120608, __global + unsigned char *mem_120649, __global + unsigned char *mem_120661, __global + unsigned char *mem_120690, __global + unsigned char *mem_120763, __global + unsigned char *mem_120778, __global + unsigned char *mem_120790, __global + unsigned char *mem_120801, __global + unsigned char *mem_120821, __global + unsigned char *mem_120824, __global + unsigned char *mem_120878, __global + unsigned char *mem_120881, __global + unsigned char *mem_120883, __global + unsigned char *mem_125150, __global + unsigned char *mem_125152, __global + unsigned char *mem_125160, __global + unsigned char *mem_125421, __global + unsigned char *mem_125429, __global + unsigned char *mem_125431, __global + unsigned char *mem_125491, __global + unsigned char *double_buffer_mem_125535, + __global + unsigned char *double_buffer_mem_125536, + __global + unsigned char *double_buffer_mem_125537, + __global + unsigned char *double_buffer_mem_125548) { + #define segmap_group_sizze_104005 (mainzisegmap_group_sizze_103552) + const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict mem_44563_backing_1 = (__local volatile - char *) mem_44563_backing_aligned_0; - __local volatile char *restrict mem_44553_backing_0 = (__local volatile - char *) mem_44553_backing_aligned_1; volatile __local bool local_failure; if (failure_is_an_option) { @@ -5885,1248 +7975,2613 @@ def sync(self): local_failure = false; barrier(CLK_LOCAL_MEM_FENCE); - int32_t global_tid_45820; - int32_t local_tid_45821; - int64_t group_sizze_45824; - int32_t wave_sizze_45823; - int32_t group_tid_45822; - - global_tid_45820 = get_global_id(0); - local_tid_45821 = get_local_id(0); - group_sizze_45824 = get_local_size(0); - wave_sizze_45823 = LOCKSTEP_WIDTH; - group_tid_45822 = get_group_id(0); - - int32_t phys_tid_39374; - - phys_tid_39374 = group_tid_45822; - - int32_t ltid_pre_45825; - - ltid_pre_45825 = local_tid_45821; - - int64_t gtid_39305; - - gtid_39305 = sext_i32_i64(group_tid_45822); - - __local char *mem_44553; - - mem_44553 = (__local char *) mem_44553_backing_0; - - int64_t gtid_39308 = sext_i32_i64(ltid_pre_45825); - int32_t phys_tid_39309 = local_tid_45821; - int32_t index_primexp_42354 = sext_i64_i32(gtid_39308); - int32_t defunc_0_f_res_39555 = sdiv32(index_primexp_42354, m_29312); - int32_t defunc_0_f_res_39556 = smod32(index_primexp_42354, m_29312); - bool cond_39557 = slt32(defunc_0_f_res_39556, k2p2zq_29179); - float defunc_0_f_res_39558; - - if (cond_39557) { - int64_t i_39559 = sext_i32_i64(defunc_0_f_res_39555); - bool x_39560 = sle64((int64_t) 0, i_39559); - bool y_39561 = slt64(i_39559, i32_res_29181); - bool bounds_check_39562 = x_39560 && y_39561; - int64_t j_39563 = sext_i32_i64(defunc_0_f_res_39556); - bool x_39564 = sle64((int64_t) 0, j_39563); - bool y_39565 = slt64(j_39563, i32_res_29181); - bool bounds_check_39566 = x_39564 && y_39565; - bool index_ok_39567 = bounds_check_39562 && bounds_check_39566; - bool index_certs_39568; - - if (!index_ok_39567) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 0) == -1) { - global_failure_args[0] = i_39559; - global_failure_args[1] = j_39563; - global_failure_args[2] = i32_res_29181; - global_failure_args[3] = i32_res_29181; - ; + int32_t global_tid_126577; + int32_t local_tid_126578; + int64_t group_sizze_126581; + int32_t wave_sizze_126580; + int32_t group_tid_126579; + + global_tid_126577 = get_global_id(0); + local_tid_126578 = get_local_id(0); + group_sizze_126581 = get_local_size(0); + wave_sizze_126580 = LOCKSTEP_WIDTH; + group_tid_126579 = get_group_id(0); + + int32_t phys_tid_103550; + + phys_tid_103550 = global_tid_126577; + + int32_t phys_group_id_126582; + + phys_group_id_126582 = get_group_id(0); + for (int32_t i_126583 = 0; i_126583 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, segmap_group_sizze_104005)) - + phys_group_id_126582, sext_i64_i32(num_groups_104006)); + i_126583++) { + int32_t virt_group_id_126584 = phys_group_id_126582 + i_126583 * + sext_i64_i32(num_groups_104006); + int64_t gtid_103549 = sext_i32_i64(virt_group_id_126584) * + segmap_group_sizze_104005 + sext_i32_i64(local_tid_126578); + + if (slt64(gtid_103549, m_75136)) { + for (int64_t i_126585 = 0; i_126585 < k2p2zq_75151; i_126585++) { + ((__global double *) mem_120268)[phys_tid_103550 + i_126585 * + num_threads_126109] = 0.0; + } + for (int64_t i_126586 = 0; i_126586 < (int64_t) 2; i_126586++) { + for (int64_t i_126587 = 0; i_126587 < k2p2zq_75151; + i_126587++) { + ((__global double *) mem_120271)[phys_tid_103550 + + (i_126586 * + (num_threads_126109 * + k2p2zq_75151) + + i_126587 * + num_threads_126109)] = + 0.0; + } + } + for (int64_t i_126588 = 0; i_126588 < k2p2zq_75151; i_126588++) { + int64_t x_126589 = (int64_t) 0 + i_126588 * (int64_t) 1; + + ((__global int64_t *) mem_120273)[phys_tid_103550 + i_126588 * + num_threads_126109] = + x_126589; + } + for (int64_t j_104018 = 0; j_104018 < k2p2zq_75151; j_104018++) { + bool index_certs_104021; + + if (!ok_or_empty_75229) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 1) == + -1) { + global_failure_args[0] = j_104018; + global_failure_args[1] = (int64_t) 0; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } } - local_failure = true; - goto error_0; + + double defunc_2_reduce_res_104023; + double redout_119641 = 0.0; + + for (int64_t i_119642 = 0; i_119642 < k2p2zq_75151; + i_119642++) { + double x_104027 = ((__global + double *) mem_120265)[i_119642 * + (k2p2zq_75151 * + m_75136) + + gtid_103549 * + k2p2zq_75151 + + j_104018]; + double defunc_1_f_res_104028 = x_104027 * x_104027; + double defunc_1_op_res_104026 = defunc_1_f_res_104028 + + redout_119641; + double redout_tmp_126592 = defunc_1_op_res_104026; + + redout_119641 = redout_tmp_126592; + } + defunc_2_reduce_res_104023 = redout_119641; + + double sqrt_res_104029; + + sqrt_res_104029 = futrts_sqrt64(defunc_2_reduce_res_104023); + ((__global double *) mem_120268)[phys_tid_103550 + j_104018 * + num_threads_126109] = + sqrt_res_104029; + ((__global double *) mem_120271)[phys_tid_103550 + j_104018 * + num_threads_126109] = + sqrt_res_104029; + + bool zeze_res_104032 = sqrt_res_104029 == 0.0; + double lw_val_104033; + + if (zeze_res_104032) { + lw_val_104033 = 1.0; + } else { + lw_val_104033 = sqrt_res_104029; + } + ((__global double *) mem_120271)[phys_tid_103550 + + (num_threads_126109 * + k2p2zq_75151 + j_104018 * + num_threads_126109)] = + lw_val_104033; } - } - - float defunc_0_f_res_t_res_39569 = ((__global - float *) defunc_3_map_res_mem_44549)[gtid_39305 * - (i32_res_29181 * - i32_res_29181) + - i_39559 * - i32_res_29181 + - j_39563]; - - defunc_0_f_res_39558 = defunc_0_f_res_t_res_39569; - } else { - int32_t y_39570 = add32(k2p2zq_29179, defunc_0_f_res_39555); - bool cond_39571 = defunc_0_f_res_39556 == y_39570; - float defunc_0_f_res_f_res_39572; - - if (cond_39571) { - defunc_0_f_res_f_res_39572 = 1.0F; - } else { - defunc_0_f_res_f_res_39572 = 0.0F; - } - defunc_0_f_res_39558 = defunc_0_f_res_f_res_39572; - } - ((__local float *) mem_44553)[gtid_39308] = defunc_0_f_res_39558; - - error_0: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); - - __local char *mem_44563; - - mem_44563 = (__local char *) mem_44563_backing_1; - for (int32_t i_39574 = 0; i_39574 < k2p2zq_29179; i_39574++) { - int64_t i32_res_39576 = sext_i32_i64(i_39574); - bool x_39577 = sle64((int64_t) 0, i32_res_39576); - bool y_39578 = slt64(i32_res_39576, nm_29314); - bool bounds_check_39579 = x_39577 && y_39578; - bool index_certs_39580; - - if (!bounds_check_39579) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 1) == -1) { - global_failure_args[0] = i32_res_39576; - global_failure_args[1] = nm_29314; - ; + for (int64_t i_126593 = 0; i_126593 < k2p2zq_75151; i_126593++) { + for (int64_t i_126594 = 0; i_126594 < k2p2zq_75151; + i_126594++) { + ((__global + double *) double_buffer_mem_125535)[phys_tid_103550 + + (i_126593 * + (num_threads_126109 * + k2p2zq_75151) + + i_126594 * + num_threads_126109)] = + ((__global double *) mem_120261)[gtid_103549 + + (i_126593 * (m_75136 * + k2p2zq_75151) + + i_126594 * m_75136)]; + } + } + for (int64_t i_126595 = 0; i_126595 < k2p2zq_75151; i_126595++) { + ((__global double *) double_buffer_mem_125536)[phys_tid_103550 + + i_126595 * + num_threads_126109] = + ((__global double *) mem_120268)[phys_tid_103550 + + i_126595 * + num_threads_126109]; + } + for (int64_t i_126596 = 0; i_126596 < (int64_t) 2; i_126596++) { + for (int64_t i_126597 = 0; i_126597 < k2p2zq_75151; + i_126597++) { + ((__global + double *) double_buffer_mem_125537)[phys_tid_103550 + + (i_126596 * + (num_threads_126109 * + k2p2zq_75151) + + i_126597 * + num_threads_126109)] = + ((__global double *) mem_120271)[phys_tid_103550 + + (i_126596 * + (num_threads_126109 * + k2p2zq_75151) + + i_126597 * + num_threads_126109)]; } - local_failure = true; - goto error_1; } - } - - float v1_39581 = ((__local float *) mem_44553)[i32_res_39576]; - bool cond_39582 = v1_39581 == 0.0F; - int64_t gtid_39329 = sext_i32_i64(ltid_pre_45825); - int32_t phys_tid_39330 = local_tid_45821; - int32_t defunc_0_f_res_39585 = sext_i64_i32(gtid_39329); - int32_t defunc_0_f_res_39586 = sdiv32(defunc_0_f_res_39585, m_29312); - int32_t defunc_0_f_res_39587 = smod32(defunc_0_f_res_39585, m_29312); - float defunc_0_f_res_39588; - - if (cond_39582) { - int32_t x_39589 = mul32(m_29312, defunc_0_f_res_39586); - int32_t i32_arg_39590 = add32(defunc_0_f_res_39587, x_39589); - int64_t i32_res_39591 = sext_i32_i64(i32_arg_39590); - bool x_39592 = sle64((int64_t) 0, i32_res_39591); - bool y_39593 = slt64(i32_res_39591, nm_29314); - bool bounds_check_39594 = x_39592 && y_39593; - bool index_certs_39595; - if (!bounds_check_39594) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 2) == - -1) { - global_failure_args[0] = i32_res_39591; - global_failure_args[1] = nm_29314; - ; - } - local_failure = true; - goto error_1; - } - } - - float defunc_0_f_res_t_res_39596 = ((__local - float *) mem_44553)[i32_res_39591]; - - defunc_0_f_res_39588 = defunc_0_f_res_t_res_39596; - } else { - int64_t i32_res_39597 = sext_i32_i64(defunc_0_f_res_39587); - bool x_39598 = sle64((int64_t) 0, i32_res_39597); - bool y_39599 = slt64(i32_res_39597, nm_29314); - bool bounds_check_39600 = x_39598 && y_39599; - bool index_certs_39601; + int64_t dqrdc2_res_104039; + int64_t k_104045 = k_75342; - if (!bounds_check_39600) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 3) == - -1) { - global_failure_args[0] = i32_res_39597; - global_failure_args[1] = nm_29314; - ; + for (int64_t l_104040 = 0; l_104040 < min_res_75341; l_104040++) { + int64_t x_104046 = add64((int64_t) 1, l_104040); + bool cond_104047 = slt64(x_104046, k_104045); + bool loop_cond_104048; + + if (cond_104047) { + bool y_104049 = slt64(l_104040, k2p2zq_75151); + bool index_certs_104050; + + if (!y_104049) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 2) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_104040; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } } - local_failure = true; - goto error_1; + + double zt_arg_104051 = ((__global + double *) double_buffer_mem_125537)[phys_tid_103550 + + (num_threads_126109 * + k2p2zq_75151 + + l_104040 * + num_threads_126109)]; + double zt_res_104052 = 1.0e-7 * zt_arg_104051; + bool index_certs_104053; + + if (!y_104049) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 3) == -1) { + global_failure_args[0] = l_104040; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double zl_arg_104054 = ((__global + double *) double_buffer_mem_125536)[phys_tid_103550 + + l_104040 * + num_threads_126109]; + bool zl_res_104055 = zl_arg_104054 < zt_res_104052; + + loop_cond_104048 = zl_res_104055; + } else { + loop_cond_104048 = 0; } - } - - float x_39602 = ((__local float *) mem_44553)[i32_res_39597]; - float x_39603 = x_39602 / v1_39581; - int32_t y_39604 = sub32(k2p2zq_29179, 1); - bool cond_39605 = slt32(defunc_0_f_res_39586, y_39604); - float defunc_0_f_res_f_res_39606; - - if (cond_39605) { - int32_t x_39607 = add32(1, defunc_0_f_res_39586); - int32_t x_39608 = mul32(m_29312, x_39607); - int32_t i32_arg_39609 = add32(defunc_0_f_res_39587, x_39608); - int64_t i32_res_39610 = sext_i32_i64(i32_arg_39609); - bool x_39611 = sle64((int64_t) 0, i32_res_39610); - bool y_39612 = slt64(i32_res_39610, nm_29314); - bool bounds_check_39613 = x_39611 && y_39612; - bool index_certs_39614; - if (!bounds_check_39613) { + bool y_104056 = slt64(l_104040, k2p2zq_75151); + int64_t upper_bound_104057 = sub64(k2p2zq_75151, x_104046); + bool loop_not_taken_104058 = !loop_cond_104048; + bool protect_assert_disj_104059 = y_104056 || + loop_not_taken_104058; + bool index_certs_104060; + + if (!protect_assert_disj_104059) { { if (atomic_cmpxchg_i32_global(global_failure, -1, 4) == -1) { - global_failure_args[0] = i32_res_39610; - global_failure_args[1] = nm_29314; + global_failure_args[0] = l_104040; + global_failure_args[1] = k2p2zq_75151; ; } local_failure = true; - goto error_1; + goto error_0; } } - float x_39615 = ((__local float *) mem_44553)[i32_res_39610]; - int32_t i32_arg_39616 = add32(i_39574, x_39608); - int64_t i32_res_39617 = sext_i32_i64(i32_arg_39616); - bool x_39618 = sle64((int64_t) 0, i32_res_39617); - bool y_39619 = slt64(i32_res_39617, nm_29314); - bool bounds_check_39620 = x_39618 && y_39619; - bool index_certs_39621; + bool index_certs_104061; - if (!bounds_check_39620) { + if (!protect_assert_disj_104059) { { if (atomic_cmpxchg_i32_global(global_failure, -1, 5) == -1) { - global_failure_args[0] = i32_res_39617; - global_failure_args[1] = nm_29314; + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = l_104040; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; ; } local_failure = true; - goto error_1; + goto error_0; } } - float x_39622 = ((__local float *) mem_44553)[i32_res_39617]; - float y_39623 = x_39603 * x_39622; - float defunc_0_f_res_f_res_t_res_39624 = x_39615 - y_39623; + bool index_certs_104062; - defunc_0_f_res_f_res_39606 = defunc_0_f_res_f_res_t_res_39624; - } else { - defunc_0_f_res_f_res_39606 = x_39603; - } - defunc_0_f_res_39588 = defunc_0_f_res_f_res_39606; - } - ((__local float *) mem_44563)[gtid_39329] = defunc_0_f_res_39588; - - error_1: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t write_i_39372 = sext_i32_i64(ltid_pre_45825); - int32_t phys_tid_39373 = local_tid_45821; - float write_value_39627 = ((__local float *) mem_44563)[write_i_39372]; - - if (sle64((int64_t) 0, write_i_39372) && slt64(write_i_39372, - nm_29314)) { - ((__local float *) mem_44553)[write_i_39372] = write_value_39627; - } - barrier(CLK_LOCAL_MEM_FENCE); - } - for (int64_t i_45827 = 0; i_45827 < sdiv_up64(i32_res_29181 * - i32_res_29181 - - sext_i32_i64(local_tid_45821), - nm_29314); i_45827++) { - ((__global float *) mem_44573)[gtid_39305 * (i32_res_29181 * - i32_res_29181) + - squot64(i_45827 * nm_29314 + - sext_i32_i64(local_tid_45821), - i32_res_29181) * i32_res_29181 + - (i_45827 * nm_29314 + - sext_i32_i64(local_tid_45821) - - squot64(i_45827 * nm_29314 + - sext_i32_i64(local_tid_45821), - i32_res_29181) * - i32_res_29181)] = ((__local - float *) mem_44553)[i32_res_29181 + - (squot64(i_45827 * - nm_29314 + - sext_i32_i64(local_tid_45821), - i32_res_29181) * - i32_res_29329 + - (i_45827 * - nm_29314 + - sext_i32_i64(local_tid_45821) - - squot64(i_45827 * - nm_29314 + - sext_i32_i64(local_tid_45821), - i32_res_29181) * - i32_res_29181))]; - } - barrier(CLK_LOCAL_MEM_FENCE); - - error_3: - return; -} -__kernel void mainzisegmap_intragroup_39701(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, - __local volatile - int64_t *mem_44594_backing_aligned_0, - int64_t m_29166, - int32_t k2p2zq_29179, - int32_t m_29312, int64_t nm_29314, - int32_t i_40240, - int64_t i32_res_40242, - int64_t ctx_param_ext_44580, - int64_t ctx_param_ext_44581, - int64_t ctx_param_ext_44583, - __global - unsigned char *mem_param_44585, - __global unsigned char *mem_44590, - __global unsigned char *mem_44598) -{ - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - __local volatile char *restrict mem_44594_backing_0 = (__local volatile - char *) mem_44594_backing_aligned_0; - volatile __local bool local_failure; - - if (failure_is_an_option) { - int failed = *global_failure >= 0; - - if (failed) - return; - } - local_failure = false; - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t global_tid_45854; - int32_t local_tid_45855; - int64_t group_sizze_45858; - int32_t wave_sizze_45857; - int32_t group_tid_45856; - - global_tid_45854 = get_global_id(0); - local_tid_45855 = get_local_id(0); - group_sizze_45858 = get_local_size(0); - wave_sizze_45857 = LOCKSTEP_WIDTH; - group_tid_45856 = get_group_id(0); - - int32_t phys_tid_39701; - - phys_tid_39701 = group_tid_45856; - - int32_t ltid_pre_45859; - - ltid_pre_45859 = local_tid_45855; - - int64_t gtid_39653; - - gtid_39653 = sext_i32_i64(group_tid_45856); - - float v1_40259 = ((__global float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_39653 * - ctx_param_ext_44581 + - i32_res_40242 * - ctx_param_ext_44583)]; - bool cond_40260 = v1_40259 == 0.0F; - __local char *mem_44594; - - mem_44594 = (__local char *) mem_44594_backing_0; - - int64_t gtid_39656 = sext_i32_i64(ltid_pre_45859); - int32_t phys_tid_39657 = local_tid_45855; - int32_t defunc_0_f_res_40263 = sext_i64_i32(gtid_39656); - int32_t defunc_0_f_res_40264 = sdiv32(defunc_0_f_res_40263, m_29312); - int32_t defunc_0_f_res_40265 = smod32(defunc_0_f_res_40263, m_29312); - float defunc_0_f_res_40266; - - if (cond_40260) { - int32_t x_40267 = mul32(m_29312, defunc_0_f_res_40264); - int32_t i32_arg_40268 = add32(defunc_0_f_res_40265, x_40267); - int64_t i32_res_40269 = sext_i32_i64(i32_arg_40268); - bool x_40270 = sle64((int64_t) 0, i32_res_40269); - bool y_40271 = slt64(i32_res_40269, nm_29314); - bool bounds_check_40272 = x_40270 && y_40271; - bool index_certs_40273; - - if (!bounds_check_40272) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 7) == -1) { - global_failure_args[0] = i32_res_40269; - global_failure_args[1] = nm_29314; - ; - } - local_failure = true; - goto error_0; - } - } - - float defunc_0_f_res_t_res_40274 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_39653 * - ctx_param_ext_44581 + - i32_res_40269 * - ctx_param_ext_44583)]; - - defunc_0_f_res_40266 = defunc_0_f_res_t_res_40274; - } else { - int64_t i32_res_40275 = sext_i32_i64(defunc_0_f_res_40265); - bool x_40276 = sle64((int64_t) 0, i32_res_40275); - bool y_40277 = slt64(i32_res_40275, nm_29314); - bool bounds_check_40278 = x_40276 && y_40277; - bool index_certs_40279; - - if (!bounds_check_40278) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 8) == -1) { - global_failure_args[0] = i32_res_40275; - global_failure_args[1] = nm_29314; - ; - } - local_failure = true; - goto error_0; - } - } - - float x_40280 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_39653 * - ctx_param_ext_44581 + - i32_res_40275 * - ctx_param_ext_44583)]; - float x_40281 = x_40280 / v1_40259; - int32_t y_40282 = sub32(k2p2zq_29179, 1); - bool cond_40283 = slt32(defunc_0_f_res_40264, y_40282); - float defunc_0_f_res_f_res_40284; - - if (cond_40283) { - int32_t x_40285 = add32(1, defunc_0_f_res_40264); - int32_t x_40286 = mul32(m_29312, x_40285); - int32_t i32_arg_40287 = add32(defunc_0_f_res_40265, x_40286); - int64_t i32_res_40288 = sext_i32_i64(i32_arg_40287); - bool x_40289 = sle64((int64_t) 0, i32_res_40288); - bool y_40290 = slt64(i32_res_40288, nm_29314); - bool bounds_check_40291 = x_40289 && y_40290; - bool index_certs_40292; - - if (!bounds_check_40291) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 9) == - -1) { - global_failure_args[0] = i32_res_40288; - global_failure_args[1] = nm_29314; - ; + if (!protect_assert_disj_104059) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 6) == + -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_104040; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; } - local_failure = true; - goto error_0; } - } - - float x_40293 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_39653 * - ctx_param_ext_44581 + - i32_res_40288 * - ctx_param_ext_44583)]; - int32_t i32_arg_40294 = add32(i_40240, x_40286); - int64_t i32_res_40295 = sext_i32_i64(i32_arg_40294); - bool x_40296 = sle64((int64_t) 0, i32_res_40295); - bool y_40297 = slt64(i32_res_40295, nm_29314); - bool bounds_check_40298 = x_40296 && y_40297; - bool index_certs_40299; - - if (!bounds_check_40298) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 10) == - -1) { - global_failure_args[0] = i32_res_40295; - global_failure_args[1] = nm_29314; - ; + + bool protect_assert_disj_104063 = y_75227 || + loop_not_taken_104058; + bool index_certs_104064; + + if (!protect_assert_disj_104063) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 7) == + -1) { + global_failure_args[0] = m_75223; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; } - local_failure = true; - goto error_0; - } - } - - float x_40300 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_39653 * - ctx_param_ext_44581 + - i32_res_40295 * - ctx_param_ext_44583)]; - float y_40301 = x_40281 * x_40300; - float defunc_0_f_res_f_res_t_res_40302 = x_40293 - y_40301; - - defunc_0_f_res_f_res_40284 = defunc_0_f_res_f_res_t_res_40302; - } else { - defunc_0_f_res_f_res_40284 = x_40281; - } - defunc_0_f_res_40266 = defunc_0_f_res_f_res_40284; - } - ((__local float *) mem_44594)[gtid_39656] = defunc_0_f_res_40266; - - error_0: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t write_i_39699 = sext_i32_i64(ltid_pre_45859); - int32_t phys_tid_39700 = local_tid_45855; - float write_value_40305 = ((__local float *) mem_44594)[write_i_39699]; - - if (sle64((int64_t) 0, write_i_39699) && slt64(write_i_39699, nm_29314)) { - ((__global float *) mem_44590)[gtid_39653 + write_i_39699 * m_29166] = - write_value_40305; - } - barrier(CLK_LOCAL_MEM_FENCE); - if (local_tid_45855 == 0) { - for (int64_t i_45860 = 0; i_45860 < nm_29314; i_45860++) { - ((__global float *) mem_44598)[gtid_39653 * nm_29314 + i_45860] = - ((__global float *) mem_44590)[gtid_39653 + i_45860 * m_29166]; - } - } - - error_2: - return; -} -__kernel void mainzisegmap_intragroup_40832(__global int *global_failure, - __local volatile - int64_t *mem_45150_backing_aligned_0, - __local volatile - int64_t *mem_45148_backing_aligned_1, - __local volatile - int64_t *mem_45146_backing_aligned_2, - __local volatile - int64_t *mem_45144_backing_aligned_3, - int64_t N_29165, int64_t i_29469, - __global - unsigned char *images_mem_44381, - __global - unsigned char *defunc_3_map_res_mem_45140, - __global unsigned char *mem_45153, - __global unsigned char *mem_45156, - __global unsigned char *mem_45159) -{ - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - __local volatile char *restrict mem_45150_backing_3 = (__local volatile - char *) mem_45150_backing_aligned_0; - __local volatile char *restrict mem_45148_backing_2 = (__local volatile - char *) mem_45148_backing_aligned_1; - __local volatile char *restrict mem_45146_backing_1 = (__local volatile - char *) mem_45146_backing_aligned_2; - __local volatile char *restrict mem_45144_backing_0 = (__local volatile - char *) mem_45144_backing_aligned_3; - - if (*global_failure >= 0) - return; - - int32_t global_tid_46236; - int32_t local_tid_46237; - int64_t group_sizze_46240; - int32_t wave_sizze_46239; - int32_t group_tid_46238; - - global_tid_46236 = get_global_id(0); - local_tid_46237 = get_local_id(0); - group_sizze_46240 = get_local_size(0); - wave_sizze_46239 = LOCKSTEP_WIDTH; - group_tid_46238 = get_group_id(0); - - int32_t phys_tid_40832; - - phys_tid_40832 = group_tid_46238; - - int32_t ltid_pre_46241; - - ltid_pre_46241 = local_tid_46237; - - int64_t gtid_40825; - - gtid_40825 = sext_i32_i64(group_tid_46238); - - __local char *mem_45144; - - mem_45144 = (__local char *) mem_45144_backing_0; - - __local char *mem_45146; - - mem_45146 = (__local char *) mem_45146_backing_1; - - int64_t gtid_40828 = sext_i32_i64(ltid_pre_46241); - int32_t phys_tid_40829 = local_tid_46237; - float x_40921 = ((__global float *) images_mem_44381)[gtid_40825 * N_29165 + - gtid_40828]; - bool isnan_res_40923; - - isnan_res_40923 = futrts_isnan32(x_40921); - - bool cond_40924 = !isnan_res_40923; - float defunc_1_f_res_40925; - - if (cond_40924) { - float x_40922 = ((__global - float *) defunc_3_map_res_mem_45140)[gtid_40825 * - N_29165 + - gtid_40828]; - float defunc_1_f_res_t_res_40926 = x_40921 - x_40922; - - defunc_1_f_res_40925 = defunc_1_f_res_t_res_40926; - } else { - defunc_1_f_res_40925 = NAN; - } - - bool isnan_res_40927; - - isnan_res_40927 = futrts_isnan32(defunc_1_f_res_40925); - - bool defunc_0_p_res_40928 = !isnan_res_40927; - int64_t defunc_0_f_res_40929 = btoi_bool_i64(defunc_0_p_res_40928); - - ((__local int64_t *) mem_45144)[gtid_40828] = defunc_0_f_res_40929; - ((__local float *) mem_45146)[gtid_40828] = defunc_1_f_res_40925; - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t dims_flat_46242; - - dims_flat_46242 = N_29165; - - int64_t x_40918; - int64_t x_40919; - int64_t x_46244; - int64_t x_46245; - bool ltid_in_bounds_46247; - - ltid_in_bounds_46247 = slt64(sext_i32_i64(local_tid_46237), N_29165); - - int32_t skip_threads_46248; - - // read input for in-block scan - { - if (ltid_in_bounds_46247) { - x_40919 = ((volatile __local - int64_t *) mem_45144)[sext_i32_i64(local_tid_46237)]; - if ((local_tid_46237 - squot32(local_tid_46237, 32) * 32) == 0) { - x_40918 = x_40919; - } - } - } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46248 = 1; - while (slt32(skip_threads_46248, 32)) { - if (sle32(skip_threads_46248, local_tid_46237 - - squot32(local_tid_46237, 32) * 32) && - ltid_in_bounds_46247) { - // read operands - { - x_40918 = ((volatile __local - int64_t *) mem_45144)[sext_i32_i64(local_tid_46237) - - sext_i32_i64(skip_threads_46248)]; } - // perform operation - { - bool inactive_46249 = - slt64(srem64(sext_i32_i64(local_tid_46237), N_29165), - sext_i32_i64(local_tid_46237) - - sext_i32_i64(local_tid_46237 - - skip_threads_46248)); - - if (inactive_46249) { - x_40918 = x_40919; - } - if (!inactive_46249) { - int64_t defunc_1_op_res_40920 = add64(x_40918, x_40919); - - x_40918 = defunc_1_op_res_40920; + + bool index_certs_104065; + + if (!protect_assert_disj_104063) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 8) == + -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = m_75223; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; } } - } - if (sle32(wave_sizze_46239, skip_threads_46248)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46248, local_tid_46237 - - squot32(local_tid_46237, 32) * 32) && - ltid_in_bounds_46247) { - // write result - { - ((volatile __local - int64_t *) mem_45144)[sext_i32_i64(local_tid_46237)] = - x_40918; - x_40919 = x_40918; - } - } - if (sle32(wave_sizze_46239, skip_threads_46248)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46248 *= 2; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' - { - if ((local_tid_46237 - squot32(local_tid_46237, 32) * 32) == 31 && - ltid_in_bounds_46247) { - ((volatile __local - int64_t *) mem_45144)[sext_i32_i64(squot32(local_tid_46237, - 32))] = x_40918; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' - { - int32_t skip_threads_46250; - - // read input for in-block scan - { - if (squot32(local_tid_46237, 32) == 0 && ltid_in_bounds_46247) { - x_46245 = ((volatile __local - int64_t *) mem_45144)[sext_i32_i64(local_tid_46237)]; - if ((local_tid_46237 - squot32(local_tid_46237, 32) * 32) == - 0) { - x_46244 = x_46245; - } - } - } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46250 = 1; - while (slt32(skip_threads_46250, 32)) { - if (sle32(skip_threads_46250, local_tid_46237 - - squot32(local_tid_46237, 32) * 32) && - (squot32(local_tid_46237, 32) == 0 && - ltid_in_bounds_46247)) { - // read operands + + bool index_certs_104066; + + if (!protect_assert_disj_104063) { { - x_46244 = ((volatile __local - int64_t *) mem_45144)[sext_i32_i64(local_tid_46237) - - sext_i32_i64(skip_threads_46250)]; + if (atomic_cmpxchg_i32_global(global_failure, -1, 9) == + -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = m_75223; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; } - // perform operation - { - bool inactive_46251 = - slt64(srem64(sext_i32_i64(local_tid_46237 * 32 + - 32 - 1), N_29165), - sext_i32_i64(local_tid_46237 * 32 + 32 - 1) - - sext_i32_i64((local_tid_46237 - - skip_threads_46250) * 32 + 32 - - 1)); + } + + bool loopres_104067; + int64_t loopres_104072; + bool loop_while_104073; + int64_t k_104078; + + loop_while_104073 = loop_cond_104048; + k_104078 = k_104045; + while (loop_while_104073) { + for (int64_t i_104080 = 0; i_104080 < k2p2zq_75151; + i_104080++) { + bool index_certs_104082; - if (inactive_46251) { - x_46244 = x_46245; + if (!y_104056) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 10) == -1) { + global_failure_args[0] = l_104040; + global_failure_args[1] = i_104080; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } } - if (!inactive_46251) { - int64_t defunc_1_op_res_46246 = add64(x_46244, - x_46245); + + double t_104083 = ((__global + double *) double_buffer_mem_125535)[phys_tid_103550 + + (l_104040 * + (num_threads_126109 * + k2p2zq_75151) + + i_104080 * + num_threads_126109)]; + + for (int64_t j0_104085 = 0; j0_104085 < + upper_bound_104057; j0_104085++) { + int64_t j_104087 = add64(x_104046, j0_104085); + bool x_104088 = sle64((int64_t) 0, j_104087); + bool y_104089 = slt64(j_104087, k2p2zq_75151); + bool bounds_check_104090 = x_104088 && y_104089; + bool index_certs_104091; + + if (!bounds_check_104090) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 11) == + -1) { + global_failure_args[0] = j_104087; + global_failure_args[1] = i_104080; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } - x_46244 = defunc_1_op_res_46246; + double lw_val_104092 = ((__global + double *) double_buffer_mem_125535)[phys_tid_103550 + + (j_104087 * + (num_threads_126109 * + k2p2zq_75151) + + i_104080 * + num_threads_126109)]; + int64_t i_104093 = sub64(j_104087, (int64_t) 1); + bool x_104094 = sle64((int64_t) 0, i_104093); + bool y_104095 = slt64(i_104093, k2p2zq_75151); + bool bounds_check_104096 = x_104094 && y_104095; + bool index_certs_104097; + + if (!bounds_check_104096) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 12) == + -1) { + global_failure_args[0] = i_104093; + global_failure_args[1] = i_104080; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125535)[phys_tid_103550 + + (i_104093 * + (num_threads_126109 * + k2p2zq_75151) + + i_104080 * + num_threads_126109)] = + lw_val_104092; + } + + bool index_certs_104099; + + if (!y_75227) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 13) == -1) { + global_failure_args[0] = m_75223; + global_failure_args[1] = i_104080; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } } + ((__global + double *) double_buffer_mem_125535)[phys_tid_103550 + + (m_75223 * + (num_threads_126109 * + k2p2zq_75151) + + i_104080 * + num_threads_126109)] = + t_104083; } - } - if (sle32(wave_sizze_46239, skip_threads_46250)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46250, local_tid_46237 - - squot32(local_tid_46237, 32) * 32) && - (squot32(local_tid_46237, 32) == 0 && - ltid_in_bounds_46247)) { - // write result - { - ((volatile __local - int64_t *) mem_45144)[sext_i32_i64(local_tid_46237)] = - x_46244; - x_46245 = x_46244; + + int64_t i_104101 = ((__global + int64_t *) mem_120273)[phys_tid_103550 + + l_104040 * + num_threads_126109]; + double t_104102 = ((__global + double *) double_buffer_mem_125536)[phys_tid_103550 + + l_104040 * + num_threads_126109]; + double tt_104103 = ((__global + double *) double_buffer_mem_125537)[phys_tid_103550 + + l_104040 * + num_threads_126109]; + double ttt_104104 = ((__global + double *) double_buffer_mem_125537)[phys_tid_103550 + + (num_threads_126109 * + k2p2zq_75151 + + l_104040 * + num_threads_126109)]; + + for (int64_t j0_104108 = 0; j0_104108 < upper_bound_104057; + j0_104108++) { + int64_t j_104112 = add64(x_104046, j0_104108); + bool x_104113 = sle64((int64_t) 0, j_104112); + bool y_104114 = slt64(j_104112, k2p2zq_75151); + bool bounds_check_104115 = x_104113 && y_104114; + bool index_certs_104116; + + if (!bounds_check_104115) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 14) == -1) { + global_failure_args[0] = j_104112; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + int64_t lw_val_104117 = ((__global + int64_t *) mem_120273)[phys_tid_103550 + + j_104112 * + num_threads_126109]; + int64_t i_104118 = sub64(j_104112, (int64_t) 1); + bool x_104119 = sle64((int64_t) 0, i_104118); + bool y_104120 = slt64(i_104118, k2p2zq_75151); + bool bounds_check_104121 = x_104119 && y_104120; + bool index_certs_104122; + + if (!bounds_check_104121) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 15) == -1) { + global_failure_args[0] = i_104118; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global int64_t *) mem_120273)[phys_tid_103550 + + i_104118 * + num_threads_126109] = + lw_val_104117; + + double lw_val_104124 = ((__global + double *) double_buffer_mem_125536)[phys_tid_103550 + + j_104112 * + num_threads_126109]; + + ((__global + double *) double_buffer_mem_125536)[phys_tid_103550 + + i_104118 * + num_threads_126109] = + lw_val_104124; + + bool index_certs_104126; + + if (!bounds_check_104115) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 16) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = j_104112; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_104127 = ((__global + double *) double_buffer_mem_125537)[phys_tid_103550 + + j_104112 * + num_threads_126109]; + bool index_certs_104128; + + if (!bounds_check_104121) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 17) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = i_104118; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125537)[phys_tid_103550 + + i_104118 * + num_threads_126109] = + lw_val_104127; + + bool index_certs_104130; + + if (!bounds_check_104115) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 18) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = j_104112; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_104131 = ((__global + double *) double_buffer_mem_125537)[phys_tid_103550 + + (num_threads_126109 * + k2p2zq_75151 + + j_104112 * + num_threads_126109)]; + bool index_certs_104132; + + if (!bounds_check_104121) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 19) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = i_104118; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125537)[phys_tid_103550 + + (num_threads_126109 * + k2p2zq_75151 + + i_104118 * + num_threads_126109)] = + lw_val_104131; + } + ((__global int64_t *) mem_120273)[phys_tid_103550 + + m_75223 * + num_threads_126109] = + i_104101; + ((__global + double *) double_buffer_mem_125536)[phys_tid_103550 + + m_75223 * + num_threads_126109] = + t_104102; + ((__global + double *) double_buffer_mem_125537)[phys_tid_103550 + + m_75223 * + num_threads_126109] = + tt_104103; + ((__global + double *) double_buffer_mem_125537)[phys_tid_103550 + + (num_threads_126109 * + k2p2zq_75151 + + m_75223 * + num_threads_126109)] = + ttt_104104; + + int64_t k_104138 = sub64(k_104078, (int64_t) 1); + bool cond_104139 = slt64(x_104046, k_104138); + bool loop_cond_104140; + + if (cond_104139) { + bool index_certs_104141; + + if (!y_104056) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 20) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_104040; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double zt_arg_104142 = ((__global + double *) double_buffer_mem_125537)[phys_tid_103550 + + (num_threads_126109 * + k2p2zq_75151 + + l_104040 * + num_threads_126109)]; + double zt_res_104143 = 1.0e-7 * zt_arg_104142; + bool index_certs_104144; + + if (!y_104056) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 21) == -1) { + global_failure_args[0] = l_104040; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double zl_arg_104145 = ((__global + double *) double_buffer_mem_125536)[phys_tid_103550 + + l_104040 * + num_threads_126109]; + bool zl_res_104146 = zl_arg_104145 < zt_res_104143; + + loop_cond_104140 = zl_res_104146; + } else { + loop_cond_104140 = 0; } - } - if (sle32(wave_sizze_46239, skip_threads_46250)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46250 *= 2; - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_46237, 32) == 0 || !ltid_in_bounds_46247)) { - // read operands - { - x_40919 = x_40918; - x_40918 = ((__local - int64_t *) mem_45144)[sext_i32_i64(squot32(local_tid_46237, - 32)) - - (int64_t) 1]; - } - // perform operation - { - bool inactive_46252 = - slt64(srem64(sext_i32_i64(local_tid_46237), N_29165), - sext_i32_i64(local_tid_46237) - - sext_i32_i64(squot32(local_tid_46237, 32) * 32 - 1)); - - if (inactive_46252) { - x_40918 = x_40919; - } - if (!inactive_46252) { - int64_t defunc_1_op_res_40920 = add64(x_40918, x_40919); - x_40918 = defunc_1_op_res_40920; - } - } - // write final result - { - ((__local int64_t *) mem_45144)[sext_i32_i64(local_tid_46237)] = - x_40918; - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_46237, 32) == 0) { - ((__local int64_t *) mem_45144)[sext_i32_i64(local_tid_46237)] = - x_40919; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t last_res_40930 = ((__local int64_t *) mem_45144)[i_29469]; - int32_t defunc_0_f_res_40931 = sext_i64_i32(last_res_40930); - __local char *mem_45148; - - mem_45148 = (__local char *) mem_45148_backing_2; - ((__local float *) mem_45148)[sext_i32_i64(local_tid_46237)] = NAN; - barrier(CLK_LOCAL_MEM_FENCE); - - __local char *mem_45150; - - mem_45150 = (__local char *) mem_45150_backing_3; - ((__local int32_t *) mem_45150)[sext_i32_i64(local_tid_46237)] = 0; - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t write_i_40830 = sext_i32_i64(ltid_pre_46241); - int32_t phys_tid_40831 = local_tid_46237; - float x_40936 = ((__local float *) mem_45146)[write_i_40830]; - int32_t index_primexp_42374 = sext_i64_i32(write_i_40830); - bool isnan_res_40939; - - isnan_res_40939 = futrts_isnan32(x_40936); - - bool defunc_0_p_res_40940 = !isnan_res_40939; - int64_t defunc_1_f_res_40941; - - if (defunc_0_p_res_40940) { - int64_t x_40937 = ((__local int64_t *) mem_45144)[write_i_40830]; - int64_t defunc_1_f_res_t_res_40942 = sub64(x_40937, (int64_t) 1); - - defunc_1_f_res_40941 = defunc_1_f_res_t_res_40942; - } else { - defunc_1_f_res_40941 = (int64_t) -1; - } - if (sle64((int64_t) 0, defunc_1_f_res_40941) && slt64(defunc_1_f_res_40941, - N_29165)) { - ((__local int32_t *) mem_45150)[defunc_1_f_res_40941] = - index_primexp_42374; - } - if (sle64((int64_t) 0, defunc_1_f_res_40941) && slt64(defunc_1_f_res_40941, - N_29165)) { - ((__local float *) mem_45148)[defunc_1_f_res_40941] = x_40936; - } - barrier(CLK_LOCAL_MEM_FENCE); - if (local_tid_46237 == 0) { - ((__global int32_t *) mem_45153)[gtid_40825] = defunc_0_f_res_40931; - } - ((__global float *) mem_45156)[gtid_40825 * N_29165 + - sext_i32_i64(local_tid_46237)] = ((__local - float *) mem_45148)[sext_i32_i64(local_tid_46237)]; - barrier(CLK_LOCAL_MEM_FENCE); - ((__global int32_t *) mem_45159)[gtid_40825 * N_29165 + - sext_i32_i64(local_tid_46237)] = ((__local - int32_t *) mem_45150)[sext_i32_i64(local_tid_46237)]; - barrier(CLK_LOCAL_MEM_FENCE); - - error_2: - return; -} -__kernel void mainzisegmap_intragroup_41172(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, - __local volatile - int64_t *red_arr_mem_46361_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_46357_backing_aligned_1, - int64_t N_29165, float hfrac_29171, - int64_t i32_res_29175, - int32_t k2p2_29177, __global - unsigned char *images_mem_44381, - __global - unsigned char *defunc_4_map_res_mem_45178, - __global unsigned char *mem_45225, - __global unsigned char *mem_45227, - __global unsigned char *mem_45229) -{ - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_46361_backing_1 = - (__local volatile - char *) red_arr_mem_46361_backing_aligned_0; - __local volatile char *restrict red_arr_mem_46357_backing_0 = - (__local volatile - char *) red_arr_mem_46357_backing_aligned_1; - volatile __local bool local_failure; - - if (failure_is_an_option) { - int failed = *global_failure >= 0; - - if (failed) - return; - } - local_failure = false; - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t global_tid_46351; - int32_t local_tid_46352; - int64_t group_sizze_46355; - int32_t wave_sizze_46354; - int32_t group_tid_46353; - - global_tid_46351 = get_global_id(0); - local_tid_46352 = get_local_id(0); - group_sizze_46355 = get_local_size(0); - wave_sizze_46354 = LOCKSTEP_WIDTH; - group_tid_46353 = get_group_id(0); - - int32_t phys_tid_41172; - - phys_tid_41172 = group_tid_46353; - - int32_t ltid_pre_46356; - - ltid_pre_46356 = local_tid_46352; - - int64_t gtid_41165; - - gtid_41165 = sext_i32_i64(group_tid_46353); - - int32_t defunc_0_f_res_41257; - int64_t gtid_41168 = sext_i32_i64(ltid_pre_46356); - int32_t phys_tid_41169 = local_tid_46352; - __local char *red_arr_mem_46357; - - red_arr_mem_46357 = (__local char *) red_arr_mem_46357_backing_0; - - float x_41261; - - x_41261 = ((__global float *) images_mem_44381)[gtid_41165 * N_29165 + - gtid_41168]; - - bool isnan_res_41262; - - isnan_res_41262 = futrts_isnan32(x_41261); - - bool cond_41263 = !isnan_res_41262; - int32_t defunc_0_f_res_41264 = btoi_bool_i32(cond_41263); - - ((__local int32_t *) red_arr_mem_46357)[gtid_41168] = defunc_0_f_res_41264; - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t offset_46359; - int32_t skip_waves_46360; - - skip_waves_46360 = 1; - - int32_t x_41258; - int32_t x_41259; - - offset_46359 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46352, sext_i64_i32(i32_res_29175))) { - x_41258 = ((__local - int32_t *) red_arr_mem_46357)[sext_i32_i64(local_tid_46352 + - offset_46359)]; - } - } - offset_46359 = 1; - while (slt32(offset_46359, wave_sizze_46354)) { - if (slt32(local_tid_46352 + offset_46359, - sext_i64_i32(i32_res_29175)) && ((local_tid_46352 - - squot32(local_tid_46352, - wave_sizze_46354) * - wave_sizze_46354) & (2 * - offset_46359 - - 1)) == - 0) { - // read array element - { - x_41259 = ((volatile __local - int32_t *) red_arr_mem_46357)[sext_i32_i64(local_tid_46352 + - offset_46359)]; - } - // apply reduction operation - { - int32_t defunc_1_op_res_41260 = add32(x_41258, x_41259); - - x_41258 = defunc_1_op_res_41260; - } - // write result of operation - { - ((volatile __local - int32_t *) red_arr_mem_46357)[sext_i32_i64(local_tid_46352)] = - x_41258; - } - } - offset_46359 *= 2; - } - while (slt32(skip_waves_46360, squot32(sext_i64_i32(i32_res_29175) + - wave_sizze_46354 - 1, - wave_sizze_46354))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46359 = skip_waves_46360 * wave_sizze_46354; - if (slt32(local_tid_46352 + offset_46359, - sext_i64_i32(i32_res_29175)) && ((local_tid_46352 - - squot32(local_tid_46352, - wave_sizze_46354) * - wave_sizze_46354) == 0 && - (squot32(local_tid_46352, - wave_sizze_46354) & - (2 * skip_waves_46360 - - 1)) == 0)) { - // read array element - { - x_41259 = ((__local - int32_t *) red_arr_mem_46357)[sext_i32_i64(local_tid_46352 + - offset_46359)]; - } - // apply reduction operation - { - int32_t defunc_1_op_res_41260 = add32(x_41258, x_41259); - - x_41258 = defunc_1_op_res_41260; - } - // write result of operation - { - ((__local - int32_t *) red_arr_mem_46357)[sext_i32_i64(local_tid_46352)] = - x_41258; - } - } - skip_waves_46360 *= 2; - } - barrier(CLK_LOCAL_MEM_FENCE); - defunc_0_f_res_41257 = ((__local int32_t *) red_arr_mem_46357)[(int64_t) 0]; - - float defunc_0_f_res_41265; - int64_t gtid_41170 = sext_i32_i64(ltid_pre_46356); - int32_t phys_tid_41171 = local_tid_46352; - __local char *red_arr_mem_46361; - - red_arr_mem_46361 = (__local char *) red_arr_mem_46361_backing_1; - - int32_t index_primexp_42382; - - index_primexp_42382 = sext_i64_i32(gtid_41170); - - bool cond_41270 = slt32(index_primexp_42382, defunc_0_f_res_41257); - float defunc_0_f_res_41271; - - if (cond_41270) { - int64_t i_41272 = sext_i32_i64(index_primexp_42382); - bool x_41273 = sle64((int64_t) 0, i_41272); - bool y_41274 = slt64(i_41272, N_29165); - bool bounds_check_41275 = x_41273 && y_41274; - bool index_certs_41276; - - if (!bounds_check_41275) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 17) == -1) { - global_failure_args[0] = i_41272; - global_failure_args[1] = N_29165; - ; + bool loop_while_tmp_126603 = loop_cond_104140; + int64_t k_tmp_126608 = k_104138; + + loop_while_104073 = loop_while_tmp_126603; + k_104078 = k_tmp_126608; + } + loopres_104067 = loop_while_104073; + loopres_104072 = k_104078; + + bool cond_104147 = x_104046 == k2p2zq_75151; + int64_t j_m_i_104148 = sub64(k2p2zq_75151, l_104040); + bool empty_slice_104152 = j_m_i_104148 == (int64_t) 0; + int64_t m_104153 = sub64(j_m_i_104148, (int64_t) 1); + int64_t i_p_m_t_s_104154 = add64(l_104040, m_104153); + bool zzero_leq_i_p_m_t_s_104155 = sle64((int64_t) 0, + i_p_m_t_s_104154); + bool i_p_m_t_s_leq_w_104156 = slt64(i_p_m_t_s_104154, + k2p2zq_75151); + bool i_lte_j_104157 = sle64(l_104040, k2p2zq_75151); + bool y_104158 = zzero_leq_i_p_m_t_s_104155 && + i_p_m_t_s_leq_w_104156; + bool y_104159 = i_lte_j_104157 && y_104158; + bool ok_or_empty_104160 = empty_slice_104152 || y_104159; + bool index_ok_104161 = y_104056 && ok_or_empty_104160; + + if (cond_104147) { + for (int64_t i_126614 = 0; i_126614 < k2p2zq_75151; + i_126614++) { + ((__global double *) mem_125431)[phys_tid_103550 + + i_126614 * + num_threads_126109] = + ((__global + double *) double_buffer_mem_125536)[phys_tid_103550 + + i_126614 * + num_threads_126109]; + } + for (int64_t i_126615 = 0; i_126615 < (int64_t) 2; + i_126615++) { + for (int64_t i_126616 = 0; i_126616 < k2p2zq_75151; + i_126616++) { + ((__global double *) mem_125429)[phys_tid_103550 + + (i_126615 * + (num_threads_126109 * + k2p2zq_75151) + + i_126616 * + num_threads_126109)] = + ((__global + double *) double_buffer_mem_125537)[phys_tid_103550 + + (i_126615 * + (num_threads_126109 * + k2p2zq_75151) + + i_126616 * + num_threads_126109)]; + } + } + for (int64_t i_126617 = 0; i_126617 < k2p2zq_75151; + i_126617++) { + for (int64_t i_126618 = 0; i_126618 < k2p2zq_75151; + i_126618++) { + ((__global double *) mem_125491)[phys_tid_103550 + + (i_126617 * + (num_threads_126109 * + k2p2zq_75151) + + i_126618 * + num_threads_126109)] = + ((__global + double *) double_buffer_mem_125535)[phys_tid_103550 + + (i_126617 * + (num_threads_126109 * + k2p2zq_75151) + + i_126618 * + num_threads_126109)]; + } + } + } else { + bool index_certs_104162; + + if (!index_ok_104161) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 22) == -1) { + global_failure_args[0] = l_104040; + global_failure_args[1] = l_104040; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_104164; + double redout_119643 = 0.0; + + for (int64_t i_119644 = 0; i_119644 < j_m_i_104148; + i_119644++) { + int64_t slice_119985 = l_104040 + i_119644; + double x_104168 = ((__global + double *) double_buffer_mem_125535)[phys_tid_103550 + + (l_104040 * + (num_threads_126109 * + k2p2zq_75151) + + slice_119985 * + num_threads_126109)]; + double defunc_1_f_res_104169 = x_104168 * x_104168; + double defunc_1_op_res_104167 = defunc_1_f_res_104169 + + redout_119643; + double redout_tmp_126619 = defunc_1_op_res_104167; + + redout_119643 = redout_tmp_126619; + } + defunc_2_reduce_res_104164 = redout_119643; + + double sqrt_res_104170; + + sqrt_res_104170 = futrts_sqrt64(defunc_2_reduce_res_104164); + + bool zeze_res_104171 = sqrt_res_104170 == 0.0; + + if (zeze_res_104171) { + for (int64_t i_126620 = 0; i_126620 < k2p2zq_75151; + i_126620++) { + ((__global double *) mem_125152)[phys_tid_103550 + + i_126620 * + num_threads_126109] = + ((__global + double *) double_buffer_mem_125536)[phys_tid_103550 + + i_126620 * + num_threads_126109]; + } + for (int64_t i_126621 = 0; i_126621 < (int64_t) 2; + i_126621++) { + for (int64_t i_126622 = 0; i_126622 < k2p2zq_75151; + i_126622++) { + ((__global + double *) mem_125150)[phys_tid_103550 + + (i_126621 * + (num_threads_126109 * + k2p2zq_75151) + + i_126622 * + num_threads_126109)] = + ((__global + double *) double_buffer_mem_125537)[phys_tid_103550 + + (i_126621 * + (num_threads_126109 * + k2p2zq_75151) + + i_126622 * + num_threads_126109)]; + } + } + for (int64_t i_126623 = 0; i_126623 < k2p2zq_75151; + i_126623++) { + for (int64_t i_126624 = 0; i_126624 < k2p2zq_75151; + i_126624++) { + ((__global + double *) mem_125421)[phys_tid_103550 + + (i_126623 * + (num_threads_126109 * + k2p2zq_75151) + + i_126624 * + num_threads_126109)] = + ((__global + double *) double_buffer_mem_125535)[phys_tid_103550 + + (i_126623 * + (num_threads_126109 * + k2p2zq_75151) + + i_126624 * + num_threads_126109)]; + } + } + } else { + bool index_ok_104175 = y_104056 && y_104056; + bool index_certs_104176; + + if (!index_ok_104175) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 23) == -1) { + global_failure_args[0] = l_104040; + global_failure_args[1] = l_104040; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double znze_arg_104177 = ((__global + double *) double_buffer_mem_125535)[phys_tid_103550 + + (l_104040 * + (num_threads_126109 * + k2p2zq_75151) + + l_104040 * + num_threads_126109)]; + bool zeze_res_104178 = znze_arg_104177 == 0.0; + bool znze_res_104179 = !zeze_res_104178; + double nrmxl_104180; + + if (znze_res_104179) { + double abs_res_104181 = fabs(sqrt_res_104170); + double sgn_res_104182 = fsignum32(znze_arg_104177); + double zt_res_104183 = abs_res_104181 * + sgn_res_104182; + + nrmxl_104180 = zt_res_104183; + } else { + nrmxl_104180 = sqrt_res_104170; + } + for (int64_t i0_104185 = 0; i0_104185 < j_m_i_104148; + i0_104185++) { + int64_t i_104187 = add64(l_104040, i0_104185); + bool x_104188 = sle64((int64_t) 0, i_104187); + bool y_104189 = slt64(i_104187, k2p2zq_75151); + bool bounds_check_104190 = x_104188 && y_104189; + bool index_ok_104191 = y_104056 && + bounds_check_104190; + bool index_certs_104192; + + if (!index_ok_104191) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 24) == + -1) { + global_failure_args[0] = l_104040; + global_failure_args[1] = i_104187; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_104193 = ((__global + double *) double_buffer_mem_125535)[phys_tid_103550 + + (l_104040 * + (num_threads_126109 * + k2p2zq_75151) + + i_104187 * + num_threads_126109)]; + double lw_val_104194 = x_104193 / nrmxl_104180; + + ((__global + double *) double_buffer_mem_125535)[phys_tid_103550 + + (l_104040 * + (num_threads_126109 * + k2p2zq_75151) + + i_104187 * + num_threads_126109)] = + lw_val_104194; + } + + double zp_arg_104196 = ((__global + double *) double_buffer_mem_125535)[phys_tid_103550 + + (l_104040 * + (num_threads_126109 * + k2p2zq_75151) + + l_104040 * + num_threads_126109)]; + double zp_res_104197 = 1.0 + zp_arg_104196; + + ((__global + double *) double_buffer_mem_125535)[phys_tid_103550 + + (l_104040 * + (num_threads_126109 * + k2p2zq_75151) + + l_104040 * + num_threads_126109)] = + zp_res_104197; + + bool bounds_invalid_upwards_104199 = slt64(k2p2zq_75151, + x_104046); + bool valid_104200 = !bounds_invalid_upwards_104199; + bool range_valid_c_104201; + + if (!valid_104200) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 25) == -1) { + global_failure_args[0] = x_104046; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + bool loop_nonempty_104202 = slt64((int64_t) 0, + upper_bound_104057); + bool loop_not_taken_104203 = !loop_nonempty_104202; + bool protect_assert_disj_104204 = index_ok_104175 || + loop_not_taken_104203; + bool index_certs_104205; + + if (!protect_assert_disj_104204) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 26) == -1) { + global_failure_args[0] = l_104040; + global_failure_args[1] = l_104040; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_104209 = 0; i_104209 < + upper_bound_104057; i_104209++) { + int64_t index_primexp_104213 = add64(x_104046, + i_104209); + bool x_104214 = sle64((int64_t) 0, + index_primexp_104213); + bool y_104215 = slt64(index_primexp_104213, + k2p2zq_75151); + bool bounds_check_104216 = x_104214 && y_104215; + double t_104217; + double t_104219 = 0.0; + + for (int64_t i0_104218 = 0; i0_104218 < + j_m_i_104148; i0_104218++) { + int64_t i_104220 = add64(l_104040, i0_104218); + bool x_104221 = sle64((int64_t) 0, i_104220); + bool y_104222 = slt64(i_104220, k2p2zq_75151); + bool bounds_check_104223 = x_104221 && y_104222; + bool index_ok_104224 = y_104056 && + bounds_check_104223; + bool index_certs_104225; + + if (!index_ok_104224) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 27) == + -1) { + global_failure_args[0] = l_104040; + global_failure_args[1] = i_104220; + global_failure_args[2] = + k2p2zq_75151; + global_failure_args[3] = + k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_104226 = ((__global + double *) double_buffer_mem_125535)[phys_tid_103550 + + (l_104040 * + (num_threads_126109 * + k2p2zq_75151) + + i_104220 * + num_threads_126109)]; + bool index_ok_104227 = bounds_check_104216 && + bounds_check_104223; + bool index_certs_104228; + + if (!index_ok_104227) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 28) == + -1) { + global_failure_args[0] = + index_primexp_104213; + global_failure_args[1] = i_104220; + global_failure_args[2] = + k2p2zq_75151; + global_failure_args[3] = + k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_104229 = ((__global + double *) double_buffer_mem_125535)[phys_tid_103550 + + (index_primexp_104213 * + (num_threads_126109 * + k2p2zq_75151) + + i_104220 * + num_threads_126109)]; + double y_104230 = x_104226 * y_104229; + double loopres_104231 = t_104219 - y_104230; + double t_tmp_126629 = loopres_104231; + + t_104219 = t_tmp_126629; + } + t_104217 = t_104219; + + double y_104232 = ((__global + double *) double_buffer_mem_125535)[phys_tid_103550 + + (l_104040 * + (num_threads_126109 * + k2p2zq_75151) + + l_104040 * + num_threads_126109)]; + double t_104233 = t_104217 / y_104232; + + for (int64_t i0_104235 = 0; i0_104235 < + j_m_i_104148; i0_104235++) { + int64_t i_104237 = add64(l_104040, i0_104235); + bool x_104238 = sle64((int64_t) 0, i_104237); + bool y_104239 = slt64(i_104237, k2p2zq_75151); + bool bounds_check_104240 = x_104238 && y_104239; + bool index_ok_104241 = bounds_check_104216 && + bounds_check_104240; + bool index_certs_104242; + + if (!index_ok_104241) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 29) == + -1) { + global_failure_args[0] = + index_primexp_104213; + global_failure_args[1] = i_104237; + global_failure_args[2] = + k2p2zq_75151; + global_failure_args[3] = + k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_104243 = ((__global + double *) double_buffer_mem_125535)[phys_tid_103550 + + (index_primexp_104213 * + (num_threads_126109 * + k2p2zq_75151) + + i_104237 * + num_threads_126109)]; + bool index_ok_104244 = y_104056 && + bounds_check_104240; + bool index_certs_104245; + + if (!index_ok_104244) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 30) == + -1) { + global_failure_args[0] = l_104040; + global_failure_args[1] = i_104237; + global_failure_args[2] = + k2p2zq_75151; + global_failure_args[3] = + k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_104246 = ((__global + double *) double_buffer_mem_125535)[phys_tid_103550 + + (l_104040 * + (num_threads_126109 * + k2p2zq_75151) + + i_104237 * + num_threads_126109)]; + double y_104247 = t_104233 * y_104246; + double lw_val_104248 = x_104243 + y_104247; + + ((__global + double *) double_buffer_mem_125535)[phys_tid_103550 + + (index_primexp_104213 * + (num_threads_126109 * + k2p2zq_75151) + + i_104237 * + num_threads_126109)] = + lw_val_104248; + } + + bool index_certs_104250; + + if (!bounds_check_104216) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 31) == + -1) { + global_failure_args[0] = + index_primexp_104213; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double zeze_arg_104251 = ((__global + double *) double_buffer_mem_125536)[phys_tid_103550 + + index_primexp_104213 * + num_threads_126109]; + bool zeze_res_104252 = zeze_arg_104251 == 0.0; + + if (!zeze_res_104252) { + bool index_ok_104255 = y_104056 && + bounds_check_104216; + bool index_certs_104256; + + if (!index_ok_104255) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 32) == + -1) { + global_failure_args[0] = + index_primexp_104213; + global_failure_args[1] = l_104040; + global_failure_args[2] = + k2p2zq_75151; + global_failure_args[3] = + k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double abs_arg_104257 = ((__global + double *) double_buffer_mem_125535)[phys_tid_103550 + + (index_primexp_104213 * + (num_threads_126109 * + k2p2zq_75151) + + l_104040 * + num_threads_126109)]; + double abs_res_104258 = fabs(abs_arg_104257); + double zs_res_104259 = abs_res_104258 / + zeze_arg_104251; + double ztzt_res_104260 = fpow64(zs_res_104259, + 2.0); + double zm_res_104261 = 1.0 - ztzt_res_104260; + double max_res_104262 = fmax64(0.0, + zm_res_104261); + double abs_res_104263 = fabs(max_res_104262); + bool zgze_res_104264 = 1.0e-6 <= abs_res_104263; + + if (zgze_res_104264) { + double sqrt_res_104267; + + sqrt_res_104267 = + futrts_sqrt64(max_res_104262); + + double zt_res_104268 = zeze_arg_104251 * + sqrt_res_104267; + + ((__global + double *) double_buffer_mem_125536)[phys_tid_103550 + + index_primexp_104213 * + num_threads_126109] = + zt_res_104268; + } else { + bool empty_slice_104270 = + upper_bound_104057 == (int64_t) 0; + int64_t m_104271 = sub64(upper_bound_104057, + (int64_t) 1); + int64_t i_p_m_t_s_104272 = add64(x_104046, + m_104271); + bool zzero_leq_i_p_m_t_s_104273 = + sle64((int64_t) 0, i_p_m_t_s_104272); + bool i_p_m_t_s_leq_w_104274 = + slt64(i_p_m_t_s_104272, k2p2zq_75151); + bool zzero_lte_i_104275 = sle64((int64_t) 0, + x_104046); + bool i_lte_j_104276 = sle64(x_104046, + k2p2zq_75151); + bool y_104277 = i_p_m_t_s_leq_w_104274 && + zzero_lte_i_104275; + bool y_104278 = + zzero_leq_i_p_m_t_s_104273 && y_104277; + bool y_104279 = i_lte_j_104276 && y_104278; + bool forwards_ok_104280 = + zzero_lte_i_104275 && y_104279; + bool ok_or_empty_104281 = + empty_slice_104270 || + forwards_ok_104280; + bool index_ok_104282 = + bounds_check_104216 && + ok_or_empty_104281; + bool index_certs_104283; + + if (!index_ok_104282) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 33) == + -1) { + global_failure_args[0] = + index_primexp_104213; + global_failure_args[1] = + x_104046; + global_failure_args[2] = + k2p2zq_75151; + global_failure_args[3] = + k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_104285; + double redout_119645 = 0.0; + + for (int64_t i_119646 = 0; i_119646 < + upper_bound_104057; i_119646++) { + int64_t slice_119986 = x_104046 + + i_119646; + double x_104289 = ((__global + double *) double_buffer_mem_125535)[phys_tid_103550 + + (index_primexp_104213 * + (num_threads_126109 * + k2p2zq_75151) + + slice_119986 * + num_threads_126109)]; + double defunc_1_f_res_104290 = + x_104289 * x_104289; + double defunc_1_op_res_104288 = + defunc_1_f_res_104290 + + redout_119645; + double redout_tmp_126631 = + defunc_1_op_res_104288; + + redout_119645 = redout_tmp_126631; + } + defunc_2_reduce_res_104285 = redout_119645; + + double sqrt_res_104291; + + sqrt_res_104291 = + futrts_sqrt64(defunc_2_reduce_res_104285); + ((__global + double *) double_buffer_mem_125536)[phys_tid_103550 + + index_primexp_104213 * + num_threads_126109] = + sqrt_res_104291; + + bool index_certs_104293; + + if (!bounds_check_104216) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 34) == + -1) { + global_failure_args[0] = + (int64_t) 0; + global_failure_args[1] = + index_primexp_104213; + global_failure_args[2] = + (int64_t) 2; + global_failure_args[3] = + k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_126632 = 0; i_126632 < + (int64_t) 1; i_126632++) { + ((__global + double *) double_buffer_mem_125537)[phys_tid_103550 + + (index_primexp_104213 + + i_126632) * + num_threads_126109] = + ((__global + double *) double_buffer_mem_125536)[phys_tid_103550 + + num_threads_126109 * + index_primexp_104213 + + i_126632 * + num_threads_126109]; + } + } + } + } + + bool index_certs_104296; + + if (!y_104056) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 35) == -1) { + global_failure_args[0] = l_104040; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_126633 = 0; i_126633 < (int64_t) 1; + i_126633++) { + ((__global + double *) double_buffer_mem_125536)[phys_tid_103550 + + (l_104040 + + i_126633) * + num_threads_126109] = + ((__global + double *) double_buffer_mem_125535)[phys_tid_103550 + + l_104040 * + (num_threads_126109 * + k2p2zq_75151) + + num_threads_126109 * + l_104040 + + i_126633 * + num_threads_126109]; + } + + double zt_res_104299 = -1.0 * nrmxl_104180; + + ((__global + double *) double_buffer_mem_125535)[phys_tid_103550 + + (l_104040 * + (num_threads_126109 * + k2p2zq_75151) + + l_104040 * + num_threads_126109)] = + zt_res_104299; + for (int64_t i_126634 = 0; i_126634 < k2p2zq_75151; + i_126634++) { + ((__global double *) mem_125152)[phys_tid_103550 + + i_126634 * + num_threads_126109] = + ((__global + double *) double_buffer_mem_125536)[phys_tid_103550 + + i_126634 * + num_threads_126109]; + } + for (int64_t i_126635 = 0; i_126635 < (int64_t) 2; + i_126635++) { + for (int64_t i_126636 = 0; i_126636 < k2p2zq_75151; + i_126636++) { + ((__global + double *) mem_125150)[phys_tid_103550 + + (i_126635 * + (num_threads_126109 * + k2p2zq_75151) + + i_126636 * + num_threads_126109)] = + ((__global + double *) double_buffer_mem_125537)[phys_tid_103550 + + (i_126635 * + (num_threads_126109 * + k2p2zq_75151) + + i_126636 * + num_threads_126109)]; + } + } + for (int64_t i_126637 = 0; i_126637 < k2p2zq_75151; + i_126637++) { + for (int64_t i_126638 = 0; i_126638 < k2p2zq_75151; + i_126638++) { + ((__global + double *) mem_125421)[phys_tid_103550 + + (i_126637 * + (num_threads_126109 * + k2p2zq_75151) + + i_126638 * + num_threads_126109)] = + ((__global + double *) double_buffer_mem_125535)[phys_tid_103550 + + (i_126637 * + (num_threads_126109 * + k2p2zq_75151) + + i_126638 * + num_threads_126109)]; + } + } + } + for (int64_t i_126639 = 0; i_126639 < k2p2zq_75151; + i_126639++) { + ((__global double *) mem_125431)[phys_tid_103550 + + i_126639 * + num_threads_126109] = + ((__global double *) mem_125152)[phys_tid_103550 + + i_126639 * + num_threads_126109]; + } + for (int64_t i_126640 = 0; i_126640 < (int64_t) 2; + i_126640++) { + for (int64_t i_126641 = 0; i_126641 < k2p2zq_75151; + i_126641++) { + ((__global double *) mem_125429)[phys_tid_103550 + + (i_126640 * + (num_threads_126109 * + k2p2zq_75151) + + i_126641 * + num_threads_126109)] = + ((__global + double *) mem_125150)[phys_tid_103550 + + (i_126640 * + (num_threads_126109 * + k2p2zq_75151) + + i_126641 * + num_threads_126109)]; + } + } + for (int64_t i_126642 = 0; i_126642 < k2p2zq_75151; + i_126642++) { + for (int64_t i_126643 = 0; i_126643 < k2p2zq_75151; + i_126643++) { + ((__global double *) mem_125491)[phys_tid_103550 + + (i_126642 * + (num_threads_126109 * + k2p2zq_75151) + + i_126643 * + num_threads_126109)] = + ((__global + double *) mem_125421)[phys_tid_103550 + + (i_126642 * + (num_threads_126109 * + k2p2zq_75151) + + i_126643 * + num_threads_126109)]; + } + } + } + for (int64_t i_126644 = 0; i_126644 < k2p2zq_75151; + i_126644++) { + for (int64_t i_126645 = 0; i_126645 < k2p2zq_75151; + i_126645++) { + ((__global + double *) double_buffer_mem_125535)[phys_tid_103550 + + (i_126644 * + (num_threads_126109 * + k2p2zq_75151) + + i_126645 * + num_threads_126109)] = + ((__global double *) mem_125491)[phys_tid_103550 + + (i_126644 * + (num_threads_126109 * + k2p2zq_75151) + + i_126645 * + num_threads_126109)]; + } + } + for (int64_t i_126646 = 0; i_126646 < k2p2zq_75151; + i_126646++) { + ((__global + double *) double_buffer_mem_125536)[phys_tid_103550 + + i_126646 * + num_threads_126109] = + ((__global double *) mem_125431)[phys_tid_103550 + + i_126646 * + num_threads_126109]; + } + for (int64_t i_126647 = 0; i_126647 < (int64_t) 2; i_126647++) { + for (int64_t i_126648 = 0; i_126648 < k2p2zq_75151; + i_126648++) { + ((__global + double *) double_buffer_mem_125537)[phys_tid_103550 + + (i_126647 * + (num_threads_126109 * + k2p2zq_75151) + + i_126648 * + num_threads_126109)] = + ((__global double *) mem_125429)[phys_tid_103550 + + (i_126647 * + (num_threads_126109 * + k2p2zq_75151) + + i_126648 * + num_threads_126109)]; + } + } + + int64_t k_tmp_126602 = loopres_104072; + + k_104045 = k_tmp_126602; + } + dqrdc2_res_104039 = k_104045; + + int64_t min_arg_104301 = sub64(dqrdc2_res_104039, (int64_t) 1); + int64_t min_res_104302 = smin64(k2p2zq_75151, min_arg_104301); + + for (int64_t i_119649 = 0; i_119649 < k2p2zq_75151; i_119649++) { + int64_t x_104306 = add64((int64_t) 1, i_119649); + bool cond_f_res_104307 = slt64(min_res_104302, x_104306); + + for (int64_t i_119653 = 0; i_119653 < k2p2zq_75151; + i_119653++) { + int64_t x_104311 = add64((int64_t) 1, i_119653); + bool cond_104312 = slt64(min_res_104302, x_104311); + bool x_104313 = !cond_104312; + bool y_104314 = cond_f_res_104307 && x_104313; + bool cond_104315 = cond_104312 || y_104314; + double defunc_1_f_res_104316; + + if (cond_104315) { + defunc_1_f_res_104316 = NAN; + } else { + double x_104310 = ((__global + double *) double_buffer_mem_125535)[phys_tid_103550 + + (i_119649 * + (num_threads_126109 * + k2p2zq_75151) + + i_119653 * + num_threads_126109)]; + + defunc_1_f_res_104316 = x_104310; + } + ((__global double *) mem_120608)[phys_tid_103550 + + (i_119649 * + (num_threads_126109 * + k2p2zq_75151) + + i_119653 * + num_threads_126109)] = + defunc_1_f_res_104316; + } + } + for (int64_t i_126651 = 0; i_126651 < k2p2zq_75151; i_126651++) { + ((__global double *) mem_120661)[phys_tid_103550 + i_126651 * + num_threads_126109] = 0.0; + } + for (int64_t i_119657 = 0; i_119657 < k2p2zq_75151; i_119657++) { + for (int64_t i_126653 = 0; i_126653 < k2p2zq_75151; + i_126653++) { + ((__global double *) mem_120649)[phys_tid_103550 + + (i_119657 * + (num_threads_126109 * + k2p2zq_75151) + + i_126653 * + num_threads_126109)] = + ((__global double *) mem_120661)[phys_tid_103550 + + i_126653 * + num_threads_126109]; + } + for (int64_t i_104322 = 0; i_104322 < k2p2zq_75151; + i_104322++) { + int64_t x_104324 = sub64(k2p2zq_75151, i_104322); + int64_t i_104325 = sub64(x_104324, (int64_t) 1); + bool x_104326 = sle64((int64_t) 0, i_104325); + bool y_104327 = slt64(i_104325, k2p2zq_75151); + bool bounds_check_104328 = x_104326 && y_104327; + int64_t j_m_i_104329 = sub64(k2p2zq_75151, x_104324); + bool empty_slice_104330 = j_m_i_104329 == (int64_t) 0; + int64_t m_104331 = sub64(j_m_i_104329, (int64_t) 1); + int64_t i_p_m_t_s_104332 = add64(x_104324, m_104331); + bool zzero_leq_i_p_m_t_s_104333 = sle64((int64_t) 0, + i_p_m_t_s_104332); + bool i_p_m_t_s_leq_w_104334 = slt64(i_p_m_t_s_104332, + k2p2zq_75151); + bool zzero_lte_i_104335 = sle64((int64_t) 0, x_104324); + bool i_lte_j_104336 = sle64(x_104324, k2p2zq_75151); + bool y_104337 = i_p_m_t_s_leq_w_104334 && + zzero_lte_i_104335; + bool y_104338 = zzero_leq_i_p_m_t_s_104333 && y_104337; + bool y_104339 = i_lte_j_104336 && y_104338; + bool forwards_ok_104340 = zzero_lte_i_104335 && y_104339; + bool ok_or_empty_104341 = empty_slice_104330 || + forwards_ok_104340; + bool index_ok_104342 = bounds_check_104328 && + ok_or_empty_104341; + bool index_certs_104343; + + if (!index_ok_104342) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 36) == -1) { + global_failure_args[0] = i_104325; + global_failure_args[1] = x_104324; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + global_failure_args[4] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_104344; + + if (!ok_or_empty_104341) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 37) == -1) { + global_failure_args[0] = x_104324; + global_failure_args[1] = k2p2zq_75151; + global_failure_args[2] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_104347; + double redout_119659 = 0.0; + + for (int64_t i_119660 = 0; i_119660 < j_m_i_104329; + i_119660++) { + int64_t slice_119992 = x_104324 + i_119660; + double x_104352 = ((__global + double *) mem_120608)[phys_tid_103550 + + (slice_119992 * + (num_threads_126109 * + k2p2zq_75151) + + i_104325 * + num_threads_126109)]; + bool isnan_res_104353; + + isnan_res_104353 = futrts_isnan64(x_104352); + + double defunc_1_f_res_104354; + + if (isnan_res_104353) { + defunc_1_f_res_104354 = 0.0; + } else { + double x_104351 = ((__global + double *) mem_120649)[phys_tid_103550 + + (i_119657 * + (num_threads_126109 * + k2p2zq_75151) + + slice_119992 * + num_threads_126109)]; + double defunc_1_f_res_f_res_104355 = x_104351 * + x_104352; + + defunc_1_f_res_104354 = defunc_1_f_res_f_res_104355; + } + + double defunc_1_op_res_104350 = defunc_1_f_res_104354 + + redout_119659; + double redout_tmp_126655 = defunc_1_op_res_104350; + + redout_119659 = redout_tmp_126655; + } + defunc_2_reduce_res_104347 = redout_119659; + + bool index_ok_104356 = bounds_check_104328 && + bounds_check_104328; + bool index_certs_104357; + + if (!index_ok_104356) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 38) == -1) { + global_failure_args[0] = i_104325; + global_failure_args[1] = i_104325; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double zs_arg_104358 = ((__global + double *) mem_120608)[phys_tid_103550 + + (i_104325 * + (num_threads_126109 * + k2p2zq_75151) + + i_104325 * + num_threads_126109)]; + bool index_certs_104359; + + if (!bounds_check_104328) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 39) == -1) { + global_failure_args[0] = i_104325; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double zm_arg_104360 = ((__global + double *) mem_120252)[i_119657 * + k2p2zq_75151 + + i_104325]; + double zm_res_104361 = zm_arg_104360 - + defunc_2_reduce_res_104347; + double zs_res_104362 = zm_res_104361 / zs_arg_104358; + + ((__global double *) mem_120649)[phys_tid_103550 + + (i_119657 * + (num_threads_126109 * + k2p2zq_75151) + + i_104325 * + num_threads_126109)] = + zs_res_104362; + } + } + for (int64_t i_119663 = 0; i_119663 < k2p2zq_75151; i_119663++) { + for (int64_t i_119667 = 0; i_119667 < k2p2zq_75151; + i_119667++) { + double defunc_2_reduce_res_104369; + double redout_119669 = 0.0; + + for (int64_t i_119670 = 0; i_119670 < k2p2zq_75151; + i_119670++) { + double x_104373 = ((__global + double *) mem_120649)[phys_tid_103550 + + (i_119670 * + (num_threads_126109 * + k2p2zq_75151) + + i_119663 * + num_threads_126109)]; + double x_104374 = ((__global + double *) mem_120649)[phys_tid_103550 + + (i_119670 * + (num_threads_126109 * + k2p2zq_75151) + + i_119667 * + num_threads_126109)]; + double defunc_1_f_res_104375 = x_104373 * x_104374; + double defunc_1_op_res_104372 = defunc_1_f_res_104375 + + redout_119669; + double redout_tmp_126658 = defunc_1_op_res_104372; + + redout_119669 = redout_tmp_126658; + } + defunc_2_reduce_res_104369 = redout_119669; + ((__global double *) mem_120690)[phys_tid_103550 + + (i_119663 * + (num_threads_126109 * + k2p2zq_75151) + + i_119667 * + num_threads_126109)] = + defunc_2_reduce_res_104369; } - local_failure = true; - goto error_2; } + + int64_t min_res_104376 = smin64(m_75223, min_res_104302); + + for (int64_t i_126659 = 0; i_126659 < k2p2zq_75151; i_126659++) { + ((__global double *) double_buffer_mem_125548)[phys_tid_103550 + + i_126659 * + num_threads_126109] = + ((__global double *) mem_120257)[gtid_103549 + i_126659 * + m_75136]; + } + for (int64_t j_104378 = 0; j_104378 < min_res_104376; j_104378++) { + bool y_104380 = slt64(j_104378, k2p2zq_75151); + bool index_certs_104381; + + if (!y_104380) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 40) == + -1) { + global_failure_args[0] = j_104378; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double zeze_arg_104382 = ((__global + double *) double_buffer_mem_125536)[phys_tid_103550 + + j_104378 * + num_threads_126109]; + bool zeze_res_104383 = zeze_arg_104382 == 0.0; + + if (zeze_res_104383) { + for (int64_t i_126661 = 0; i_126661 < k2p2zq_75151; + i_126661++) { + ((__global double *) mem_125160)[phys_tid_103550 + + i_126661 * + num_threads_126109] = + ((__global + double *) double_buffer_mem_125548)[phys_tid_103550 + + i_126661 * + num_threads_126109]; + } + } else { + double y_104385 = ((__global + double *) double_buffer_mem_125548)[phys_tid_103550 + + j_104378 * + num_threads_126109]; + double negate_arg_104386 = zeze_arg_104382 * y_104385; + double t_104387 = 0.0 - negate_arg_104386; + int64_t x_104388 = sub64(k2p2zq_75151, j_104378); + int64_t upper_bound_104389 = sub64(x_104388, (int64_t) 1); + double t_104390; + double t_104392 = t_104387; + + for (int64_t i0_104391 = 0; i0_104391 < upper_bound_104389; + i0_104391++) { + int64_t x_104393 = add64(j_104378, i0_104391); + int64_t i_104394 = add64((int64_t) 1, x_104393); + bool x_104395 = sle64((int64_t) 0, i_104394); + bool y_104396 = slt64(i_104394, k2p2zq_75151); + bool bounds_check_104397 = x_104395 && y_104396; + bool index_ok_104398 = y_104380 && bounds_check_104397; + bool index_certs_104399; + + if (!index_ok_104398) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 41) == -1) { + global_failure_args[0] = j_104378; + global_failure_args[1] = i_104394; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_104400 = ((__global + double *) double_buffer_mem_125535)[phys_tid_103550 + + (j_104378 * + (num_threads_126109 * + k2p2zq_75151) + + i_104394 * + num_threads_126109)]; + bool index_certs_104401; + + if (!bounds_check_104397) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 42) == -1) { + global_failure_args[0] = i_104394; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_104402 = ((__global + double *) double_buffer_mem_125548)[phys_tid_103550 + + i_104394 * + num_threads_126109]; + double y_104403 = x_104400 * y_104402; + double loopres_104404 = t_104392 - y_104403; + double t_tmp_126662 = loopres_104404; + + t_104392 = t_tmp_126662; + } + t_104390 = t_104392; + + double t_104405 = t_104390 / zeze_arg_104382; + double y_104406 = zeze_arg_104382 * t_104405; + double lw_val_104407 = y_104385 + y_104406; + + ((__global + double *) double_buffer_mem_125548)[phys_tid_103550 + + j_104378 * + num_threads_126109] = + lw_val_104407; + for (int64_t i0_104410 = 0; i0_104410 < upper_bound_104389; + i0_104410++) { + int64_t x_104412 = add64(j_104378, i0_104410); + int64_t i_104413 = add64((int64_t) 1, x_104412); + bool x_104414 = sle64((int64_t) 0, i_104413); + bool y_104415 = slt64(i_104413, k2p2zq_75151); + bool bounds_check_104416 = x_104414 && y_104415; + bool index_certs_104417; + + if (!bounds_check_104416) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 43) == -1) { + global_failure_args[0] = i_104413; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_104418 = ((__global + double *) double_buffer_mem_125548)[phys_tid_103550 + + i_104413 * + num_threads_126109]; + bool index_ok_104419 = y_104380 && bounds_check_104416; + bool index_certs_104420; + + if (!index_ok_104419) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 44) == -1) { + global_failure_args[0] = j_104378; + global_failure_args[1] = i_104413; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_104421 = ((__global + double *) double_buffer_mem_125535)[phys_tid_103550 + + (j_104378 * + (num_threads_126109 * + k2p2zq_75151) + + i_104413 * + num_threads_126109)]; + double y_104422 = t_104405 * y_104421; + double lw_val_104423 = x_104418 + y_104422; + + ((__global + double *) double_buffer_mem_125548)[phys_tid_103550 + + i_104413 * + num_threads_126109] = + lw_val_104423; + } + for (int64_t i_126664 = 0; i_126664 < k2p2zq_75151; + i_126664++) { + ((__global double *) mem_125160)[phys_tid_103550 + + i_126664 * + num_threads_126109] = + ((__global + double *) double_buffer_mem_125548)[phys_tid_103550 + + i_126664 * + num_threads_126109]; + } + } + for (int64_t i_126665 = 0; i_126665 < k2p2zq_75151; + i_126665++) { + ((__global + double *) double_buffer_mem_125548)[phys_tid_103550 + + i_126665 * + num_threads_126109] = + ((__global double *) mem_125160)[phys_tid_103550 + + i_126665 * + num_threads_126109]; + } + } + for (int64_t i_126666 = 0; i_126666 < k2p2zq_75151; i_126666++) { + ((__global double *) mem_120763)[phys_tid_103550 + i_126666 * + num_threads_126109] = 0.0; + } + for (int64_t write_iter_119671 = 0; write_iter_119671 < + k2p2zq_75151; write_iter_119671++) { + int64_t write_iv_119674 = ((__global + int64_t *) mem_120273)[phys_tid_103550 + + write_iter_119671 * + num_threads_126109]; + double defunc_2_reduce_res_104429; + double redout_119681 = 0.0; + + for (int64_t i_119682 = 0; i_119682 < k2p2zq_75151; + i_119682++) { + double x_104433 = ((__global + double *) double_buffer_mem_125548)[phys_tid_103550 + + i_119682 * + num_threads_126109]; + double x_104434 = ((__global + double *) mem_120649)[phys_tid_103550 + + (i_119682 * + (num_threads_126109 * + k2p2zq_75151) + + write_iter_119671 * + num_threads_126109)]; + double defunc_1_f_res_104435 = x_104433 * x_104434; + double defunc_1_op_res_104432 = defunc_1_f_res_104435 + + redout_119681; + double redout_tmp_126668 = defunc_1_op_res_104432; + + redout_119681 = redout_tmp_126668; + } + defunc_2_reduce_res_104429 = redout_119681; + + bool less_than_zzero_119675 = slt64(write_iv_119674, + (int64_t) 0); + bool greater_than_sizze_119676 = sle64(k2p2zq_75151, + write_iv_119674); + bool outside_bounds_dim_119677 = less_than_zzero_119675 || + greater_than_sizze_119676; + + if (!outside_bounds_dim_119677) { + ((__global double *) mem_120763)[phys_tid_103550 + + write_iv_119674 * + num_threads_126109] = + defunc_2_reduce_res_104429; + } + } + for (int64_t i_119685 = 0; i_119685 < k2p2zq_75151; i_119685++) { + int64_t x_104438 = ((__global + int64_t *) mem_120273)[phys_tid_103550 + + i_119685 * + num_threads_126109]; + + for (int64_t i_126670 = 0; i_126670 < k2p2zq_75151; + i_126670++) { + ((__global int64_t *) mem_120790)[phys_tid_103550 + + i_126670 * + num_threads_126109] = + x_104438; + } + for (int64_t i_126671 = 0; i_126671 < k2p2zq_75151; + i_126671++) { + ((__global int64_t *) mem_120778)[phys_tid_103550 + + (i_119685 * + (num_threads_126109 * + k2p2zq_75151) + + i_126671 * + num_threads_126109)] = + ((__global int64_t *) mem_120790)[phys_tid_103550 + + i_126671 * + num_threads_126109]; + } + } + for (int64_t i_126672 = 0; i_126672 < k2p2zq_75151; i_126672++) { + for (int64_t i_126673 = 0; i_126673 < k2p2zq_75151; + i_126673++) { + ((__global double *) mem_120801)[phys_tid_103550 + + (i_126672 * + (num_threads_126109 * + k2p2zq_75151) + + i_126673 * + num_threads_126109)] = + 0.0; + } + } + for (int64_t write_iter_119687 = 0; write_iter_119687 < + binop_x_120251; write_iter_119687++) { + int64_t new_index_119993 = squot64(write_iter_119687, + k2p2zq_75151); + int64_t binop_y_119995 = k2p2zq_75151 * new_index_119993; + int64_t new_index_119996 = write_iter_119687 - binop_y_119995; + int64_t write_iv_119689 = ((__global + int64_t *) mem_120778)[phys_tid_103550 + + (new_index_119993 * + (num_threads_126109 * + k2p2zq_75151) + + new_index_119996 * + num_threads_126109)]; + int64_t write_iv_119690 = ((__global + int64_t *) mem_120273)[phys_tid_103550 + + new_index_119996 * + num_threads_126109]; + bool less_than_zzero_119692 = slt64(write_iv_119689, + (int64_t) 0); + bool greater_than_sizze_119693 = sle64(k2p2zq_75151, + write_iv_119689); + bool outside_bounds_dim_119694 = less_than_zzero_119692 || + greater_than_sizze_119693; + bool less_than_zzero_119695 = slt64(write_iv_119690, + (int64_t) 0); + bool greater_than_sizze_119696 = sle64(k2p2zq_75151, + write_iv_119690); + bool outside_bounds_dim_119697 = less_than_zzero_119695 || + greater_than_sizze_119696; + bool outside_bounds_119699 = outside_bounds_dim_119694 || + outside_bounds_dim_119697; + + if (!outside_bounds_119699) { + for (int64_t i_126675 = 0; i_126675 < (int64_t) 1; + i_126675++) { + ((__global double *) mem_120801)[phys_tid_103550 + + (write_iv_119689 * + (num_threads_126109 * + k2p2zq_75151) + + (write_iv_119690 + + i_126675) * + num_threads_126109)] = + ((__global double *) mem_120690)[phys_tid_103550 + + new_index_119993 * + (num_threads_126109 * + k2p2zq_75151) + + num_threads_126109 * + new_index_119996 + + i_126675 * + num_threads_126109]; + } + } + } + for (int64_t i_119706 = 0; i_119706 < k2p2zq_75151; i_119706++) { + double x_104451 = ((__global + double *) mem_120763)[phys_tid_103550 + + i_119706 * + num_threads_126109]; + + for (int64_t i_119711 = 0; i_119711 < k2p2zq_75151; + i_119711++) { + double x_104453 = ((__global + double *) mem_120801)[phys_tid_103550 + + (i_119706 * + (num_threads_126109 * + k2p2zq_75151) + + i_119711 * + num_threads_126109)]; + bool isnan_res_104454; + + isnan_res_104454 = futrts_isnan64(x_104453); + + double defunc_0_f_res_104455; + + if (isnan_res_104454) { + defunc_0_f_res_104455 = 0.0; + } else { + defunc_0_f_res_104455 = x_104453; + } + ((__global double *) mem_120824)[phys_tid_103550 + + (i_119706 * + (num_threads_126109 * + k2p2zq_75151) + + i_119711 * + num_threads_126109)] = + defunc_0_f_res_104455; + } + + bool isnan_res_104456; + + isnan_res_104456 = futrts_isnan64(x_104451); + + double defunc_0_f_res_104457; + + if (isnan_res_104456) { + defunc_0_f_res_104457 = 0.0; + } else { + defunc_0_f_res_104457 = x_104451; + } + ((__global double *) mem_120821)[phys_tid_103550 + i_119706 * + num_threads_126109] = + defunc_0_f_res_104457; + } + for (int64_t i_126679 = 0; i_126679 < k2p2zq_75151; i_126679++) { + for (int64_t i_126680 = 0; i_126680 < k2p2zq_75151; + i_126680++) { + ((__global double *) mem_120878)[i_126679 * (m_75136 * + k2p2zq_75151) + + i_126680 * m_75136 + + gtid_103549] = ((__global + double *) mem_120824)[phys_tid_103550 + + (i_126679 * + (num_threads_126109 * + k2p2zq_75151) + + i_126680 * + num_threads_126109)]; + } + } + for (int64_t i_126681 = 0; i_126681 < k2p2zq_75151; i_126681++) { + ((__global double *) mem_120881)[i_126681 * m_75136 + + gtid_103549] = ((__global + double *) mem_120821)[phys_tid_103550 + + i_126681 * + num_threads_126109]; + } + ((__global int64_t *) mem_120883)[gtid_103549] = min_res_104302; } - - float defunc_0_f_res_t_res_41277 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_41165 * - N_29165 + - i_41272]; - - defunc_0_f_res_41271 = defunc_0_f_res_t_res_41277; - } else { - defunc_0_f_res_41271 = 0.0F; + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - float defunc_0_f_res_41278 = defunc_0_f_res_41271 * defunc_0_f_res_41271; + error_0: + return; + #undef segmap_group_sizze_104005 +} +__kernel void mainzisegmap_104472(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, __global + unsigned char *mem_121898, __global + unsigned char *mem_121919) +{ + #define segmap_group_sizze_106879 (mainzisegmap_group_sizze_104475) - ((__local float *) red_arr_mem_46361)[gtid_41170] = defunc_0_f_res_41278; + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; - error_2: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) + if (*global_failure >= 0) return; - barrier(CLK_LOCAL_MEM_FENCE); - int32_t offset_46363; - int32_t skip_waves_46364; + int32_t global_tid_127176; + int32_t local_tid_127177; + int64_t group_sizze_127180; + int32_t wave_sizze_127179; + int32_t group_tid_127178; - skip_waves_46364 = 1; + global_tid_127176 = get_global_id(0); + local_tid_127177 = get_local_id(0); + group_sizze_127180 = get_local_size(0); + wave_sizze_127179 = LOCKSTEP_WIDTH; + group_tid_127178 = get_group_id(0); - float x_41266; - float x_41267; + int32_t phys_tid_104472; - offset_46363 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46352, sext_i64_i32(i32_res_29175))) { - x_41266 = ((__local - float *) red_arr_mem_46361)[sext_i32_i64(local_tid_46352 + - offset_46363)]; + phys_tid_104472 = global_tid_127176; + + int64_t gtid_104470; + + gtid_104470 = squot64(sext_i32_i64(group_tid_127178) * + segmap_group_sizze_106879 + + sext_i32_i64(local_tid_127177), k2p2zq_75151); + + int64_t gtid_104471; + + gtid_104471 = sext_i32_i64(group_tid_127178) * segmap_group_sizze_106879 + + sext_i32_i64(local_tid_127177) - + squot64(sext_i32_i64(group_tid_127178) * segmap_group_sizze_106879 + + sext_i32_i64(local_tid_127177), k2p2zq_75151) * k2p2zq_75151; + if (slt64(gtid_104470, m_75136) && slt64(gtid_104471, k2p2zq_75151)) { + double x_106882 = ((__global double *) mem_121898)[gtid_104470 * + k2p2zq_75151 + + gtid_104471]; + bool isnan_res_106883; + + isnan_res_106883 = futrts_isnan64(x_106882); + + double defunc_0_f_res_106884; + + if (isnan_res_106883) { + defunc_0_f_res_106884 = 0.0; + } else { + defunc_0_f_res_106884 = x_106882; } + ((__global double *) mem_121919)[gtid_104470 * k2p2zq_75151 + + gtid_104471] = defunc_0_f_res_106884; } - offset_46363 = 1; - while (slt32(offset_46363, wave_sizze_46354)) { - if (slt32(local_tid_46352 + offset_46363, - sext_i64_i32(i32_res_29175)) && ((local_tid_46352 - - squot32(local_tid_46352, - wave_sizze_46354) * - wave_sizze_46354) & (2 * - offset_46363 - - 1)) == - 0) { - // read array element - { - x_41267 = ((volatile __local - float *) red_arr_mem_46361)[sext_i32_i64(local_tid_46352 + - offset_46363)]; - } - // apply reduction operation - { - float defunc_1_op_res_41268 = x_41266 + x_41267; - - x_41266 = defunc_1_op_res_41268; - } - // write result of operation - { - ((volatile __local - float *) red_arr_mem_46361)[sext_i32_i64(local_tid_46352)] = - x_41266; - } - } - offset_46363 *= 2; - } - while (slt32(skip_waves_46364, squot32(sext_i64_i32(i32_res_29175) + - wave_sizze_46354 - 1, - wave_sizze_46354))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46363 = skip_waves_46364 * wave_sizze_46354; - if (slt32(local_tid_46352 + offset_46363, - sext_i64_i32(i32_res_29175)) && ((local_tid_46352 - - squot32(local_tid_46352, - wave_sizze_46354) * - wave_sizze_46354) == 0 && - (squot32(local_tid_46352, - wave_sizze_46354) & - (2 * skip_waves_46364 - - 1)) == 0)) { - // read array element - { - x_41267 = ((__local - float *) red_arr_mem_46361)[sext_i32_i64(local_tid_46352 + - offset_46363)]; - } - // apply reduction operation - { - float defunc_1_op_res_41268 = x_41266 + x_41267; - - x_41266 = defunc_1_op_res_41268; - } - // write result of operation - { - ((__local - float *) red_arr_mem_46361)[sext_i32_i64(local_tid_46352)] = - x_41266; - } + + error_0: + return; + #undef segmap_group_sizze_106879 +} +__kernel void mainzisegmap_104494(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, __global + unsigned char *mem_121906, __global + unsigned char *mem_121915) +{ + #define segmap_group_sizze_106870 (mainzisegmap_group_sizze_104498) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127171; + int32_t local_tid_127172; + int64_t group_sizze_127175; + int32_t wave_sizze_127174; + int32_t group_tid_127173; + + global_tid_127171 = get_global_id(0); + local_tid_127172 = get_local_id(0); + group_sizze_127175 = get_local_size(0); + wave_sizze_127174 = LOCKSTEP_WIDTH; + group_tid_127173 = get_group_id(0); + + int32_t phys_tid_104494; + + phys_tid_104494 = global_tid_127171; + + int64_t gtid_104491; + + gtid_104491 = squot64(sext_i32_i64(group_tid_127173) * + segmap_group_sizze_106870 + + sext_i32_i64(local_tid_127172), k2p2zq_75151 * + k2p2zq_75151); + + int64_t gtid_104492; + + gtid_104492 = squot64(sext_i32_i64(group_tid_127173) * + segmap_group_sizze_106870 + + sext_i32_i64(local_tid_127172) - + squot64(sext_i32_i64(group_tid_127173) * + segmap_group_sizze_106870 + + sext_i32_i64(local_tid_127172), k2p2zq_75151 * + k2p2zq_75151) * (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151); + + int64_t gtid_104493; + + gtid_104493 = sext_i32_i64(group_tid_127173) * segmap_group_sizze_106870 + + sext_i32_i64(local_tid_127172) - + squot64(sext_i32_i64(group_tid_127173) * segmap_group_sizze_106870 + + sext_i32_i64(local_tid_127172), k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151) - squot64(sext_i32_i64(group_tid_127173) * + segmap_group_sizze_106870 + + sext_i32_i64(local_tid_127172) - + squot64(sext_i32_i64(group_tid_127173) * + segmap_group_sizze_106870 + + sext_i32_i64(local_tid_127172), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151) * k2p2zq_75151; + if ((slt64(gtid_104491, m_75136) && slt64(gtid_104492, k2p2zq_75151)) && + slt64(gtid_104493, k2p2zq_75151)) { + double x_106873 = ((__global double *) mem_121906)[gtid_104491 * + (k2p2zq_75151 * + k2p2zq_75151) + + gtid_104492 * + k2p2zq_75151 + + gtid_104493]; + bool isnan_res_106874; + + isnan_res_106874 = futrts_isnan64(x_106873); + + double defunc_0_f_res_106875; + + if (isnan_res_106874) { + defunc_0_f_res_106875 = 0.0; + } else { + defunc_0_f_res_106875 = x_106873; } - skip_waves_46364 *= 2; + ((__global double *) mem_121915)[gtid_104491 * (k2p2zq_75151 * + k2p2zq_75151) + + gtid_104492 * k2p2zq_75151 + + gtid_104493] = defunc_0_f_res_106875; } - barrier(CLK_LOCAL_MEM_FENCE); - defunc_0_f_res_41265 = ((__local float *) red_arr_mem_46361)[(int64_t) 0]; - int32_t r32_arg_41279 = sub32(defunc_0_f_res_41257, k2p2_29177); - float i32_res_41280 = sitofp_i32_f32(r32_arg_41279); - float sqrt_arg_41281 = defunc_0_f_res_41265 / i32_res_41280; - float sqrt_res_41282; + error_0: + return; + #undef segmap_group_sizze_106870 +} +__kernel void mainzisegmap_104542(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, int64_t binop_x_120251, + __global + unsigned char *defunc_3_map_res_r_mem_121847, + __global unsigned char *mem_121906, __global + unsigned char *mem_121909) +{ + #define segmap_group_sizze_106851 (mainzisegmap_group_sizze_104545) - sqrt_res_41282 = futrts_sqrt32(sqrt_arg_41281); + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; - float i32_res_41283 = sitofp_i32_f32(defunc_0_f_res_41257); - float t32_arg_41284 = hfrac_29171 * i32_res_41283; - int32_t f32_res_41285 = fptosi_f32_i32(t32_arg_41284); + if (*global_failure >= 0) + return; - if (local_tid_46352 == 0) { - ((__global int32_t *) mem_45225)[gtid_41165] = f32_res_41285; - } - if (local_tid_46352 == 0) { - ((__global int32_t *) mem_45227)[gtid_41165] = defunc_0_f_res_41257; + int32_t global_tid_127166; + int32_t local_tid_127167; + int64_t group_sizze_127170; + int32_t wave_sizze_127169; + int32_t group_tid_127168; + + global_tid_127166 = get_global_id(0); + local_tid_127167 = get_local_id(0); + group_sizze_127170 = get_local_size(0); + wave_sizze_127169 = LOCKSTEP_WIDTH; + group_tid_127168 = get_group_id(0); + + int32_t phys_tid_104542; + + phys_tid_104542 = global_tid_127166; + + int64_t gtid_104540; + + gtid_104540 = squot64(sext_i32_i64(group_tid_127168) * + segmap_group_sizze_106851 + + sext_i32_i64(local_tid_127167), binop_x_120251); + + int64_t gtid_104541; + + gtid_104541 = sext_i32_i64(group_tid_127168) * segmap_group_sizze_106851 + + sext_i32_i64(local_tid_127167) - + squot64(sext_i32_i64(group_tid_127168) * segmap_group_sizze_106851 + + sext_i32_i64(local_tid_127167), binop_x_120251) * + binop_x_120251; + if (slt64(gtid_104540, m_75136) && slt64(gtid_104541, binop_x_120251)) { + int64_t binop_x_115049 = gtid_104540 * binop_x_120251; + int64_t binop_x_115050 = gtid_104541 + binop_x_115049; + int64_t new_index_115052 = squot64(binop_x_115050, binop_x_120251); + int64_t binop_y_115060 = new_index_115052 * binop_x_120251; + int64_t binop_x_115061 = binop_x_115050 - binop_y_115060; + int64_t new_index_115062 = squot64(binop_x_115061, k2p2zq_75151); + int64_t write_index_106854 = ((__global + int64_t *) mem_121909)[new_index_115052 * + k2p2zq_75151 + + new_index_115062]; + int64_t binop_y_115117 = k2p2zq_75151 * new_index_115062; + int64_t new_index_115118 = binop_x_115061 - binop_y_115117; + int64_t write_index_106855 = ((__global + int64_t *) mem_121909)[new_index_115052 * + k2p2zq_75151 + + new_index_115118]; + double write_value_106856 = ((__global + double *) defunc_3_map_res_r_mem_121847)[new_index_115052 * + (k2p2zq_75151 * + k2p2zq_75151) + + new_index_115062 * + k2p2zq_75151 + + new_index_115118]; + + if (((sle64((int64_t) 0, gtid_104540) && slt64(gtid_104540, m_75136)) && + (sle64((int64_t) 0, write_index_106854) && + slt64(write_index_106854, k2p2zq_75151))) && (sle64((int64_t) 0, + write_index_106855) && + slt64(write_index_106855, + k2p2zq_75151))) { + ((__global double *) mem_121906)[gtid_104540 * (k2p2zq_75151 * + k2p2zq_75151) + + write_index_106854 * k2p2zq_75151 + + write_index_106855] = + write_value_106856; + } } - if (local_tid_46352 == 0) { - ((__global float *) mem_45229)[gtid_41165] = sqrt_res_41282; + + error_0: + return; + #undef segmap_group_sizze_106851 +} +__kernel void mainzisegmap_104699(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, int64_t binop_x_120251, + __global unsigned char *mem_121341, __global + unsigned char *defunc_3_map_res_r_mem_121609, + __global unsigned char *mem_121898, __global + unsigned char *mem_121901) +{ + #define segmap_group_sizze_106780 (mainzisegmap_group_sizze_104702) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127160; + int32_t local_tid_127161; + int64_t group_sizze_127164; + int32_t wave_sizze_127163; + int32_t group_tid_127162; + + global_tid_127160 = get_global_id(0); + local_tid_127161 = get_local_id(0); + group_sizze_127164 = get_local_size(0); + wave_sizze_127163 = LOCKSTEP_WIDTH; + group_tid_127162 = get_group_id(0); + + int32_t phys_tid_104699; + + phys_tid_104699 = global_tid_127160; + + int64_t gtid_104697; + + gtid_104697 = squot64(sext_i32_i64(group_tid_127162) * + segmap_group_sizze_106780 + + sext_i32_i64(local_tid_127161), k2p2zq_75151); + + int64_t gtid_104698; + + gtid_104698 = sext_i32_i64(group_tid_127162) * segmap_group_sizze_106780 + + sext_i32_i64(local_tid_127161) - + squot64(sext_i32_i64(group_tid_127162) * segmap_group_sizze_106780 + + sext_i32_i64(local_tid_127161), k2p2zq_75151) * k2p2zq_75151; + if (slt64(gtid_104697, m_75136) && slt64(gtid_104698, k2p2zq_75151)) { + int64_t write_index_106785 = ((__global + int64_t *) mem_121341)[gtid_104698 * + m_75136 + + gtid_104697]; + double defunc_2_reduce_res_106786; + double redout_119740 = 0.0; + + for (int64_t i_119741 = 0; i_119741 < k2p2zq_75151; i_119741++) { + double x_106790 = ((__global double *) mem_121901)[gtid_104697 * + k2p2zq_75151 + + i_119741]; + double x_106791 = ((__global + double *) defunc_3_map_res_r_mem_121609)[gtid_104697 * + binop_x_120251 + + i_119741 * + k2p2zq_75151 + + gtid_104698]; + double defunc_1_f_res_106792 = x_106790 * x_106791; + double defunc_1_op_res_106789 = defunc_1_f_res_106792 + + redout_119740; + double redout_tmp_127165 = defunc_1_op_res_106789; + + redout_119740 = redout_tmp_127165; + } + defunc_2_reduce_res_106786 = redout_119740; + if ((sle64((int64_t) 0, gtid_104697) && slt64(gtid_104697, m_75136)) && + (sle64((int64_t) 0, write_index_106785) && slt64(write_index_106785, + k2p2zq_75151))) { + ((__global double *) mem_121898)[gtid_104697 * k2p2zq_75151 + + write_index_106785] = + defunc_2_reduce_res_106786; + } } - error_4: + error_0: return; + #undef segmap_group_sizze_106780 } -__kernel void mainzisegmap_intragroup_41640(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, - __local volatile - int64_t *red_arr_mem_46624_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_46622_backing_aligned_1, - __local volatile - int64_t *red_arr_mem_46620_backing_aligned_2, - __local volatile - int64_t *mem_45288_backing_aligned_3, - int64_t N_29165, int32_t n_29169, - int64_t iota32_arg_29597, __global - unsigned char *defunc_4_map_res_mem_45177, - __global - unsigned char *defunc_4_map_res_mem_45178, - __global - unsigned char *defunc_4_map_res_mem_45179, - __global - unsigned char *defunc_3_map_res_mem_45244, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global - unsigned char *defunc_3_map_res_mem_45246, - __global - unsigned char *defunc_0_f_res_mem_45279, - __global unsigned char *mem_45284, - __global unsigned char *mem_45291, - __global unsigned char *mem_45293) -{ - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_46624_backing_3 = - (__local volatile - char *) red_arr_mem_46624_backing_aligned_0; - __local volatile char *restrict red_arr_mem_46622_backing_2 = - (__local volatile - char *) red_arr_mem_46622_backing_aligned_1; - __local volatile char *restrict red_arr_mem_46620_backing_1 = - (__local volatile - char *) red_arr_mem_46620_backing_aligned_2; - __local volatile char *restrict mem_45288_backing_0 = (__local volatile - char *) mem_45288_backing_aligned_3; +__kernel void mainzisegmap_104742(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, int64_t m_75136, + int64_t k2p2zq_75151, int64_t m_75223, + int64_t num_groups_106711, + int64_t num_threads_126166, __global + unsigned char *mem_121338, __global + unsigned char *mem_121343, __global + unsigned char *mem_121850, __global + unsigned char *mem_121858, __global + unsigned char *mem_121895, __global + unsigned char *mem_125243, __global + unsigned char *double_buffer_mem_125565) +{ + #define segmap_group_sizze_106710 (mainzisegmap_group_sizze_104744) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; volatile __local bool local_failure; if (failure_is_an_option) { @@ -7138,4768 +10593,5865 @@ def sync(self): local_failure = false; barrier(CLK_LOCAL_MEM_FENCE); - int32_t global_tid_46603; - int32_t local_tid_46604; - int64_t group_sizze_46607; - int32_t wave_sizze_46606; - int32_t group_tid_46605; - - global_tid_46603 = get_global_id(0); - local_tid_46604 = get_local_id(0); - group_sizze_46607 = get_local_size(0); - wave_sizze_46606 = LOCKSTEP_WIDTH; - group_tid_46605 = get_group_id(0); - - int32_t phys_tid_41640; - - phys_tid_41640 = group_tid_46605; - - int32_t ltid_pre_46608; - - ltid_pre_46608 = local_tid_46604; - - int64_t gtid_41633; - - gtid_41633 = sext_i32_i64(group_tid_46605); - - int32_t x_41883; - - x_41883 = ((__global int32_t *) defunc_4_map_res_mem_45177)[gtid_41633]; - - int32_t x_41884 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_41633]; - float x_41885 = ((__global float *) defunc_3_map_res_mem_45246)[gtid_41633]; - int32_t x_41886 = ((__global - int32_t *) defunc_3_map_res_mem_45244)[gtid_41633]; - float x_41887 = ((__global float *) defunc_0_f_res_mem_45279)[gtid_41633]; - int32_t y_41890 = sub32(x_41883, x_41884); - float i32_res_41891 = sitofp_i32_f32(x_41884); - float sqrt_res_41892; - - sqrt_res_41892 = futrts_sqrt32(i32_res_41891); - - float y_41893 = x_41885 * sqrt_res_41892; - __local char *mem_45288; - - mem_45288 = (__local char *) mem_45288_backing_0; - - int64_t gtid_41636 = sext_i32_i64(ltid_pre_46608); - int32_t phys_tid_41637 = local_tid_46604; - int32_t index_primexp_42395 = sext_i64_i32(gtid_41636); - bool cond_41906 = sle32(y_41890, index_primexp_42395); - float defunc_0_f_res_41907; - - if (cond_41906) { - defunc_0_f_res_41907 = 0.0F; - } else { - bool cond_41908 = index_primexp_42395 == 0; - float defunc_0_f_res_f_res_41909; - - if (cond_41908) { - defunc_0_f_res_f_res_41909 = x_41887; - } else { - int32_t i_41910 = add32(x_41884, index_primexp_42395); - int64_t i_41911 = sext_i32_i64(i_41910); - bool x_41912 = sle64((int64_t) 0, i_41911); - bool y_41913 = slt64(i_41911, N_29165); - bool bounds_check_41914 = x_41912 && y_41913; - bool index_certs_41915; + int32_t global_tid_127144; + int32_t local_tid_127145; + int64_t group_sizze_127148; + int32_t wave_sizze_127147; + int32_t group_tid_127146; + + global_tid_127144 = get_global_id(0); + local_tid_127145 = get_local_id(0); + group_sizze_127148 = get_local_size(0); + wave_sizze_127147 = LOCKSTEP_WIDTH; + group_tid_127146 = get_group_id(0); + + int32_t phys_tid_104742; + + phys_tid_104742 = global_tid_127144; + + int32_t phys_group_id_127149; + + phys_group_id_127149 = get_group_id(0); + for (int32_t i_127150 = 0; i_127150 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, segmap_group_sizze_106710)) - + phys_group_id_127149, sext_i64_i32(num_groups_106711)); + i_127150++) { + int32_t virt_group_id_127151 = phys_group_id_127149 + i_127150 * + sext_i64_i32(num_groups_106711); + int64_t gtid_104741 = sext_i32_i64(virt_group_id_127151) * + segmap_group_sizze_106710 + sext_i32_i64(local_tid_127145); + + if (slt64(gtid_104741, m_75136)) { + int64_t min_res_106717 = ((__global + int64_t *) mem_121343)[gtid_104741]; + int64_t min_res_106718 = smin64(m_75223, min_res_106717); - if (!bounds_check_41914) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 25) == - -1) { - global_failure_args[0] = i_41911; - global_failure_args[1] = N_29165; - ; + for (int64_t i_127152 = 0; i_127152 < k2p2zq_75151; i_127152++) { + ((__global double *) double_buffer_mem_125565)[phys_tid_104742 + + i_127152 * + num_threads_126166] = + ((__global double *) mem_121850)[gtid_104741 + i_127152 * + m_75136]; + } + for (int64_t j_106720 = 0; j_106720 < min_res_106718; j_106720++) { + bool y_106722 = slt64(j_106720, k2p2zq_75151); + bool index_certs_106723; + + if (!y_106722) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 83) == + -1) { + global_failure_args[0] = j_106720; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; } - local_failure = true; - goto error_0; } - } - - float x_41916 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_41633 * - N_29165 + - i_41911]; - int32_t x_41917 = sub32(x_41884, x_41886); - int32_t i_41918 = add32(x_41917, index_primexp_42395); - int64_t i_41919 = sext_i32_i64(i_41918); - bool x_41920 = sle64((int64_t) 0, i_41919); - bool y_41921 = slt64(i_41919, N_29165); - bool bounds_check_41922 = x_41920 && y_41921; - bool index_certs_41923; - - if (!bounds_check_41922) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 26) == - -1) { - global_failure_args[0] = i_41919; - global_failure_args[1] = N_29165; - ; + + double zeze_arg_106724 = ((__global + double *) mem_121338)[j_106720 * + m_75136 + + gtid_104741]; + bool zeze_res_106725 = zeze_arg_106724 == 0.0; + + if (zeze_res_106725) { + for (int64_t i_127154 = 0; i_127154 < k2p2zq_75151; + i_127154++) { + ((__global double *) mem_125243)[phys_tid_104742 + + i_127154 * + num_threads_126166] = + ((__global + double *) double_buffer_mem_125565)[phys_tid_104742 + + i_127154 * + num_threads_126166]; } - local_failure = true; - goto error_0; + } else { + double y_106727 = ((__global + double *) double_buffer_mem_125565)[phys_tid_104742 + + j_106720 * + num_threads_126166]; + double negate_arg_106728 = zeze_arg_106724 * y_106727; + double t_106729 = 0.0 - negate_arg_106728; + int64_t x_106730 = sub64(k2p2zq_75151, j_106720); + int64_t upper_bound_106731 = sub64(x_106730, (int64_t) 1); + double t_106732; + double t_106734 = t_106729; + + for (int64_t i0_106733 = 0; i0_106733 < upper_bound_106731; + i0_106733++) { + int64_t x_106735 = add64(j_106720, i0_106733); + int64_t i_106736 = add64((int64_t) 1, x_106735); + bool x_106737 = sle64((int64_t) 0, i_106736); + bool y_106738 = slt64(i_106736, k2p2zq_75151); + bool bounds_check_106739 = x_106737 && y_106738; + bool index_ok_106740 = y_106722 && bounds_check_106739; + bool index_certs_106741; + + if (!index_ok_106740) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 84) == -1) { + global_failure_args[0] = j_106720; + global_failure_args[1] = i_106736; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_106742 = ((__global + double *) mem_121858)[i_106736 * + (m_75136 * + k2p2zq_75151) + + j_106720 * + m_75136 + + gtid_104741]; + bool index_certs_106743; + + if (!bounds_check_106739) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 85) == -1) { + global_failure_args[0] = i_106736; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_106744 = ((__global + double *) double_buffer_mem_125565)[phys_tid_104742 + + i_106736 * + num_threads_126166]; + double y_106745 = x_106742 * y_106744; + double loopres_106746 = t_106734 - y_106745; + double t_tmp_127155 = loopres_106746; + + t_106734 = t_tmp_127155; + } + t_106732 = t_106734; + + double t_106747 = t_106732 / zeze_arg_106724; + double y_106748 = zeze_arg_106724 * t_106747; + double lw_val_106749 = y_106727 + y_106748; + + ((__global + double *) double_buffer_mem_125565)[phys_tid_104742 + + j_106720 * + num_threads_126166] = + lw_val_106749; + for (int64_t i0_106752 = 0; i0_106752 < upper_bound_106731; + i0_106752++) { + int64_t x_106754 = add64(j_106720, i0_106752); + int64_t i_106755 = add64((int64_t) 1, x_106754); + bool x_106756 = sle64((int64_t) 0, i_106755); + bool y_106757 = slt64(i_106755, k2p2zq_75151); + bool bounds_check_106758 = x_106756 && y_106757; + bool index_certs_106759; + + if (!bounds_check_106758) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 86) == -1) { + global_failure_args[0] = i_106755; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_106760 = ((__global + double *) double_buffer_mem_125565)[phys_tid_104742 + + i_106755 * + num_threads_126166]; + bool index_ok_106761 = y_106722 && bounds_check_106758; + bool index_certs_106762; + + if (!index_ok_106761) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 87) == -1) { + global_failure_args[0] = j_106720; + global_failure_args[1] = i_106755; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_106763 = ((__global + double *) mem_121858)[i_106755 * + (m_75136 * + k2p2zq_75151) + + j_106720 * + m_75136 + + gtid_104741]; + double y_106764 = t_106747 * y_106763; + double lw_val_106765 = x_106760 + y_106764; + + ((__global + double *) double_buffer_mem_125565)[phys_tid_104742 + + i_106755 * + num_threads_126166] = + lw_val_106765; + } + for (int64_t i_127157 = 0; i_127157 < k2p2zq_75151; + i_127157++) { + ((__global double *) mem_125243)[phys_tid_104742 + + i_127157 * + num_threads_126166] = + ((__global + double *) double_buffer_mem_125565)[phys_tid_104742 + + i_127157 * + num_threads_126166]; + } + } + for (int64_t i_127158 = 0; i_127158 < k2p2zq_75151; + i_127158++) { + ((__global + double *) double_buffer_mem_125565)[phys_tid_104742 + + i_127158 * + num_threads_126166] = + ((__global double *) mem_125243)[phys_tid_104742 + + i_127158 * + num_threads_126166]; } } - - float y_41924 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_41633 * - N_29165 + - i_41919]; - float defunc_0_f_res_f_res_f_res_41925 = x_41916 - y_41924; - - defunc_0_f_res_f_res_41909 = defunc_0_f_res_f_res_f_res_41925; + for (int64_t i_127159 = 0; i_127159 < k2p2zq_75151; i_127159++) { + ((__global double *) mem_121895)[i_127159 * m_75136 + + gtid_104741] = ((__global + double *) double_buffer_mem_125565)[phys_tid_104742 + + i_127159 * + num_threads_126166]; + } } - defunc_0_f_res_41907 = defunc_0_f_res_f_res_41909; + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - ((__local float *) mem_45288)[gtid_41636] = defunc_0_f_res_41907; error_0: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t dims_flat_46609; - - dims_flat_46609 = iota32_arg_29597; - - float x_41902; - float x_41903; - float x_46611; - float x_46612; - bool ltid_in_bounds_46614; + return; + #undef segmap_group_sizze_106710 +} +__kernel void mainzisegmap_104804(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, + int64_t num_groups_106652, + int64_t binop_x_120251, + int64_t num_threads_126162, __global + unsigned char *defunc_3_map_res_r_mem_121609, + __global unsigned char *mem_121613, __global + unsigned char *mem_121616, __global + unsigned char *mem_121632) +{ + #define segmap_group_sizze_106651 (mainzisegmap_group_sizze_104807) - ltid_in_bounds_46614 = slt64(sext_i32_i64(local_tid_46604), - iota32_arg_29597); + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; - int32_t skip_threads_46615; + if (*global_failure >= 0) + return; - // read input for in-block scan - { - if (ltid_in_bounds_46614) { - x_41903 = ((volatile __local - float *) mem_45288)[sext_i32_i64(local_tid_46604)]; - if ((local_tid_46604 - squot32(local_tid_46604, 32) * 32) == 0) { - x_41902 = x_41903; - } - } - } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46615 = 1; - while (slt32(skip_threads_46615, 32)) { - if (sle32(skip_threads_46615, local_tid_46604 - - squot32(local_tid_46604, 32) * 32) && - ltid_in_bounds_46614) { - // read operands - { - x_41902 = ((volatile __local - float *) mem_45288)[sext_i32_i64(local_tid_46604) - - sext_i32_i64(skip_threads_46615)]; - } - // perform operation - { - bool inactive_46616 = - slt64(srem64(sext_i32_i64(local_tid_46604), - iota32_arg_29597), - sext_i32_i64(local_tid_46604) - - sext_i32_i64(local_tid_46604 - - skip_threads_46615)); + int32_t global_tid_127001; + int32_t local_tid_127002; + int64_t group_sizze_127005; + int32_t wave_sizze_127004; + int32_t group_tid_127003; + + global_tid_127001 = get_global_id(0); + local_tid_127002 = get_local_id(0); + group_sizze_127005 = get_local_size(0); + wave_sizze_127004 = LOCKSTEP_WIDTH; + group_tid_127003 = get_group_id(0); + + int32_t phys_tid_104804; + + phys_tid_104804 = global_tid_127001; + + int32_t phys_group_id_127006; + + phys_group_id_127006 = get_group_id(0); + for (int32_t i_127007 = 0; i_127007 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136 * k2p2zq_75151, + segmap_group_sizze_106651)) - + phys_group_id_127006, sext_i64_i32(num_groups_106652)); + i_127007++) { + int32_t virt_group_id_127008 = phys_group_id_127006 + i_127007 * + sext_i64_i32(num_groups_106652); + int64_t gtid_104802 = squot64(sext_i32_i64(virt_group_id_127008) * + segmap_group_sizze_106651 + + sext_i32_i64(local_tid_127002), + k2p2zq_75151); + int64_t gtid_104803 = sext_i32_i64(virt_group_id_127008) * + segmap_group_sizze_106651 + sext_i32_i64(local_tid_127002) - + squot64(sext_i32_i64(virt_group_id_127008) * + segmap_group_sizze_106651 + + sext_i32_i64(local_tid_127002), k2p2zq_75151) * + k2p2zq_75151; + + if (slt64(gtid_104802, m_75136) && slt64(gtid_104803, k2p2zq_75151)) { + for (int64_t i_119736 = 0; i_119736 < k2p2zq_75151; i_119736++) { + double defunc_2_reduce_res_106663; + double redout_119738 = 0.0; + + for (int64_t i_119739 = 0; i_119739 < k2p2zq_75151; + i_119739++) { + double x_106667 = ((__global + double *) defunc_3_map_res_r_mem_121609)[gtid_104802 * + binop_x_120251 + + i_119739 * + k2p2zq_75151 + + gtid_104803]; + double x_106668 = ((__global + double *) mem_121613)[gtid_104802 * + (k2p2zq_75151 * + k2p2zq_75151) + + i_119736 * + k2p2zq_75151 + + i_119739]; + double defunc_1_f_res_106669 = x_106667 * x_106668; + double defunc_1_op_res_106666 = defunc_1_f_res_106669 + + redout_119738; + double redout_tmp_127010 = defunc_1_op_res_106666; - if (inactive_46616) { - x_41902 = x_41903; - } - if (!inactive_46616) { - float defunc_1_op_res_41904 = x_41902 + x_41903; - - x_41902 = defunc_1_op_res_41904; - } + redout_119738 = redout_tmp_127010; } + defunc_2_reduce_res_106663 = redout_119738; + ((__global double *) mem_121616)[phys_tid_104804 + i_119736 * + num_threads_126162] = + defunc_2_reduce_res_106663; } - if (sle32(wave_sizze_46606, skip_threads_46615)) { - barrier(CLK_LOCAL_MEM_FENCE); + for (int64_t i_127011 = 0; i_127011 < k2p2zq_75151; i_127011++) { + ((__global double *) mem_121632)[i_127011 * (k2p2zq_75151 * + m_75136) + + gtid_104802 * k2p2zq_75151 + + gtid_104803] = ((__global + double *) mem_121616)[phys_tid_104804 + + i_127011 * + num_threads_126162]; } - if (sle32(skip_threads_46615, local_tid_46604 - - squot32(local_tid_46604, 32) * 32) && - ltid_in_bounds_46614) { - // write result - { - ((volatile __local - float *) mem_45288)[sext_i32_i64(local_tid_46604)] = - x_41902; - x_41903 = x_41902; - } - } - if (sle32(wave_sizze_46606, skip_threads_46615)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46615 *= 2; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' - { - if ((local_tid_46604 - squot32(local_tid_46604, 32) * 32) == 31 && - ltid_in_bounds_46614) { - ((volatile __local - float *) mem_45288)[sext_i32_i64(squot32(local_tid_46604, 32))] = - x_41902; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' - { - int32_t skip_threads_46617; - - // read input for in-block scan - { - if (squot32(local_tid_46604, 32) == 0 && ltid_in_bounds_46614) { - x_46612 = ((volatile __local - float *) mem_45288)[sext_i32_i64(local_tid_46604)]; - if ((local_tid_46604 - squot32(local_tid_46604, 32) * 32) == - 0) { - x_46611 = x_46612; - } - } - } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46617 = 1; - while (slt32(skip_threads_46617, 32)) { - if (sle32(skip_threads_46617, local_tid_46604 - - squot32(local_tid_46604, 32) * 32) && - (squot32(local_tid_46604, 32) == 0 && - ltid_in_bounds_46614)) { - // read operands - { - x_46611 = ((volatile __local - float *) mem_45288)[sext_i32_i64(local_tid_46604) - - sext_i32_i64(skip_threads_46617)]; - } - // perform operation - { - bool inactive_46618 = - slt64(srem64(sext_i32_i64(local_tid_46604 * 32 + - 32 - 1), iota32_arg_29597), - sext_i32_i64(local_tid_46604 * 32 + 32 - 1) - - sext_i32_i64((local_tid_46604 - - skip_threads_46617) * 32 + 32 - - 1)); - - if (inactive_46618) { - x_46611 = x_46612; - } - if (!inactive_46618) { - float defunc_1_op_res_46613 = x_46611 + x_46612; - - x_46611 = defunc_1_op_res_46613; - } - } - } - if (sle32(wave_sizze_46606, skip_threads_46617)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46617, local_tid_46604 - - squot32(local_tid_46604, 32) * 32) && - (squot32(local_tid_46604, 32) == 0 && - ltid_in_bounds_46614)) { - // write result - { - ((volatile __local - float *) mem_45288)[sext_i32_i64(local_tid_46604)] = - x_46611; - x_46612 = x_46611; - } - } - if (sle32(wave_sizze_46606, skip_threads_46617)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46617 *= 2; - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_46604, 32) == 0 || !ltid_in_bounds_46614)) { - // read operands - { - x_41903 = x_41902; - x_41902 = ((__local - float *) mem_45288)[sext_i32_i64(squot32(local_tid_46604, - 32)) - - (int64_t) 1]; - } - // perform operation - { - bool inactive_46619 = - slt64(srem64(sext_i32_i64(local_tid_46604), - iota32_arg_29597), - sext_i32_i64(local_tid_46604) - - sext_i32_i64(squot32(local_tid_46604, 32) * 32 - 1)); - - if (inactive_46619) { - x_41902 = x_41903; - } - if (!inactive_46619) { - float defunc_1_op_res_41904 = x_41902 + x_41903; - - x_41902 = defunc_1_op_res_41904; - } - } - // write final result - { - ((__local float *) mem_45288)[sext_i32_i64(local_tid_46604)] = - x_41902; - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_46604, 32) == 0) { - ((__local float *) mem_45288)[sext_i32_i64(local_tid_46604)] = - x_41903; } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - barrier(CLK_LOCAL_MEM_FENCE); - - bool acc0_41931; - int32_t acc0_41932; - float acc0_41933; - int64_t gtid_41638 = sext_i32_i64(ltid_pre_46608); - int32_t phys_tid_41639 = local_tid_46604; - __local char *red_arr_mem_46620; - - red_arr_mem_46620 = (__local char *) red_arr_mem_46620_backing_1; - - __local char *red_arr_mem_46622; - - red_arr_mem_46622 = (__local char *) red_arr_mem_46622_backing_2; - - __local char *red_arr_mem_46624; - - red_arr_mem_46624 = (__local char *) red_arr_mem_46624_backing_3; - - float x_41948; - - x_41948 = ((__local float *) mem_45288)[gtid_41638]; - - float x_41949 = ((__global float *) mem_45284)[gtid_41638]; - int32_t index_primexp_42398 = sext_i64_i32(gtid_41638); - float defunc_0_f_res_41952 = x_41948 / y_41893; - bool cond_41953 = slt32(index_primexp_42398, y_41890); - bool isnan_res_41954; - - isnan_res_41954 = futrts_isnan32(defunc_0_f_res_41952); - - bool cond_t_res_41955 = !isnan_res_41954; - bool x_41956 = cond_41953 && cond_t_res_41955; - float abs_res_41957 = (float) fabs(defunc_0_f_res_41952); - bool defunc_2_f_res_t_res_41958 = x_41949 < abs_res_41957; - bool x_41959 = x_41956 && defunc_2_f_res_t_res_41958; - float defunc_1_f_res_41960; - - if (cond_41953) { - defunc_1_f_res_41960 = defunc_0_f_res_41952; - } else { - defunc_1_f_res_41960 = 0.0F; - } - ((__local bool *) red_arr_mem_46620)[gtid_41638] = x_41959; - ((__local int32_t *) red_arr_mem_46622)[gtid_41638] = index_primexp_42398; - ((__local float *) red_arr_mem_46624)[gtid_41638] = defunc_1_f_res_41960; - barrier(CLK_LOCAL_MEM_FENCE); - int32_t offset_46626; - int32_t skip_waves_46627; + error_0: + return; + #undef segmap_group_sizze_106651 +} +__kernel void mainzisegmap_105020(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, int64_t x_106526, + int64_t i_106527, int64_t j_m_i_106531, + int64_t num_groups_106559, + int64_t num_threads_126154, __global + unsigned char *mem_120252, __global + unsigned char *mem_121351, __global + unsigned char *mem_121458, __global + unsigned char *mem_121476, __global + unsigned char *mem_121480, __global + unsigned char *mem_121492, __global + unsigned char *mem_121504) +{ + #define segmap_group_sizze_106558 (mainzisegmap_group_sizze_105022) - skip_waves_46627 = 1; + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; - bool x_41934; - int32_t x_41935; - float x_41936; - bool x_41937; - int32_t x_41938; - float x_41939; + if (*global_failure >= 0) + return; - offset_46626 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46604, sext_i64_i32(iota32_arg_29597))) { - x_41934 = ((__local - bool *) red_arr_mem_46620)[sext_i32_i64(local_tid_46604 + - offset_46626)]; - x_41935 = ((__local - int32_t *) red_arr_mem_46622)[sext_i32_i64(local_tid_46604 + - offset_46626)]; - x_41936 = ((__local - float *) red_arr_mem_46624)[sext_i32_i64(local_tid_46604 + - offset_46626)]; - } - } - offset_46626 = 1; - while (slt32(offset_46626, wave_sizze_46606)) { - if (slt32(local_tid_46604 + offset_46626, - sext_i64_i32(iota32_arg_29597)) && ((local_tid_46604 - - squot32(local_tid_46604, - wave_sizze_46606) * - wave_sizze_46606) & (2 * - offset_46626 - - 1)) == - 0) { - // read array element - { - x_41937 = ((volatile __local - bool *) red_arr_mem_46620)[sext_i32_i64(local_tid_46604 + - offset_46626)]; - x_41938 = ((volatile __local - int32_t *) red_arr_mem_46622)[sext_i32_i64(local_tid_46604 + - offset_46626)]; - x_41939 = ((volatile __local - float *) red_arr_mem_46624)[sext_i32_i64(local_tid_46604 + - offset_46626)]; - } - // apply reduction operation - { - bool defunc_1_op_res_41940; - int32_t defunc_1_op_res_41941; - - if (x_41934) { - defunc_1_op_res_41940 = x_41934; - defunc_1_op_res_41941 = x_41935; - } else { - bool x_41942 = x_41937 && x_41937; - bool x_41943 = !x_41937; - bool y_41944 = x_41934 && x_41943; - bool defunc_1_op_res_f_res_41945 = x_41942 || y_41944; - int32_t defunc_1_op_res_f_res_41946; - - if (x_41937) { - defunc_1_op_res_f_res_41946 = x_41938; - } else { - defunc_1_op_res_f_res_41946 = x_41935; - } - defunc_1_op_res_41940 = defunc_1_op_res_f_res_41945; - defunc_1_op_res_41941 = defunc_1_op_res_f_res_41946; - } - - float defunc_1_op_res_41947 = x_41936 + x_41939; - - x_41934 = defunc_1_op_res_41940; - x_41935 = defunc_1_op_res_41941; - x_41936 = defunc_1_op_res_41947; - } - // write result of operation - { - ((volatile __local - bool *) red_arr_mem_46620)[sext_i32_i64(local_tid_46604)] = - x_41934; - ((volatile __local - int32_t *) red_arr_mem_46622)[sext_i32_i64(local_tid_46604)] = - x_41935; - ((volatile __local - float *) red_arr_mem_46624)[sext_i32_i64(local_tid_46604)] = - x_41936; - } - } - offset_46626 *= 2; - } - while (slt32(skip_waves_46627, squot32(sext_i64_i32(iota32_arg_29597) + - wave_sizze_46606 - 1, - wave_sizze_46606))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46626 = skip_waves_46627 * wave_sizze_46606; - if (slt32(local_tid_46604 + offset_46626, - sext_i64_i32(iota32_arg_29597)) && ((local_tid_46604 - - squot32(local_tid_46604, - wave_sizze_46606) * - wave_sizze_46606) == 0 && - (squot32(local_tid_46604, - wave_sizze_46606) & - (2 * skip_waves_46627 - - 1)) == 0)) { - // read array element - { - x_41937 = ((__local - bool *) red_arr_mem_46620)[sext_i32_i64(local_tid_46604 + - offset_46626)]; - x_41938 = ((__local - int32_t *) red_arr_mem_46622)[sext_i32_i64(local_tid_46604 + - offset_46626)]; - x_41939 = ((__local - float *) red_arr_mem_46624)[sext_i32_i64(local_tid_46604 + - offset_46626)]; - } - // apply reduction operation - { - bool defunc_1_op_res_41940; - int32_t defunc_1_op_res_41941; - - if (x_41934) { - defunc_1_op_res_41940 = x_41934; - defunc_1_op_res_41941 = x_41935; - } else { - bool x_41942 = x_41937 && x_41937; - bool x_41943 = !x_41937; - bool y_41944 = x_41934 && x_41943; - bool defunc_1_op_res_f_res_41945 = x_41942 || y_41944; - int32_t defunc_1_op_res_f_res_41946; - - if (x_41937) { - defunc_1_op_res_f_res_41946 = x_41938; + int32_t global_tid_126898; + int32_t local_tid_126899; + int64_t group_sizze_126902; + int32_t wave_sizze_126901; + int32_t group_tid_126900; + + global_tid_126898 = get_global_id(0); + local_tid_126899 = get_local_id(0); + group_sizze_126902 = get_local_size(0); + wave_sizze_126901 = LOCKSTEP_WIDTH; + group_tid_126900 = get_group_id(0); + + int32_t phys_tid_105020; + + phys_tid_105020 = global_tid_126898; + + int32_t phys_group_id_126903; + + phys_group_id_126903 = get_group_id(0); + for (int32_t i_126904 = 0; i_126904 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, segmap_group_sizze_106558)) - + phys_group_id_126903, sext_i64_i32(num_groups_106559)); + i_126904++) { + int32_t virt_group_id_126905 = phys_group_id_126903 + i_126904 * + sext_i64_i32(num_groups_106559); + int64_t gtid_105019 = sext_i32_i64(virt_group_id_126905) * + segmap_group_sizze_106558 + sext_i32_i64(local_tid_126899); + + if (slt64(gtid_105019, m_75136)) { + double defunc_3_map_res_r_transformed_row_106564 = ((__global + double *) mem_121351)[gtid_105019 * + (k2p2zq_75151 * + k2p2zq_75151) + + i_106527 * + k2p2zq_75151 + + i_106527]; + + for (int64_t i_119725 = 0; i_119725 < k2p2zq_75151; i_119725++) { + for (int64_t i_126907 = 0; i_126907 < k2p2zq_75151; + i_126907++) { + ((__global double *) mem_121492)[phys_tid_105020 + + i_126907 * + num_threads_126154] = + ((__global double *) mem_121476)[i_119725 * (m_75136 * + k2p2zq_75151) + + gtid_105019 + + i_126907 * m_75136]; + } + + double defunc_2_map_res_transformed_row_106569 = ((__global + double *) mem_120252)[i_119725 * + k2p2zq_75151 + + i_106527]; + double defunc_2_reduce_res_106570; + double redout_119728 = 0.0; + + for (int64_t i_119729 = 0; i_119729 < j_m_i_106531; + i_119729++) { + int64_t slice_120011 = x_106526 + i_119729; + double x_106575 = ((__global + double *) mem_121458)[slice_120011 * + (k2p2zq_75151 * + m_75136) + + gtid_105019 * + k2p2zq_75151 + + i_106527]; + bool isnan_res_106576; + + isnan_res_106576 = futrts_isnan64(x_106575); + + double defunc_1_f_res_106577; + + if (isnan_res_106576) { + defunc_1_f_res_106577 = 0.0; } else { - defunc_1_op_res_f_res_41946 = x_41935; + double x_106574 = ((__global + double *) mem_121476)[i_119725 * + (m_75136 * + k2p2zq_75151) + + slice_120011 * + m_75136 + + gtid_105019]; + double defunc_1_f_res_f_res_106578 = x_106574 * + x_106575; + + defunc_1_f_res_106577 = defunc_1_f_res_f_res_106578; } - defunc_1_op_res_41940 = defunc_1_op_res_f_res_41945; - defunc_1_op_res_41941 = defunc_1_op_res_f_res_41946; + + double defunc_1_op_res_106573 = defunc_1_f_res_106577 + + redout_119728; + double redout_tmp_126908 = defunc_1_op_res_106573; + + redout_119728 = redout_tmp_126908; + } + defunc_2_reduce_res_106570 = redout_119728; + + double zm_res_106579 = defunc_2_map_res_transformed_row_106569 - + defunc_2_reduce_res_106570; + double zs_res_106580 = zm_res_106579 / + defunc_3_map_res_r_transformed_row_106564; + + ((__global double *) mem_121492)[phys_tid_105020 + i_106527 * + num_threads_126154] = + zs_res_106580; + for (int64_t i_126909 = 0; i_126909 < k2p2zq_75151; + i_126909++) { + ((__global double *) mem_121480)[phys_tid_105020 + + (i_119725 * + (num_threads_126154 * + k2p2zq_75151) + + i_126909 * + num_threads_126154)] = + ((__global double *) mem_121492)[phys_tid_105020 + + i_126909 * + num_threads_126154]; + } + } + for (int64_t i_126910 = 0; i_126910 < k2p2zq_75151; i_126910++) { + for (int64_t i_126911 = 0; i_126911 < k2p2zq_75151; + i_126911++) { + ((__global double *) mem_121504)[i_126910 * (m_75136 * + k2p2zq_75151) + + i_126911 * m_75136 + + gtid_105019] = ((__global + double *) mem_121480)[phys_tid_105020 + + (i_126910 * + (num_threads_126154 * + k2p2zq_75151) + + i_126911 * + num_threads_126154)]; } - - float defunc_1_op_res_41947 = x_41936 + x_41939; - - x_41934 = defunc_1_op_res_41940; - x_41935 = defunc_1_op_res_41941; - x_41936 = defunc_1_op_res_41947; - } - // write result of operation - { - ((__local - bool *) red_arr_mem_46620)[sext_i32_i64(local_tid_46604)] = - x_41934; - ((__local - int32_t *) red_arr_mem_46622)[sext_i32_i64(local_tid_46604)] = - x_41935; - ((__local - float *) red_arr_mem_46624)[sext_i32_i64(local_tid_46604)] = - x_41936; } } - skip_waves_46627 *= 2; - } - barrier(CLK_LOCAL_MEM_FENCE); - acc0_41931 = ((__local bool *) red_arr_mem_46620)[(int64_t) 0]; - acc0_41932 = ((__local int32_t *) red_arr_mem_46622)[(int64_t) 0]; - acc0_41933 = ((__local float *) red_arr_mem_46624)[(int64_t) 0]; - - bool x_41963 = acc0_41931 && acc0_41931; - int32_t defunc_1_op_res_f_res_41967; - - if (acc0_41931) { - defunc_1_op_res_f_res_41967 = acc0_41932; - } else { - defunc_1_op_res_f_res_41967 = -1; + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - bool cond_41973 = y_41890 == 0; - float defunc_0_f_res_41974; + error_0: + return; + #undef segmap_group_sizze_106558 +} +__kernel void mainzisegmap_105108(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, int64_t i_106527, + int64_t binop_x_120251, __global + unsigned char *mem_param_121469, __global + unsigned char *mem_121559) +{ + #define segmap_group_sizze_106640 (mainzisegmap_group_sizze_105112) - if (cond_41973) { - defunc_0_f_res_41974 = 0.0F; - } else { - float i32_res_41975 = sitofp_i32_f32(y_41890); - float defunc_0_f_res_f_res_41976 = acc0_41933 / i32_res_41975; - - defunc_0_f_res_41974 = defunc_0_f_res_f_res_41976; - } + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; - bool cond_41977 = !x_41963; - int32_t fst_breakzq_41978; + if (*global_failure >= 0) + return; - if (cond_41977) { - fst_breakzq_41978 = -1; - } else { - bool cond_41979 = slt32(defunc_1_op_res_f_res_41967, y_41890); - int32_t adjustValInds_res_41980; - - if (cond_41979) { - int32_t i_41981 = add32(x_41884, defunc_1_op_res_f_res_41967); - int64_t i_41982 = sext_i32_i64(i_41981); - bool x_41983 = sle64((int64_t) 0, i_41982); - bool y_41984 = slt64(i_41982, N_29165); - bool bounds_check_41985 = x_41983 && y_41984; - bool index_certs_41986; - - if (!bounds_check_41985) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 27) == - -1) { - global_failure_args[0] = i_41982; - global_failure_args[1] = N_29165; - ; - } - local_failure = true; - goto error_3; - } - } - - int32_t x_41987 = ((__global - int32_t *) defunc_4_map_res_mem_45179)[gtid_41633 * - N_29165 + - i_41982]; - int32_t adjustValInds_res_t_res_41988 = sub32(x_41987, n_29169); - - adjustValInds_res_41980 = adjustValInds_res_t_res_41988; - } else { - adjustValInds_res_41980 = -1; + int32_t global_tid_126993; + int32_t local_tid_126994; + int64_t group_sizze_126997; + int32_t wave_sizze_126996; + int32_t group_tid_126995; + + global_tid_126993 = get_global_id(0); + local_tid_126994 = get_local_id(0); + group_sizze_126997 = get_local_size(0); + wave_sizze_126996 = LOCKSTEP_WIDTH; + group_tid_126995 = get_group_id(0); + + int32_t phys_tid_105108; + + phys_tid_105108 = global_tid_126993; + + int64_t gtid_105105; + + gtid_105105 = squot64(sext_i32_i64(group_tid_126995) * + segmap_group_sizze_106640 + + sext_i32_i64(local_tid_126994), k2p2zq_75151); + + int64_t gtid_105106; + + gtid_105106 = sext_i32_i64(group_tid_126995) * segmap_group_sizze_106640 + + sext_i32_i64(local_tid_126994) - + squot64(sext_i32_i64(group_tid_126995) * segmap_group_sizze_106640 + + sext_i32_i64(local_tid_126994), k2p2zq_75151) * k2p2zq_75151; + + int64_t gtid_105107; + + gtid_105107 = sext_i32_i64(group_tid_126995) * segmap_group_sizze_106640 + + sext_i32_i64(local_tid_126994) - + squot64(sext_i32_i64(group_tid_126995) * segmap_group_sizze_106640 + + sext_i32_i64(local_tid_126994), k2p2zq_75151) * k2p2zq_75151 - + (sext_i32_i64(group_tid_126995) * segmap_group_sizze_106640 + + sext_i32_i64(local_tid_126994) - + squot64(sext_i32_i64(group_tid_126995) * segmap_group_sizze_106640 + + sext_i32_i64(local_tid_126994), k2p2zq_75151) * k2p2zq_75151); + if ((slt64(gtid_105105, m_75136) && slt64(gtid_105106, k2p2zq_75151)) && + slt64(gtid_105107, (int64_t) 1)) { + double zs_res_106643 = ((__global double *) mem_121559)[gtid_105105 * + k2p2zq_75151 + + gtid_105106]; + + if (((sle64((int64_t) 0, gtid_105105) && slt64(gtid_105105, m_75136)) && + (sle64((int64_t) 0, gtid_105106) && slt64(gtid_105106, + k2p2zq_75151))) && + (sle64((int64_t) 0, i_106527) && slt64(i_106527, k2p2zq_75151))) { + ((__global double *) mem_param_121469)[gtid_105105 * + binop_x_120251 + + gtid_105106 * k2p2zq_75151 + + i_106527] = zs_res_106643; } - fst_breakzq_41978 = adjustValInds_res_41980; - } - - bool cond_41989 = sle32(x_41884, 5); - bool cond_f_res_41990 = sle32(y_41890, 5); - bool x_41991 = !cond_41989; - bool y_41992 = cond_f_res_41990 && x_41991; - bool cond_41993 = cond_41989 || y_41992; - int32_t fst_breakzq_41994; - - if (cond_41993) { - fst_breakzq_41994 = -2; - } else { - fst_breakzq_41994 = fst_breakzq_41978; - } - if (local_tid_46604 == 0) { - ((__global int32_t *) mem_45291)[gtid_41633] = fst_breakzq_41994; - } - if (local_tid_46604 == 0) { - ((__global float *) mem_45293)[gtid_41633] = defunc_0_f_res_41974; } - error_3: + error_0: return; + #undef segmap_group_sizze_106640 } -__kernel void mainzisegmap_intragroup_42541(__global int *global_failure, - __local volatile - int64_t *mem_44480_backing_aligned_0, - int64_t m_29166, int32_t n_29169, - int64_t i32_res_29181, - int64_t Ty_42529, int64_t Tx_42530, - int64_t gridDim_x_42531, - int64_t gridDim_y_42532, - int64_t group_sizze_tile3d_42536, - int64_t count_shmem_42537, __global - unsigned char *mem_44393, __global - unsigned char *mem_44397, __global - unsigned char *mem_44468, __global - unsigned char *mem_44528) +__kernel void mainzisegmap_105120(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, int64_t i_106527, + __global unsigned char *mem_120252, __global + unsigned char *mem_121351, __global + unsigned char *mem_121555, __global + unsigned char *mem_121559) { + #define segmap_group_sizze_106629 (mainzisegmap_group_sizze_105123) + const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict mem_44480_backing_0 = (__local volatile - char *) mem_44480_backing_aligned_0; if (*global_failure >= 0) return; - int32_t global_tid_45721; - int32_t local_tid_45722; - int64_t group_sizze_45725; - int32_t wave_sizze_45724; - int32_t group_tid_45723; - - global_tid_45721 = get_global_id(0); - local_tid_45722 = get_local_id(0); - group_sizze_45725 = get_local_size(0); - wave_sizze_45724 = LOCKSTEP_WIDTH; - group_tid_45723 = get_group_id(0); - - int32_t gid_flat_42541; - - gid_flat_42541 = group_tid_45723; - - int32_t ltid_pre_45726; - - ltid_pre_45726 = squot32(local_tid_45722, sext_i64_i32(Ty_42529) * - sext_i64_i32(Tx_42530)); - - int32_t ltid_pre_45727; - - ltid_pre_45727 = squot32(local_tid_45722 - squot32(local_tid_45722, - sext_i64_i32(Ty_42529) * - sext_i64_i32(Tx_42530)) * - (sext_i64_i32(Ty_42529) * sext_i64_i32(Tx_42530)), - sext_i64_i32(Tx_42530)); - - int32_t ltid_pre_45728; - - ltid_pre_45728 = local_tid_45722 - squot32(local_tid_45722, - sext_i64_i32(Ty_42529) * - sext_i64_i32(Tx_42530)) * - (sext_i64_i32(Ty_42529) * sext_i64_i32(Tx_42530)) - - squot32(local_tid_45722 - squot32(local_tid_45722, - sext_i64_i32(Ty_42529) * - sext_i64_i32(Tx_42530)) * - (sext_i64_i32(Ty_42529) * sext_i64_i32(Tx_42530)), - sext_i64_i32(Tx_42530)) * sext_i64_i32(Tx_42530); - - int32_t ltid_pre_45729; - - ltid_pre_45729 = squot32(local_tid_45722, sext_i64_i32(Tx_42530)); - - int32_t ltid_pre_45730; - - ltid_pre_45730 = local_tid_45722 - squot32(local_tid_45722, - sext_i64_i32(Tx_42530)) * - sext_i64_i32(Tx_42530); - - int32_t ltid_pre_45731; - - ltid_pre_45731 = local_tid_45722; - - int64_t gid_zz_42540; - - gid_zz_42540 = squot64(sext_i32_i64(group_tid_45723), gridDim_y_42532 * - gridDim_x_42531); - - int64_t gid_y_42539; - - gid_y_42539 = squot64(sext_i32_i64(group_tid_45723) - - squot64(sext_i32_i64(group_tid_45723), - gridDim_y_42532 * gridDim_x_42531) * - (gridDim_y_42532 * gridDim_x_42531), gridDim_x_42531); - - int64_t gid_x_42538; - - gid_x_42538 = sext_i32_i64(group_tid_45723) - - squot64(sext_i32_i64(group_tid_45723), gridDim_y_42532 * - gridDim_x_42531) * (gridDim_y_42532 * gridDim_x_42531) - - squot64(sext_i32_i64(group_tid_45723) - - squot64(sext_i32_i64(group_tid_45723), gridDim_y_42532 * - gridDim_x_42531) * (gridDim_y_42532 * gridDim_x_42531), - gridDim_x_42531) * gridDim_x_42531; - - int64_t ii_42542; - - ii_42542 = (int64_t) 30 * gid_zz_42540; - - int64_t jj1_42543 = Ty_42529 * gid_y_42539; - int64_t jj2_42544 = Tx_42530 * gid_x_42538; - float mem_44478[30]; - int64_t ltid_y_42547 = sext_i32_i64(ltid_pre_45729); - int64_t ltid_x_42545 = sext_i32_i64(ltid_pre_45730); - int32_t ltid_flat_42546 = local_tid_45722; - float mem_44472[30]; - - for (int32_t i_44270 = 0; i_44270 < 30; i_44270++) { - int64_t i_42555 = sext_i32_i64(i_44270); - - mem_44472[i_42555] = 0.0F; + int32_t global_tid_126988; + int32_t local_tid_126989; + int64_t group_sizze_126992; + int32_t wave_sizze_126991; + int32_t group_tid_126990; + + global_tid_126988 = get_global_id(0); + local_tid_126989 = get_local_id(0); + group_sizze_126992 = get_local_size(0); + wave_sizze_126991 = LOCKSTEP_WIDTH; + group_tid_126990 = get_group_id(0); + + int32_t phys_tid_105120; + + phys_tid_105120 = global_tid_126988; + + int64_t gtid_105118; + + gtid_105118 = squot64(sext_i32_i64(group_tid_126990) * + segmap_group_sizze_106629 + + sext_i32_i64(local_tid_126989), k2p2zq_75151); + + int64_t gtid_105119; + + gtid_105119 = sext_i32_i64(group_tid_126990) * segmap_group_sizze_106629 + + sext_i32_i64(local_tid_126989) - + squot64(sext_i32_i64(group_tid_126990) * segmap_group_sizze_106629 + + sext_i32_i64(local_tid_126989), k2p2zq_75151) * k2p2zq_75151; + if (slt64(gtid_105118, m_75136) && slt64(gtid_105119, k2p2zq_75151)) { + double defunc_3_map_res_r_transformed_row_106632 = ((__global + double *) mem_121351)[gtid_105118 * + (k2p2zq_75151 * + k2p2zq_75151) + + i_106527 * + k2p2zq_75151 + + i_106527]; + double defunc_2_map_res_transformed_row_106633 = ((__global + double *) mem_120252)[gtid_105119 * + k2p2zq_75151 + + i_106527]; + double defunc_2_reduce_res_106634 = ((__global + double *) mem_121555)[gtid_105118 * + k2p2zq_75151 + + gtid_105119]; + double zm_res_106635 = defunc_2_map_res_transformed_row_106633 - + defunc_2_reduce_res_106634; + double zs_res_106636 = zm_res_106635 / + defunc_3_map_res_r_transformed_row_106632; + + ((__global double *) mem_121559)[gtid_105118 * k2p2zq_75151 + + gtid_105119] = zs_res_106636; } - for (int64_t i_45733 = 0; i_45733 < (int64_t) 30; i_45733++) { - mem_44478[i_45733] = mem_44472[i_45733]; - } - barrier(CLK_LOCAL_MEM_FENCE); - __local char *mem_44480; + error_0: + return; + #undef segmap_group_sizze_106629 +} +__kernel void mainzisegmap_105446(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, __global + unsigned char *mem_121335, __global + unsigned char *mem_121343, __global + unsigned char *mem_121346, __global + unsigned char *mem_121351) +{ + #define segmap_group_sizze_106436 (mainzisegmap_group_sizze_105450) - mem_44480 = (__local char *) mem_44480_backing_0; + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; - float loop_mem_44510[30]; - float mem_param_44481[30]; + if (*global_failure >= 0) + return; - for (int32_t i_1 = 0; i_1 < 30; i_1++) - mem_param_44481[i_1] = mem_44478[i_1]; - for (int32_t i_44272 = 0; i_44272 < n_29169; i_44272++) { - int64_t i_42560 = sext_i32_i64(i_44272); - - for (int64_t i_42563 = 0; i_42563 < count_shmem_42537; i_42563++) { - int64_t offs_42576 = group_sizze_tile3d_42536 * i_42563; - int64_t ltid_42566 = sext_i32_i64(ltid_pre_45731); - int32_t ltid_flat_42565 = local_tid_45722; - int64_t loc_ind_42577 = ltid_42566 + offs_42576; - int64_t gtid_42578 = ii_42542 + loc_ind_42577; - bool cond_42579 = slt64(gtid_42578, m_29166); - float y_elem_42580; - - if (cond_42579) { - float Y_elem_42582 = ((__global float *) mem_44468)[i_42560 * - m_29166 + - gtid_42578]; - - y_elem_42580 = Y_elem_42582; - } else { - y_elem_42580 = 0.0F; - } - - bool cond_42584 = slt64(loc_ind_42577, (int64_t) 30); - int64_t y_loc_ind_42585; - - if (cond_42584) { - y_loc_ind_42585 = loc_ind_42577; - } else { - y_loc_ind_42585 = (int64_t) -1; - } - if (sle64((int64_t) 0, y_loc_ind_42585) && slt64(y_loc_ind_42585, - (int64_t) 30)) { - ((__local float *) mem_44480)[y_loc_ind_42585] = y_elem_42580; - } - barrier(CLK_LOCAL_MEM_FENCE); - } - - float mem_44509[30]; - int64_t ltid_y_42591 = sext_i32_i64(ltid_pre_45729); - int64_t ltid_x_42589 = sext_i32_i64(ltid_pre_45730); - int32_t ltid_flat_42590 = local_tid_45722; - int64_t gtid_42618 = jj1_42543 + ltid_y_42591; - int64_t gtid_42619 = jj2_42544 + ltid_x_42589; - bool binop_x_42621 = slt64(gtid_42618, i32_res_29181); - bool binop_y_42622 = slt64(gtid_42619, i32_res_29181); - bool cond_42623 = binop_x_42621 && binop_y_42622; - float mem_45450[30]; - - if (cond_42623) { - float x_42626 = ((__global float *) mem_44393)[i_42560 * - i32_res_29181 + - gtid_42618]; - float x_42628 = ((__global float *) mem_44397)[i_42560 * - i32_res_29181 + - gtid_42619]; - - for (int32_t i_44271 = 0; i_44271 < 30; i_44271++) { - int64_t i_42630 = sext_i32_i64(i_44271); - int64_t gtid_42632 = ii_42542 + i_42630; - bool cond_42633 = slt64(gtid_42632, m_29166); - - if (cond_42633) { - float inp_reg_var2zz_42635 = ((__local - float *) mem_44480)[i_42630]; - float res_reg_var2zz_42636 = mem_param_44481[i_42630]; - float x_42640 = x_42626 * x_42628; - bool isnan_res_42641; - - isnan_res_42641 = futrts_isnan32(inp_reg_var2zz_42635); - - float y_42642; - - if (isnan_res_42641) { - y_42642 = 0.0F; - } else { - y_42642 = 1.0F; - } - - float defunc_2_f_res_42643 = x_42640 * y_42642; - float defunc_1_op_res_42647 = res_reg_var2zz_42636 + - defunc_2_f_res_42643; - - mem_param_44481[i_42630] = defunc_1_op_res_42647; - } - } - for (int64_t i_45739 = 0; i_45739 < (int64_t) 30; i_45739++) { - mem_45450[i_45739] = mem_param_44481[i_45739]; - } + int32_t global_tid_126865; + int32_t local_tid_126866; + int64_t group_sizze_126869; + int32_t wave_sizze_126868; + int32_t group_tid_126867; + + global_tid_126865 = get_global_id(0); + local_tid_126866 = get_local_id(0); + group_sizze_126869 = get_local_size(0); + wave_sizze_126868 = LOCKSTEP_WIDTH; + group_tid_126867 = get_group_id(0); + + int32_t phys_tid_105446; + + phys_tid_105446 = global_tid_126865; + + int64_t gtid_105443; + + gtid_105443 = squot64(sext_i32_i64(group_tid_126867) * + segmap_group_sizze_106436 + + sext_i32_i64(local_tid_126866), k2p2zq_75151 * + k2p2zq_75151); + + int64_t gtid_105444; + + gtid_105444 = squot64(sext_i32_i64(group_tid_126867) * + segmap_group_sizze_106436 + + sext_i32_i64(local_tid_126866) - + squot64(sext_i32_i64(group_tid_126867) * + segmap_group_sizze_106436 + + sext_i32_i64(local_tid_126866), k2p2zq_75151 * + k2p2zq_75151) * (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151); + + int64_t gtid_105445; + + gtid_105445 = sext_i32_i64(group_tid_126867) * segmap_group_sizze_106436 + + sext_i32_i64(local_tid_126866) - + squot64(sext_i32_i64(group_tid_126867) * segmap_group_sizze_106436 + + sext_i32_i64(local_tid_126866), k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151) - squot64(sext_i32_i64(group_tid_126867) * + segmap_group_sizze_106436 + + sext_i32_i64(local_tid_126866) - + squot64(sext_i32_i64(group_tid_126867) * + segmap_group_sizze_106436 + + sext_i32_i64(local_tid_126866), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151) * k2p2zq_75151; + if ((slt64(gtid_105443, m_75136) && slt64(gtid_105444, k2p2zq_75151)) && + slt64(gtid_105445, k2p2zq_75151)) { + int64_t min_res_106439 = ((__global int64_t *) mem_121343)[gtid_105443]; + bool cond_f_res_106440 = ((__global bool *) mem_121346)[gtid_105443 * + k2p2zq_75151 + + gtid_105444]; + int64_t x_106443 = add64((int64_t) 1, gtid_105445); + bool cond_106444 = slt64(min_res_106439, x_106443); + bool x_106445 = !cond_106444; + bool y_106446 = cond_f_res_106440 && x_106445; + bool cond_106447 = cond_106444 || y_106446; + double defunc_1_f_res_106448; + + if (cond_106447) { + defunc_1_f_res_106448 = NAN; } else { - for (int64_t i_45740 = 0; i_45740 < (int64_t) 30; i_45740++) { - mem_45450[i_45740] = mem_param_44481[i_45740]; - } - } - for (int64_t i_45741 = 0; i_45741 < (int64_t) 30; i_45741++) { - mem_44509[i_45741] = mem_45450[i_45741]; - } - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_param_tmp_45734[30]; - - for (int32_t i_2 = 0; i_2 < 30; i_2++) - mem_param_tmp_45734[i_2] = mem_44509[i_2]; - for (int32_t i_3 = 0; i_3 < 30; i_3++) - mem_param_44481[i_3] = mem_param_tmp_45734[i_3]; - } - for (int32_t i_4 = 0; i_4 < 30; i_4++) - loop_mem_44510[i_4] = mem_param_44481[i_4]; - - float mem_44524[30 * 1 * 1]; - int64_t ltid_zz_42656 = sext_i32_i64(ltid_pre_45726); - int64_t ltid_y_42655 = sext_i32_i64(ltid_pre_45727); - int64_t ltid_x_42653 = sext_i32_i64(ltid_pre_45728); - int32_t ltid_flat_42654 = local_tid_45722; - float mem_44518[30 * 1 * 1]; - - for (int32_t i_44274 = 0; i_44274 < 30; i_44274++) { - int64_t i_42665 = sext_i32_i64(i_44274); - - for (int64_t i_45743 = 0; i_45743 < (int64_t) 1; i_45743++) { - mem_44518[i_42665 + i_45743] = loop_mem_44510[i_42665 + i_45743]; - } - } - for (int64_t i_45744 = 0; i_45744 < (int64_t) 30; i_45744++) { - for (int64_t i_45745 = 0; i_45745 < (int64_t) 1; i_45745++) { - for (int64_t i_45746 = 0; i_45746 < (int64_t) 1; i_45746++) { - mem_44524[i_45744 + i_45745 + i_45746] = mem_44518[i_45744 + - i_45745 + - i_45746]; - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t reg_tile_i_45747 = squot64(sext_i32_i64(local_tid_45722), Ty_42529 * - Tx_42530); - int64_t reg_tile_i_45748 = squot64(sext_i32_i64(local_tid_45722) - - squot64(sext_i32_i64(local_tid_45722), - Ty_42529 * Tx_42530) * - (Ty_42529 * Tx_42530), Tx_42530); - int64_t reg_tile_i_45749 = sext_i32_i64(local_tid_45722) - - squot64(sext_i32_i64(local_tid_45722), Ty_42529 * Tx_42530) * - (Ty_42529 * Tx_42530) - squot64(sext_i32_i64(local_tid_45722) - - squot64(sext_i32_i64(local_tid_45722), - Ty_42529 * Tx_42530) * - (Ty_42529 * Tx_42530), Tx_42530) * - Tx_42530; - int64_t tile_dim_start_45750 = (int64_t) 30 * (gid_zz_42540 + - reg_tile_i_45747); - int64_t tile_dim_start_45751 = Ty_42529 * gid_y_42539 + reg_tile_i_45748; - int64_t tile_dim_start_45752 = Tx_42530 * gid_x_42538 + reg_tile_i_45749; - - for (int64_t nest_i_45753 = 0; nest_i_45753 < (int64_t) 30; - nest_i_45753++) { - for (int64_t nest_i_45754 = 0; nest_i_45754 < (int64_t) 1; - nest_i_45754++) { - for (int64_t nest_i_45755 = 0; nest_i_45755 < (int64_t) 1; - nest_i_45755++) { - if ((slt64(tile_dim_start_45750 + nest_i_45753, m_29166) && - slt64(tile_dim_start_45751 + nest_i_45754, - i32_res_29181)) && slt64(tile_dim_start_45752 + - nest_i_45755, - i32_res_29181)) { - ((__global float *) mem_44528)[(tile_dim_start_45750 + - nest_i_45753) * - (i32_res_29181 * - i32_res_29181) + - (tile_dim_start_45751 + - nest_i_45754) * - i32_res_29181 + - (tile_dim_start_45752 + - nest_i_45755)] = - mem_44524[nest_i_45753 + nest_i_45754 + nest_i_45755]; - } - } + double x_106442 = ((__global double *) mem_121335)[gtid_105444 * + (m_75136 * + k2p2zq_75151) + + gtid_105445 * + m_75136 + + gtid_105443]; + + defunc_1_f_res_106448 = x_106442; } + ((__global double *) mem_121351)[gtid_105443 * (k2p2zq_75151 * + k2p2zq_75151) + + gtid_105444 * k2p2zq_75151 + + gtid_105445] = defunc_1_f_res_106448; } - error_4: + error_0: return; + #undef segmap_group_sizze_106436 } -__kernel void mainzisegmap_intragroup_42694(__global int *global_failure, - __local volatile - int64_t *mem_44668_backing_aligned_0, - __local volatile - int64_t *mem_44666_backing_aligned_1, - int64_t N_29165, int64_t m_29166, - int64_t i32_res_29175, - int64_t i32_res_29181, - int64_t gridDim_x_42688, - int64_t full_tiles_42719, - int64_t kk_42926, __global - unsigned char *images_mem_44381, - __global unsigned char *mem_44393, - __global unsigned char *mem_44840) +__kernel void mainzisegmap_105481(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, __global + unsigned char *mem_121343, __global + unsigned char *mem_121346) { - #define Ty_42675 (mainziTy_42672) - #define Ry_42676 (mainziRy_42674) - #define Tx_42677 (mainziTx_42671) - #define Rx_42678 (mainziRx_42673) - #define Tk_42679 (mainziTk_42670) - #define tk_div_tx_42680 (sdiv_up64(mainziTk_42670, mainziTx_42671)) - #define tk_div_ty_42681 (sdiv_up64(mainziTk_42670, mainziTy_42672)) - #define TxRx_42682 (mainziTx_42671 * mainziRx_42673) - #define TyRy_42683 (mainziTy_42672 * mainziRy_42674) - #define a_loc_szz_42685 (mainziTk_42670 * (mainziTy_42672 * mainziRy_42674)) - #define b_loc_szz_42687 (mainziRx_42673 * (mainziTx_42671 * mainziTk_42670)) + #define segmap_group_sizze_106421 (mainzisegmap_group_sizze_105484) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict mem_44668_backing_1 = (__local volatile - char *) mem_44668_backing_aligned_0; - __local volatile char *restrict mem_44666_backing_0 = (__local volatile - char *) mem_44666_backing_aligned_1; if (*global_failure >= 0) return; - int32_t global_tid_45896; - int32_t local_tid_45897; - int64_t group_sizze_45900; - int32_t wave_sizze_45899; - int32_t group_tid_45898; - - global_tid_45896 = get_global_id(0); - local_tid_45897 = get_local_id(0); - group_sizze_45900 = get_local_size(0); - wave_sizze_45899 = LOCKSTEP_WIDTH; - group_tid_45898 = get_group_id(0); - - int32_t gid_flat_42694; + int32_t global_tid_126860; + int32_t local_tid_126861; + int64_t group_sizze_126864; + int32_t wave_sizze_126863; + int32_t group_tid_126862; - gid_flat_42694 = group_tid_45898; + global_tid_126860 = get_global_id(0); + local_tid_126861 = get_local_id(0); + group_sizze_126864 = get_local_size(0); + wave_sizze_126863 = LOCKSTEP_WIDTH; + group_tid_126862 = get_group_id(0); - int32_t ltid_pre_45901; + int32_t phys_tid_105481; - ltid_pre_45901 = squot32(local_tid_45897, sext_i64_i32(Tx_42677)); + phys_tid_105481 = global_tid_126860; - int32_t ltid_pre_45902; + int64_t gtid_105479; - ltid_pre_45902 = local_tid_45897 - squot32(local_tid_45897, - sext_i64_i32(Tx_42677)) * - sext_i64_i32(Tx_42677); + gtid_105479 = squot64(sext_i32_i64(group_tid_126862) * + segmap_group_sizze_106421 + + sext_i32_i64(local_tid_126861), k2p2zq_75151); - int64_t gid_y_42693; + int64_t gtid_105480; - gid_y_42693 = squot64(sext_i32_i64(group_tid_45898), gridDim_x_42688); - - int64_t gid_x_42692; - - gid_x_42692 = sext_i32_i64(group_tid_45898) - - squot64(sext_i32_i64(group_tid_45898), gridDim_x_42688) * - gridDim_x_42688; - - int64_t iii_42695; + gtid_105480 = sext_i32_i64(group_tid_126862) * segmap_group_sizze_106421 + + sext_i32_i64(local_tid_126861) - + squot64(sext_i32_i64(group_tid_126862) * segmap_group_sizze_106421 + + sext_i32_i64(local_tid_126861), k2p2zq_75151) * k2p2zq_75151; + if (slt64(gtid_105479, m_75136) && slt64(gtid_105480, k2p2zq_75151)) { + int64_t min_res_106424 = ((__global int64_t *) mem_121343)[gtid_105479]; + int64_t x_106426 = add64((int64_t) 1, gtid_105480); + bool cond_f_res_106427 = slt64(min_res_106424, x_106426); + + ((__global bool *) mem_121346)[gtid_105479 * k2p2zq_75151 + + gtid_105480] = cond_f_res_106427; + } - iii_42695 = TyRy_42683 * gid_y_42693; + error_0: + return; + #undef segmap_group_sizze_106421 +} +__kernel void mainzisegmap_105526(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, int64_t m_75136, + int64_t k2p2zq_75151, int64_t m_75223, + unsigned char y_75227, int64_t min_res_75341, + int64_t k_75342, int64_t num_groups_106136, + int64_t num_threads_126139, __global + unsigned char *mem_120248, __global + unsigned char *mem_121001, __global + unsigned char *mem_121004, __global + unsigned char *mem_121008, __global + unsigned char *mem_121011, __global + unsigned char *mem_121335, __global + unsigned char *mem_121338, __global + unsigned char *mem_121341, __global + unsigned char *mem_121343, __global + unsigned char *mem_125167, __global + unsigned char *mem_125169, __global + unsigned char *mem_125438, __global + unsigned char *mem_125446, __global + unsigned char *mem_125448, __global + unsigned char *mem_125498, __global + unsigned char *double_buffer_mem_125552, + __global + unsigned char *double_buffer_mem_125553, + __global + unsigned char *double_buffer_mem_125554) +{ + #define segmap_group_sizze_106135 (mainzisegmap_group_sizze_105528) - int64_t jjj_42696 = TxRx_42682 * gid_x_42692; - float mem_44664[Ry_42676 * Rx_42678]; - int64_t ltid_y_42699 = sext_i32_i64(ltid_pre_45901); - int64_t ltid_x_42697 = sext_i32_i64(ltid_pre_45902); - int32_t ltid_flat_42698 = local_tid_45897; - float mem_44655[Ry_42676 * Rx_42678]; + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + volatile __local bool local_failure; - for (int64_t i_42710 = 0; i_42710 < Ry_42676; i_42710++) { - for (int64_t i_42713 = 0; i_42713 < Rx_42678; i_42713++) { - mem_44655[i_42710 * Rx_42678 + i_42713] = 0.0F; - } - } - for (int64_t i_45905 = 0; i_45905 < Ry_42676; i_45905++) { - for (int64_t i_45906 = 0; i_45906 < Rx_42678; i_45906++) { - mem_44664[i_45905 * Rx_42678 + i_45906] = mem_44655[i_45905 * - Rx_42678 + - i_45906]; - } + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; } + local_failure = false; barrier(CLK_LOCAL_MEM_FENCE); - __local char *mem_44666; - - mem_44666 = (__local char *) mem_44666_backing_0; - - __local char *mem_44668; - - mem_44668 = (__local char *) mem_44668_backing_1; - - float mem_44739[Ry_42676]; - float mem_44743[Rx_42678]; - float loop_mem_44755[Ry_42676 * Rx_42678]; - float mem_param_44669[Ry_42676 * Rx_42678]; - - for (int32_t i_2 = 0; i_2 < Ry_42676 * Rx_42678; i_2++) - mem_param_44669[i_2] = mem_44664[i_2]; - for (int64_t i_42720 = 0; i_42720 < full_tiles_42719; i_42720++) { - int64_t kk_42724 = Tk_42679 * i_42720; - - for (int64_t i_42725 = 0; i_42725 < Ry_42676; i_42725++) { - int64_t binop_y_42748 = Ty_42675 * i_42725; + int32_t global_tid_126791; + int32_t local_tid_126792; + int64_t group_sizze_126795; + int32_t wave_sizze_126794; + int32_t group_tid_126793; + + global_tid_126791 = get_global_id(0); + local_tid_126792 = get_local_id(0); + group_sizze_126795 = get_local_size(0); + wave_sizze_126794 = LOCKSTEP_WIDTH; + group_tid_126793 = get_group_id(0); + + int32_t phys_tid_105526; + + phys_tid_105526 = global_tid_126791; + + int32_t phys_group_id_126796; + + phys_group_id_126796 = get_group_id(0); + for (int32_t i_126797 = 0; i_126797 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, segmap_group_sizze_106135)) - + phys_group_id_126796, sext_i64_i32(num_groups_106136)); + i_126797++) { + int32_t virt_group_id_126798 = phys_group_id_126796 + i_126797 * + sext_i64_i32(num_groups_106136); + int64_t gtid_105525 = sext_i32_i64(virt_group_id_126798) * + segmap_group_sizze_106135 + sext_i32_i64(local_tid_126792); + + if (slt64(gtid_105525, m_75136)) { + for (int64_t i_126799 = 0; i_126799 < k2p2zq_75151; i_126799++) { + ((__global int64_t *) mem_121011)[phys_tid_105526 + i_126799 * + num_threads_126139] = + ((__global int64_t *) mem_120248)[i_126799]; + } + for (int64_t i_126800 = 0; i_126800 < k2p2zq_75151; i_126800++) { + for (int64_t i_126801 = 0; i_126801 < k2p2zq_75151; + i_126801++) { + ((__global + double *) double_buffer_mem_125552)[phys_tid_105526 + + (i_126800 * + (num_threads_126139 * + k2p2zq_75151) + + i_126801 * + num_threads_126139)] = + ((__global double *) mem_121001)[gtid_105525 + + (i_126800 * (m_75136 * + k2p2zq_75151) + + i_126801 * m_75136)]; + } + } + for (int64_t i_126802 = 0; i_126802 < k2p2zq_75151; i_126802++) { + ((__global double *) double_buffer_mem_125553)[phys_tid_105526 + + i_126802 * + num_threads_126139] = + ((__global double *) mem_121004)[gtid_105525 + i_126802 * + m_75136]; + } + for (int64_t i_126803 = 0; i_126803 < (int64_t) 2; i_126803++) { + for (int64_t i_126804 = 0; i_126804 < k2p2zq_75151; + i_126804++) { + ((__global + double *) double_buffer_mem_125554)[phys_tid_105526 + + (i_126803 * + (num_threads_126139 * + k2p2zq_75151) + + i_126804 * + num_threads_126139)] = + ((__global double *) mem_121008)[gtid_105525 + + (i_126803 * (m_75136 * + k2p2zq_75151) + + i_126804 * m_75136)]; + } + } - for (int64_t i_42727 = 0; i_42727 < tk_div_tx_42680; i_42727++) { - int64_t binop_y_42746 = Tx_42677 * i_42727; - int64_t ltid_x_42729 = sext_i32_i64(ltid_pre_45901); - int64_t ltid_y_42730 = sext_i32_i64(ltid_pre_45902); - int32_t ltid_flat_42731 = local_tid_45897; - int64_t k_42747 = ltid_y_42730 + binop_y_42746; - int64_t i_42749 = ltid_x_42729 + binop_y_42748; - int64_t gtid_42750 = iii_42695 + i_42749; - int64_t A_col_idx_42751 = kk_42724 + k_42747; - bool cond_42752 = slt64(gtid_42750, m_29166); - float A_elem_42753; - - if (cond_42752) { - float A_elem_42755 = ((__global - float *) images_mem_44381)[gtid_42750 * - N_29165 + - A_col_idx_42751]; + int64_t dqrdc2_res_106150; + int64_t k_106156 = k_75342; + + for (int64_t l_106151 = 0; l_106151 < min_res_75341; l_106151++) { + int64_t x_106157 = add64((int64_t) 1, l_106151); + bool cond_106158 = slt64(x_106157, k_106156); + bool loop_cond_106159; + + if (cond_106158) { + bool y_106160 = slt64(l_106151, k2p2zq_75151); + bool index_certs_106161; + + if (!y_106160) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 45) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_106151; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double zt_arg_106162 = ((__global + double *) double_buffer_mem_125554)[phys_tid_105526 + + (num_threads_126139 * + k2p2zq_75151 + + l_106151 * + num_threads_126139)]; + double zt_res_106163 = 1.0e-7 * zt_arg_106162; + bool index_certs_106164; - A_elem_42753 = A_elem_42755; + if (!y_106160) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 46) == -1) { + global_failure_args[0] = l_106151; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double zl_arg_106165 = ((__global + double *) double_buffer_mem_125553)[phys_tid_105526 + + l_106151 * + num_threads_126139]; + bool zl_res_106166 = zl_arg_106165 < zt_res_106163; + + loop_cond_106159 = zl_res_106166; } else { - A_elem_42753 = 0.0F; + loop_cond_106159 = 0; } - bool cond_42757 = slt64(k_42747, Tk_42679); - int64_t a_loc_ind_42758; + bool y_106167 = slt64(l_106151, k2p2zq_75151); + int64_t upper_bound_106168 = sub64(k2p2zq_75151, x_106157); + bool loop_not_taken_106169 = !loop_cond_106159; + bool protect_assert_disj_106170 = y_106167 || + loop_not_taken_106169; + bool index_certs_106171; - if (cond_42757) { - int64_t binop_y_42759 = Tk_42679 * i_42749; - int64_t loc_fi_42760 = k_42747 + binop_y_42759; - - a_loc_ind_42758 = loc_fi_42760; - } else { - a_loc_ind_42758 = (int64_t) -1; - } - if (sle64((int64_t) 0, a_loc_ind_42758) && - slt64(a_loc_ind_42758, a_loc_szz_42685)) { - ((__local float *) mem_44666)[a_loc_ind_42758] = - A_elem_42753; + if (!protect_assert_disj_106170) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 47) == + -1) { + global_failure_args[0] = l_106151; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } } - barrier(CLK_LOCAL_MEM_FENCE); - } - } - for (int64_t i_42765 = 0; i_42765 < tk_div_ty_42681; i_42765++) { - int64_t binop_y_42786 = Ty_42675 * i_42765; - - for (int64_t i_42767 = 0; i_42767 < Rx_42678; i_42767++) { - int64_t binop_y_42788 = Tx_42677 * i_42767; - int64_t ltid_x_42769 = sext_i32_i64(ltid_pre_45901); - int64_t ltid_y_42770 = sext_i32_i64(ltid_pre_45902); - int32_t ltid_flat_42771 = local_tid_45897; - int64_t k_42787 = ltid_x_42769 + binop_y_42786; - int64_t j_42789 = ltid_y_42770 + binop_y_42788; - int64_t gtid_42790 = jjj_42696 + j_42789; - int64_t B_row_idx_42791 = kk_42724 + k_42787; - bool cond_42792 = slt64(gtid_42790, i32_res_29181); - float B_elem_42793; - if (cond_42792) { - float B_elem_42795 = ((__global - float *) mem_44393)[B_row_idx_42791 * - i32_res_29181 + - gtid_42790]; - - B_elem_42793 = B_elem_42795; - } else { - B_elem_42793 = 0.0F; + bool index_certs_106172; + + if (!protect_assert_disj_106170) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 48) == + -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = l_106151; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } } - bool cond_42797 = slt64(k_42787, Tk_42679); - int64_t b_loc_ind_42798; + bool index_certs_106173; - if (cond_42797) { - int64_t binop_y_42799 = TxRx_42682 * k_42787; - int64_t loc_fi_42800 = j_42789 + binop_y_42799; - - b_loc_ind_42798 = loc_fi_42800; - } else { - b_loc_ind_42798 = (int64_t) -1; + if (!protect_assert_disj_106170) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 49) == + -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_106151; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } } - if (sle64((int64_t) 0, b_loc_ind_42798) && - slt64(b_loc_ind_42798, b_loc_szz_42687)) { - ((__local float *) mem_44668)[b_loc_ind_42798] = - B_elem_42793; + + bool protect_assert_disj_106174 = y_75227 || + loop_not_taken_106169; + bool index_certs_106175; + + if (!protect_assert_disj_106174) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 50) == + -1) { + global_failure_args[0] = m_75223; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } } - barrier(CLK_LOCAL_MEM_FENCE); - } - } - - float loop_mem_44754[Ry_42676 * Rx_42678]; - float mem_param_44726[Ry_42676 * Rx_42678]; - - for (int32_t i_3 = 0; i_3 < Ry_42676 * Rx_42678; i_3++) - mem_param_44726[i_3] = mem_param_44669[i_3]; - for (int64_t i_42805 = 0; i_42805 < Tk_42679; i_42805++) { - int64_t binop_y_42844 = TxRx_42682 * i_42805; - int64_t ltid_y_42809 = sext_i32_i64(ltid_pre_45901); - int64_t ltid_x_42807 = sext_i32_i64(ltid_pre_45902); - int32_t ltid_flat_42808 = local_tid_45897; - float mem_44729[Ry_42676]; - float mem_44731[Rx_42678]; - int64_t binop_x_42835 = Ry_42676 * ltid_y_42809; - - for (int64_t i_42833 = 0; i_42833 < Ry_42676; i_42833++) { - int64_t binop_x_42836 = i_42833 + binop_x_42835; - int64_t binop_y_42837 = Tk_42679 * binop_x_42836; - int64_t a_loc_ind_42838 = i_42805 + binop_y_42837; - for (int64_t i_45918 = 0; i_45918 < (int64_t) 1; i_45918++) { - mem_44729[i_42833 + i_45918] = ((__local - float *) mem_44666)[a_loc_ind_42838 + - i_45918]; + bool index_certs_106176; + + if (!protect_assert_disj_106174) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 51) == + -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = m_75223; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } } - } - - int64_t binop_y_42846 = Rx_42678 * ltid_x_42807; - - for (int64_t i_42842 = 0; i_42842 < Rx_42678; i_42842++) { - int64_t binop_x_42845 = i_42842 + binop_y_42844; - int64_t b_loc_ind_42847 = binop_x_42845 + binop_y_42846; - for (int64_t i_45920 = 0; i_45920 < (int64_t) 1; i_45920++) { - mem_44731[i_42842 + i_45920] = ((__local - float *) mem_44668)[b_loc_ind_42847 + - i_45920]; + bool index_certs_106177; + + if (!protect_assert_disj_106174) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 52) == + -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = m_75223; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } } - } - for (int64_t i_45921 = 0; i_45921 < Ry_42676; i_45921++) { - mem_44739[i_45921] = mem_44729[i_45921]; - } - for (int64_t i_45922 = 0; i_45922 < Rx_42678; i_45922++) { - mem_44743[i_45922] = mem_44731[i_45922]; - } - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_44753[Ry_42676 * Rx_42678]; - int64_t ltid_y_42854 = sext_i32_i64(ltid_pre_45901); - int64_t ltid_x_42852 = sext_i32_i64(ltid_pre_45902); - int32_t ltid_flat_42853 = local_tid_45897; - int64_t binop_y_42897 = Ry_42676 * ltid_y_42854; - int64_t binop_y_42901 = Rx_42678 * ltid_x_42852; - - for (int64_t i_42891 = 0; i_42891 < Ry_42676; i_42891++) { - int64_t binop_x_42896 = iii_42695 + i_42891; - int64_t cmpop_x_42898 = binop_x_42896 + binop_y_42897; - bool binop_x_42899 = slt64(cmpop_x_42898, m_29166); - for (int64_t i_42894 = 0; i_42894 < Rx_42678; i_42894++) { - int64_t binop_x_42900 = jjj_42696 + i_42894; - int64_t cmpop_x_42902 = binop_x_42900 + binop_y_42901; - bool binop_y_42903 = slt64(cmpop_x_42902, i32_res_29181); - bool cond_42904 = binop_x_42899 && binop_y_42903; - - if (cond_42904) { - float a_42906 = mem_44739[i_42891]; - float c_42908 = mem_param_44726[i_42891 * Rx_42678 + - i_42894]; - bool isnan_res_42911; + bool loopres_106178; + int64_t loopres_106183; + bool loop_while_106184; + int64_t k_106189; + + loop_while_106184 = loop_cond_106159; + k_106189 = k_106156; + while (loop_while_106184) { + for (int64_t i_106191 = 0; i_106191 < k2p2zq_75151; + i_106191++) { + bool index_certs_106193; - isnan_res_42911 = futrts_isnan32(a_42906); + if (!y_106167) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 53) == -1) { + global_failure_args[0] = l_106151; + global_failure_args[1] = i_106191; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } - float defunc_1_f_res_42912; + double t_106194 = ((__global + double *) double_buffer_mem_125552)[phys_tid_105526 + + (l_106151 * + (num_threads_126139 * + k2p2zq_75151) + + i_106191 * + num_threads_126139)]; - if (isnan_res_42911) { - defunc_1_f_res_42912 = 0.0F; - } else { - float b_42907 = mem_44743[i_42894]; - float defunc_1_f_res_f_res_42913 = a_42906 * - b_42907; + for (int64_t j0_106196 = 0; j0_106196 < + upper_bound_106168; j0_106196++) { + int64_t j_106198 = add64(x_106157, j0_106196); + bool x_106199 = sle64((int64_t) 0, j_106198); + bool y_106200 = slt64(j_106198, k2p2zq_75151); + bool bounds_check_106201 = x_106199 && y_106200; + bool index_certs_106202; + + if (!bounds_check_106201) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 54) == + -1) { + global_failure_args[0] = j_106198; + global_failure_args[1] = i_106191; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } - defunc_1_f_res_42912 = defunc_1_f_res_f_res_42913; + double lw_val_106203 = ((__global + double *) double_buffer_mem_125552)[phys_tid_105526 + + (j_106198 * + (num_threads_126139 * + k2p2zq_75151) + + i_106191 * + num_threads_126139)]; + int64_t i_106204 = sub64(j_106198, (int64_t) 1); + bool x_106205 = sle64((int64_t) 0, i_106204); + bool y_106206 = slt64(i_106204, k2p2zq_75151); + bool bounds_check_106207 = x_106205 && y_106206; + bool index_certs_106208; + + if (!bounds_check_106207) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 55) == + -1) { + global_failure_args[0] = i_106204; + global_failure_args[1] = i_106191; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125552)[phys_tid_105526 + + (i_106204 * + (num_threads_126139 * + k2p2zq_75151) + + i_106191 * + num_threads_126139)] = + lw_val_106203; } - float defunc_1_op_res_42917 = c_42908 + - defunc_1_f_res_42912; + bool index_certs_106210; - mem_param_44726[i_42891 * Rx_42678 + i_42894] = - defunc_1_op_res_42917; + if (!y_75227) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 56) == -1) { + global_failure_args[0] = m_75223; + global_failure_args[1] = i_106191; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125552)[phys_tid_105526 + + (m_75223 * + (num_threads_126139 * + k2p2zq_75151) + + i_106191 * + num_threads_126139)] = + t_106194; } - } - } - for (int64_t i_45925 = 0; i_45925 < Ry_42676; i_45925++) { - for (int64_t i_45926 = 0; i_45926 < Rx_42678; i_45926++) { - mem_44753[i_45925 * Rx_42678 + i_45926] = - mem_param_44726[i_45925 * Rx_42678 + i_45926]; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_param_tmp_45915[Ry_42676 * Rx_42678]; - - for (int32_t i_4 = 0; i_4 < Ry_42676 * Rx_42678; i_4++) - mem_param_tmp_45915[i_4] = mem_44753[i_4]; - for (int32_t i_5 = 0; i_5 < Ry_42676 * Rx_42678; i_5++) - mem_param_44726[i_5] = mem_param_tmp_45915[i_5]; - } - for (int32_t i_6 = 0; i_6 < Ry_42676 * Rx_42678; i_6++) - loop_mem_44754[i_6] = mem_param_44726[i_6]; - - float mem_param_tmp_45907[Ry_42676 * Rx_42678]; - - for (int32_t i_7 = 0; i_7 < Ry_42676 * Rx_42678; i_7++) - mem_param_tmp_45907[i_7] = loop_mem_44754[i_7]; - for (int32_t i_8 = 0; i_8 < Ry_42676 * Rx_42678; i_8++) - mem_param_44669[i_8] = mem_param_tmp_45907[i_8]; - } - for (int32_t i_9 = 0; i_9 < Ry_42676 * Rx_42678; i_9++) - loop_mem_44755[i_9] = mem_param_44669[i_9]; - for (int64_t i_42927 = 0; i_42927 < Ry_42676; i_42927++) { - int64_t binop_y_42952 = Ty_42675 * i_42927; - - for (int64_t i_42929 = 0; i_42929 < tk_div_tx_42680; i_42929++) { - int64_t binop_y_42950 = Tx_42677 * i_42929; - int64_t ltid_x_42931 = sext_i32_i64(ltid_pre_45901); - int64_t ltid_y_42932 = sext_i32_i64(ltid_pre_45902); - int32_t ltid_flat_42933 = local_tid_45897; - int64_t k_42951 = ltid_y_42932 + binop_y_42950; - int64_t i_42953 = ltid_x_42931 + binop_y_42952; - int64_t gtid_42954 = iii_42695 + i_42953; - int64_t A_col_idx_42955 = kk_42926 + k_42951; - bool binop_x_42956 = slt64(gtid_42954, m_29166); - bool binop_y_42957 = slt64(A_col_idx_42955, i32_res_29175); - bool cond_42958 = binop_x_42956 && binop_y_42957; - float A_elem_42959; - - if (cond_42958) { - float A_elem_42961 = ((__global - float *) images_mem_44381)[gtid_42954 * - N_29165 + - A_col_idx_42955]; - - A_elem_42959 = A_elem_42961; - } else { - A_elem_42959 = 0.0F; - } - - bool cond_42963 = slt64(k_42951, Tk_42679); - int64_t a_loc_ind_42964; - - if (cond_42963) { - int64_t binop_y_42965 = Tk_42679 * i_42953; - int64_t loc_fi_42966 = k_42951 + binop_y_42965; - - a_loc_ind_42964 = loc_fi_42966; - } else { - a_loc_ind_42964 = (int64_t) -1; - } - if (sle64((int64_t) 0, a_loc_ind_42964) && slt64(a_loc_ind_42964, - a_loc_szz_42685)) { - ((__local float *) mem_44666)[a_loc_ind_42964] = A_elem_42959; - } - barrier(CLK_LOCAL_MEM_FENCE); - } - } - for (int64_t i_42971 = 0; i_42971 < tk_div_ty_42681; i_42971++) { - int64_t binop_y_42994 = Ty_42675 * i_42971; - - for (int64_t i_42973 = 0; i_42973 < Rx_42678; i_42973++) { - int64_t binop_y_42996 = Tx_42677 * i_42973; - int64_t ltid_x_42975 = sext_i32_i64(ltid_pre_45901); - int64_t ltid_y_42976 = sext_i32_i64(ltid_pre_45902); - int32_t ltid_flat_42977 = local_tid_45897; - int64_t k_42995 = ltid_x_42975 + binop_y_42994; - int64_t j_42997 = ltid_y_42976 + binop_y_42996; - int64_t gtid_42998 = jjj_42696 + j_42997; - int64_t B_row_idx_42999 = kk_42926 + k_42995; - bool binop_x_43000 = slt64(gtid_42998, i32_res_29181); - bool binop_y_43001 = slt64(B_row_idx_42999, i32_res_29175); - bool cond_43002 = binop_x_43000 && binop_y_43001; - float B_elem_43003; - - if (cond_43002) { - float B_elem_43005 = ((__global - float *) mem_44393)[B_row_idx_42999 * - i32_res_29181 + - gtid_42998]; - - B_elem_43003 = B_elem_43005; - } else { - B_elem_43003 = 0.0F; - } - - bool cond_43007 = slt64(k_42995, Tk_42679); - int64_t b_loc_ind_43008; - - if (cond_43007) { - int64_t binop_y_43009 = TxRx_42682 * k_42995; - int64_t loc_fi_43010 = j_42997 + binop_y_43009; - - b_loc_ind_43008 = loc_fi_43010; - } else { - b_loc_ind_43008 = (int64_t) -1; - } - if (sle64((int64_t) 0, b_loc_ind_43008) && slt64(b_loc_ind_43008, - b_loc_szz_42687)) { - ((__local float *) mem_44668)[b_loc_ind_43008] = B_elem_43003; - } - barrier(CLK_LOCAL_MEM_FENCE); - } - } - - float mem_44821[Ry_42676]; - float mem_44825[Rx_42678]; - float mem_44835[Ry_42676 * Rx_42678]; - float loop_mem_44837[Ry_42676 * Rx_42678]; - float mem_param_44808[Ry_42676 * Rx_42678]; - - for (int32_t i_10 = 0; i_10 < Ry_42676 * Rx_42678; i_10++) - mem_param_44808[i_10] = loop_mem_44755[i_10]; - for (int64_t i_43015 = 0; i_43015 < Tk_42679; i_43015++) { - int64_t cmpop_x_43017 = kk_42926 + i_43015; - bool cond_43018 = slt64(cmpop_x_43017, i32_res_29175); - float mem_45468[Ry_42676 * Rx_42678]; - - if (cond_43018) { - int64_t binop_y_43056 = TxRx_42682 * i_43015; - int64_t bytes_44810 = (int64_t) 4 * Ry_42676; - int64_t bytes_44812 = (int64_t) 4 * Rx_42678; - int64_t ltid_y_43021 = sext_i32_i64(ltid_pre_45901); - int64_t ltid_x_43019 = sext_i32_i64(ltid_pre_45902); - int32_t ltid_flat_43020 = local_tid_45897; - float mem_44811[Ry_42676]; - float mem_44813[Rx_42678]; - int64_t binop_x_43047 = Ry_42676 * ltid_y_43021; - - for (int64_t i_43045 = 0; i_43045 < Ry_42676; i_43045++) { - int64_t binop_x_43048 = i_43045 + binop_x_43047; - int64_t binop_y_43049 = Tk_42679 * binop_x_43048; - int64_t a_loc_ind_43050 = i_43015 + binop_y_43049; - - for (int64_t i_45934 = 0; i_45934 < (int64_t) 1; i_45934++) { - mem_44811[i_43045 + i_45934] = ((__local - float *) mem_44666)[a_loc_ind_43050 + - i_45934]; - } - } - - int64_t binop_y_43058 = Rx_42678 * ltid_x_43019; - - for (int64_t i_43054 = 0; i_43054 < Rx_42678; i_43054++) { - int64_t binop_x_43057 = i_43054 + binop_y_43056; - int64_t b_loc_ind_43059 = binop_x_43057 + binop_y_43058; - - for (int64_t i_45936 = 0; i_45936 < (int64_t) 1; i_45936++) { - mem_44813[i_43054 + i_45936] = ((__local - float *) mem_44668)[b_loc_ind_43059 + - i_45936]; - } - } - for (int64_t i_45937 = 0; i_45937 < Ry_42676; i_45937++) { - mem_44821[i_45937] = mem_44811[i_45937]; - } - for (int64_t i_45938 = 0; i_45938 < Rx_42678; i_45938++) { - mem_44825[i_45938] = mem_44813[i_45938]; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t ltid_y_43066 = sext_i32_i64(ltid_pre_45901); - int64_t ltid_x_43064 = sext_i32_i64(ltid_pre_45902); - int32_t ltid_flat_43065 = local_tid_45897; - int64_t binop_y_43109 = Ry_42676 * ltid_y_43066; - int64_t binop_y_43113 = Rx_42678 * ltid_x_43064; - - for (int64_t i_43103 = 0; i_43103 < Ry_42676; i_43103++) { - int64_t binop_x_43108 = iii_42695 + i_43103; - int64_t cmpop_x_43110 = binop_x_43108 + binop_y_43109; - bool binop_x_43111 = slt64(cmpop_x_43110, m_29166); - - for (int64_t i_43106 = 0; i_43106 < Rx_42678; i_43106++) { - int64_t binop_x_43112 = jjj_42696 + i_43106; - int64_t cmpop_x_43114 = binop_x_43112 + binop_y_43113; - bool binop_y_43115 = slt64(cmpop_x_43114, i32_res_29181); - bool cond_43116 = binop_x_43111 && binop_y_43115; - if (cond_43116) { - float a_43118 = mem_44821[i_43103]; - float c_43120 = mem_param_44808[i_43103 * Rx_42678 + - i_43106]; - bool isnan_res_43123; + int64_t i_106212 = ((__global + int64_t *) mem_121011)[phys_tid_105526 + + l_106151 * + num_threads_126139]; + double t_106213 = ((__global + double *) double_buffer_mem_125553)[phys_tid_105526 + + l_106151 * + num_threads_126139]; + double tt_106214 = ((__global + double *) double_buffer_mem_125554)[phys_tid_105526 + + l_106151 * + num_threads_126139]; + double ttt_106215 = ((__global + double *) double_buffer_mem_125554)[phys_tid_105526 + + (num_threads_126139 * + k2p2zq_75151 + + l_106151 * + num_threads_126139)]; + + for (int64_t j0_106219 = 0; j0_106219 < upper_bound_106168; + j0_106219++) { + int64_t j_106223 = add64(x_106157, j0_106219); + bool x_106224 = sle64((int64_t) 0, j_106223); + bool y_106225 = slt64(j_106223, k2p2zq_75151); + bool bounds_check_106226 = x_106224 && y_106225; + bool index_certs_106227; - isnan_res_43123 = futrts_isnan32(a_43118); + if (!bounds_check_106226) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 57) == -1) { + global_failure_args[0] = j_106223; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } - float defunc_1_f_res_43124; + int64_t lw_val_106228 = ((__global + int64_t *) mem_121011)[phys_tid_105526 + + j_106223 * + num_threads_126139]; + int64_t i_106229 = sub64(j_106223, (int64_t) 1); + bool x_106230 = sle64((int64_t) 0, i_106229); + bool y_106231 = slt64(i_106229, k2p2zq_75151); + bool bounds_check_106232 = x_106230 && y_106231; + bool index_certs_106233; - if (isnan_res_43123) { - defunc_1_f_res_43124 = 0.0F; - } else { - float b_43119 = mem_44825[i_43106]; - float defunc_1_f_res_f_res_43125 = a_43118 * - b_43119; - - defunc_1_f_res_43124 = defunc_1_f_res_f_res_43125; + if (!bounds_check_106232) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 58) == -1) { + global_failure_args[0] = i_106229; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } } + ((__global int64_t *) mem_121011)[phys_tid_105526 + + i_106229 * + num_threads_126139] = + lw_val_106228; - float defunc_1_op_res_43129 = c_43120 + - defunc_1_f_res_43124; + double lw_val_106235 = ((__global + double *) double_buffer_mem_125553)[phys_tid_105526 + + j_106223 * + num_threads_126139]; - mem_param_44808[i_43103 * Rx_42678 + i_43106] = - defunc_1_op_res_43129; - } - } - } - for (int64_t i_45941 = 0; i_45941 < Ry_42676; i_45941++) { - for (int64_t i_45942 = 0; i_45942 < Rx_42678; i_45942++) { - mem_44835[i_45941 * Rx_42678 + i_45942] = - mem_param_44808[i_45941 * Rx_42678 + i_45942]; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - for (int64_t i_45943 = 0; i_45943 < Ry_42676; i_45943++) { - for (int64_t i_45944 = 0; i_45944 < Rx_42678; i_45944++) { - mem_45468[i_45943 * Rx_42678 + i_45944] = - mem_44835[i_45943 * Rx_42678 + i_45944]; - } - } - } else { - for (int64_t i_45945 = 0; i_45945 < Ry_42676; i_45945++) { - for (int64_t i_45946 = 0; i_45946 < Rx_42678; i_45946++) { - mem_45468[i_45945 * Rx_42678 + i_45946] = - mem_param_44808[i_45945 * Rx_42678 + i_45946]; - } - } - } - - float mem_param_tmp_45931[Ry_42676 * Rx_42678]; - - for (int32_t i_11 = 0; i_11 < Ry_42676 * Rx_42678; i_11++) - mem_param_tmp_45931[i_11] = mem_45468[i_11]; - for (int32_t i_12 = 0; i_12 < Ry_42676 * Rx_42678; i_12++) - mem_param_44808[i_12] = mem_param_tmp_45931[i_12]; - } - for (int32_t i_13 = 0; i_13 < Ry_42676 * Rx_42678; i_13++) - loop_mem_44837[i_13] = mem_param_44808[i_13]; - - int64_t reg_tile_i_45947 = squot64(sext_i32_i64(local_tid_45897), Tx_42677); - int64_t reg_tile_i_45948 = sext_i32_i64(local_tid_45897) - - squot64(sext_i32_i64(local_tid_45897), Tx_42677) * Tx_42677; - int64_t tile_dim_start_45949 = Ry_42676 * (Ty_42675 * gid_y_42693 + - reg_tile_i_45947); - int64_t tile_dim_start_45950 = Rx_42678 * (Tx_42677 * gid_x_42692 + - reg_tile_i_45948); - - for (int64_t nest_i_45951 = 0; nest_i_45951 < Ry_42676; nest_i_45951++) { - for (int64_t nest_i_45952 = 0; nest_i_45952 < Rx_42678; - nest_i_45952++) { - if (slt64(tile_dim_start_45949 + nest_i_45951, m_29166) && - slt64(tile_dim_start_45950 + nest_i_45952, i32_res_29181)) { - ((__global float *) mem_44840)[(tile_dim_start_45949 + - nest_i_45951) * i32_res_29181 + - (tile_dim_start_45950 + - nest_i_45952)] = - loop_mem_44837[nest_i_45951 * Rx_42678 + nest_i_45952]; - } - } - } + ((__global + double *) double_buffer_mem_125553)[phys_tid_105526 + + i_106229 * + num_threads_126139] = + lw_val_106235; + + bool index_certs_106237; + + if (!bounds_check_106226) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 59) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = j_106223; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_106238 = ((__global + double *) double_buffer_mem_125554)[phys_tid_105526 + + j_106223 * + num_threads_126139]; + bool index_certs_106239; + + if (!bounds_check_106232) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 60) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = i_106229; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125554)[phys_tid_105526 + + i_106229 * + num_threads_126139] = + lw_val_106238; + + bool index_certs_106241; + + if (!bounds_check_106226) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 61) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = j_106223; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_106242 = ((__global + double *) double_buffer_mem_125554)[phys_tid_105526 + + (num_threads_126139 * + k2p2zq_75151 + + j_106223 * + num_threads_126139)]; + bool index_certs_106243; + + if (!bounds_check_106232) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 62) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = i_106229; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125554)[phys_tid_105526 + + (num_threads_126139 * + k2p2zq_75151 + + i_106229 * + num_threads_126139)] = + lw_val_106242; + } + ((__global int64_t *) mem_121011)[phys_tid_105526 + + m_75223 * + num_threads_126139] = + i_106212; + ((__global + double *) double_buffer_mem_125553)[phys_tid_105526 + + m_75223 * + num_threads_126139] = + t_106213; + ((__global + double *) double_buffer_mem_125554)[phys_tid_105526 + + m_75223 * + num_threads_126139] = + tt_106214; + ((__global + double *) double_buffer_mem_125554)[phys_tid_105526 + + (num_threads_126139 * + k2p2zq_75151 + + m_75223 * + num_threads_126139)] = + ttt_106215; + + int64_t k_106249 = sub64(k_106189, (int64_t) 1); + bool cond_106250 = slt64(x_106157, k_106249); + bool loop_cond_106251; + + if (cond_106250) { + bool index_certs_106252; + + if (!y_106167) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 63) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_106151; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double zt_arg_106253 = ((__global + double *) double_buffer_mem_125554)[phys_tid_105526 + + (num_threads_126139 * + k2p2zq_75151 + + l_106151 * + num_threads_126139)]; + double zt_res_106254 = 1.0e-7 * zt_arg_106253; + bool index_certs_106255; + + if (!y_106167) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 64) == -1) { + global_failure_args[0] = l_106151; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double zl_arg_106256 = ((__global + double *) double_buffer_mem_125553)[phys_tid_105526 + + l_106151 * + num_threads_126139]; + bool zl_res_106257 = zl_arg_106256 < zt_res_106254; + + loop_cond_106251 = zl_res_106257; + } else { + loop_cond_106251 = 0; + } + + bool loop_while_tmp_126810 = loop_cond_106251; + int64_t k_tmp_126815 = k_106249; + + loop_while_106184 = loop_while_tmp_126810; + k_106189 = k_tmp_126815; + } + loopres_106178 = loop_while_106184; + loopres_106183 = k_106189; + + bool cond_106258 = x_106157 == k2p2zq_75151; + int64_t j_m_i_106259 = sub64(k2p2zq_75151, l_106151); + bool empty_slice_106263 = j_m_i_106259 == (int64_t) 0; + int64_t m_106264 = sub64(j_m_i_106259, (int64_t) 1); + int64_t i_p_m_t_s_106265 = add64(l_106151, m_106264); + bool zzero_leq_i_p_m_t_s_106266 = sle64((int64_t) 0, + i_p_m_t_s_106265); + bool i_p_m_t_s_leq_w_106267 = slt64(i_p_m_t_s_106265, + k2p2zq_75151); + bool i_lte_j_106268 = sle64(l_106151, k2p2zq_75151); + bool y_106269 = zzero_leq_i_p_m_t_s_106266 && + i_p_m_t_s_leq_w_106267; + bool y_106270 = i_lte_j_106268 && y_106269; + bool ok_or_empty_106271 = empty_slice_106263 || y_106270; + bool index_ok_106272 = y_106167 && ok_or_empty_106271; + + if (cond_106258) { + for (int64_t i_126821 = 0; i_126821 < k2p2zq_75151; + i_126821++) { + ((__global double *) mem_125448)[phys_tid_105526 + + i_126821 * + num_threads_126139] = + ((__global + double *) double_buffer_mem_125553)[phys_tid_105526 + + i_126821 * + num_threads_126139]; + } + for (int64_t i_126822 = 0; i_126822 < (int64_t) 2; + i_126822++) { + for (int64_t i_126823 = 0; i_126823 < k2p2zq_75151; + i_126823++) { + ((__global double *) mem_125446)[phys_tid_105526 + + (i_126822 * + (num_threads_126139 * + k2p2zq_75151) + + i_126823 * + num_threads_126139)] = + ((__global + double *) double_buffer_mem_125554)[phys_tid_105526 + + (i_126822 * + (num_threads_126139 * + k2p2zq_75151) + + i_126823 * + num_threads_126139)]; + } + } + for (int64_t i_126824 = 0; i_126824 < k2p2zq_75151; + i_126824++) { + for (int64_t i_126825 = 0; i_126825 < k2p2zq_75151; + i_126825++) { + ((__global double *) mem_125498)[phys_tid_105526 + + (i_126824 * + (num_threads_126139 * + k2p2zq_75151) + + i_126825 * + num_threads_126139)] = + ((__global + double *) double_buffer_mem_125552)[phys_tid_105526 + + (i_126824 * + (num_threads_126139 * + k2p2zq_75151) + + i_126825 * + num_threads_126139)]; + } + } + } else { + bool index_certs_106273; + + if (!index_ok_106272) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 65) == -1) { + global_failure_args[0] = l_106151; + global_failure_args[1] = l_106151; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_106275; + double redout_119715 = 0.0; + + for (int64_t i_119716 = 0; i_119716 < j_m_i_106259; + i_119716++) { + int64_t slice_120006 = l_106151 + i_119716; + double x_106279 = ((__global + double *) double_buffer_mem_125552)[phys_tid_105526 + + (l_106151 * + (num_threads_126139 * + k2p2zq_75151) + + slice_120006 * + num_threads_126139)]; + double defunc_1_f_res_106280 = x_106279 * x_106279; + double defunc_1_op_res_106278 = defunc_1_f_res_106280 + + redout_119715; + double redout_tmp_126826 = defunc_1_op_res_106278; + + redout_119715 = redout_tmp_126826; + } + defunc_2_reduce_res_106275 = redout_119715; + + double sqrt_res_106281; + + sqrt_res_106281 = futrts_sqrt64(defunc_2_reduce_res_106275); + + bool zeze_res_106282 = sqrt_res_106281 == 0.0; + + if (zeze_res_106282) { + for (int64_t i_126827 = 0; i_126827 < k2p2zq_75151; + i_126827++) { + ((__global double *) mem_125169)[phys_tid_105526 + + i_126827 * + num_threads_126139] = + ((__global + double *) double_buffer_mem_125553)[phys_tid_105526 + + i_126827 * + num_threads_126139]; + } + for (int64_t i_126828 = 0; i_126828 < (int64_t) 2; + i_126828++) { + for (int64_t i_126829 = 0; i_126829 < k2p2zq_75151; + i_126829++) { + ((__global + double *) mem_125167)[phys_tid_105526 + + (i_126828 * + (num_threads_126139 * + k2p2zq_75151) + + i_126829 * + num_threads_126139)] = + ((__global + double *) double_buffer_mem_125554)[phys_tid_105526 + + (i_126828 * + (num_threads_126139 * + k2p2zq_75151) + + i_126829 * + num_threads_126139)]; + } + } + for (int64_t i_126830 = 0; i_126830 < k2p2zq_75151; + i_126830++) { + for (int64_t i_126831 = 0; i_126831 < k2p2zq_75151; + i_126831++) { + ((__global + double *) mem_125438)[phys_tid_105526 + + (i_126830 * + (num_threads_126139 * + k2p2zq_75151) + + i_126831 * + num_threads_126139)] = + ((__global + double *) double_buffer_mem_125552)[phys_tid_105526 + + (i_126830 * + (num_threads_126139 * + k2p2zq_75151) + + i_126831 * + num_threads_126139)]; + } + } + } else { + bool index_ok_106286 = y_106167 && y_106167; + bool index_certs_106287; + + if (!index_ok_106286) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 66) == -1) { + global_failure_args[0] = l_106151; + global_failure_args[1] = l_106151; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double znze_arg_106288 = ((__global + double *) double_buffer_mem_125552)[phys_tid_105526 + + (l_106151 * + (num_threads_126139 * + k2p2zq_75151) + + l_106151 * + num_threads_126139)]; + bool zeze_res_106289 = znze_arg_106288 == 0.0; + bool znze_res_106290 = !zeze_res_106289; + double nrmxl_106291; + + if (znze_res_106290) { + double abs_res_106292 = fabs(sqrt_res_106281); + double sgn_res_106293 = fsignum32(znze_arg_106288); + double zt_res_106294 = abs_res_106292 * + sgn_res_106293; + + nrmxl_106291 = zt_res_106294; + } else { + nrmxl_106291 = sqrt_res_106281; + } + for (int64_t i0_106296 = 0; i0_106296 < j_m_i_106259; + i0_106296++) { + int64_t i_106298 = add64(l_106151, i0_106296); + bool x_106299 = sle64((int64_t) 0, i_106298); + bool y_106300 = slt64(i_106298, k2p2zq_75151); + bool bounds_check_106301 = x_106299 && y_106300; + bool index_ok_106302 = y_106167 && + bounds_check_106301; + bool index_certs_106303; + + if (!index_ok_106302) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 67) == + -1) { + global_failure_args[0] = l_106151; + global_failure_args[1] = i_106298; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_106304 = ((__global + double *) double_buffer_mem_125552)[phys_tid_105526 + + (l_106151 * + (num_threads_126139 * + k2p2zq_75151) + + i_106298 * + num_threads_126139)]; + double lw_val_106305 = x_106304 / nrmxl_106291; + + ((__global + double *) double_buffer_mem_125552)[phys_tid_105526 + + (l_106151 * + (num_threads_126139 * + k2p2zq_75151) + + i_106298 * + num_threads_126139)] = + lw_val_106305; + } + + double zp_arg_106307 = ((__global + double *) double_buffer_mem_125552)[phys_tid_105526 + + (l_106151 * + (num_threads_126139 * + k2p2zq_75151) + + l_106151 * + num_threads_126139)]; + double zp_res_106308 = 1.0 + zp_arg_106307; + + ((__global + double *) double_buffer_mem_125552)[phys_tid_105526 + + (l_106151 * + (num_threads_126139 * + k2p2zq_75151) + + l_106151 * + num_threads_126139)] = + zp_res_106308; + + bool bounds_invalid_upwards_106310 = slt64(k2p2zq_75151, + x_106157); + bool valid_106311 = !bounds_invalid_upwards_106310; + bool range_valid_c_106312; + + if (!valid_106311) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 68) == -1) { + global_failure_args[0] = x_106157; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + bool loop_nonempty_106313 = slt64((int64_t) 0, + upper_bound_106168); + bool loop_not_taken_106314 = !loop_nonempty_106313; + bool protect_assert_disj_106315 = index_ok_106286 || + loop_not_taken_106314; + bool index_certs_106316; + + if (!protect_assert_disj_106315) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 69) == -1) { + global_failure_args[0] = l_106151; + global_failure_args[1] = l_106151; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_106320 = 0; i_106320 < + upper_bound_106168; i_106320++) { + int64_t index_primexp_106324 = add64(x_106157, + i_106320); + bool x_106325 = sle64((int64_t) 0, + index_primexp_106324); + bool y_106326 = slt64(index_primexp_106324, + k2p2zq_75151); + bool bounds_check_106327 = x_106325 && y_106326; + double t_106328; + double t_106330 = 0.0; + + for (int64_t i0_106329 = 0; i0_106329 < + j_m_i_106259; i0_106329++) { + int64_t i_106331 = add64(l_106151, i0_106329); + bool x_106332 = sle64((int64_t) 0, i_106331); + bool y_106333 = slt64(i_106331, k2p2zq_75151); + bool bounds_check_106334 = x_106332 && y_106333; + bool index_ok_106335 = y_106167 && + bounds_check_106334; + bool index_certs_106336; + + if (!index_ok_106335) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 70) == + -1) { + global_failure_args[0] = l_106151; + global_failure_args[1] = i_106331; + global_failure_args[2] = + k2p2zq_75151; + global_failure_args[3] = + k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_106337 = ((__global + double *) double_buffer_mem_125552)[phys_tid_105526 + + (l_106151 * + (num_threads_126139 * + k2p2zq_75151) + + i_106331 * + num_threads_126139)]; + bool index_ok_106338 = bounds_check_106327 && + bounds_check_106334; + bool index_certs_106339; + + if (!index_ok_106338) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 71) == + -1) { + global_failure_args[0] = + index_primexp_106324; + global_failure_args[1] = i_106331; + global_failure_args[2] = + k2p2zq_75151; + global_failure_args[3] = + k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_106340 = ((__global + double *) double_buffer_mem_125552)[phys_tid_105526 + + (index_primexp_106324 * + (num_threads_126139 * + k2p2zq_75151) + + i_106331 * + num_threads_126139)]; + double y_106341 = x_106337 * y_106340; + double loopres_106342 = t_106330 - y_106341; + double t_tmp_126836 = loopres_106342; + + t_106330 = t_tmp_126836; + } + t_106328 = t_106330; + + double y_106343 = ((__global + double *) double_buffer_mem_125552)[phys_tid_105526 + + (l_106151 * + (num_threads_126139 * + k2p2zq_75151) + + l_106151 * + num_threads_126139)]; + double t_106344 = t_106328 / y_106343; + + for (int64_t i0_106346 = 0; i0_106346 < + j_m_i_106259; i0_106346++) { + int64_t i_106348 = add64(l_106151, i0_106346); + bool x_106349 = sle64((int64_t) 0, i_106348); + bool y_106350 = slt64(i_106348, k2p2zq_75151); + bool bounds_check_106351 = x_106349 && y_106350; + bool index_ok_106352 = bounds_check_106327 && + bounds_check_106351; + bool index_certs_106353; + + if (!index_ok_106352) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 72) == + -1) { + global_failure_args[0] = + index_primexp_106324; + global_failure_args[1] = i_106348; + global_failure_args[2] = + k2p2zq_75151; + global_failure_args[3] = + k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_106354 = ((__global + double *) double_buffer_mem_125552)[phys_tid_105526 + + (index_primexp_106324 * + (num_threads_126139 * + k2p2zq_75151) + + i_106348 * + num_threads_126139)]; + bool index_ok_106355 = y_106167 && + bounds_check_106351; + bool index_certs_106356; + + if (!index_ok_106355) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 73) == + -1) { + global_failure_args[0] = l_106151; + global_failure_args[1] = i_106348; + global_failure_args[2] = + k2p2zq_75151; + global_failure_args[3] = + k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_106357 = ((__global + double *) double_buffer_mem_125552)[phys_tid_105526 + + (l_106151 * + (num_threads_126139 * + k2p2zq_75151) + + i_106348 * + num_threads_126139)]; + double y_106358 = t_106344 * y_106357; + double lw_val_106359 = x_106354 + y_106358; + + ((__global + double *) double_buffer_mem_125552)[phys_tid_105526 + + (index_primexp_106324 * + (num_threads_126139 * + k2p2zq_75151) + + i_106348 * + num_threads_126139)] = + lw_val_106359; + } + + bool index_certs_106361; + + if (!bounds_check_106327) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 74) == + -1) { + global_failure_args[0] = + index_primexp_106324; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double zeze_arg_106362 = ((__global + double *) double_buffer_mem_125553)[phys_tid_105526 + + index_primexp_106324 * + num_threads_126139]; + bool zeze_res_106363 = zeze_arg_106362 == 0.0; + + if (!zeze_res_106363) { + bool index_ok_106366 = y_106167 && + bounds_check_106327; + bool index_certs_106367; + + if (!index_ok_106366) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 75) == + -1) { + global_failure_args[0] = + index_primexp_106324; + global_failure_args[1] = l_106151; + global_failure_args[2] = + k2p2zq_75151; + global_failure_args[3] = + k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double abs_arg_106368 = ((__global + double *) double_buffer_mem_125552)[phys_tid_105526 + + (index_primexp_106324 * + (num_threads_126139 * + k2p2zq_75151) + + l_106151 * + num_threads_126139)]; + double abs_res_106369 = fabs(abs_arg_106368); + double zs_res_106370 = abs_res_106369 / + zeze_arg_106362; + double ztzt_res_106371 = fpow64(zs_res_106370, + 2.0); + double zm_res_106372 = 1.0 - ztzt_res_106371; + double max_res_106373 = fmax64(0.0, + zm_res_106372); + double abs_res_106374 = fabs(max_res_106373); + bool zgze_res_106375 = 1.0e-6 <= abs_res_106374; + + if (zgze_res_106375) { + double sqrt_res_106378; + + sqrt_res_106378 = + futrts_sqrt64(max_res_106373); + + double zt_res_106379 = zeze_arg_106362 * + sqrt_res_106378; + + ((__global + double *) double_buffer_mem_125553)[phys_tid_105526 + + index_primexp_106324 * + num_threads_126139] = + zt_res_106379; + } else { + bool empty_slice_106381 = + upper_bound_106168 == (int64_t) 0; + int64_t m_106382 = sub64(upper_bound_106168, + (int64_t) 1); + int64_t i_p_m_t_s_106383 = add64(x_106157, + m_106382); + bool zzero_leq_i_p_m_t_s_106384 = + sle64((int64_t) 0, i_p_m_t_s_106383); + bool i_p_m_t_s_leq_w_106385 = + slt64(i_p_m_t_s_106383, k2p2zq_75151); + bool zzero_lte_i_106386 = sle64((int64_t) 0, + x_106157); + bool i_lte_j_106387 = sle64(x_106157, + k2p2zq_75151); + bool y_106388 = i_p_m_t_s_leq_w_106385 && + zzero_lte_i_106386; + bool y_106389 = + zzero_leq_i_p_m_t_s_106384 && y_106388; + bool y_106390 = i_lte_j_106387 && y_106389; + bool forwards_ok_106391 = + zzero_lte_i_106386 && y_106390; + bool ok_or_empty_106392 = + empty_slice_106381 || + forwards_ok_106391; + bool index_ok_106393 = + bounds_check_106327 && + ok_or_empty_106392; + bool index_certs_106394; + + if (!index_ok_106393) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 76) == + -1) { + global_failure_args[0] = + index_primexp_106324; + global_failure_args[1] = + x_106157; + global_failure_args[2] = + k2p2zq_75151; + global_failure_args[3] = + k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_106396; + double redout_119717 = 0.0; + + for (int64_t i_119718 = 0; i_119718 < + upper_bound_106168; i_119718++) { + int64_t slice_120007 = x_106157 + + i_119718; + double x_106400 = ((__global + double *) double_buffer_mem_125552)[phys_tid_105526 + + (index_primexp_106324 * + (num_threads_126139 * + k2p2zq_75151) + + slice_120007 * + num_threads_126139)]; + double defunc_1_f_res_106401 = + x_106400 * x_106400; + double defunc_1_op_res_106399 = + defunc_1_f_res_106401 + + redout_119717; + double redout_tmp_126838 = + defunc_1_op_res_106399; + + redout_119717 = redout_tmp_126838; + } + defunc_2_reduce_res_106396 = redout_119717; + + double sqrt_res_106402; + + sqrt_res_106402 = + futrts_sqrt64(defunc_2_reduce_res_106396); + ((__global + double *) double_buffer_mem_125553)[phys_tid_105526 + + index_primexp_106324 * + num_threads_126139] = + sqrt_res_106402; + + bool index_certs_106404; + + if (!bounds_check_106327) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 77) == + -1) { + global_failure_args[0] = + (int64_t) 0; + global_failure_args[1] = + index_primexp_106324; + global_failure_args[2] = + (int64_t) 2; + global_failure_args[3] = + k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_126839 = 0; i_126839 < + (int64_t) 1; i_126839++) { + ((__global + double *) double_buffer_mem_125554)[phys_tid_105526 + + (index_primexp_106324 + + i_126839) * + num_threads_126139] = + ((__global + double *) double_buffer_mem_125553)[phys_tid_105526 + + num_threads_126139 * + index_primexp_106324 + + i_126839 * + num_threads_126139]; + } + } + } + } + + bool index_certs_106407; + + if (!y_106167) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 78) == -1) { + global_failure_args[0] = l_106151; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_126840 = 0; i_126840 < (int64_t) 1; + i_126840++) { + ((__global + double *) double_buffer_mem_125553)[phys_tid_105526 + + (l_106151 + + i_126840) * + num_threads_126139] = + ((__global + double *) double_buffer_mem_125552)[phys_tid_105526 + + l_106151 * + (num_threads_126139 * + k2p2zq_75151) + + num_threads_126139 * + l_106151 + + i_126840 * + num_threads_126139]; + } + + double zt_res_106410 = -1.0 * nrmxl_106291; + + ((__global + double *) double_buffer_mem_125552)[phys_tid_105526 + + (l_106151 * + (num_threads_126139 * + k2p2zq_75151) + + l_106151 * + num_threads_126139)] = + zt_res_106410; + for (int64_t i_126841 = 0; i_126841 < k2p2zq_75151; + i_126841++) { + ((__global double *) mem_125169)[phys_tid_105526 + + i_126841 * + num_threads_126139] = + ((__global + double *) double_buffer_mem_125553)[phys_tid_105526 + + i_126841 * + num_threads_126139]; + } + for (int64_t i_126842 = 0; i_126842 < (int64_t) 2; + i_126842++) { + for (int64_t i_126843 = 0; i_126843 < k2p2zq_75151; + i_126843++) { + ((__global + double *) mem_125167)[phys_tid_105526 + + (i_126842 * + (num_threads_126139 * + k2p2zq_75151) + + i_126843 * + num_threads_126139)] = + ((__global + double *) double_buffer_mem_125554)[phys_tid_105526 + + (i_126842 * + (num_threads_126139 * + k2p2zq_75151) + + i_126843 * + num_threads_126139)]; + } + } + for (int64_t i_126844 = 0; i_126844 < k2p2zq_75151; + i_126844++) { + for (int64_t i_126845 = 0; i_126845 < k2p2zq_75151; + i_126845++) { + ((__global + double *) mem_125438)[phys_tid_105526 + + (i_126844 * + (num_threads_126139 * + k2p2zq_75151) + + i_126845 * + num_threads_126139)] = + ((__global + double *) double_buffer_mem_125552)[phys_tid_105526 + + (i_126844 * + (num_threads_126139 * + k2p2zq_75151) + + i_126845 * + num_threads_126139)]; + } + } + } + for (int64_t i_126846 = 0; i_126846 < k2p2zq_75151; + i_126846++) { + ((__global double *) mem_125448)[phys_tid_105526 + + i_126846 * + num_threads_126139] = + ((__global double *) mem_125169)[phys_tid_105526 + + i_126846 * + num_threads_126139]; + } + for (int64_t i_126847 = 0; i_126847 < (int64_t) 2; + i_126847++) { + for (int64_t i_126848 = 0; i_126848 < k2p2zq_75151; + i_126848++) { + ((__global double *) mem_125446)[phys_tid_105526 + + (i_126847 * + (num_threads_126139 * + k2p2zq_75151) + + i_126848 * + num_threads_126139)] = + ((__global + double *) mem_125167)[phys_tid_105526 + + (i_126847 * + (num_threads_126139 * + k2p2zq_75151) + + i_126848 * + num_threads_126139)]; + } + } + for (int64_t i_126849 = 0; i_126849 < k2p2zq_75151; + i_126849++) { + for (int64_t i_126850 = 0; i_126850 < k2p2zq_75151; + i_126850++) { + ((__global double *) mem_125498)[phys_tid_105526 + + (i_126849 * + (num_threads_126139 * + k2p2zq_75151) + + i_126850 * + num_threads_126139)] = + ((__global + double *) mem_125438)[phys_tid_105526 + + (i_126849 * + (num_threads_126139 * + k2p2zq_75151) + + i_126850 * + num_threads_126139)]; + } + } + } + for (int64_t i_126851 = 0; i_126851 < k2p2zq_75151; + i_126851++) { + for (int64_t i_126852 = 0; i_126852 < k2p2zq_75151; + i_126852++) { + ((__global + double *) double_buffer_mem_125552)[phys_tid_105526 + + (i_126851 * + (num_threads_126139 * + k2p2zq_75151) + + i_126852 * + num_threads_126139)] = + ((__global double *) mem_125498)[phys_tid_105526 + + (i_126851 * + (num_threads_126139 * + k2p2zq_75151) + + i_126852 * + num_threads_126139)]; + } + } + for (int64_t i_126853 = 0; i_126853 < k2p2zq_75151; + i_126853++) { + ((__global + double *) double_buffer_mem_125553)[phys_tid_105526 + + i_126853 * + num_threads_126139] = + ((__global double *) mem_125448)[phys_tid_105526 + + i_126853 * + num_threads_126139]; + } + for (int64_t i_126854 = 0; i_126854 < (int64_t) 2; i_126854++) { + for (int64_t i_126855 = 0; i_126855 < k2p2zq_75151; + i_126855++) { + ((__global + double *) double_buffer_mem_125554)[phys_tid_105526 + + (i_126854 * + (num_threads_126139 * + k2p2zq_75151) + + i_126855 * + num_threads_126139)] = + ((__global double *) mem_125446)[phys_tid_105526 + + (i_126854 * + (num_threads_126139 * + k2p2zq_75151) + + i_126855 * + num_threads_126139)]; + } + } + + int64_t k_tmp_126809 = loopres_106183; + + k_106156 = k_tmp_126809; + } + dqrdc2_res_106150 = k_106156; + + int64_t min_arg_106412 = sub64(dqrdc2_res_106150, (int64_t) 1); + int64_t min_res_106413 = smin64(k2p2zq_75151, min_arg_106412); + + for (int64_t i_126856 = 0; i_126856 < k2p2zq_75151; i_126856++) { + for (int64_t i_126857 = 0; i_126857 < k2p2zq_75151; + i_126857++) { + ((__global double *) mem_121335)[i_126856 * (m_75136 * + k2p2zq_75151) + + i_126857 * m_75136 + + gtid_105525] = ((__global + double *) double_buffer_mem_125552)[phys_tid_105526 + + (i_126856 * + (num_threads_126139 * + k2p2zq_75151) + + i_126857 * + num_threads_126139)]; + } + } + for (int64_t i_126858 = 0; i_126858 < k2p2zq_75151; i_126858++) { + ((__global double *) mem_121338)[i_126858 * m_75136 + + gtid_105525] = ((__global + double *) double_buffer_mem_125553)[phys_tid_105526 + + i_126858 * + num_threads_126139]; + } + for (int64_t i_126859 = 0; i_126859 < k2p2zq_75151; i_126859++) { + ((__global int64_t *) mem_121341)[i_126859 * m_75136 + + gtid_105525] = ((__global + int64_t *) mem_121011)[phys_tid_105526 + + i_126859 * + num_threads_126139]; + } + ((__global int64_t *) mem_121343)[gtid_105525] = min_res_106413; + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } - error_9: + error_0: return; - #undef Ty_42675 - #undef Ry_42676 - #undef Tx_42677 - #undef Rx_42678 - #undef Tk_42679 - #undef tk_div_tx_42680 - #undef tk_div_ty_42681 - #undef TxRx_42682 - #undef TyRy_42683 - #undef a_loc_szz_42685 - #undef b_loc_szz_42687 + #undef segmap_group_sizze_106135 } -__kernel void mainzisegmap_intragroup_43143(__global int *global_failure, - __local volatile - int64_t *mem_44898_backing_aligned_0, - __local volatile - int64_t *mem_44889_backing_aligned_1, - int64_t m_29166, - int64_t i32_res_29181, - int64_t num_groups_y_43141, - int64_t num_whole_tiles_43159, - int64_t residual_input_43286, - unsigned char cond_43287, __global - unsigned char *defunc_3_map_res_mem_44850, - __global unsigned char *mem_44879, - __global unsigned char *mem_44906) +__kernel void mainzisegmap_105814(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, int64_t j_106067, + int64_t num_groups_106078, __global + unsigned char *mem_120894, __global + unsigned char *mem_120923, __global + unsigned char *mem_120927, __global + unsigned char *mem_120931, __global + unsigned char *mem_120935) { - #define tile_sizze_43138 (mainzitile_sizze_43137) + #define segmap_group_sizze_106077 (mainzisegmap_group_sizze_105816) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict mem_44898_backing_5 = (__local volatile - char *) mem_44898_backing_aligned_0; - __local volatile char *restrict mem_44889_backing_0 = (__local volatile - char *) mem_44889_backing_aligned_1; if (*global_failure >= 0) return; - int32_t global_tid_46028; - int32_t local_tid_46029; - int64_t group_sizze_46032; - int32_t wave_sizze_46031; - int32_t group_tid_46030; - - global_tid_46028 = get_global_id(0); - local_tid_46029 = get_local_id(0); - group_sizze_46032 = get_local_size(0); - wave_sizze_46031 = LOCKSTEP_WIDTH; - group_tid_46030 = get_group_id(0); - - int32_t gid_flat_43143; + int32_t global_tid_126691; + int32_t local_tid_126692; + int64_t group_sizze_126695; + int32_t wave_sizze_126694; + int32_t group_tid_126693; + + global_tid_126691 = get_global_id(0); + local_tid_126692 = get_local_id(0); + group_sizze_126695 = get_local_size(0); + wave_sizze_126694 = LOCKSTEP_WIDTH; + group_tid_126693 = get_group_id(0); + + int32_t phys_tid_105814; + + phys_tid_105814 = global_tid_126691; + + int32_t phys_group_id_126696; + + phys_group_id_126696 = get_group_id(0); + for (int32_t i_126697 = 0; i_126697 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, segmap_group_sizze_106077)) - + phys_group_id_126696, sext_i64_i32(num_groups_106078)); + i_126697++) { + int32_t virt_group_id_126698 = phys_group_id_126696 + i_126697 * + sext_i64_i32(num_groups_106078); + int64_t gtid_105813 = sext_i32_i64(virt_group_id_126698) * + segmap_group_sizze_106077 + sext_i32_i64(local_tid_126692); + + if (slt64(gtid_105813, m_75136)) { + double defunc_2_reduce_res_106085; + double redout_119713 = 0.0; + + for (int64_t i_119714 = 0; i_119714 < k2p2zq_75151; i_119714++) { + double x_106089 = ((__global double *) mem_120894)[i_119714 * + (k2p2zq_75151 * + m_75136) + + gtid_105813 * + k2p2zq_75151 + + j_106067]; + double defunc_1_f_res_106090 = x_106089 * x_106089; + double defunc_1_op_res_106088 = defunc_1_f_res_106090 + + redout_119713; + double redout_tmp_126699 = defunc_1_op_res_106088; + + redout_119713 = redout_tmp_126699; + } + defunc_2_reduce_res_106085 = redout_119713; + + double sqrt_res_106091; + + sqrt_res_106091 = futrts_sqrt64(defunc_2_reduce_res_106085); + ((__global double *) mem_120923)[gtid_105813 + j_106067 * m_75136] = + sqrt_res_106091; + ((__global double *) mem_120927)[gtid_105813 + j_106067 * m_75136] = + sqrt_res_106091; + + bool zeze_res_106094 = sqrt_res_106091 == 0.0; + double lw_val_106095; + + if (zeze_res_106094) { + lw_val_106095 = 1.0; + } else { + lw_val_106095 = sqrt_res_106091; + } + ((__global double *) mem_120927)[gtid_105813 + (m_75136 * + k2p2zq_75151 + + j_106067 * + m_75136)] = + lw_val_106095; + for (int64_t i_126700 = 0; i_126700 < k2p2zq_75151; i_126700++) { + ((__global double *) mem_120931)[i_126700 * m_75136 + + gtid_105813] = ((__global + double *) mem_120923)[gtid_105813 + + i_126700 * + m_75136]; + } + for (int64_t i_126701 = 0; i_126701 < (int64_t) 2; i_126701++) { + for (int64_t i_126702 = 0; i_126702 < k2p2zq_75151; + i_126702++) { + ((__global double *) mem_120935)[i_126701 * (m_75136 * + k2p2zq_75151) + + i_126702 * m_75136 + + gtid_105813] = ((__global + double *) mem_120927)[gtid_105813 + + (i_126701 * + (m_75136 * + k2p2zq_75151) + + i_126702 * + m_75136)]; + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } - gid_flat_43143 = group_tid_46030; + error_0: + return; + #undef segmap_group_sizze_106077 +} +__kernel void mainzisegmap_105859(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, int64_t j_106067, + int64_t num_groups_106123, + int64_t num_threads_115425, + int64_t per_chunk_115432, __global + unsigned char *mem_120941, __global + unsigned char *mem_120946, __global + unsigned char *mem_120951, __global + unsigned char *mem_120956) +{ + #define segmap_group_sizze_106122 (mainzisegmap_group_sizze_105861) - int32_t ltid_pre_46033; + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; - ltid_pre_46033 = squot32(local_tid_46029, sext_i64_i32(tile_sizze_43138)); + if (*global_failure >= 0) + return; - int32_t ltid_pre_46034; + int32_t global_tid_126774; + int32_t local_tid_126775; + int64_t group_sizze_126778; + int32_t wave_sizze_126777; + int32_t group_tid_126776; + + global_tid_126774 = get_global_id(0); + local_tid_126775 = get_local_id(0); + group_sizze_126778 = get_local_size(0); + wave_sizze_126777 = LOCKSTEP_WIDTH; + group_tid_126776 = get_group_id(0); + + int32_t phys_tid_105859; + + phys_tid_105859 = global_tid_126774; + + int32_t phys_group_id_126779; + + phys_group_id_126779 = get_group_id(0); + for (int32_t i_126780 = 0; i_126780 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, segmap_group_sizze_106122)) - + phys_group_id_126779, sext_i64_i32(num_groups_106123)); + i_126780++) { + int32_t virt_group_id_126781 = phys_group_id_126779 + i_126780 * + sext_i64_i32(num_groups_106123); + int64_t gtid_105858 = sext_i32_i64(virt_group_id_126781) * + segmap_group_sizze_106122 + sext_i32_i64(local_tid_126775); + + if (slt64(gtid_105858, m_75136)) { + double sqrt_res_106127 = ((__global + double *) mem_120941)[gtid_105858]; + + for (int64_t i_126782 = 0; i_126782 < (int64_t) 1; i_126782++) { + ((__global double *) mem_120946)[gtid_105858 + (j_106067 + + i_126782) * + m_75136] = ((__global + double *) mem_120951)[(gtid_105858 + + i_126782 - + squot64(gtid_105858 + + i_126782, + per_chunk_115432) * + per_chunk_115432) * + num_threads_115425 + + squot64(gtid_105858 + + i_126782, + per_chunk_115432)]; + } + + bool zeze_res_106129 = sqrt_res_106127 == 0.0; + double lw_val_106130; + + if (zeze_res_106129) { + lw_val_106130 = 1.0; + } else { + lw_val_106130 = sqrt_res_106127; + } + ((__global double *) mem_120946)[gtid_105858 + (m_75136 * + k2p2zq_75151 + + j_106067 * + m_75136)] = + lw_val_106130; + for (int64_t i_126783 = 0; i_126783 < (int64_t) 2; i_126783++) { + for (int64_t i_126784 = 0; i_126784 < k2p2zq_75151; + i_126784++) { + ((__global double *) mem_120956)[i_126783 * (m_75136 * + k2p2zq_75151) + + i_126784 * m_75136 + + gtid_105858] = ((__global + double *) mem_120946)[gtid_105858 + + (i_126783 * + (m_75136 * + k2p2zq_75151) + + i_126784 * + m_75136)]; + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } - ltid_pre_46034 = local_tid_46029 - squot32(local_tid_46029, - sext_i64_i32(tile_sizze_43138)) * - sext_i64_i32(tile_sizze_43138); + error_0: + return; + #undef segmap_group_sizze_106122 +} +__kernel void mainzisegmap_105874(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, int64_t j_106067, + __global unsigned char *mem_param_120902, + __global unsigned char *mem_120941) +{ + #define segmap_group_sizze_106117 (mainzisegmap_group_sizze_105877) - int64_t gid_x_43135; + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; - gid_x_43135 = squot64(sext_i32_i64(group_tid_46030), num_groups_y_43141); + if (*global_failure >= 0) + return; - int64_t gid_y_43136; + int32_t global_tid_126768; + int32_t local_tid_126769; + int64_t group_sizze_126772; + int32_t wave_sizze_126771; + int32_t group_tid_126770; - gid_y_43136 = sext_i32_i64(group_tid_46030) - - squot64(sext_i32_i64(group_tid_46030), num_groups_y_43141) * - num_groups_y_43141; + global_tid_126768 = get_global_id(0); + local_tid_126769 = get_local_id(0); + group_sizze_126772 = get_local_size(0); + wave_sizze_126771 = LOCKSTEP_WIDTH; + group_tid_126770 = get_group_id(0); - float mem_44884[1]; - int64_t ltid_y_43162 = sext_i32_i64(ltid_pre_46033); - int64_t ltid_x_43160 = sext_i32_i64(ltid_pre_46034); - int32_t ltid_flat_43161 = local_tid_46029; + int32_t phys_tid_105874; - if (slt64(ltid_y_43162, tile_sizze_43138) && slt64(ltid_x_43160, - tile_sizze_43138)) { - mem_44884[(int64_t) 0] = 0.0F; - } - barrier(CLK_LOCAL_MEM_FENCE); + phys_tid_105874 = global_tid_126768; - int64_t binop_x_43245 = gid_x_43135 * tile_sizze_43138; - int64_t binop_x_43260 = gid_y_43136 * tile_sizze_43138; - __local char *mem_44889; + int64_t gtid_105872; - mem_44889 = (__local char *) mem_44889_backing_0; + gtid_105872 = sext_i32_i64(group_tid_126770) * segmap_group_sizze_106117 + + sext_i32_i64(local_tid_126769); - float accs_mem_44894[1]; - float mem_param_44885[1]; + int64_t gtid_105873; - for (int32_t i_1 = 0; i_1 < 1; i_1++) - mem_param_44885[i_1] = mem_44884[i_1]; - for (int64_t tile_id_43171 = 0; tile_id_43171 < num_whole_tiles_43159; - tile_id_43171++) { - int64_t binop_x_43243 = tile_sizze_43138 * tile_id_43171; - int64_t ltid_y_43174 = sext_i32_i64(ltid_pre_46033); - int64_t ltid_x_43172 = sext_i32_i64(ltid_pre_46034); - int32_t ltid_flat_43173 = local_tid_46029; - int64_t j_43244 = ltid_x_43172 + binop_x_43243; - int64_t gtid_43246 = ltid_y_43174 + binop_x_43245; - bool binop_x_43251 = slt64(j_43244, i32_res_29181); - bool binop_y_43252 = slt64(gtid_43246, m_29166); - bool cond_43253 = binop_x_43251 && binop_y_43252; - float pre_43254; - - if (cond_43253) { - float x_43255 = ((__global - float *) defunc_3_map_res_mem_44850)[gtid_43246 * - i32_res_29181 + - j_43244]; - - pre_43254 = x_43255; - } else { - pre_43254 = 0.0F; - } - ((__local float *) mem_44889)[ltid_y_43174 * tile_sizze_43138 + - ltid_x_43172] = pre_43254; - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_44893[1]; - int64_t ltid_y_43206 = sext_i32_i64(ltid_pre_46033); - int64_t ltid_x_43204 = sext_i32_i64(ltid_pre_46034); - int32_t ltid_flat_43205 = local_tid_46029; - int64_t gtid_43259 = ltid_y_43206 + binop_x_43245; - int64_t gtid_43261 = ltid_x_43204 + binop_x_43260; - float acc_43264 = mem_param_44885[(int64_t) 0]; - bool binop_x_43268 = slt64(gtid_43259, m_29166); - bool binop_y_43269 = slt64(gtid_43261, i32_res_29181); - bool cond_43270 = binop_x_43268 && binop_y_43269; - float acc_43271; - - if (cond_43270) { - float x_43272; - float redout_44315 = acc_43264; - - for (int64_t i_44316 = 0; i_44316 < tile_sizze_43138; i_44316++) { - float x_43276 = ((__local float *) mem_44889)[ltid_y_43206 * - tile_sizze_43138 + - i_44316]; - int64_t slice_44367 = binop_x_43243 + i_44316; - float x_43277 = ((__global float *) mem_44879)[slice_44367 * - (i32_res_29181 * - m_29166) + - gtid_43259 * - i32_res_29181 + - gtid_43261]; - float defunc_1_f_res_43278 = x_43276 * x_43277; - float defunc_1_op_res_43275 = defunc_1_f_res_43278 + - redout_44315; - float redout_tmp_46037 = defunc_1_op_res_43275; - - redout_44315 = redout_tmp_46037; - } - x_43272 = redout_44315; - acc_43271 = x_43272; - } else { - acc_43271 = acc_43264; - } - mem_44893[(int64_t) 0] = acc_43271; - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_param_tmp_46035[1]; + gtid_105873 = sext_i32_i64(group_tid_126770) * segmap_group_sizze_106117 + + sext_i32_i64(local_tid_126769) - (sext_i32_i64(group_tid_126770) * + segmap_group_sizze_106117 + + sext_i32_i64(local_tid_126769)); + if (slt64(gtid_105872, m_75136) && slt64(gtid_105873, (int64_t) 1)) { + double sqrt_res_106120 = ((__global double *) mem_120941)[gtid_105872]; - for (int32_t i_2 = 0; i_2 < 1; i_2++) - mem_param_tmp_46035[i_2] = mem_44893[i_2]; - for (int32_t i_3 = 0; i_3 < 1; i_3++) - mem_param_44885[i_3] = mem_param_tmp_46035[i_3]; - } - for (int32_t i_4 = 0; i_4 < 1; i_4++) - accs_mem_44894[i_4] = mem_param_44885[i_4]; - - __local char *mem_44898; - - mem_44898 = (__local char *) mem_44898_backing_5; - - float mem_44902[1]; - float mem_45482[1]; - - if (cond_43287) { - mem_45482[(int64_t) 0] = accs_mem_44894[(int64_t) 0]; - } else { - int64_t binop_x_43360 = tile_sizze_43138 * num_whole_tiles_43159; - int64_t ltid_y_43290 = sext_i32_i64(ltid_pre_46033); - int64_t ltid_x_43288 = sext_i32_i64(ltid_pre_46034); - int32_t ltid_flat_43289 = local_tid_46029; - int64_t j_43361 = ltid_x_43288 + binop_x_43360; - int64_t gtid_43363 = binop_x_43245 + ltid_y_43290; - bool binop_x_43368 = slt64(j_43361, i32_res_29181); - bool binop_y_43369 = slt64(gtid_43363, m_29166); - bool cond_43370 = binop_x_43368 && binop_y_43369; - float pre_43371; - - if (cond_43370) { - float x_43372 = ((__global - float *) defunc_3_map_res_mem_44850)[gtid_43363 * - i32_res_29181 + - j_43361]; - - pre_43371 = x_43372; - } else { - pre_43371 = 0.0F; - } - ((__local float *) mem_44898)[ltid_y_43290 * tile_sizze_43138 + - ltid_x_43288] = pre_43371; - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t ltid_y_43323 = sext_i32_i64(ltid_pre_46033); - int64_t ltid_x_43321 = sext_i32_i64(ltid_pre_46034); - int32_t ltid_flat_43322 = local_tid_46029; - int64_t gtid_43377 = binop_x_43245 + ltid_y_43323; - int64_t gtid_43379 = binop_x_43260 + ltid_x_43321; - float acc_43382 = accs_mem_44894[(int64_t) 0]; - bool binop_x_43386 = slt64(gtid_43377, m_29166); - bool binop_y_43387 = slt64(gtid_43379, i32_res_29181); - bool cond_43388 = binop_x_43386 && binop_y_43387; - float acc_43389; - - if (cond_43388) { - float x_43390; - float redout_44317 = acc_43382; - - for (int64_t i_44318 = 0; i_44318 < residual_input_43286; - i_44318++) { - float x_43394 = ((__local float *) mem_44898)[ltid_y_43323 * - tile_sizze_43138 + - i_44318]; - int64_t slice_44368 = binop_x_43360 + i_44318; - float x_43395 = ((__global float *) mem_44879)[slice_44368 * - (i32_res_29181 * - m_29166) + - gtid_43377 * - i32_res_29181 + - gtid_43379]; - float defunc_1_f_res_43396 = x_43394 * x_43395; - float defunc_1_op_res_43393 = defunc_1_f_res_43396 + - redout_44317; - float redout_tmp_46038 = defunc_1_op_res_43393; - - redout_44317 = redout_tmp_46038; - } - x_43390 = redout_44317; - acc_43389 = x_43390; - } else { - acc_43389 = acc_43382; - } - mem_44902[(int64_t) 0] = acc_43389; - barrier(CLK_LOCAL_MEM_FENCE); - mem_45482[(int64_t) 0] = mem_44902[(int64_t) 0]; - } - - int64_t thread_out_index_46039 = gid_x_43135 * tile_sizze_43138 + - sext_i32_i64(ltid_pre_46033); - int64_t thread_out_index_46040 = gid_y_43136 * tile_sizze_43138 + - sext_i32_i64(ltid_pre_46034); - - if (slt64(thread_out_index_46039, m_29166) && slt64(thread_out_index_46040, - i32_res_29181)) { - ((__global float *) mem_44906)[thread_out_index_46039 * i32_res_29181 + - thread_out_index_46040] = - mem_45482[(int64_t) 0]; + if ((sle64((int64_t) 0, gtid_105872) && slt64(gtid_105872, m_75136)) && + (sle64((int64_t) 0, j_106067) && slt64(j_106067, k2p2zq_75151))) { + ((__global double *) mem_param_120902)[gtid_105872 * k2p2zq_75151 + + j_106067] = sqrt_res_106120; + } } - error_5: + error_0: return; - #undef tile_sizze_43138 + #undef segmap_group_sizze_106117 } -__kernel void mainzisegmap_intragroup_43435(__global int *global_failure, - __local volatile - int64_t *mem_44958_backing_aligned_0, - __local volatile - int64_t *mem_44956_backing_aligned_1, - int64_t N_29165, int64_t m_29166, - int64_t i32_res_29181, - int64_t gridDim_x_43429, - int64_t full_tiles_43460, - int64_t kk_43663, __global - unsigned char *defunc_4_map_res_mem_44916, - __global unsigned char *mem_44940, - __global unsigned char *mem_45130) +__kernel void mainzisegmap_105883(__global int *global_failure, int64_t m_75136, + __global unsigned char *mem_120938, __global + unsigned char *mem_120941) { - #define Ty_43416 (mainziTy_43413) - #define Ry_43417 (mainziRy_43415) - #define Tx_43418 (mainziTx_43412) - #define Rx_43419 (mainziRx_43414) - #define Tk_43420 (mainziTk_43411) - #define tk_div_tx_43421 (sdiv_up64(mainziTk_43411, mainziTx_43412)) - #define tk_div_ty_43422 (sdiv_up64(mainziTk_43411, mainziTy_43413)) - #define TxRx_43423 (mainziTx_43412 * mainziRx_43414) - #define TyRy_43424 (mainziTy_43413 * mainziRy_43415) - #define a_loc_szz_43426 (mainziTk_43411 * (mainziTy_43413 * mainziRy_43415)) - #define b_loc_szz_43428 (mainziRx_43414 * (mainziTx_43412 * mainziTk_43411)) + #define segmap_group_sizze_106110 (mainzisegmap_group_sizze_105885) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict mem_44958_backing_1 = (__local volatile - char *) mem_44958_backing_aligned_0; - __local volatile char *restrict mem_44956_backing_0 = (__local volatile - char *) mem_44956_backing_aligned_1; if (*global_failure >= 0) return; - int32_t global_tid_46116; - int32_t local_tid_46117; - int64_t group_sizze_46120; - int32_t wave_sizze_46119; - int32_t group_tid_46118; + int32_t global_tid_126763; + int32_t local_tid_126764; + int64_t group_sizze_126767; + int32_t wave_sizze_126766; + int32_t group_tid_126765; + + global_tid_126763 = get_global_id(0); + local_tid_126764 = get_local_id(0); + group_sizze_126767 = get_local_size(0); + wave_sizze_126766 = LOCKSTEP_WIDTH; + group_tid_126765 = get_group_id(0); - global_tid_46116 = get_global_id(0); - local_tid_46117 = get_local_id(0); - group_sizze_46120 = get_local_size(0); - wave_sizze_46119 = LOCKSTEP_WIDTH; - group_tid_46118 = get_group_id(0); + int32_t phys_tid_105883; - int32_t gid_flat_43435; + phys_tid_105883 = global_tid_126763; - gid_flat_43435 = group_tid_46118; + int64_t gtid_105882; - int32_t ltid_pre_46121; + gtid_105882 = sext_i32_i64(group_tid_126765) * segmap_group_sizze_106110 + + sext_i32_i64(local_tid_126764); + if (slt64(gtid_105882, m_75136)) { + double defunc_2_reduce_res_106113 = ((__global + double *) mem_120938)[gtid_105882]; + double sqrt_res_106114; + + sqrt_res_106114 = futrts_sqrt64(defunc_2_reduce_res_106113); + ((__global double *) mem_120941)[gtid_105882] = sqrt_res_106114; + } - ltid_pre_46121 = squot32(local_tid_46117, sext_i64_i32(Tx_43418)); + error_0: + return; + #undef segmap_group_sizze_106110 +} +__kernel void mainzisegmap_106900(__global int *global_failure, + int64_t k2p2zq_75151, __global + unsigned char *mem_121938) +{ + #define segmap_group_sizze_106925 (mainzisegmap_group_sizze_106903) - int32_t ltid_pre_46122; + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; - ltid_pre_46122 = local_tid_46117 - squot32(local_tid_46117, - sext_i64_i32(Tx_43418)) * - sext_i64_i32(Tx_43418); + if (*global_failure >= 0) + return; - int64_t gid_y_43434; + int32_t global_tid_127182; + int32_t local_tid_127183; + int64_t group_sizze_127186; + int32_t wave_sizze_127185; + int32_t group_tid_127184; - gid_y_43434 = squot64(sext_i32_i64(group_tid_46118), gridDim_x_43429); + global_tid_127182 = get_global_id(0); + local_tid_127183 = get_local_id(0); + group_sizze_127186 = get_local_size(0); + wave_sizze_127185 = LOCKSTEP_WIDTH; + group_tid_127184 = get_group_id(0); - int64_t gid_x_43433; + int32_t phys_tid_106900; - gid_x_43433 = sext_i32_i64(group_tid_46118) - - squot64(sext_i32_i64(group_tid_46118), gridDim_x_43429) * - gridDim_x_43429; + phys_tid_106900 = global_tid_127182; - int64_t iii_43436; + int64_t gtid_106898; - iii_43436 = TyRy_43424 * gid_y_43434; + gtid_106898 = squot64(sext_i32_i64(group_tid_127184) * + segmap_group_sizze_106925 + + sext_i32_i64(local_tid_127183), k2p2zq_75151); - int64_t jjj_43437 = TxRx_43423 * gid_x_43433; - float mem_44954[Ry_43417 * Rx_43419]; - int64_t ltid_y_43440 = sext_i32_i64(ltid_pre_46121); - int64_t ltid_x_43438 = sext_i32_i64(ltid_pre_46122); - int32_t ltid_flat_43439 = local_tid_46117; - float mem_44945[Ry_43417 * Rx_43419]; + int64_t gtid_106899; - for (int64_t i_43451 = 0; i_43451 < Ry_43417; i_43451++) { - for (int64_t i_43454 = 0; i_43454 < Rx_43419; i_43454++) { - mem_44945[i_43451 * Rx_43419 + i_43454] = 0.0F; - } - } - for (int64_t i_46125 = 0; i_46125 < Ry_43417; i_46125++) { - for (int64_t i_46126 = 0; i_46126 < Rx_43419; i_46126++) { - mem_44954[i_46125 * Rx_43419 + i_46126] = mem_44945[i_46125 * - Rx_43419 + - i_46126]; + gtid_106899 = sext_i32_i64(group_tid_127184) * segmap_group_sizze_106925 + + sext_i32_i64(local_tid_127183) - + squot64(sext_i32_i64(group_tid_127184) * segmap_group_sizze_106925 + + sext_i32_i64(local_tid_127183), k2p2zq_75151) * k2p2zq_75151; + if (slt64(gtid_106898, k2p2zq_75151) && slt64(gtid_106899, k2p2zq_75151)) { + bool cond_106930 = gtid_106899 == gtid_106898; + double defunc_0_f_res_106931; + + if (cond_106930) { + defunc_0_f_res_106931 = 1.0; + } else { + defunc_0_f_res_106931 = 0.0; } + ((__global double *) mem_121938)[gtid_106898 * k2p2zq_75151 + + gtid_106899] = defunc_0_f_res_106931; } - barrier(CLK_LOCAL_MEM_FENCE); - - __local char *mem_44956; - - mem_44956 = (__local char *) mem_44956_backing_0; - __local char *mem_44958; - - mem_44958 = (__local char *) mem_44958_backing_1; + error_0: + return; + #undef segmap_group_sizze_106925 +} +__kernel void mainzisegmap_107039(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, int64_t m_75136, + int64_t n_75139, int64_t k2p2zq_75151, + int64_t m_75223, unsigned char y_75227, + int64_t defunc_2_reduce_res_75260, + double tol_75329, int64_t k_75342, + int64_t r_75826, int64_t rp1_75837, + unsigned char ok_or_empty_75848, + int64_t min_res_75849, + int64_t num_groups_107575, + int64_t binop_x_120251, + int64_t num_threads_126174, __global + unsigned char *defunc_3_map_res_mem_120231, + __global unsigned char *mem_120246, __global + unsigned char *mem_121938, __global + unsigned char *mem_121941, __global + unsigned char *mem_param_121972, __global + unsigned char *mem_122011, __global + unsigned char *mem_122014, __global + unsigned char *mem_122017, __global + unsigned char *mem_122021, __global + unsigned char *mem_122025, __global + unsigned char *mem_122028, __global + unsigned char *mem_122042, __global + unsigned char *mem_122045, __global + unsigned char *mem_122047, __global + unsigned char *mem_122382, __global + unsigned char *mem_122423, __global + unsigned char *mem_122435, __global + unsigned char *mem_122464, __global + unsigned char *mem_122537, __global + unsigned char *mem_122552, __global + unsigned char *mem_122564, __global + unsigned char *mem_122575, __global + unsigned char *mem_122595, __global + unsigned char *mem_122598, __global + unsigned char *mem_122650, __global + unsigned char *mem_122654, __global + unsigned char *mem_122657, __global + unsigned char *mem_122659, __global + unsigned char *mem_122661, __global + unsigned char *mem_125248, __global + unsigned char *mem_125250, __global + unsigned char *mem_125258, __global + unsigned char *mem_125455, __global + unsigned char *mem_125463, __global + unsigned char *mem_125465, __global + unsigned char *mem_125505, __global + unsigned char *double_buffer_mem_125569, + __global + unsigned char *double_buffer_mem_125570, + __global + unsigned char *double_buffer_mem_125571, + __global + unsigned char *double_buffer_mem_125582) +{ + #define segmap_group_sizze_107574 (mainzisegmap_group_sizze_107041) - float mem_45029[Ry_43417]; - float mem_45033[Rx_43419]; - float loop_mem_45045[Ry_43417 * Rx_43419]; - float mem_param_44959[Ry_43417 * Rx_43419]; + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + volatile __local bool local_failure; - for (int32_t i_2 = 0; i_2 < Ry_43417 * Rx_43419; i_2++) - mem_param_44959[i_2] = mem_44954[i_2]; - for (int64_t i_43461 = 0; i_43461 < full_tiles_43460; i_43461++) { - int64_t kk_43465 = Tk_43420 * i_43461; + if (failure_is_an_option) { + int failed = *global_failure >= 0; - for (int64_t i_43466 = 0; i_43466 < Ry_43417; i_43466++) { - int64_t binop_y_43489 = Ty_43416 * i_43466; - - for (int64_t i_43468 = 0; i_43468 < tk_div_tx_43421; i_43468++) { - int64_t binop_y_43487 = Tx_43418 * i_43468; - int64_t ltid_x_43470 = sext_i32_i64(ltid_pre_46121); - int64_t ltid_y_43471 = sext_i32_i64(ltid_pre_46122); - int32_t ltid_flat_43472 = local_tid_46117; - int64_t k_43488 = ltid_y_43471 + binop_y_43487; - int64_t i_43490 = ltid_x_43470 + binop_y_43489; - int64_t gtid_43491 = iii_43436 + i_43490; - int64_t A_col_idx_43492 = kk_43465 + k_43488; - bool cond_43493 = slt64(gtid_43491, m_29166); - float A_elem_43494; - - if (cond_43493) { - float A_elem_43496 = ((__global - float *) defunc_4_map_res_mem_44916)[gtid_43491 * - i32_res_29181 + - A_col_idx_43492]; - - A_elem_43494 = A_elem_43496; - } else { - A_elem_43494 = 0.0F; - } - - bool cond_43498 = slt64(k_43488, Tk_43420); - int64_t a_loc_ind_43499; - - if (cond_43498) { - int64_t binop_y_43500 = Tk_43420 * i_43490; - int64_t loc_fi_43501 = k_43488 + binop_y_43500; - - a_loc_ind_43499 = loc_fi_43501; - } else { - a_loc_ind_43499 = (int64_t) -1; - } - if (sle64((int64_t) 0, a_loc_ind_43499) && - slt64(a_loc_ind_43499, a_loc_szz_43426)) { - ((__local float *) mem_44956)[a_loc_ind_43499] = - A_elem_43494; - } - barrier(CLK_LOCAL_MEM_FENCE); - } - } - for (int64_t i_43506 = 0; i_43506 < tk_div_ty_43422; i_43506++) { - int64_t binop_y_43527 = Ty_43416 * i_43506; + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_127222; + int32_t local_tid_127223; + int64_t group_sizze_127226; + int32_t wave_sizze_127225; + int32_t group_tid_127224; + + global_tid_127222 = get_global_id(0); + local_tid_127223 = get_local_id(0); + group_sizze_127226 = get_local_size(0); + wave_sizze_127225 = LOCKSTEP_WIDTH; + group_tid_127224 = get_group_id(0); + + int32_t phys_tid_107039; + + phys_tid_107039 = global_tid_127222; + + int32_t phys_group_id_127227; + + phys_group_id_127227 = get_group_id(0); + for (int32_t i_127228 = 0; i_127228 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, segmap_group_sizze_107574)) - + phys_group_id_127227, sext_i64_i32(num_groups_107575)); + i_127228++) { + int32_t virt_group_id_127229 = phys_group_id_127227 + i_127228 * + sext_i64_i32(num_groups_107575); + int64_t gtid_107038 = sext_i32_i64(virt_group_id_127229) * + segmap_group_sizze_107574 + sext_i32_i64(local_tid_127223); + + if (slt64(gtid_107038, m_75136)) { + int64_t x_107585 = ((__global + int64_t *) mem_param_121972)[gtid_107038]; + double defunc_0_f_res_107589; + double redout_119743 = 0.0; - for (int64_t i_43508 = 0; i_43508 < Rx_43419; i_43508++) { - int64_t binop_y_43529 = Tx_43418 * i_43508; - int64_t ltid_x_43510 = sext_i32_i64(ltid_pre_46121); - int64_t ltid_y_43511 = sext_i32_i64(ltid_pre_46122); - int32_t ltid_flat_43512 = local_tid_46117; - int64_t k_43528 = ltid_x_43510 + binop_y_43527; - int64_t j_43530 = ltid_y_43511 + binop_y_43529; - int64_t gtid_43531 = jjj_43437 + j_43530; - int64_t B_row_idx_43532 = kk_43465 + k_43528; - bool cond_43533 = slt64(gtid_43531, N_29165); - float B_elem_43534; - - if (cond_43533) { - float B_elem_43536 = ((__global - float *) mem_44940)[B_row_idx_43532 * - N_29165 + - gtid_43531]; + for (int64_t i_119745 = 0; i_119745 < k2p2zq_75151; i_119745++) { + double x_107595 = ((__global double *) mem_120246)[i_119745 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_107038 * + defunc_2_reduce_res_75260 + + r_75826]; + double defunc_0_f_res_107596; + double redout_119747 = 0.0; + + for (int64_t i_119748 = 0; i_119748 < k2p2zq_75151; + i_119748++) { + double x_107600 = ((__global + double *) mem_120246)[i_119748 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_107038 * + defunc_2_reduce_res_75260 + + r_75826]; + double x_107601 = ((__global + double *) mem_122011)[i_119745 * + (m_75136 * + k2p2zq_75151) + + i_119748 * + m_75136 + + gtid_107038]; + double defunc_1_f_res_107602 = x_107600 * x_107601; + double defunc_1_op_res_107599 = defunc_1_f_res_107602 + + redout_119747; + double redout_tmp_127232 = defunc_1_op_res_107599; - B_elem_43534 = B_elem_43536; - } else { - B_elem_43534 = 0.0F; + redout_119747 = redout_tmp_127232; } + defunc_0_f_res_107596 = redout_119747; - bool cond_43538 = slt64(k_43528, Tk_43420); - int64_t b_loc_ind_43539; + double defunc_1_f_res_107603 = x_107595 * defunc_0_f_res_107596; + double defunc_1_op_res_107593 = defunc_1_f_res_107603 + + redout_119743; - if (cond_43538) { - int64_t binop_y_43540 = TxRx_43423 * k_43528; - int64_t loc_fi_43541 = j_43530 + binop_y_43540; - - b_loc_ind_43539 = loc_fi_43541; - } else { - b_loc_ind_43539 = (int64_t) -1; - } - if (sle64((int64_t) 0, b_loc_ind_43539) && - slt64(b_loc_ind_43539, b_loc_szz_43428)) { - ((__local float *) mem_44958)[b_loc_ind_43539] = - B_elem_43534; - } - barrier(CLK_LOCAL_MEM_FENCE); - } - } - - float loop_mem_45044[Ry_43417 * Rx_43419]; - float mem_param_45016[Ry_43417 * Rx_43419]; - - for (int32_t i_3 = 0; i_3 < Ry_43417 * Rx_43419; i_3++) - mem_param_45016[i_3] = mem_param_44959[i_3]; - for (int64_t i_43546 = 0; i_43546 < Tk_43420; i_43546++) { - int64_t binop_y_43585 = TxRx_43423 * i_43546; - int64_t ltid_y_43550 = sext_i32_i64(ltid_pre_46121); - int64_t ltid_x_43548 = sext_i32_i64(ltid_pre_46122); - int32_t ltid_flat_43549 = local_tid_46117; - float mem_45019[Ry_43417]; - float mem_45021[Rx_43419]; - int64_t binop_x_43576 = Ry_43417 * ltid_y_43550; - - for (int64_t i_43574 = 0; i_43574 < Ry_43417; i_43574++) { - int64_t binop_x_43577 = i_43574 + binop_x_43576; - int64_t binop_y_43578 = Tk_43420 * binop_x_43577; - int64_t a_loc_ind_43579 = i_43546 + binop_y_43578; + ((__global double *) mem_122028)[phys_tid_107039 + i_119745 * + num_threads_126174] = + defunc_0_f_res_107596; - for (int64_t i_46138 = 0; i_46138 < (int64_t) 1; i_46138++) { - mem_45019[i_43574 + i_46138] = ((__local - float *) mem_44956)[a_loc_ind_43579 + - i_46138]; - } - } - - int64_t binop_y_43587 = Rx_43419 * ltid_x_43548; - - for (int64_t i_43583 = 0; i_43583 < Rx_43419; i_43583++) { - int64_t binop_x_43586 = i_43583 + binop_y_43585; - int64_t b_loc_ind_43588 = binop_x_43586 + binop_y_43587; + double redout_tmp_127230 = defunc_1_op_res_107593; - for (int64_t i_46140 = 0; i_46140 < (int64_t) 1; i_46140++) { - mem_45021[i_43583 + i_46140] = ((__local - float *) mem_44958)[b_loc_ind_43588 + - i_46140]; - } + redout_119743 = redout_tmp_127230; } - for (int64_t i_46141 = 0; i_46141 < Ry_43417; i_46141++) { - mem_45029[i_46141] = mem_45019[i_46141]; - } - for (int64_t i_46142 = 0; i_46142 < Rx_43419; i_46142++) { - mem_45033[i_46142] = mem_45021[i_46142]; - } - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_45043[Ry_43417 * Rx_43419]; - int64_t ltid_y_43595 = sext_i32_i64(ltid_pre_46121); - int64_t ltid_x_43593 = sext_i32_i64(ltid_pre_46122); - int32_t ltid_flat_43594 = local_tid_46117; - int64_t binop_y_43636 = Ry_43417 * ltid_y_43595; - int64_t binop_y_43640 = Rx_43419 * ltid_x_43593; + defunc_0_f_res_107589 = redout_119743; - for (int64_t i_43630 = 0; i_43630 < Ry_43417; i_43630++) { - int64_t binop_x_43635 = iii_43436 + i_43630; - int64_t cmpop_x_43637 = binop_x_43635 + binop_y_43636; - bool binop_x_43638 = slt64(cmpop_x_43637, m_29166); - - for (int64_t i_43633 = 0; i_43633 < Rx_43419; i_43633++) { - int64_t binop_x_43639 = jjj_43437 + i_43633; - int64_t cmpop_x_43641 = binop_x_43639 + binop_y_43640; - bool binop_y_43642 = slt64(cmpop_x_43641, N_29165); - bool cond_43643 = binop_x_43638 && binop_y_43642; - - if (cond_43643) { - float a_43645 = mem_45029[i_43630]; - float b_43646 = mem_45033[i_43633]; - float c_43647 = mem_param_45016[i_43630 * Rx_43419 + - i_43633]; - float defunc_1_f_res_43650 = a_43645 * b_43646; - float defunc_1_op_res_43654 = c_43647 + - defunc_1_f_res_43650; - - mem_param_45016[i_43630 * Rx_43419 + i_43633] = - defunc_1_op_res_43654; - } - } - } - for (int64_t i_46145 = 0; i_46145 < Ry_43417; i_46145++) { - for (int64_t i_46146 = 0; i_46146 < Rx_43419; i_46146++) { - mem_45043[i_46145 * Rx_43419 + i_46146] = - mem_param_45016[i_46145 * Rx_43419 + i_46146]; - } - } - barrier(CLK_LOCAL_MEM_FENCE); + double fr_107604 = 1.0 + defunc_0_f_res_107589; + double x_107605 = ((__global + double *) defunc_3_map_res_mem_120231)[gtid_107038 * + n_75139 + + r_75826]; + double defunc_0_f_res_107606; + double redout_119749 = 0.0; - float mem_param_tmp_46135[Ry_43417 * Rx_43419]; + for (int64_t i_119750 = 0; i_119750 < k2p2zq_75151; i_119750++) { + double x_107610 = ((__global double *) mem_120246)[i_119750 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_107038 * + defunc_2_reduce_res_75260 + + r_75826]; + double x_107611 = ((__global double *) mem_122014)[i_119750 * + m_75136 + + gtid_107038]; + double defunc_1_f_res_107612 = x_107610 * x_107611; + double defunc_1_op_res_107609 = defunc_1_f_res_107612 + + redout_119749; + double redout_tmp_127233 = defunc_1_op_res_107609; + + redout_119749 = redout_tmp_127233; + } + defunc_0_f_res_107606 = redout_119749; - for (int32_t i_4 = 0; i_4 < Ry_43417 * Rx_43419; i_4++) - mem_param_tmp_46135[i_4] = mem_45043[i_4]; - for (int32_t i_5 = 0; i_5 < Ry_43417 * Rx_43419; i_5++) - mem_param_45016[i_5] = mem_param_tmp_46135[i_5]; - } - for (int32_t i_6 = 0; i_6 < Ry_43417 * Rx_43419; i_6++) - loop_mem_45044[i_6] = mem_param_45016[i_6]; - - float mem_param_tmp_46127[Ry_43417 * Rx_43419]; - - for (int32_t i_7 = 0; i_7 < Ry_43417 * Rx_43419; i_7++) - mem_param_tmp_46127[i_7] = loop_mem_45044[i_7]; - for (int32_t i_8 = 0; i_8 < Ry_43417 * Rx_43419; i_8++) - mem_param_44959[i_8] = mem_param_tmp_46127[i_8]; - } - for (int32_t i_9 = 0; i_9 < Ry_43417 * Rx_43419; i_9++) - loop_mem_45045[i_9] = mem_param_44959[i_9]; - for (int64_t i_43664 = 0; i_43664 < Ry_43417; i_43664++) { - int64_t binop_y_43689 = Ty_43416 * i_43664; - - for (int64_t i_43666 = 0; i_43666 < tk_div_tx_43421; i_43666++) { - int64_t binop_y_43687 = Tx_43418 * i_43666; - int64_t ltid_x_43668 = sext_i32_i64(ltid_pre_46121); - int64_t ltid_y_43669 = sext_i32_i64(ltid_pre_46122); - int32_t ltid_flat_43670 = local_tid_46117; - int64_t k_43688 = ltid_y_43669 + binop_y_43687; - int64_t i_43690 = ltid_x_43668 + binop_y_43689; - int64_t gtid_43691 = iii_43436 + i_43690; - int64_t A_col_idx_43692 = kk_43663 + k_43688; - bool binop_x_43693 = slt64(gtid_43691, m_29166); - bool binop_y_43694 = slt64(A_col_idx_43692, i32_res_29181); - bool cond_43695 = binop_x_43693 && binop_y_43694; - float A_elem_43696; + double resid_107613 = x_107605 - defunc_0_f_res_107606; + double sqrt_res_107614; - if (cond_43695) { - float A_elem_43698 = ((__global - float *) defunc_4_map_res_mem_44916)[gtid_43691 * - i32_res_29181 + - A_col_idx_43692]; - - A_elem_43696 = A_elem_43698; - } else { - A_elem_43696 = 0.0F; - } + sqrt_res_107614 = futrts_sqrt64(fr_107604); - bool cond_43700 = slt64(k_43688, Tk_43420); - int64_t a_loc_ind_43701; + double recresid_r_107615 = resid_107613 / sqrt_res_107614; - if (cond_43700) { - int64_t binop_y_43702 = Tk_43420 * i_43690; - int64_t loc_fi_43703 = k_43688 + binop_y_43702; - - a_loc_ind_43701 = loc_fi_43703; - } else { - a_loc_ind_43701 = (int64_t) -1; - } - if (sle64((int64_t) 0, a_loc_ind_43701) && slt64(a_loc_ind_43701, - a_loc_szz_43426)) { - ((__local float *) mem_44956)[a_loc_ind_43701] = A_elem_43696; + for (int64_t i_127234 = 0; i_127234 < k2p2zq_75151; i_127234++) { + ((__global double *) mem_122042)[phys_tid_107039 + i_127234 * + num_threads_126174] = 0.0; } - barrier(CLK_LOCAL_MEM_FENCE); - } - } - for (int64_t i_43708 = 0; i_43708 < tk_div_ty_43422; i_43708++) { - int64_t binop_y_43731 = Ty_43416 * i_43708; - - for (int64_t i_43710 = 0; i_43710 < Rx_43419; i_43710++) { - int64_t binop_y_43733 = Tx_43418 * i_43710; - int64_t ltid_x_43712 = sext_i32_i64(ltid_pre_46121); - int64_t ltid_y_43713 = sext_i32_i64(ltid_pre_46122); - int32_t ltid_flat_43714 = local_tid_46117; - int64_t k_43732 = ltid_x_43712 + binop_y_43731; - int64_t j_43734 = ltid_y_43713 + binop_y_43733; - int64_t gtid_43735 = jjj_43437 + j_43734; - int64_t B_row_idx_43736 = kk_43663 + k_43732; - bool binop_x_43737 = slt64(gtid_43735, N_29165); - bool binop_y_43738 = slt64(B_row_idx_43736, i32_res_29181); - bool cond_43739 = binop_x_43737 && binop_y_43738; - float B_elem_43740; - - if (cond_43739) { - float B_elem_43742 = ((__global - float *) mem_44940)[B_row_idx_43736 * - N_29165 + - gtid_43735]; - - B_elem_43740 = B_elem_43742; - } else { - B_elem_43740 = 0.0F; + for (int64_t i_127235 = 0; i_127235 < (int64_t) 2; i_127235++) { + for (int64_t i_127236 = 0; i_127236 < k2p2zq_75151; + i_127236++) { + ((__global double *) mem_122045)[phys_tid_107039 + + (i_127235 * + (num_threads_126174 * + k2p2zq_75151) + + i_127236 * + num_threads_126174)] = + 0.0; + } } - - bool cond_43744 = slt64(k_43732, Tk_43420); - int64_t b_loc_ind_43745; - - if (cond_43744) { - int64_t binop_y_43746 = TxRx_43423 * k_43732; - int64_t loc_fi_43747 = j_43734 + binop_y_43746; + for (int64_t i_127237 = 0; i_127237 < k2p2zq_75151; i_127237++) { + int64_t x_127238 = (int64_t) 0 + i_127237 * (int64_t) 1; - b_loc_ind_43745 = loc_fi_43747; - } else { - b_loc_ind_43745 = (int64_t) -1; - } - if (sle64((int64_t) 0, b_loc_ind_43745) && slt64(b_loc_ind_43745, - b_loc_szz_43428)) { - ((__local float *) mem_44958)[b_loc_ind_43745] = B_elem_43740; + ((__global int64_t *) mem_122047)[phys_tid_107039 + i_127237 * + num_threads_126174] = + x_127238; } - barrier(CLK_LOCAL_MEM_FENCE); - } - } - - float mem_45111[Ry_43417]; - float mem_45115[Rx_43419]; - float mem_45125[Ry_43417 * Rx_43419]; - float loop_mem_45127[Ry_43417 * Rx_43419]; - float mem_param_45098[Ry_43417 * Rx_43419]; - - for (int32_t i_10 = 0; i_10 < Ry_43417 * Rx_43419; i_10++) - mem_param_45098[i_10] = loop_mem_45045[i_10]; - for (int64_t i_43752 = 0; i_43752 < Tk_43420; i_43752++) { - int64_t cmpop_x_43754 = kk_43663 + i_43752; - bool cond_43755 = slt64(cmpop_x_43754, i32_res_29181); - float mem_45498[Ry_43417 * Rx_43419]; - - if (cond_43755) { - int64_t binop_y_43793 = TxRx_43423 * i_43752; - int64_t bytes_45100 = (int64_t) 4 * Ry_43417; - int64_t bytes_45102 = (int64_t) 4 * Rx_43419; - int64_t ltid_y_43758 = sext_i32_i64(ltid_pre_46121); - int64_t ltid_x_43756 = sext_i32_i64(ltid_pre_46122); - int32_t ltid_flat_43757 = local_tid_46117; - float mem_45101[Ry_43417]; - float mem_45103[Rx_43419]; - int64_t binop_x_43784 = Ry_43417 * ltid_y_43758; - - for (int64_t i_43782 = 0; i_43782 < Ry_43417; i_43782++) { - int64_t binop_x_43785 = i_43782 + binop_x_43784; - int64_t binop_y_43786 = Tk_43420 * binop_x_43785; - int64_t a_loc_ind_43787 = i_43752 + binop_y_43786; + for (int64_t j_107621 = 0; j_107621 < k2p2zq_75151; j_107621++) { + bool index_certs_107624; - for (int64_t i_46154 = 0; i_46154 < (int64_t) 1; i_46154++) { - mem_45101[i_43782 + i_46154] = ((__local - float *) mem_44956)[a_loc_ind_43787 + - i_46154]; + if (!ok_or_empty_75848) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 88) == + -1) { + global_failure_args[0] = j_107621; + global_failure_args[1] = (int64_t) 0; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } } - } - - int64_t binop_y_43795 = Rx_43419 * ltid_x_43756; - - for (int64_t i_43791 = 0; i_43791 < Rx_43419; i_43791++) { - int64_t binop_x_43794 = i_43791 + binop_y_43793; - int64_t b_loc_ind_43796 = binop_x_43794 + binop_y_43795; - for (int64_t i_46156 = 0; i_46156 < (int64_t) 1; i_46156++) { - mem_45103[i_43791 + i_46156] = ((__local - float *) mem_44958)[b_loc_ind_43796 + - i_46156]; - } - } - for (int64_t i_46157 = 0; i_46157 < Ry_43417; i_46157++) { - mem_45111[i_46157] = mem_45101[i_46157]; - } - for (int64_t i_46158 = 0; i_46158 < Rx_43419; i_46158++) { - mem_45115[i_46158] = mem_45103[i_46158]; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t ltid_y_43803 = sext_i32_i64(ltid_pre_46121); - int64_t ltid_x_43801 = sext_i32_i64(ltid_pre_46122); - int32_t ltid_flat_43802 = local_tid_46117; - int64_t binop_y_43844 = Ry_43417 * ltid_y_43803; - int64_t binop_y_43848 = Rx_43419 * ltid_x_43801; - - for (int64_t i_43838 = 0; i_43838 < Ry_43417; i_43838++) { - int64_t binop_x_43843 = iii_43436 + i_43838; - int64_t cmpop_x_43845 = binop_x_43843 + binop_y_43844; - bool binop_x_43846 = slt64(cmpop_x_43845, m_29166); + double defunc_2_reduce_res_107626; + double redout_119751 = 0.0; - for (int64_t i_43841 = 0; i_43841 < Rx_43419; i_43841++) { - int64_t binop_x_43847 = jjj_43437 + i_43841; - int64_t cmpop_x_43849 = binop_x_43847 + binop_y_43848; - bool binop_y_43850 = slt64(cmpop_x_43849, N_29165); - bool cond_43851 = binop_x_43846 && binop_y_43850; + for (int64_t i_119752 = 0; i_119752 < rp1_75837; i_119752++) { + double x_107630 = ((__global + double *) mem_122025)[i_119752 * + (k2p2zq_75151 * + m_75136) + + gtid_107038 * + k2p2zq_75151 + + j_107621]; + double defunc_1_f_res_107631 = x_107630 * x_107630; + double defunc_1_op_res_107629 = defunc_1_f_res_107631 + + redout_119751; + double redout_tmp_127241 = defunc_1_op_res_107629; - if (cond_43851) { - float a_43853 = mem_45111[i_43838]; - float b_43854 = mem_45115[i_43841]; - float c_43855 = mem_param_45098[i_43838 * Rx_43419 + - i_43841]; - float defunc_1_f_res_43858 = a_43853 * b_43854; - float defunc_1_op_res_43862 = c_43855 + - defunc_1_f_res_43858; - - mem_param_45098[i_43838 * Rx_43419 + i_43841] = - defunc_1_op_res_43862; - } - } - } - for (int64_t i_46161 = 0; i_46161 < Ry_43417; i_46161++) { - for (int64_t i_46162 = 0; i_46162 < Rx_43419; i_46162++) { - mem_45125[i_46161 * Rx_43419 + i_46162] = - mem_param_45098[i_46161 * Rx_43419 + i_46162]; + redout_119751 = redout_tmp_127241; } - } - barrier(CLK_LOCAL_MEM_FENCE); - for (int64_t i_46163 = 0; i_46163 < Ry_43417; i_46163++) { - for (int64_t i_46164 = 0; i_46164 < Rx_43419; i_46164++) { - mem_45498[i_46163 * Rx_43419 + i_46164] = - mem_45125[i_46163 * Rx_43419 + i_46164]; + defunc_2_reduce_res_107626 = redout_119751; + + double sqrt_res_107632; + + sqrt_res_107632 = futrts_sqrt64(defunc_2_reduce_res_107626); + ((__global double *) mem_122042)[phys_tid_107039 + j_107621 * + num_threads_126174] = + sqrt_res_107632; + ((__global double *) mem_122045)[phys_tid_107039 + j_107621 * + num_threads_126174] = + sqrt_res_107632; + + bool zeze_res_107635 = sqrt_res_107632 == 0.0; + double lw_val_107636; + + if (zeze_res_107635) { + lw_val_107636 = 1.0; + } else { + lw_val_107636 = sqrt_res_107632; } + ((__global double *) mem_122045)[phys_tid_107039 + + (num_threads_126174 * + k2p2zq_75151 + j_107621 * + num_threads_126174)] = + lw_val_107636; } - } else { - for (int64_t i_46165 = 0; i_46165 < Ry_43417; i_46165++) { - for (int64_t i_46166 = 0; i_46166 < Rx_43419; i_46166++) { - mem_45498[i_46165 * Rx_43419 + i_46166] = - mem_param_45098[i_46165 * Rx_43419 + i_46166]; + for (int64_t i_127242 = 0; i_127242 < k2p2zq_75151; i_127242++) { + for (int64_t i_127243 = 0; i_127243 < rp1_75837; i_127243++) { + ((__global + double *) double_buffer_mem_125569)[phys_tid_107039 + + (i_127242 * + (num_threads_126174 * + rp1_75837) + + i_127243 * + num_threads_126174)] = + ((__global double *) mem_122021)[gtid_107038 + + (i_127242 * (m_75136 * + rp1_75837) + + i_127243 * m_75136)]; + } + } + for (int64_t i_127244 = 0; i_127244 < k2p2zq_75151; i_127244++) { + ((__global double *) double_buffer_mem_125570)[phys_tid_107039 + + i_127244 * + num_threads_126174] = + ((__global double *) mem_122042)[phys_tid_107039 + + i_127244 * + num_threads_126174]; + } + for (int64_t i_127245 = 0; i_127245 < (int64_t) 2; i_127245++) { + for (int64_t i_127246 = 0; i_127246 < k2p2zq_75151; + i_127246++) { + ((__global + double *) double_buffer_mem_125571)[phys_tid_107039 + + (i_127245 * + (num_threads_126174 * + k2p2zq_75151) + + i_127246 * + num_threads_126174)] = + ((__global double *) mem_122045)[phys_tid_107039 + + (i_127245 * + (num_threads_126174 * + k2p2zq_75151) + + i_127246 * + num_threads_126174)]; } } - } - - float mem_param_tmp_46151[Ry_43417 * Rx_43419]; - - for (int32_t i_11 = 0; i_11 < Ry_43417 * Rx_43419; i_11++) - mem_param_tmp_46151[i_11] = mem_45498[i_11]; - for (int32_t i_12 = 0; i_12 < Ry_43417 * Rx_43419; i_12++) - mem_param_45098[i_12] = mem_param_tmp_46151[i_12]; - } - for (int32_t i_13 = 0; i_13 < Ry_43417 * Rx_43419; i_13++) - loop_mem_45127[i_13] = mem_param_45098[i_13]; - - int64_t reg_tile_i_46167 = squot64(sext_i32_i64(local_tid_46117), Tx_43418); - int64_t reg_tile_i_46168 = sext_i32_i64(local_tid_46117) - - squot64(sext_i32_i64(local_tid_46117), Tx_43418) * Tx_43418; - int64_t tile_dim_start_46169 = Ry_43417 * (Ty_43416 * gid_y_43434 + - reg_tile_i_46167); - int64_t tile_dim_start_46170 = Rx_43419 * (Tx_43418 * gid_x_43433 + - reg_tile_i_46168); - - for (int64_t nest_i_46171 = 0; nest_i_46171 < Ry_43417; nest_i_46171++) { - for (int64_t nest_i_46172 = 0; nest_i_46172 < Rx_43419; - nest_i_46172++) { - if (slt64(tile_dim_start_46169 + nest_i_46171, m_29166) && - slt64(tile_dim_start_46170 + nest_i_46172, N_29165)) { - ((__global float *) mem_45130)[(tile_dim_start_46169 + - nest_i_46171) * N_29165 + - (tile_dim_start_46170 + - nest_i_46172)] = - loop_mem_45127[nest_i_46171 * Rx_43419 + nest_i_46172]; - } - } - } - - error_9: - return; - #undef Ty_43416 - #undef Ry_43417 - #undef Tx_43418 - #undef Rx_43419 - #undef Tk_43420 - #undef tk_div_tx_43421 - #undef tk_div_ty_43422 - #undef TxRx_43423 - #undef TyRy_43424 - #undef a_loc_szz_43426 - #undef b_loc_szz_43428 -} -__kernel void mainzisegmap_intragroup_43869(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, - __local volatile - int64_t *mem_45203_backing_aligned_0, - __local volatile - int64_t *mem_45196_backing_aligned_1, - int64_t N_29165, int64_t m_29166, - int32_t n_29169, float hfrac_29171, - int64_t i32_res_29175, - int32_t k2p2_29177, - int64_t num_whole_tiles_43891, - int64_t residual_input_43992, - unsigned char cond_43993, __global - unsigned char *mem_45182, __global - unsigned char *mem_45185, __global - unsigned char *mem_45216, __global - unsigned char *mem_45218, __global - unsigned char *mem_45220) -{ - #define segmap_group_sizze_41211 (mainzisegmap_group_sizze_41176) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - __local volatile char *restrict mem_45203_backing_5 = (__local volatile - char *) mem_45203_backing_aligned_0; - __local volatile char *restrict mem_45196_backing_0 = (__local volatile - char *) mem_45196_backing_aligned_1; - volatile __local bool local_failure; - - if (failure_is_an_option) { - int failed = *global_failure >= 0; - - if (failed) - return; - } - local_failure = false; - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t global_tid_46340; - int32_t local_tid_46341; - int64_t group_sizze_46344; - int32_t wave_sizze_46343; - int32_t group_tid_46342; - - global_tid_46340 = get_global_id(0); - local_tid_46341 = get_local_id(0); - group_sizze_46344 = get_local_size(0); - wave_sizze_46343 = LOCKSTEP_WIDTH; - group_tid_46342 = get_group_id(0); - - int32_t gid_flat_43869; - - gid_flat_43869 = group_tid_46342; - - int32_t ltid_pre_46345; - - ltid_pre_46345 = local_tid_46341; - - int64_t gid_43868; - - gid_43868 = sext_i32_i64(group_tid_46342); - - int64_t binop_x_43876; - - binop_x_43876 = segmap_group_sizze_41211 * gid_43868; - - int32_t mem_45189[1]; - int64_t ltid_43870 = sext_i32_i64(ltid_pre_46345); - int32_t ltid_flat_43871 = local_tid_46341; - int64_t gtid_43877 = ltid_43870 + binop_x_43876; - bool cond_43878 = slt64(gtid_43877, m_29166); - int32_t pre_43879; - - if (cond_43878) { - int32_t defunc_0_f_res_43881; - int32_t redout_44325 = 0; - - for (int32_t i_44370 = 0; i_44370 < n_29169; i_44370++) { - int64_t i_44326 = sext_i32_i64(i_44370); - float x_43885 = ((__global float *) mem_45182)[i_44326 * m_29166 + - gtid_43877]; - bool isnan_res_43886; - - isnan_res_43886 = futrts_isnan32(x_43885); - bool cond_43887 = !isnan_res_43886; - int32_t defunc_0_f_res_43888 = btoi_bool_i32(cond_43887); - int32_t defunc_1_op_res_43884 = add32(defunc_0_f_res_43888, - redout_44325); - int32_t redout_tmp_46346 = defunc_1_op_res_43884; + int64_t dqrdc2_res_107642; + int64_t k_107648 = k_75342; - redout_44325 = redout_tmp_46346; - } - defunc_0_f_res_43881 = redout_44325; - pre_43879 = defunc_0_f_res_43881; - } else { - pre_43879 = 0; - } - mem_45189[(int64_t) 0] = pre_43879; - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_45192[1]; - int64_t ltid_43892 = sext_i32_i64(ltid_pre_46345); - int32_t ltid_flat_43893 = local_tid_46341; - - mem_45192[(int64_t) 0] = 0.0F; - barrier(CLK_LOCAL_MEM_FENCE); - - __local char *mem_45196; - - mem_45196 = (__local char *) mem_45196_backing_0; - - float accs_mem_45200[1]; - float mem_param_45193[1]; - - for (int32_t i_1 = 0; i_1 < 1; i_1++) - mem_param_45193[i_1] = mem_45192[i_1]; - for (int64_t tile_id_43899 = 0; tile_id_43899 < num_whole_tiles_43891; - tile_id_43899++) { - int64_t binop_x_43948 = segmap_group_sizze_41211 * tile_id_43899; - int64_t ltid_43900 = sext_i32_i64(ltid_pre_46345); - int32_t ltid_flat_43901 = local_tid_46341; - int64_t j_43949 = ltid_43900 + binop_x_43948; - bool cond_43953 = slt64(j_43949, i32_res_29175); - int32_t pre_43954; - - if (cond_43953) { - int32_t index_primexp_44283 = sext_i64_i32(j_43949); - - pre_43954 = index_primexp_44283; - } else { - pre_43954 = 0; - } - ((__local int32_t *) mem_45196)[ltid_43900] = pre_43954; - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_45199[1]; - int64_t ltid_43919 = sext_i32_i64(ltid_pre_46345); - int32_t ltid_flat_43920 = local_tid_46341; - int64_t gtid_43959 = binop_x_43876 + ltid_43919; - float acc_43961 = mem_param_45193[(int64_t) 0]; - bool cond_43962 = slt64(gtid_43959, m_29166); - float acc_43963; - - if (cond_43962) { - int32_t defunc_0_f_res_43960 = mem_45189[(int64_t) 0]; - float x_43964; - float redout_44327 = acc_43961; - - for (int64_t i_44328 = 0; i_44328 < segmap_group_sizze_41211; - i_44328++) { - int32_t x_43968 = ((__local int32_t *) mem_45196)[i_44328]; - bool cond_43969 = slt32(x_43968, defunc_0_f_res_43960); - float defunc_0_f_res_43970; - - if (cond_43969) { - int64_t i_43971 = sext_i32_i64(x_43968); - bool x_43972 = sle64((int64_t) 0, i_43971); - bool y_43973 = slt64(i_43971, N_29165); - bool bounds_check_43974 = x_43972 && y_43973; - bool index_certs_43975; - - if (!bounds_check_43974) { + for (int64_t l_107643 = 0; l_107643 < min_res_75849; l_107643++) { + int64_t x_107649 = add64((int64_t) 1, l_107643); + bool cond_107650 = slt64(x_107649, k_107648); + bool loop_cond_107651; + + if (cond_107650) { + bool y_107652 = slt64(l_107643, k2p2zq_75151); + bool index_certs_107653; + + if (!y_107652) { { if (atomic_cmpxchg_i32_global(global_failure, -1, - 15) == -1) { - global_failure_args[0] = i_43971; - global_failure_args[1] = N_29165; + 89) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_107643; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; ; } local_failure = true; - goto error_3; + goto error_0; } } - float defunc_0_f_res_t_res_43976 = ((__global - float *) mem_45185)[i_43971 * - m_29166 + - gtid_43959]; - - defunc_0_f_res_43970 = defunc_0_f_res_t_res_43976; - } else { - defunc_0_f_res_43970 = 0.0F; - } - - float defunc_0_f_res_43977 = defunc_0_f_res_43970 * - defunc_0_f_res_43970; - float defunc_1_op_res_43967 = defunc_0_f_res_43977 + - redout_44327; - float redout_tmp_46349 = defunc_1_op_res_43967; - - redout_44327 = redout_tmp_46349; - } - x_43964 = redout_44327; - acc_43963 = x_43964; - } else { - acc_43963 = acc_43961; - } - mem_45199[(int64_t) 0] = acc_43963; - - error_3: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_param_tmp_46347[1]; - - for (int32_t i_2 = 0; i_2 < 1; i_2++) - mem_param_tmp_46347[i_2] = mem_45199[i_2]; - for (int32_t i_3 = 0; i_3 < 1; i_3++) - mem_param_45193[i_3] = mem_param_tmp_46347[i_3]; - } - for (int32_t i_4 = 0; i_4 < 1; i_4++) - accs_mem_45200[i_4] = mem_param_45193[i_4]; - - __local char *mem_45203; - - mem_45203 = (__local char *) mem_45203_backing_5; - - float mem_45206[1]; - float mem_45511[1]; - - if (cond_43993) { - mem_45511[(int64_t) 0] = accs_mem_45200[(int64_t) 0]; - } else { - int64_t binop_x_44003 = segmap_group_sizze_41211 * - num_whole_tiles_43891; - int64_t ltid_43994 = sext_i32_i64(ltid_pre_46345); - int32_t ltid_flat_43995 = local_tid_46341; - int64_t j_44004 = ltid_43994 + binop_x_44003; - bool cond_44008 = slt64(j_44004, i32_res_29175); - int32_t pre_44009; - - if (cond_44008) { - int32_t index_primexp_44284 = sext_i64_i32(j_44004); - - pre_44009 = index_primexp_44284; - } else { - pre_44009 = 0; - } - ((__local int32_t *) mem_45203)[ltid_43994] = pre_44009; - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t ltid_44014 = sext_i32_i64(ltid_pre_46345); - int32_t ltid_flat_44015 = local_tid_46341; - int64_t gtid_44022 = binop_x_43876 + ltid_44014; - float acc_44024 = accs_mem_45200[(int64_t) 0]; - bool cond_44025 = slt64(gtid_44022, m_29166); - float acc_44026; - - if (cond_44025) { - int32_t defunc_0_f_res_44023 = mem_45189[(int64_t) 0]; - float x_44027; - float redout_44329 = acc_44024; - - for (int64_t i_44330 = 0; i_44330 < residual_input_43992; - i_44330++) { - int32_t x_44031 = ((__local int32_t *) mem_45203)[i_44330]; - bool cond_44032 = slt32(x_44031, defunc_0_f_res_44023); - float defunc_0_f_res_44033; - - if (cond_44032) { - int64_t i_44034 = sext_i32_i64(x_44031); - bool x_44035 = sle64((int64_t) 0, i_44034); - bool y_44036 = slt64(i_44034, N_29165); - bool bounds_check_44037 = x_44035 && y_44036; - bool index_certs_44038; + double zt_arg_107654 = ((__global + double *) double_buffer_mem_125571)[phys_tid_107039 + + (num_threads_126174 * + k2p2zq_75151 + + l_107643 * + num_threads_126174)]; + double zt_res_107655 = 1.0e-7 * zt_arg_107654; + bool index_certs_107656; - if (!bounds_check_44037) { + if (!y_107652) { { if (atomic_cmpxchg_i32_global(global_failure, -1, - 16) == -1) { - global_failure_args[0] = i_44034; - global_failure_args[1] = N_29165; + 90) == -1) { + global_failure_args[0] = l_107643; + global_failure_args[1] = k2p2zq_75151; ; } local_failure = true; - goto error_5; + goto error_0; } } - float defunc_0_f_res_t_res_44039 = ((__global - float *) mem_45185)[i_44034 * - m_29166 + - gtid_44022]; + double zl_arg_107657 = ((__global + double *) double_buffer_mem_125570)[phys_tid_107039 + + l_107643 * + num_threads_126174]; + bool zl_res_107658 = zl_arg_107657 < zt_res_107655; - defunc_0_f_res_44033 = defunc_0_f_res_t_res_44039; + loop_cond_107651 = zl_res_107658; } else { - defunc_0_f_res_44033 = 0.0F; + loop_cond_107651 = 0; } - float defunc_0_f_res_44040 = defunc_0_f_res_44033 * - defunc_0_f_res_44033; - float defunc_1_op_res_44030 = defunc_0_f_res_44040 + - redout_44329; - float redout_tmp_46350 = defunc_1_op_res_44030; + bool y_107659 = slt64(l_107643, k2p2zq_75151); + int64_t upper_bound_107660 = sub64(k2p2zq_75151, x_107649); + bool loop_not_taken_107661 = !loop_cond_107651; + bool protect_assert_disj_107662 = y_107659 || + loop_not_taken_107661; + bool index_certs_107663; - redout_44329 = redout_tmp_46350; - } - x_44027 = redout_44329; - acc_44026 = x_44027; - } else { - acc_44026 = acc_44024; - } - mem_45206[(int64_t) 0] = acc_44026; - - error_5: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); - mem_45511[(int64_t) 0] = mem_45206[(int64_t) 0]; - } - - int32_t mem_45210[1]; - int32_t mem_45212[1]; - float mem_45214[1]; - int64_t ltid_44043 = sext_i32_i64(ltid_pre_46345); - int32_t ltid_flat_44044 = local_tid_46341; - int64_t gtid_44054 = binop_x_43876 + ltid_44043; - bool cond_44056 = slt64(gtid_44054, m_29166); - int32_t postlude_44057; - int32_t postlude_44058; - float postlude_44059; - - if (cond_44056) { - float defunc_0_f_res_44055 = mem_45511[(int64_t) 0]; - int32_t defunc_0_f_res_44060 = mem_45189[(int64_t) 0]; - int32_t r32_arg_44061 = sub32(defunc_0_f_res_44060, k2p2_29177); - float i32_res_44062 = sitofp_i32_f32(r32_arg_44061); - float sqrt_arg_44063 = defunc_0_f_res_44055 / i32_res_44062; - float sqrt_res_44064; - - sqrt_res_44064 = futrts_sqrt32(sqrt_arg_44063); - - float i32_res_44065 = sitofp_i32_f32(defunc_0_f_res_44060); - float t32_arg_44066 = hfrac_29171 * i32_res_44065; - int32_t f32_res_44067 = fptosi_f32_i32(t32_arg_44066); - - postlude_44057 = f32_res_44067; - postlude_44058 = defunc_0_f_res_44060; - postlude_44059 = sqrt_res_44064; - } else { - postlude_44057 = 0; - postlude_44058 = 0; - postlude_44059 = 0.0F; - } - mem_45210[(int64_t) 0] = postlude_44057; - mem_45212[(int64_t) 0] = postlude_44058; - mem_45214[(int64_t) 0] = postlude_44059; - barrier(CLK_LOCAL_MEM_FENCE); - if (slt64(sext_i32_i64(local_tid_46341) + segmap_group_sizze_41211 * - sext_i32_i64(group_tid_46342), m_29166)) { - ((__global int32_t *) mem_45216)[sext_i32_i64(local_tid_46341) + - segmap_group_sizze_41211 * - sext_i32_i64(group_tid_46342)] = - mem_45210[(int64_t) 0]; - } - if (slt64(sext_i32_i64(local_tid_46341) + segmap_group_sizze_41211 * - sext_i32_i64(group_tid_46342), m_29166)) { - ((__global int32_t *) mem_45218)[sext_i32_i64(local_tid_46341) + - segmap_group_sizze_41211 * - sext_i32_i64(group_tid_46342)] = - mem_45212[(int64_t) 0]; - } - if (slt64(sext_i32_i64(local_tid_46341) + segmap_group_sizze_41211 * - sext_i32_i64(group_tid_46342), m_29166)) { - ((__global float *) mem_45220)[sext_i32_i64(local_tid_46341) + - segmap_group_sizze_41211 * - sext_i32_i64(group_tid_46342)] = - mem_45214[(int64_t) 0]; - } - - error_7: - return; - #undef segmap_group_sizze_41211 -} -__kernel void mainzisegmap_intragroup_44075(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, - __local volatile - int64_t *mem_45269_backing_aligned_0, - __local volatile - int64_t *mem_45262_backing_aligned_1, - int64_t N_29165, int64_t m_29166, - int64_t i32_res_29568, - int64_t num_whole_tiles_44095, - int64_t residual_input_44206, - unsigned char cond_44207, __global - unsigned char *defunc_4_map_res_mem_45178, - __global - unsigned char *defunc_3_map_res_mem_45244, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global unsigned char *mem_45275) -{ - #define segmap_group_sizze_41468 (mainzisegmap_group_sizze_41445) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - __local volatile char *restrict mem_45269_backing_5 = (__local volatile - char *) mem_45269_backing_aligned_0; - __local volatile char *restrict mem_45262_backing_0 = (__local volatile - char *) mem_45262_backing_aligned_1; - volatile __local bool local_failure; - - if (failure_is_an_option) { - int failed = *global_failure >= 0; - - if (failed) - return; - } - local_failure = false; - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t global_tid_46524; - int32_t local_tid_46525; - int64_t group_sizze_46528; - int32_t wave_sizze_46527; - int32_t group_tid_46526; - - global_tid_46524 = get_global_id(0); - local_tid_46525 = get_local_id(0); - group_sizze_46528 = get_local_size(0); - wave_sizze_46527 = LOCKSTEP_WIDTH; - group_tid_46526 = get_group_id(0); - - int32_t gid_flat_44075; - - gid_flat_44075 = group_tid_46526; - - int32_t ltid_pre_46529; - - ltid_pre_46529 = local_tid_46525; - - int64_t gid_44074; - - gid_44074 = sext_i32_i64(group_tid_46526); - - int64_t binop_x_44084; - - binop_x_44084 = segmap_group_sizze_41468 * gid_44074; - - int32_t mem_45253[1]; - int32_t mem_45255[1]; - int64_t ltid_44076 = sext_i32_i64(ltid_pre_46529); - int32_t ltid_flat_44077 = local_tid_46525; - int64_t gtid_44085 = ltid_44076 + binop_x_44084; - bool cond_44086 = slt64(gtid_44085, m_29166); - int32_t pre_44087; - int32_t pre_44088; - - if (cond_44086) { - int32_t x_44089 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_44085]; - int32_t x_44090 = ((__global - int32_t *) defunc_3_map_res_mem_45244)[gtid_44085]; - - pre_44087 = x_44089; - pre_44088 = x_44090; - } else { - pre_44087 = 0; - pre_44088 = 0; - } - mem_45253[(int64_t) 0] = pre_44087; - mem_45255[(int64_t) 0] = pre_44088; - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_45258[1]; - int64_t ltid_44096 = sext_i32_i64(ltid_pre_46529); - int32_t ltid_flat_44097 = local_tid_46525; - - mem_45258[(int64_t) 0] = 0.0F; - barrier(CLK_LOCAL_MEM_FENCE); - - __local char *mem_45262; - - mem_45262 = (__local char *) mem_45262_backing_0; - - float accs_mem_45266[1]; - float mem_param_45259[1]; - - for (int32_t i_1 = 0; i_1 < 1; i_1++) - mem_param_45259[i_1] = mem_45258[i_1]; - for (int64_t tile_id_44103 = 0; tile_id_44103 < num_whole_tiles_44095; - tile_id_44103++) { - int64_t binop_x_44156 = segmap_group_sizze_41468 * tile_id_44103; - int64_t ltid_44104 = sext_i32_i64(ltid_pre_46529); - int32_t ltid_flat_44105 = local_tid_46525; - int64_t j_44157 = ltid_44104 + binop_x_44156; - bool cond_44162 = slt64(j_44157, i32_res_29568); - int32_t pre_44163; - - if (cond_44162) { - int32_t index_primexp_44285 = sext_i64_i32(j_44157); - int32_t tile_elem_44164 = index_primexp_44285; - - pre_44163 = tile_elem_44164; - } else { - pre_44163 = 0; - } - ((__local int32_t *) mem_45262)[ltid_44104] = pre_44163; - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_45265[1]; - int64_t ltid_44124 = sext_i32_i64(ltid_pre_46529); - int32_t ltid_flat_44125 = local_tid_46525; - int64_t gtid_44168 = binop_x_44084 + ltid_44124; - float acc_44171 = mem_param_45259[(int64_t) 0]; - bool cond_44172 = slt64(gtid_44168, m_29166); - float acc_44173; - - if (cond_44172) { - int32_t x_44169 = mem_45253[(int64_t) 0]; - int32_t x_44170 = mem_45255[(int64_t) 0]; - float x_44174; - float redout_44331 = acc_44171; - - for (int64_t i_44332 = 0; i_44332 < segmap_group_sizze_41468; - i_44332++) { - int32_t x_44178 = ((__local int32_t *) mem_45262)[i_44332]; - bool cond_44179 = slt32(x_44178, x_44170); - float defunc_0_f_res_44180; - - if (cond_44179) { - int32_t x_44181 = add32(x_44169, x_44178); - int32_t x_44182 = sub32(x_44181, x_44170); - int32_t i_44183 = add32(1, x_44182); - int64_t i_44184 = sext_i32_i64(i_44183); - bool x_44185 = sle64((int64_t) 0, i_44184); - bool y_44186 = slt64(i_44184, N_29165); - bool bounds_check_44187 = x_44185 && y_44186; - bool index_certs_44188; - - if (!bounds_check_44187) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 20) == -1) { - global_failure_args[0] = i_44184; - global_failure_args[1] = N_29165; - ; - } - local_failure = true; - goto error_3; + if (!protect_assert_disj_107662) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 91) == + -1) { + global_failure_args[0] = l_107643; + global_failure_args[1] = k2p2zq_75151; + ; } + local_failure = true; + goto error_0; } - - float defunc_0_f_res_t_res_44189 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_44168 * - N_29165 + - i_44184]; - - defunc_0_f_res_44180 = defunc_0_f_res_t_res_44189; - } else { - defunc_0_f_res_44180 = 0.0F; } - float defunc_1_op_res_44177 = defunc_0_f_res_44180 + - redout_44331; - float redout_tmp_46532 = defunc_1_op_res_44177; + bool index_certs_107664; - redout_44331 = redout_tmp_46532; - } - x_44174 = redout_44331; - acc_44173 = x_44174; - } else { - acc_44173 = acc_44171; - } - mem_45265[(int64_t) 0] = acc_44173; - - error_3: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_param_tmp_46530[1]; - - for (int32_t i_2 = 0; i_2 < 1; i_2++) - mem_param_tmp_46530[i_2] = mem_45265[i_2]; - for (int32_t i_3 = 0; i_3 < 1; i_3++) - mem_param_45259[i_3] = mem_param_tmp_46530[i_3]; - } - for (int32_t i_4 = 0; i_4 < 1; i_4++) - accs_mem_45266[i_4] = mem_param_45259[i_4]; - - __local char *mem_45269; - - mem_45269 = (__local char *) mem_45269_backing_5; - - float mem_45272[1]; - float mem_45520[1]; - - if (cond_44207) { - mem_45520[(int64_t) 0] = accs_mem_45266[(int64_t) 0]; - } else { - int64_t binop_x_44217 = segmap_group_sizze_41468 * - num_whole_tiles_44095; - int64_t ltid_44208 = sext_i32_i64(ltid_pre_46529); - int32_t ltid_flat_44209 = local_tid_46525; - int64_t j_44218 = ltid_44208 + binop_x_44217; - bool cond_44223 = slt64(j_44218, i32_res_29568); - int32_t pre_44224; - - if (cond_44223) { - int32_t index_primexp_44286 = sext_i64_i32(j_44218); - int32_t tile_elem_44225 = index_primexp_44286; - - pre_44224 = tile_elem_44225; - } else { - pre_44224 = 0; - } - ((__local int32_t *) mem_45269)[ltid_44208] = pre_44224; - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t ltid_44229 = sext_i32_i64(ltid_pre_46529); - int32_t ltid_flat_44230 = local_tid_46525; - int64_t gtid_44237 = binop_x_44084 + ltid_44229; - float acc_44240 = accs_mem_45266[(int64_t) 0]; - bool cond_44241 = slt64(gtid_44237, m_29166); - float acc_44242; - - if (cond_44241) { - int32_t x_44238 = mem_45253[(int64_t) 0]; - int32_t x_44239 = mem_45255[(int64_t) 0]; - float x_44243; - float redout_44333 = acc_44240; - - for (int64_t i_44334 = 0; i_44334 < residual_input_44206; - i_44334++) { - int32_t x_44247 = ((__local int32_t *) mem_45269)[i_44334]; - bool cond_44248 = slt32(x_44247, x_44239); - float defunc_0_f_res_44249; - - if (cond_44248) { - int32_t x_44250 = add32(x_44238, x_44247); - int32_t x_44251 = sub32(x_44250, x_44239); - int32_t i_44252 = add32(1, x_44251); - int64_t i_44253 = sext_i32_i64(i_44252); - bool x_44254 = sle64((int64_t) 0, i_44253); - bool y_44255 = slt64(i_44253, N_29165); - bool bounds_check_44256 = x_44254 && y_44255; - bool index_certs_44257; - - if (!bounds_check_44256) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 21) == -1) { - global_failure_args[0] = i_44253; - global_failure_args[1] = N_29165; - ; - } - local_failure = true; - goto error_5; + if (!protect_assert_disj_107662) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 92) == + -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = l_107643; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; } + local_failure = true; + goto error_0; } - - float defunc_0_f_res_t_res_44258 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_44237 * - N_29165 + - i_44253]; - - defunc_0_f_res_44249 = defunc_0_f_res_t_res_44258; - } else { - defunc_0_f_res_44249 = 0.0F; } - float defunc_1_op_res_44246 = defunc_0_f_res_44249 + - redout_44333; - float redout_tmp_46533 = defunc_1_op_res_44246; + bool index_certs_107665; - redout_44333 = redout_tmp_46533; - } - x_44243 = redout_44333; - acc_44242 = x_44243; - } else { - acc_44242 = acc_44240; - } - mem_45272[(int64_t) 0] = acc_44242; - - error_5: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); - mem_45520[(int64_t) 0] = mem_45272[(int64_t) 0]; - } - if (slt64(sext_i32_i64(local_tid_46525) + segmap_group_sizze_41468 * - sext_i32_i64(group_tid_46526), m_29166)) { - ((__global float *) mem_45275)[sext_i32_i64(local_tid_46525) + - segmap_group_sizze_41468 * - sext_i32_i64(group_tid_46526)] = - mem_45520[(int64_t) 0]; - } - - error_6: - return; - #undef segmap_group_sizze_41468 -} -__kernel void mainzisegred_large_39115(__global int *global_failure, - __local volatile - int64_t *sync_arr_mem_45793_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_45791_backing_aligned_1, - int64_t N_29165, int64_t i32_res_29175, - int64_t i32_res_29181, - int64_t num_groups_39254, - int64_t groups_per_segment_45777, - int64_t elements_per_thread_45778, - int64_t virt_num_groups_45779, - int64_t threads_per_segment_45781, - __global unsigned char *images_mem_44381, - __global - unsigned char *binop_p_mem_44390, - __global unsigned char *mem_44531, - __global unsigned char *mem_44536, - __global - unsigned char *group_res_arr_mem_45782, - __global - unsigned char *mainzicounter_mem_45784) -{ - #define segred_group_sizze_39253 (mainzisegred_group_sizze_39109) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - __local volatile char *restrict sync_arr_mem_45793_backing_1 = - (__local volatile - char *) sync_arr_mem_45793_backing_aligned_0; - __local volatile char *restrict red_arr_mem_45791_backing_0 = - (__local volatile - char *) red_arr_mem_45791_backing_aligned_1; - - if (*global_failure >= 0) - return; - - int32_t global_tid_45786; - int32_t local_tid_45787; - int64_t group_sizze_45790; - int32_t wave_sizze_45789; - int32_t group_tid_45788; - - global_tid_45786 = get_global_id(0); - local_tid_45787 = get_local_id(0); - group_sizze_45790 = get_local_size(0); - wave_sizze_45789 = LOCKSTEP_WIDTH; - group_tid_45788 = get_group_id(0); - - int32_t phys_tid_39115; - - phys_tid_39115 = global_tid_45786; - - __local char *red_arr_mem_45791; - - red_arr_mem_45791 = (__local char *) red_arr_mem_45791_backing_0; - - __local char *sync_arr_mem_45793; - - sync_arr_mem_45793 = (__local char *) sync_arr_mem_45793_backing_1; - - int32_t phys_group_id_45795; - - phys_group_id_45795 = get_group_id(0); - for (int32_t i_45796 = 0; i_45796 < - sdiv_up32(sext_i64_i32(virt_num_groups_45779) - phys_group_id_45795, - sext_i64_i32(num_groups_39254)); i_45796++) { - int32_t virt_group_id_45797 = phys_group_id_45795 + i_45796 * - sext_i64_i32(num_groups_39254); - int32_t flat_segment_id_45798 = squot32(virt_group_id_45797, - sext_i64_i32(groups_per_segment_45777)); - int64_t global_tid_45799 = srem64(sext_i32_i64(virt_group_id_45797) * - segred_group_sizze_39253 + - sext_i32_i64(local_tid_45787), - segred_group_sizze_39253 * - groups_per_segment_45777); - int64_t gtid_39102 = squot64(sext_i32_i64(flat_segment_id_45798), - i32_res_29181 * i32_res_29181); - int64_t gtid_39103 = squot64(sext_i32_i64(flat_segment_id_45798) - - squot64(sext_i32_i64(flat_segment_id_45798), - i32_res_29181 * i32_res_29181) * - (i32_res_29181 * i32_res_29181), - i32_res_29181); - int64_t gtid_39104 = sext_i32_i64(flat_segment_id_45798) - - squot64(sext_i32_i64(flat_segment_id_45798), i32_res_29181 * - i32_res_29181) * (i32_res_29181 * i32_res_29181) - - squot64(sext_i32_i64(flat_segment_id_45798) - - squot64(sext_i32_i64(flat_segment_id_45798), - i32_res_29181 * i32_res_29181) * - (i32_res_29181 * i32_res_29181), i32_res_29181) * - i32_res_29181; - int64_t gtid_39114; - float x_acc_45800; - int64_t chunk_sizze_45801; - - chunk_sizze_45801 = smin64(elements_per_thread_45778, - sdiv_up64(i32_res_29175 - - sext_i32_i64(sext_i64_i32(global_tid_45799)), - threads_per_segment_45781)); - - float x_39257; - float x_39258; - - // neutral-initialise the accumulators - { - x_acc_45800 = 0.0F; - } - for (int64_t i_45805 = 0; i_45805 < chunk_sizze_45801; i_45805++) { - gtid_39114 = sext_i32_i64(sext_i64_i32(global_tid_45799)) + - threads_per_segment_45781 * i_45805; - // apply map function - { - float x_39263 = ((__global - float *) images_mem_44381)[gtid_39102 * - N_29165 + - gtid_39114]; - float x_39264 = ((__global - float *) binop_p_mem_44390)[gtid_39103 * - N_29165 + - gtid_39114]; - float x_39265 = ((__global float *) mem_44531)[gtid_39104 * - N_29165 + - gtid_39114]; - float x_39266 = x_39264 * x_39265; - bool isnan_res_39267; - - isnan_res_39267 = futrts_isnan32(x_39263); - - float y_39268; - - if (isnan_res_39267) { - y_39268 = 0.0F; - } else { - y_39268 = 1.0F; + if (!protect_assert_disj_107662) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 93) == + -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_107643; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } } - float defunc_2_f_res_39269 = x_39266 * y_39268; + bool protect_assert_disj_107666 = y_75227 || + loop_not_taken_107661; + bool index_certs_107667; - // save map-out results - { } - // load accumulator - { - x_39257 = x_acc_45800; - } - // load new values - { - x_39258 = defunc_2_f_res_39269; - } - // apply reduction operator - { - float defunc_1_op_res_39259 = x_39257 + x_39258; - - // store in accumulator + if (!protect_assert_disj_107666) { { - x_acc_45800 = defunc_1_op_res_39259; + if (atomic_cmpxchg_i32_global(global_failure, -1, 94) == + -1) { + global_failure_args[0] = m_75223; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; } } - } - } - // to reduce current chunk, first store our result in memory - { - x_39257 = x_acc_45800; - ((__local - float *) red_arr_mem_45791)[sext_i32_i64(local_tid_45787)] = - x_39257; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t offset_45806; - int32_t skip_waves_45807; - - skip_waves_45807 = 1; - - float x_45802; - float x_45803; - - offset_45806 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_45787, - sext_i64_i32(segred_group_sizze_39253))) { - x_45802 = ((__local - float *) red_arr_mem_45791)[sext_i32_i64(local_tid_45787 + - offset_45806)]; - } - } - offset_45806 = 1; - while (slt32(offset_45806, wave_sizze_45789)) { - if (slt32(local_tid_45787 + offset_45806, - sext_i64_i32(segred_group_sizze_39253)) && - ((local_tid_45787 - squot32(local_tid_45787, wave_sizze_45789) * - wave_sizze_45789) & (2 * offset_45806 - 1)) == 0) { - // read array element - { - x_45803 = ((volatile __local - float *) red_arr_mem_45791)[sext_i32_i64(local_tid_45787 + - offset_45806)]; - } - // apply reduction operation - { - float defunc_1_op_res_45804 = x_45802 + x_45803; - - x_45802 = defunc_1_op_res_45804; - } - // write result of operation - { - ((volatile __local - float *) red_arr_mem_45791)[sext_i32_i64(local_tid_45787)] = - x_45802; - } - } - offset_45806 *= 2; - } - while (slt32(skip_waves_45807, - squot32(sext_i64_i32(segred_group_sizze_39253) + - wave_sizze_45789 - 1, wave_sizze_45789))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_45806 = skip_waves_45807 * wave_sizze_45789; - if (slt32(local_tid_45787 + offset_45806, - sext_i64_i32(segred_group_sizze_39253)) && - ((local_tid_45787 - squot32(local_tid_45787, wave_sizze_45789) * - wave_sizze_45789) == 0 && (squot32(local_tid_45787, - wave_sizze_45789) & (2 * - skip_waves_45807 - - 1)) == - 0)) { - // read array element - { - x_45803 = ((__local - float *) red_arr_mem_45791)[sext_i32_i64(local_tid_45787 + - offset_45806)]; - } - // apply reduction operation - { - float defunc_1_op_res_45804 = x_45802 + x_45803; - - x_45802 = defunc_1_op_res_45804; - } - // write result of operation - { - ((__local - float *) red_arr_mem_45791)[sext_i32_i64(local_tid_45787)] = - x_45802; - } - } - skip_waves_45807 *= 2; - } - barrier(CLK_LOCAL_MEM_FENCE); - // first thread saves the result in accumulator - { - if (sext_i32_i64(local_tid_45787) == (int64_t) 0) { - x_acc_45800 = x_45802; - } - } - if (groups_per_segment_45777 == (int64_t) 1) { - // first thread in group saves final result to memory - { - if (local_tid_45787 == 0) { - ((__global float *) mem_44536)[gtid_39102 * (i32_res_29181 * - i32_res_29181) + - gtid_39103 * i32_res_29181 + - gtid_39104] = x_acc_45800; - } - } - } else { - int32_t old_counter_45808; - - // first thread in group saves group result to global memory - { - if (local_tid_45787 == 0) { - ((__global - float *) group_res_arr_mem_45782)[sext_i32_i64(virt_group_id_45797) * - segred_group_sizze_39253] = - x_acc_45800; - mem_fence_global(); - old_counter_45808 = - atomic_add_i32_global(&((volatile __global - int *) mainzicounter_mem_45784)[sext_i32_i64(srem32(flat_segment_id_45798, - 10240))], - (int) 1); - ((__local bool *) sync_arr_mem_45793)[(int64_t) 0] = - old_counter_45808 == groups_per_segment_45777 - - (int64_t) 1; + + bool index_certs_107668; + + if (!protect_assert_disj_107666) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 95) == + -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = m_75223; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } } - } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - - bool is_last_group_45809; - - is_last_group_45809 = ((__local - bool *) sync_arr_mem_45793)[(int64_t) 0]; - if (is_last_group_45809) { - if (local_tid_45787 == 0) { - old_counter_45808 = - atomic_add_i32_global(&((volatile __global - int *) mainzicounter_mem_45784)[sext_i32_i64(srem32(flat_segment_id_45798, - 10240))], - (int) ((int64_t) 0 - - groups_per_segment_45777)); + + bool index_certs_107669; + + if (!protect_assert_disj_107666) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 96) == + -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = m_75223; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } } - // read in the per-group-results - { - int64_t read_per_thread_45810 = - sdiv_up64(groups_per_segment_45777, - segred_group_sizze_39253); - - x_39257 = 0.0F; - for (int64_t i_45811 = 0; i_45811 < read_per_thread_45810; - i_45811++) { - int64_t group_res_id_45812 = - sext_i32_i64(local_tid_45787) * - read_per_thread_45810 + i_45811; - int64_t index_of_group_res_45813 = - sext_i32_i64(flat_segment_id_45798) * - groups_per_segment_45777 + group_res_id_45812; + + bool loopres_107670; + int64_t loopres_107675; + bool loop_while_107676; + int64_t k_107681; + + loop_while_107676 = loop_cond_107651; + k_107681 = k_107648; + while (loop_while_107676) { + for (int64_t i_107683 = 0; i_107683 < rp1_75837; + i_107683++) { + bool index_certs_107685; + + if (!y_107659) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 97) == -1) { + global_failure_args[0] = l_107643; + global_failure_args[1] = i_107683; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } + + double t_107686 = ((__global + double *) double_buffer_mem_125569)[phys_tid_107039 + + (l_107643 * + (num_threads_126174 * + rp1_75837) + + i_107683 * + num_threads_126174)]; - if (slt64(group_res_id_45812, - groups_per_segment_45777)) { - x_39258 = ((__global - float *) group_res_arr_mem_45782)[index_of_group_res_45813 * - segred_group_sizze_39253]; + for (int64_t j0_107688 = 0; j0_107688 < + upper_bound_107660; j0_107688++) { + int64_t j_107690 = add64(x_107649, j0_107688); + bool x_107691 = sle64((int64_t) 0, j_107690); + bool y_107692 = slt64(j_107690, k2p2zq_75151); + bool bounds_check_107693 = x_107691 && y_107692; + bool index_certs_107694; + + if (!bounds_check_107693) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 98) == + -1) { + global_failure_args[0] = j_107690; + global_failure_args[1] = i_107683; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } - float defunc_1_op_res_39259; + double lw_val_107695 = ((__global + double *) double_buffer_mem_125569)[phys_tid_107039 + + (j_107690 * + (num_threads_126174 * + rp1_75837) + + i_107683 * + num_threads_126174)]; + int64_t i_107696 = sub64(j_107690, (int64_t) 1); + bool x_107697 = sle64((int64_t) 0, i_107696); + bool y_107698 = slt64(i_107696, k2p2zq_75151); + bool bounds_check_107699 = x_107697 && y_107698; + bool index_certs_107700; - defunc_1_op_res_39259 = x_39257 + x_39258; - x_39257 = defunc_1_op_res_39259; + if (!bounds_check_107699) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 99) == + -1) { + global_failure_args[0] = i_107696; + global_failure_args[1] = i_107683; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125569)[phys_tid_107039 + + (i_107696 * + (num_threads_126174 * + rp1_75837) + + i_107683 * + num_threads_126174)] = + lw_val_107695; + } + + bool index_certs_107702; + + if (!y_75227) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 100) == -1) { + global_failure_args[0] = m_75223; + global_failure_args[1] = i_107683; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } } + ((__global + double *) double_buffer_mem_125569)[phys_tid_107039 + + (m_75223 * + (num_threads_126174 * + rp1_75837) + + i_107683 * + num_threads_126174)] = + t_107686; } - } - ((__local - float *) red_arr_mem_45791)[sext_i32_i64(local_tid_45787)] = - x_39257; - barrier(CLK_LOCAL_MEM_FENCE); - // reduce the per-group results - { - int32_t offset_45814; - int32_t skip_waves_45815; - skip_waves_45815 = 1; + int64_t i_107704 = ((__global + int64_t *) mem_122047)[phys_tid_107039 + + l_107643 * + num_threads_126174]; + double t_107705 = ((__global + double *) double_buffer_mem_125570)[phys_tid_107039 + + l_107643 * + num_threads_126174]; + double tt_107706 = ((__global + double *) double_buffer_mem_125571)[phys_tid_107039 + + l_107643 * + num_threads_126174]; + double ttt_107707 = ((__global + double *) double_buffer_mem_125571)[phys_tid_107039 + + (num_threads_126174 * + k2p2zq_75151 + + l_107643 * + num_threads_126174)]; - float x_45802; - float x_45803; - - offset_45814 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_45787, - sext_i64_i32(segred_group_sizze_39253))) { - x_45802 = ((__local - float *) red_arr_mem_45791)[sext_i32_i64(local_tid_45787 + - offset_45814)]; + for (int64_t j0_107711 = 0; j0_107711 < upper_bound_107660; + j0_107711++) { + int64_t j_107715 = add64(x_107649, j0_107711); + bool x_107716 = sle64((int64_t) 0, j_107715); + bool y_107717 = slt64(j_107715, k2p2zq_75151); + bool bounds_check_107718 = x_107716 && y_107717; + bool index_certs_107719; + + if (!bounds_check_107718) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 101) == -1) { + global_failure_args[0] = j_107715; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } } - } - offset_45814 = 1; - while (slt32(offset_45814, wave_sizze_45789)) { - if (slt32(local_tid_45787 + offset_45814, - sext_i64_i32(segred_group_sizze_39253)) && - ((local_tid_45787 - squot32(local_tid_45787, - wave_sizze_45789) * - wave_sizze_45789) & (2 * offset_45814 - 1)) == - 0) { - // read array element + + int64_t lw_val_107720 = ((__global + int64_t *) mem_122047)[phys_tid_107039 + + j_107715 * + num_threads_126174]; + int64_t i_107721 = sub64(j_107715, (int64_t) 1); + bool x_107722 = sle64((int64_t) 0, i_107721); + bool y_107723 = slt64(i_107721, k2p2zq_75151); + bool bounds_check_107724 = x_107722 && y_107723; + bool index_certs_107725; + + if (!bounds_check_107724) { { - x_45803 = ((volatile __local - float *) red_arr_mem_45791)[sext_i32_i64(local_tid_45787 + - offset_45814)]; + if (atomic_cmpxchg_i32_global(global_failure, + -1, 102) == -1) { + global_failure_args[0] = i_107721; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; } - // apply reduction operation + } + ((__global int64_t *) mem_122047)[phys_tid_107039 + + i_107721 * + num_threads_126174] = + lw_val_107720; + + double lw_val_107727 = ((__global + double *) double_buffer_mem_125570)[phys_tid_107039 + + j_107715 * + num_threads_126174]; + + ((__global + double *) double_buffer_mem_125570)[phys_tid_107039 + + i_107721 * + num_threads_126174] = + lw_val_107727; + + bool index_certs_107729; + + if (!bounds_check_107718) { { - float defunc_1_op_res_45804 = x_45802 + x_45803; - - x_45802 = defunc_1_op_res_45804; + if (atomic_cmpxchg_i32_global(global_failure, + -1, 103) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = j_107715; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; } - // write result of operation + } + + double lw_val_107730 = ((__global + double *) double_buffer_mem_125571)[phys_tid_107039 + + j_107715 * + num_threads_126174]; + bool index_certs_107731; + + if (!bounds_check_107724) { { - ((volatile __local - float *) red_arr_mem_45791)[sext_i32_i64(local_tid_45787)] = - x_45802; + if (atomic_cmpxchg_i32_global(global_failure, + -1, 104) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = i_107721; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; } } - offset_45814 *= 2; - } - while (slt32(skip_waves_45815, - squot32(sext_i64_i32(segred_group_sizze_39253) + - wave_sizze_45789 - 1, - wave_sizze_45789))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_45814 = skip_waves_45815 * wave_sizze_45789; - if (slt32(local_tid_45787 + offset_45814, - sext_i64_i32(segred_group_sizze_39253)) && - ((local_tid_45787 - squot32(local_tid_45787, - wave_sizze_45789) * - wave_sizze_45789) == 0 && - (squot32(local_tid_45787, wave_sizze_45789) & (2 * - skip_waves_45815 - - 1)) == - 0)) { - // read array element + ((__global + double *) double_buffer_mem_125571)[phys_tid_107039 + + i_107721 * + num_threads_126174] = + lw_val_107730; + + bool index_certs_107733; + + if (!bounds_check_107718) { { - x_45803 = ((__local - float *) red_arr_mem_45791)[sext_i32_i64(local_tid_45787 + - offset_45814)]; + if (atomic_cmpxchg_i32_global(global_failure, + -1, 105) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = j_107715; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; } - // apply reduction operation + } + + double lw_val_107734 = ((__global + double *) double_buffer_mem_125571)[phys_tid_107039 + + (num_threads_126174 * + k2p2zq_75151 + + j_107715 * + num_threads_126174)]; + bool index_certs_107735; + + if (!bounds_check_107724) { { - float defunc_1_op_res_45804 = x_45802 + x_45803; - - x_45802 = defunc_1_op_res_45804; + if (atomic_cmpxchg_i32_global(global_failure, + -1, 106) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = i_107721; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; } - // write result of operation + } + ((__global + double *) double_buffer_mem_125571)[phys_tid_107039 + + (num_threads_126174 * + k2p2zq_75151 + + i_107721 * + num_threads_126174)] = + lw_val_107734; + } + ((__global int64_t *) mem_122047)[phys_tid_107039 + + m_75223 * + num_threads_126174] = + i_107704; + ((__global + double *) double_buffer_mem_125570)[phys_tid_107039 + + m_75223 * + num_threads_126174] = + t_107705; + ((__global + double *) double_buffer_mem_125571)[phys_tid_107039 + + m_75223 * + num_threads_126174] = + tt_107706; + ((__global + double *) double_buffer_mem_125571)[phys_tid_107039 + + (num_threads_126174 * + k2p2zq_75151 + + m_75223 * + num_threads_126174)] = + ttt_107707; + + int64_t k_107741 = sub64(k_107681, (int64_t) 1); + bool cond_107742 = slt64(x_107649, k_107741); + bool loop_cond_107743; + + if (cond_107742) { + bool index_certs_107744; + + if (!y_107659) { { - ((__local - float *) red_arr_mem_45791)[sext_i32_i64(local_tid_45787)] = - x_45802; + if (atomic_cmpxchg_i32_global(global_failure, + -1, 107) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_107643; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double zt_arg_107745 = ((__global + double *) double_buffer_mem_125571)[phys_tid_107039 + + (num_threads_126174 * + k2p2zq_75151 + + l_107643 * + num_threads_126174)]; + double zt_res_107746 = 1.0e-7 * zt_arg_107745; + bool index_certs_107747; + + if (!y_107659) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 108) == -1) { + global_failure_args[0] = l_107643; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; } } - skip_waves_45815 *= 2; + + double zl_arg_107748 = ((__global + double *) double_buffer_mem_125570)[phys_tid_107039 + + l_107643 * + num_threads_126174]; + bool zl_res_107749 = zl_arg_107748 < zt_res_107746; + + loop_cond_107743 = zl_res_107749; + } else { + loop_cond_107743 = 0; } - // and back to memory with the final result - { - if (local_tid_45787 == 0) { - ((__global float *) mem_44536)[gtid_39102 * - (i32_res_29181 * - i32_res_29181) + - gtid_39103 * - i32_res_29181 + - gtid_39104] = - x_45802; + + bool loop_while_tmp_127252 = loop_cond_107743; + int64_t k_tmp_127257 = k_107741; + + loop_while_107676 = loop_while_tmp_127252; + k_107681 = k_tmp_127257; + } + loopres_107670 = loop_while_107676; + loopres_107675 = k_107681; + + bool cond_107750 = x_107649 == rp1_75837; + int64_t j_m_i_107751 = sub64(rp1_75837, l_107643); + bool empty_slice_107755 = j_m_i_107751 == (int64_t) 0; + int64_t m_107756 = sub64(j_m_i_107751, (int64_t) 1); + int64_t i_p_m_t_s_107757 = add64(l_107643, m_107756); + bool zzero_leq_i_p_m_t_s_107758 = sle64((int64_t) 0, + i_p_m_t_s_107757); + bool i_p_m_t_s_leq_w_107759 = slt64(i_p_m_t_s_107757, + rp1_75837); + bool i_lte_j_107760 = sle64(l_107643, rp1_75837); + bool y_107761 = zzero_leq_i_p_m_t_s_107758 && + i_p_m_t_s_leq_w_107759; + bool y_107762 = i_lte_j_107760 && y_107761; + bool ok_or_empty_107763 = empty_slice_107755 || y_107762; + bool index_ok_107764 = y_107659 && ok_or_empty_107763; + + if (cond_107750) { + for (int64_t i_127263 = 0; i_127263 < k2p2zq_75151; + i_127263++) { + ((__global double *) mem_125465)[phys_tid_107039 + + i_127263 * + num_threads_126174] = + ((__global + double *) double_buffer_mem_125570)[phys_tid_107039 + + i_127263 * + num_threads_126174]; + } + for (int64_t i_127264 = 0; i_127264 < (int64_t) 2; + i_127264++) { + for (int64_t i_127265 = 0; i_127265 < k2p2zq_75151; + i_127265++) { + ((__global double *) mem_125463)[phys_tid_107039 + + (i_127264 * + (num_threads_126174 * + k2p2zq_75151) + + i_127265 * + num_threads_126174)] = + ((__global + double *) double_buffer_mem_125571)[phys_tid_107039 + + (i_127264 * + (num_threads_126174 * + k2p2zq_75151) + + i_127265 * + num_threads_126174)]; + } + } + for (int64_t i_127266 = 0; i_127266 < k2p2zq_75151; + i_127266++) { + for (int64_t i_127267 = 0; i_127267 < rp1_75837; + i_127267++) { + ((__global double *) mem_125505)[phys_tid_107039 + + (i_127266 * + (num_threads_126174 * + rp1_75837) + + i_127267 * + num_threads_126174)] = + ((__global + double *) double_buffer_mem_125569)[phys_tid_107039 + + (i_127266 * + (num_threads_126174 * + rp1_75837) + + i_127267 * + num_threads_126174)]; } } - } - } - } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - } - - error_1: - return; - #undef segred_group_sizze_39253 -} -__kernel void mainzisegred_large_40466(__global int *global_failure, - __local volatile - int64_t *sync_arr_mem_45990_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_45988_backing_aligned_1, - int64_t N_29165, int64_t i32_res_29175, - int64_t i32_res_29181, - int64_t num_groups_40519, - int64_t groups_per_segment_45974, - int64_t elements_per_thread_45975, - int64_t virt_num_groups_45976, - int64_t threads_per_segment_45978, - __global unsigned char *images_mem_44381, - __global - unsigned char *binop_p_mem_44390, - __global unsigned char *mem_44844, - __global - unsigned char *group_res_arr_mem_45979, - __global - unsigned char *mainzicounter_mem_45981) -{ - #define segred_group_sizze_40518 (mainzisegred_group_sizze_40460) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - __local volatile char *restrict sync_arr_mem_45990_backing_1 = - (__local volatile - char *) sync_arr_mem_45990_backing_aligned_0; - __local volatile char *restrict red_arr_mem_45988_backing_0 = - (__local volatile - char *) red_arr_mem_45988_backing_aligned_1; - - if (*global_failure >= 0) - return; - - int32_t global_tid_45983; - int32_t local_tid_45984; - int64_t group_sizze_45987; - int32_t wave_sizze_45986; - int32_t group_tid_45985; - - global_tid_45983 = get_global_id(0); - local_tid_45984 = get_local_id(0); - group_sizze_45987 = get_local_size(0); - wave_sizze_45986 = LOCKSTEP_WIDTH; - group_tid_45985 = get_group_id(0); - - int32_t phys_tid_40466; - - phys_tid_40466 = global_tid_45983; - - __local char *red_arr_mem_45988; - - red_arr_mem_45988 = (__local char *) red_arr_mem_45988_backing_0; - - __local char *sync_arr_mem_45990; - - sync_arr_mem_45990 = (__local char *) sync_arr_mem_45990_backing_1; - - int32_t phys_group_id_45992; - - phys_group_id_45992 = get_group_id(0); - for (int32_t i_45993 = 0; i_45993 < - sdiv_up32(sext_i64_i32(virt_num_groups_45976) - phys_group_id_45992, - sext_i64_i32(num_groups_40519)); i_45993++) { - int32_t virt_group_id_45994 = phys_group_id_45992 + i_45993 * - sext_i64_i32(num_groups_40519); - int32_t flat_segment_id_45995 = squot32(virt_group_id_45994, - sext_i64_i32(groups_per_segment_45974)); - int64_t global_tid_45996 = srem64(sext_i32_i64(virt_group_id_45994) * - segred_group_sizze_40518 + - sext_i32_i64(local_tid_45984), - segred_group_sizze_40518 * - groups_per_segment_45974); - int64_t gtid_40455 = squot64(sext_i32_i64(flat_segment_id_45995), - i32_res_29181); - int64_t gtid_40456 = sext_i32_i64(flat_segment_id_45995) - - squot64(sext_i32_i64(flat_segment_id_45995), i32_res_29181) * - i32_res_29181; - int64_t gtid_40465; - float x_acc_45997; - int64_t chunk_sizze_45998; - - chunk_sizze_45998 = smin64(elements_per_thread_45975, - sdiv_up64(i32_res_29175 - - sext_i32_i64(sext_i64_i32(global_tid_45996)), - threads_per_segment_45978)); - - float x_40522; - float x_40523; - - // neutral-initialise the accumulators - { - x_acc_45997 = 0.0F; - } - for (int64_t i_46002 = 0; i_46002 < chunk_sizze_45998; i_46002++) { - gtid_40465 = sext_i32_i64(sext_i64_i32(global_tid_45996)) + - threads_per_segment_45978 * i_46002; - // apply map function - { - float x_40528 = ((__global - float *) images_mem_44381)[gtid_40455 * - N_29165 + - gtid_40465]; - bool isnan_res_40529; - - isnan_res_40529 = futrts_isnan32(x_40528); - - float defunc_1_f_res_40530; - - if (isnan_res_40529) { - defunc_1_f_res_40530 = 0.0F; } else { - float x_40527 = ((__global - float *) binop_p_mem_44390)[gtid_40456 * - N_29165 + - gtid_40465]; - float defunc_1_f_res_f_res_40531 = x_40527 * x_40528; + bool index_certs_107765; - defunc_1_f_res_40530 = defunc_1_f_res_f_res_40531; - } - // save map-out results - { } - // load accumulator - { - x_40522 = x_acc_45997; - } - // load new values - { - x_40523 = defunc_1_f_res_40530; - } - // apply reduction operator - { - float defunc_1_op_res_40524 = x_40522 + x_40523; + if (!index_ok_107764) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 109) == -1) { + global_failure_args[0] = l_107643; + global_failure_args[1] = l_107643; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } - // store in accumulator - { - x_acc_45997 = defunc_1_op_res_40524; + double defunc_2_reduce_res_107767; + double redout_119753 = 0.0; + + for (int64_t i_119754 = 0; i_119754 < j_m_i_107751; + i_119754++) { + int64_t slice_120014 = l_107643 + i_119754; + double x_107771 = ((__global + double *) double_buffer_mem_125569)[phys_tid_107039 + + (l_107643 * + (num_threads_126174 * + rp1_75837) + + slice_120014 * + num_threads_126174)]; + double defunc_1_f_res_107772 = x_107771 * x_107771; + double defunc_1_op_res_107770 = defunc_1_f_res_107772 + + redout_119753; + double redout_tmp_127268 = defunc_1_op_res_107770; + + redout_119753 = redout_tmp_127268; } - } - } - } - // to reduce current chunk, first store our result in memory - { - x_40522 = x_acc_45997; - ((__local - float *) red_arr_mem_45988)[sext_i32_i64(local_tid_45984)] = - x_40522; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t offset_46003; - int32_t skip_waves_46004; - - skip_waves_46004 = 1; - - float x_45999; - float x_46000; - - offset_46003 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_45984, - sext_i64_i32(segred_group_sizze_40518))) { - x_45999 = ((__local - float *) red_arr_mem_45988)[sext_i32_i64(local_tid_45984 + - offset_46003)]; - } - } - offset_46003 = 1; - while (slt32(offset_46003, wave_sizze_45986)) { - if (slt32(local_tid_45984 + offset_46003, - sext_i64_i32(segred_group_sizze_40518)) && - ((local_tid_45984 - squot32(local_tid_45984, wave_sizze_45986) * - wave_sizze_45986) & (2 * offset_46003 - 1)) == 0) { - // read array element - { - x_46000 = ((volatile __local - float *) red_arr_mem_45988)[sext_i32_i64(local_tid_45984 + - offset_46003)]; - } - // apply reduction operation - { - float defunc_1_op_res_46001 = x_45999 + x_46000; + defunc_2_reduce_res_107767 = redout_119753; - x_45999 = defunc_1_op_res_46001; - } - // write result of operation - { - ((volatile __local - float *) red_arr_mem_45988)[sext_i32_i64(local_tid_45984)] = - x_45999; - } - } - offset_46003 *= 2; - } - while (slt32(skip_waves_46004, - squot32(sext_i64_i32(segred_group_sizze_40518) + - wave_sizze_45986 - 1, wave_sizze_45986))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46003 = skip_waves_46004 * wave_sizze_45986; - if (slt32(local_tid_45984 + offset_46003, - sext_i64_i32(segred_group_sizze_40518)) && - ((local_tid_45984 - squot32(local_tid_45984, wave_sizze_45986) * - wave_sizze_45986) == 0 && (squot32(local_tid_45984, - wave_sizze_45986) & (2 * - skip_waves_46004 - - 1)) == - 0)) { - // read array element - { - x_46000 = ((__local - float *) red_arr_mem_45988)[sext_i32_i64(local_tid_45984 + - offset_46003)]; - } - // apply reduction operation - { - float defunc_1_op_res_46001 = x_45999 + x_46000; + double sqrt_res_107773; - x_45999 = defunc_1_op_res_46001; - } - // write result of operation - { - ((__local - float *) red_arr_mem_45988)[sext_i32_i64(local_tid_45984)] = - x_45999; - } - } - skip_waves_46004 *= 2; - } - barrier(CLK_LOCAL_MEM_FENCE); - // first thread saves the result in accumulator - { - if (sext_i32_i64(local_tid_45984) == (int64_t) 0) { - x_acc_45997 = x_45999; - } - } - if (groups_per_segment_45974 == (int64_t) 1) { - // first thread in group saves final result to memory - { - if (local_tid_45984 == 0) { - ((__global float *) mem_44844)[gtid_40455 * i32_res_29181 + - gtid_40456] = x_acc_45997; - } - } - } else { - int32_t old_counter_46005; - - // first thread in group saves group result to global memory - { - if (local_tid_45984 == 0) { + sqrt_res_107773 = futrts_sqrt64(defunc_2_reduce_res_107767); + + bool zeze_res_107774 = sqrt_res_107773 == 0.0; + + if (zeze_res_107774) { + for (int64_t i_127269 = 0; i_127269 < k2p2zq_75151; + i_127269++) { + ((__global double *) mem_125250)[phys_tid_107039 + + i_127269 * + num_threads_126174] = + ((__global + double *) double_buffer_mem_125570)[phys_tid_107039 + + i_127269 * + num_threads_126174]; + } + for (int64_t i_127270 = 0; i_127270 < (int64_t) 2; + i_127270++) { + for (int64_t i_127271 = 0; i_127271 < k2p2zq_75151; + i_127271++) { + ((__global + double *) mem_125248)[phys_tid_107039 + + (i_127270 * + (num_threads_126174 * + k2p2zq_75151) + + i_127271 * + num_threads_126174)] = + ((__global + double *) double_buffer_mem_125571)[phys_tid_107039 + + (i_127270 * + (num_threads_126174 * + k2p2zq_75151) + + i_127271 * + num_threads_126174)]; + } + } + for (int64_t i_127272 = 0; i_127272 < k2p2zq_75151; + i_127272++) { + for (int64_t i_127273 = 0; i_127273 < rp1_75837; + i_127273++) { + ((__global + double *) mem_125455)[phys_tid_107039 + + (i_127272 * + (num_threads_126174 * + rp1_75837) + + i_127273 * + num_threads_126174)] = + ((__global + double *) double_buffer_mem_125569)[phys_tid_107039 + + (i_127272 * + (num_threads_126174 * + rp1_75837) + + i_127273 * + num_threads_126174)]; + } + } + } else { + bool y_107778 = slt64(l_107643, rp1_75837); + bool index_ok_107779 = y_107659 && y_107778; + bool index_certs_107780; + + if (!index_ok_107779) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 110) == -1) { + global_failure_args[0] = l_107643; + global_failure_args[1] = l_107643; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } + + double znze_arg_107781 = ((__global + double *) double_buffer_mem_125569)[phys_tid_107039 + + (l_107643 * + (num_threads_126174 * + rp1_75837) + + l_107643 * + num_threads_126174)]; + bool zeze_res_107782 = znze_arg_107781 == 0.0; + bool znze_res_107783 = !zeze_res_107782; + double nrmxl_107784; + + if (znze_res_107783) { + double abs_res_107785 = fabs(sqrt_res_107773); + double sgn_res_107786 = fsignum32(znze_arg_107781); + double zt_res_107787 = abs_res_107785 * + sgn_res_107786; + + nrmxl_107784 = zt_res_107787; + } else { + nrmxl_107784 = sqrt_res_107773; + } + for (int64_t i0_107789 = 0; i0_107789 < j_m_i_107751; + i0_107789++) { + int64_t i_107791 = add64(l_107643, i0_107789); + bool x_107792 = sle64((int64_t) 0, i_107791); + bool y_107793 = slt64(i_107791, rp1_75837); + bool bounds_check_107794 = x_107792 && y_107793; + bool index_ok_107795 = y_107659 && + bounds_check_107794; + bool index_certs_107796; + + if (!index_ok_107795) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 111) == + -1) { + global_failure_args[0] = l_107643; + global_failure_args[1] = i_107791; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_107797 = ((__global + double *) double_buffer_mem_125569)[phys_tid_107039 + + (l_107643 * + (num_threads_126174 * + rp1_75837) + + i_107791 * + num_threads_126174)]; + double lw_val_107798 = x_107797 / nrmxl_107784; + + ((__global + double *) double_buffer_mem_125569)[phys_tid_107039 + + (l_107643 * + (num_threads_126174 * + rp1_75837) + + i_107791 * + num_threads_126174)] = + lw_val_107798; + } + + double zp_arg_107800 = ((__global + double *) double_buffer_mem_125569)[phys_tid_107039 + + (l_107643 * + (num_threads_126174 * + rp1_75837) + + l_107643 * + num_threads_126174)]; + double zp_res_107801 = 1.0 + zp_arg_107800; + + ((__global + double *) double_buffer_mem_125569)[phys_tid_107039 + + (l_107643 * + (num_threads_126174 * + rp1_75837) + + l_107643 * + num_threads_126174)] = + zp_res_107801; + + bool bounds_invalid_upwards_107803 = slt64(k2p2zq_75151, + x_107649); + bool valid_107804 = !bounds_invalid_upwards_107803; + bool range_valid_c_107805; + + if (!valid_107804) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 112) == -1) { + global_failure_args[0] = x_107649; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + bool loop_nonempty_107806 = slt64((int64_t) 0, + upper_bound_107660); + bool loop_not_taken_107807 = !loop_nonempty_107806; + bool protect_assert_disj_107808 = index_ok_107779 || + loop_not_taken_107807; + bool index_certs_107809; + + if (!protect_assert_disj_107808) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 113) == -1) { + global_failure_args[0] = l_107643; + global_failure_args[1] = l_107643; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_107813 = 0; i_107813 < + upper_bound_107660; i_107813++) { + int64_t index_primexp_107817 = add64(x_107649, + i_107813); + bool x_107818 = sle64((int64_t) 0, + index_primexp_107817); + bool y_107819 = slt64(index_primexp_107817, + k2p2zq_75151); + bool bounds_check_107820 = x_107818 && y_107819; + double t_107821; + double t_107823 = 0.0; + + for (int64_t i0_107822 = 0; i0_107822 < + j_m_i_107751; i0_107822++) { + int64_t i_107824 = add64(l_107643, i0_107822); + bool x_107825 = sle64((int64_t) 0, i_107824); + bool y_107826 = slt64(i_107824, rp1_75837); + bool bounds_check_107827 = x_107825 && y_107826; + bool index_ok_107828 = y_107659 && + bounds_check_107827; + bool index_certs_107829; + + if (!index_ok_107828) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 114) == + -1) { + global_failure_args[0] = l_107643; + global_failure_args[1] = i_107824; + global_failure_args[2] = + k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_107830 = ((__global + double *) double_buffer_mem_125569)[phys_tid_107039 + + (l_107643 * + (num_threads_126174 * + rp1_75837) + + i_107824 * + num_threads_126174)]; + bool index_ok_107831 = bounds_check_107820 && + bounds_check_107827; + bool index_certs_107832; + + if (!index_ok_107831) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 115) == + -1) { + global_failure_args[0] = + index_primexp_107817; + global_failure_args[1] = i_107824; + global_failure_args[2] = + k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_107833 = ((__global + double *) double_buffer_mem_125569)[phys_tid_107039 + + (index_primexp_107817 * + (num_threads_126174 * + rp1_75837) + + i_107824 * + num_threads_126174)]; + double y_107834 = x_107830 * y_107833; + double loopres_107835 = t_107823 - y_107834; + double t_tmp_127278 = loopres_107835; + + t_107823 = t_tmp_127278; + } + t_107821 = t_107823; + + double y_107836 = ((__global + double *) double_buffer_mem_125569)[phys_tid_107039 + + (l_107643 * + (num_threads_126174 * + rp1_75837) + + l_107643 * + num_threads_126174)]; + double t_107837 = t_107821 / y_107836; + + for (int64_t i0_107839 = 0; i0_107839 < + j_m_i_107751; i0_107839++) { + int64_t i_107841 = add64(l_107643, i0_107839); + bool x_107842 = sle64((int64_t) 0, i_107841); + bool y_107843 = slt64(i_107841, rp1_75837); + bool bounds_check_107844 = x_107842 && y_107843; + bool index_ok_107845 = bounds_check_107820 && + bounds_check_107844; + bool index_certs_107846; + + if (!index_ok_107845) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 116) == + -1) { + global_failure_args[0] = + index_primexp_107817; + global_failure_args[1] = i_107841; + global_failure_args[2] = + k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_107847 = ((__global + double *) double_buffer_mem_125569)[phys_tid_107039 + + (index_primexp_107817 * + (num_threads_126174 * + rp1_75837) + + i_107841 * + num_threads_126174)]; + bool index_ok_107848 = y_107659 && + bounds_check_107844; + bool index_certs_107849; + + if (!index_ok_107848) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 117) == + -1) { + global_failure_args[0] = l_107643; + global_failure_args[1] = i_107841; + global_failure_args[2] = + k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_107850 = ((__global + double *) double_buffer_mem_125569)[phys_tid_107039 + + (l_107643 * + (num_threads_126174 * + rp1_75837) + + i_107841 * + num_threads_126174)]; + double y_107851 = t_107837 * y_107850; + double lw_val_107852 = x_107847 + y_107851; + + ((__global + double *) double_buffer_mem_125569)[phys_tid_107039 + + (index_primexp_107817 * + (num_threads_126174 * + rp1_75837) + + i_107841 * + num_threads_126174)] = + lw_val_107852; + } + + bool index_certs_107854; + + if (!bounds_check_107820) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 118) == + -1) { + global_failure_args[0] = + index_primexp_107817; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double zeze_arg_107855 = ((__global + double *) double_buffer_mem_125570)[phys_tid_107039 + + index_primexp_107817 * + num_threads_126174]; + bool zeze_res_107856 = zeze_arg_107855 == 0.0; + + if (!zeze_res_107856) { + bool index_ok_107859 = y_107778 && + bounds_check_107820; + bool index_certs_107860; + + if (!index_ok_107859) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 119) == + -1) { + global_failure_args[0] = + index_primexp_107817; + global_failure_args[1] = l_107643; + global_failure_args[2] = + k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } + + double abs_arg_107861 = ((__global + double *) double_buffer_mem_125569)[phys_tid_107039 + + (index_primexp_107817 * + (num_threads_126174 * + rp1_75837) + + l_107643 * + num_threads_126174)]; + double abs_res_107862 = fabs(abs_arg_107861); + double zs_res_107863 = abs_res_107862 / + zeze_arg_107855; + double ztzt_res_107864 = fpow64(zs_res_107863, + 2.0); + double zm_res_107865 = 1.0 - ztzt_res_107864; + double max_res_107866 = fmax64(0.0, + zm_res_107865); + double abs_res_107867 = fabs(max_res_107866); + bool zgze_res_107868 = 1.0e-6 <= abs_res_107867; + int64_t j_m_i_107869 = sub64(rp1_75837, + x_107649); + + if (zgze_res_107868) { + double sqrt_res_107872; + + sqrt_res_107872 = + futrts_sqrt64(max_res_107866); + + double zt_res_107873 = zeze_arg_107855 * + sqrt_res_107872; + + ((__global + double *) double_buffer_mem_125570)[phys_tid_107039 + + index_primexp_107817 * + num_threads_126174] = + zt_res_107873; + } else { + bool empty_slice_107875 = j_m_i_107869 == + (int64_t) 0; + int64_t m_107876 = sub64(j_m_i_107869, + (int64_t) 1); + int64_t i_p_m_t_s_107877 = add64(x_107649, + m_107876); + bool zzero_leq_i_p_m_t_s_107878 = + sle64((int64_t) 0, i_p_m_t_s_107877); + bool i_p_m_t_s_leq_w_107879 = + slt64(i_p_m_t_s_107877, rp1_75837); + bool zzero_lte_i_107880 = sle64((int64_t) 0, + x_107649); + bool i_lte_j_107881 = sle64(x_107649, + rp1_75837); + bool y_107882 = i_p_m_t_s_leq_w_107879 && + zzero_lte_i_107880; + bool y_107883 = + zzero_leq_i_p_m_t_s_107878 && y_107882; + bool y_107884 = i_lte_j_107881 && y_107883; + bool forwards_ok_107885 = + zzero_lte_i_107880 && y_107884; + bool ok_or_empty_107886 = + empty_slice_107875 || + forwards_ok_107885; + bool index_ok_107887 = + bounds_check_107820 && + ok_or_empty_107886; + bool index_certs_107888; + + if (!index_ok_107887) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 120) == + -1) { + global_failure_args[0] = + index_primexp_107817; + global_failure_args[1] = + x_107649; + global_failure_args[2] = + k2p2zq_75151; + global_failure_args[3] = + rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_107890; + double redout_119755 = 0.0; + + for (int64_t i_119756 = 0; i_119756 < + j_m_i_107869; i_119756++) { + int64_t slice_120015 = x_107649 + + i_119756; + double x_107894 = ((__global + double *) double_buffer_mem_125569)[phys_tid_107039 + + (index_primexp_107817 * + (num_threads_126174 * + rp1_75837) + + slice_120015 * + num_threads_126174)]; + double defunc_1_f_res_107895 = + x_107894 * x_107894; + double defunc_1_op_res_107893 = + defunc_1_f_res_107895 + + redout_119755; + double redout_tmp_127280 = + defunc_1_op_res_107893; + + redout_119755 = redout_tmp_127280; + } + defunc_2_reduce_res_107890 = redout_119755; + + double sqrt_res_107896; + + sqrt_res_107896 = + futrts_sqrt64(defunc_2_reduce_res_107890); + ((__global + double *) double_buffer_mem_125570)[phys_tid_107039 + + index_primexp_107817 * + num_threads_126174] = + sqrt_res_107896; + + bool index_certs_107898; + + if (!bounds_check_107820) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 121) == + -1) { + global_failure_args[0] = + (int64_t) 0; + global_failure_args[1] = + index_primexp_107817; + global_failure_args[2] = + (int64_t) 2; + global_failure_args[3] = + k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_127281 = 0; i_127281 < + (int64_t) 1; i_127281++) { + ((__global + double *) double_buffer_mem_125571)[phys_tid_107039 + + (index_primexp_107817 + + i_127281) * + num_threads_126174] = + ((__global + double *) double_buffer_mem_125570)[phys_tid_107039 + + num_threads_126174 * + index_primexp_107817 + + i_127281 * + num_threads_126174]; + } + } + } + } + + bool index_certs_107901; + + if (!y_107659) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 122) == -1) { + global_failure_args[0] = l_107643; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_127282 = 0; i_127282 < (int64_t) 1; + i_127282++) { + ((__global + double *) double_buffer_mem_125570)[phys_tid_107039 + + (l_107643 + + i_127282) * + num_threads_126174] = + ((__global + double *) double_buffer_mem_125569)[phys_tid_107039 + + l_107643 * + (num_threads_126174 * + rp1_75837) + + num_threads_126174 * + l_107643 + + i_127282 * + num_threads_126174]; + } + + double zt_res_107904 = -1.0 * nrmxl_107784; + + ((__global + double *) double_buffer_mem_125569)[phys_tid_107039 + + (l_107643 * + (num_threads_126174 * + rp1_75837) + + l_107643 * + num_threads_126174)] = + zt_res_107904; + for (int64_t i_127283 = 0; i_127283 < k2p2zq_75151; + i_127283++) { + ((__global double *) mem_125250)[phys_tid_107039 + + i_127283 * + num_threads_126174] = + ((__global + double *) double_buffer_mem_125570)[phys_tid_107039 + + i_127283 * + num_threads_126174]; + } + for (int64_t i_127284 = 0; i_127284 < (int64_t) 2; + i_127284++) { + for (int64_t i_127285 = 0; i_127285 < k2p2zq_75151; + i_127285++) { + ((__global + double *) mem_125248)[phys_tid_107039 + + (i_127284 * + (num_threads_126174 * + k2p2zq_75151) + + i_127285 * + num_threads_126174)] = + ((__global + double *) double_buffer_mem_125571)[phys_tid_107039 + + (i_127284 * + (num_threads_126174 * + k2p2zq_75151) + + i_127285 * + num_threads_126174)]; + } + } + for (int64_t i_127286 = 0; i_127286 < k2p2zq_75151; + i_127286++) { + for (int64_t i_127287 = 0; i_127287 < rp1_75837; + i_127287++) { + ((__global + double *) mem_125455)[phys_tid_107039 + + (i_127286 * + (num_threads_126174 * + rp1_75837) + + i_127287 * + num_threads_126174)] = + ((__global + double *) double_buffer_mem_125569)[phys_tid_107039 + + (i_127286 * + (num_threads_126174 * + rp1_75837) + + i_127287 * + num_threads_126174)]; + } + } + } + for (int64_t i_127288 = 0; i_127288 < k2p2zq_75151; + i_127288++) { + ((__global double *) mem_125465)[phys_tid_107039 + + i_127288 * + num_threads_126174] = + ((__global double *) mem_125250)[phys_tid_107039 + + i_127288 * + num_threads_126174]; + } + for (int64_t i_127289 = 0; i_127289 < (int64_t) 2; + i_127289++) { + for (int64_t i_127290 = 0; i_127290 < k2p2zq_75151; + i_127290++) { + ((__global double *) mem_125463)[phys_tid_107039 + + (i_127289 * + (num_threads_126174 * + k2p2zq_75151) + + i_127290 * + num_threads_126174)] = + ((__global + double *) mem_125248)[phys_tid_107039 + + (i_127289 * + (num_threads_126174 * + k2p2zq_75151) + + i_127290 * + num_threads_126174)]; + } + } + for (int64_t i_127291 = 0; i_127291 < k2p2zq_75151; + i_127291++) { + for (int64_t i_127292 = 0; i_127292 < rp1_75837; + i_127292++) { + ((__global double *) mem_125505)[phys_tid_107039 + + (i_127291 * + (num_threads_126174 * + rp1_75837) + + i_127292 * + num_threads_126174)] = + ((__global + double *) mem_125455)[phys_tid_107039 + + (i_127291 * + (num_threads_126174 * + rp1_75837) + + i_127292 * + num_threads_126174)]; + } + } + } + for (int64_t i_127293 = 0; i_127293 < k2p2zq_75151; + i_127293++) { + for (int64_t i_127294 = 0; i_127294 < rp1_75837; + i_127294++) { + ((__global + double *) double_buffer_mem_125569)[phys_tid_107039 + + (i_127293 * + (num_threads_126174 * + rp1_75837) + + i_127294 * + num_threads_126174)] = + ((__global double *) mem_125505)[phys_tid_107039 + + (i_127293 * + (num_threads_126174 * + rp1_75837) + + i_127294 * + num_threads_126174)]; + } + } + for (int64_t i_127295 = 0; i_127295 < k2p2zq_75151; + i_127295++) { ((__global - float *) group_res_arr_mem_45979)[sext_i32_i64(virt_group_id_45994) * - segred_group_sizze_40518] = - x_acc_45997; - mem_fence_global(); - old_counter_46005 = - atomic_add_i32_global(&((volatile __global - int *) mainzicounter_mem_45981)[sext_i32_i64(srem32(flat_segment_id_45995, - 10240))], - (int) 1); - ((__local bool *) sync_arr_mem_45990)[(int64_t) 0] = - old_counter_46005 == groups_per_segment_45974 - - (int64_t) 1; - } - } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + double *) double_buffer_mem_125570)[phys_tid_107039 + + i_127295 * + num_threads_126174] = + ((__global double *) mem_125465)[phys_tid_107039 + + i_127295 * + num_threads_126174]; + } + for (int64_t i_127296 = 0; i_127296 < (int64_t) 2; i_127296++) { + for (int64_t i_127297 = 0; i_127297 < k2p2zq_75151; + i_127297++) { + ((__global + double *) double_buffer_mem_125571)[phys_tid_107039 + + (i_127296 * + (num_threads_126174 * + k2p2zq_75151) + + i_127297 * + num_threads_126174)] = + ((__global double *) mem_125463)[phys_tid_107039 + + (i_127296 * + (num_threads_126174 * + k2p2zq_75151) + + i_127297 * + num_threads_126174)]; + } + } + + int64_t k_tmp_127251 = loopres_107675; + + k_107648 = k_tmp_127251; + } + dqrdc2_res_107642 = k_107648; - bool is_last_group_46006; + int64_t min_arg_107906 = sub64(dqrdc2_res_107642, (int64_t) 1); + int64_t min_res_107907 = smin64(rp1_75837, min_arg_107906); - is_last_group_46006 = ((__local - bool *) sync_arr_mem_45990)[(int64_t) 0]; - if (is_last_group_46006) { - if (local_tid_45984 == 0) { - old_counter_46005 = - atomic_add_i32_global(&((volatile __global - int *) mainzicounter_mem_45981)[sext_i32_i64(srem32(flat_segment_id_45995, - 10240))], - (int) ((int64_t) 0 - - groups_per_segment_45974)); - } - // read in the per-group-results - { - int64_t read_per_thread_46007 = - sdiv_up64(groups_per_segment_45974, - segred_group_sizze_40518); + for (int64_t i_119759 = 0; i_119759 < k2p2zq_75151; i_119759++) { + int64_t x_107912 = add64((int64_t) 1, i_119759); + bool cond_f_res_107913 = slt64(min_res_107907, x_107912); + + for (int64_t i_119763 = 0; i_119763 < k2p2zq_75151; + i_119763++) { + int64_t x_107917 = add64((int64_t) 1, i_119763); + bool cond_107918 = slt64(min_res_107907, x_107917); + bool x_107919 = !cond_107918; + bool y_107920 = cond_f_res_107913 && x_107919; + bool cond_107921 = cond_107918 || y_107920; + double defunc_1_f_res_107922; - x_40522 = 0.0F; - for (int64_t i_46008 = 0; i_46008 < read_per_thread_46007; - i_46008++) { - int64_t group_res_id_46009 = - sext_i32_i64(local_tid_45984) * - read_per_thread_46007 + i_46008; - int64_t index_of_group_res_46010 = - sext_i32_i64(flat_segment_id_45995) * - groups_per_segment_45974 + group_res_id_46009; + if (cond_107921) { + defunc_1_f_res_107922 = NAN; + } else { + double x_107916 = ((__global + double *) double_buffer_mem_125569)[phys_tid_107039 + + (i_119759 * + (num_threads_126174 * + rp1_75837) + + i_119763 * + num_threads_126174)]; - if (slt64(group_res_id_46009, - groups_per_segment_45974)) { - x_40523 = ((__global - float *) group_res_arr_mem_45979)[index_of_group_res_46010 * - segred_group_sizze_40518]; - - float defunc_1_op_res_40524; + defunc_1_f_res_107922 = x_107916; + } + ((__global double *) mem_122382)[phys_tid_107039 + + (i_119759 * + (num_threads_126174 * + k2p2zq_75151) + + i_119763 * + num_threads_126174)] = + defunc_1_f_res_107922; + } + } + for (int64_t i_127300 = 0; i_127300 < k2p2zq_75151; i_127300++) { + ((__global double *) mem_122435)[phys_tid_107039 + i_127300 * + num_threads_126174] = 0.0; + } + for (int64_t i_119767 = 0; i_119767 < k2p2zq_75151; i_119767++) { + for (int64_t i_127302 = 0; i_127302 < k2p2zq_75151; + i_127302++) { + ((__global double *) mem_122423)[phys_tid_107039 + + (i_119767 * + (num_threads_126174 * + k2p2zq_75151) + + i_127302 * + num_threads_126174)] = + ((__global double *) mem_122435)[phys_tid_107039 + + i_127302 * + num_threads_126174]; + } + for (int64_t i_107928 = 0; i_107928 < k2p2zq_75151; + i_107928++) { + int64_t x_107930 = sub64(k2p2zq_75151, i_107928); + int64_t i_107931 = sub64(x_107930, (int64_t) 1); + bool x_107932 = sle64((int64_t) 0, i_107931); + bool y_107933 = slt64(i_107931, k2p2zq_75151); + bool bounds_check_107934 = x_107932 && y_107933; + int64_t j_m_i_107935 = sub64(k2p2zq_75151, x_107930); + bool empty_slice_107936 = j_m_i_107935 == (int64_t) 0; + int64_t m_107937 = sub64(j_m_i_107935, (int64_t) 1); + int64_t i_p_m_t_s_107938 = add64(x_107930, m_107937); + bool zzero_leq_i_p_m_t_s_107939 = sle64((int64_t) 0, + i_p_m_t_s_107938); + bool i_p_m_t_s_leq_w_107940 = slt64(i_p_m_t_s_107938, + k2p2zq_75151); + bool zzero_lte_i_107941 = sle64((int64_t) 0, x_107930); + bool i_lte_j_107942 = sle64(x_107930, k2p2zq_75151); + bool y_107943 = i_p_m_t_s_leq_w_107940 && + zzero_lte_i_107941; + bool y_107944 = zzero_leq_i_p_m_t_s_107939 && y_107943; + bool y_107945 = i_lte_j_107942 && y_107944; + bool forwards_ok_107946 = zzero_lte_i_107941 && y_107945; + bool ok_or_empty_107947 = empty_slice_107936 || + forwards_ok_107946; + bool index_ok_107948 = bounds_check_107934 && + ok_or_empty_107947; + bool index_certs_107949; + + if (!index_ok_107948) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 123) == -1) { + global_failure_args[0] = i_107931; + global_failure_args[1] = x_107930; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + global_failure_args[4] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_107950; + + if (!ok_or_empty_107947) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 124) == -1) { + global_failure_args[0] = x_107930; + global_failure_args[1] = k2p2zq_75151; + global_failure_args[2] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_107953; + double redout_119769 = 0.0; + + for (int64_t i_119770 = 0; i_119770 < j_m_i_107935; + i_119770++) { + int64_t slice_120021 = x_107930 + i_119770; + double x_107958 = ((__global + double *) mem_122382)[phys_tid_107039 + + (slice_120021 * + (num_threads_126174 * + k2p2zq_75151) + + i_107931 * + num_threads_126174)]; + bool isnan_res_107959; + + isnan_res_107959 = futrts_isnan64(x_107958); + + double defunc_1_f_res_107960; + + if (isnan_res_107959) { + defunc_1_f_res_107960 = 0.0; + } else { + double x_107957 = ((__global + double *) mem_122423)[phys_tid_107039 + + (i_119767 * + (num_threads_126174 * + k2p2zq_75151) + + slice_120021 * + num_threads_126174)]; + double defunc_1_f_res_f_res_107961 = x_107957 * + x_107958; - defunc_1_op_res_40524 = x_40522 + x_40523; - x_40522 = defunc_1_op_res_40524; + defunc_1_f_res_107960 = defunc_1_f_res_f_res_107961; } + + double defunc_1_op_res_107956 = defunc_1_f_res_107960 + + redout_119769; + double redout_tmp_127304 = defunc_1_op_res_107956; + + redout_119769 = redout_tmp_127304; } - } - ((__local - float *) red_arr_mem_45988)[sext_i32_i64(local_tid_45984)] = - x_40522; - barrier(CLK_LOCAL_MEM_FENCE); - // reduce the per-group results - { - int32_t offset_46011; - int32_t skip_waves_46012; + defunc_2_reduce_res_107953 = redout_119769; - skip_waves_46012 = 1; + bool index_ok_107962 = bounds_check_107934 && + bounds_check_107934; + bool index_certs_107963; + + if (!index_ok_107962) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 125) == -1) { + global_failure_args[0] = i_107931; + global_failure_args[1] = i_107931; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } - float x_45999; - float x_46000; + double zs_arg_107964 = ((__global + double *) mem_122382)[phys_tid_107039 + + (i_107931 * + (num_threads_126174 * + k2p2zq_75151) + + i_107931 * + num_threads_126174)]; + bool index_certs_107965; - offset_46011 = 0; - // participating threads read initial accumulator + if (!bounds_check_107934) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 126) == -1) { + global_failure_args[0] = i_107931; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double zm_arg_107966 = ((__global + double *) mem_121938)[i_119767 * + k2p2zq_75151 + + i_107931]; + double zm_res_107967 = zm_arg_107966 - + defunc_2_reduce_res_107953; + double zs_res_107968 = zm_res_107967 / zs_arg_107964; + + ((__global double *) mem_122423)[phys_tid_107039 + + (i_119767 * + (num_threads_126174 * + k2p2zq_75151) + + i_107931 * + num_threads_126174)] = + zs_res_107968; + } + } + for (int64_t i_119773 = 0; i_119773 < k2p2zq_75151; i_119773++) { + for (int64_t i_119777 = 0; i_119777 < k2p2zq_75151; + i_119777++) { + double defunc_2_reduce_res_107975; + double redout_119779 = 0.0; + + for (int64_t i_119780 = 0; i_119780 < k2p2zq_75151; + i_119780++) { + double x_107979 = ((__global + double *) mem_122423)[phys_tid_107039 + + (i_119780 * + (num_threads_126174 * + k2p2zq_75151) + + i_119773 * + num_threads_126174)]; + double x_107980 = ((__global + double *) mem_122423)[phys_tid_107039 + + (i_119780 * + (num_threads_126174 * + k2p2zq_75151) + + i_119777 * + num_threads_126174)]; + double defunc_1_f_res_107981 = x_107979 * x_107980; + double defunc_1_op_res_107978 = defunc_1_f_res_107981 + + redout_119779; + double redout_tmp_127307 = defunc_1_op_res_107978; + + redout_119779 = redout_tmp_127307; + } + defunc_2_reduce_res_107975 = redout_119779; + ((__global double *) mem_122464)[phys_tid_107039 + + (i_119773 * + (num_threads_126174 * + k2p2zq_75151) + + i_119777 * + num_threads_126174)] = + defunc_2_reduce_res_107975; + } + } + + int64_t min_res_107982 = smin64(r_75826, min_res_107907); + + for (int64_t i_127308 = 0; i_127308 < rp1_75837; i_127308++) { + ((__global double *) double_buffer_mem_125582)[phys_tid_107039 + + i_127308 * + num_threads_126174] = + ((__global double *) mem_122017)[gtid_107038 + i_127308 * + m_75136]; + } + for (int64_t j_107984 = 0; j_107984 < min_res_107982; j_107984++) { + bool y_107986 = slt64(j_107984, k2p2zq_75151); + bool index_certs_107987; + + if (!y_107986) { { - if (slt32(local_tid_45984, - sext_i64_i32(segred_group_sizze_40518))) { - x_45999 = ((__local - float *) red_arr_mem_45988)[sext_i32_i64(local_tid_45984 + - offset_46011)]; + if (atomic_cmpxchg_i32_global(global_failure, -1, + 127) == -1) { + global_failure_args[0] = j_107984; + global_failure_args[1] = k2p2zq_75151; + ; } + local_failure = true; + goto error_0; } - offset_46011 = 1; - while (slt32(offset_46011, wave_sizze_45986)) { - if (slt32(local_tid_45984 + offset_46011, - sext_i64_i32(segred_group_sizze_40518)) && - ((local_tid_45984 - squot32(local_tid_45984, - wave_sizze_45986) * - wave_sizze_45986) & (2 * offset_46011 - 1)) == - 0) { - // read array element - { - x_46000 = ((volatile __local - float *) red_arr_mem_45988)[sext_i32_i64(local_tid_45984 + - offset_46011)]; + } + + double zeze_arg_107988 = ((__global + double *) double_buffer_mem_125570)[phys_tid_107039 + + j_107984 * + num_threads_126174]; + bool zeze_res_107989 = zeze_arg_107988 == 0.0; + + if (zeze_res_107989) { + for (int64_t i_127310 = 0; i_127310 < rp1_75837; + i_127310++) { + ((__global double *) mem_125258)[phys_tid_107039 + + i_127310 * + num_threads_126174] = + ((__global + double *) double_buffer_mem_125582)[phys_tid_107039 + + i_127310 * + num_threads_126174]; + } + } else { + bool y_107991 = slt64(j_107984, rp1_75837); + bool index_certs_107992; + + if (!y_107991) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 128) == -1) { + global_failure_args[0] = j_107984; + global_failure_args[1] = rp1_75837; + ; } - // apply reduction operation + local_failure = true; + goto error_0; + } + } + + double y_107993 = ((__global + double *) double_buffer_mem_125582)[phys_tid_107039 + + j_107984 * + num_threads_126174]; + double negate_arg_107994 = zeze_arg_107988 * y_107993; + double t_107995 = 0.0 - negate_arg_107994; + int64_t x_107996 = sub64(rp1_75837, j_107984); + int64_t upper_bound_107997 = sub64(x_107996, (int64_t) 1); + double t_107998; + double t_108000 = t_107995; + + for (int64_t i0_107999 = 0; i0_107999 < upper_bound_107997; + i0_107999++) { + int64_t x_108001 = add64(j_107984, i0_107999); + int64_t i_108002 = add64((int64_t) 1, x_108001); + bool x_108003 = sle64((int64_t) 0, i_108002); + bool y_108004 = slt64(i_108002, rp1_75837); + bool bounds_check_108005 = x_108003 && y_108004; + bool index_ok_108006 = y_107986 && bounds_check_108005; + bool index_certs_108007; + + if (!index_ok_108006) { { - float defunc_1_op_res_46001 = x_45999 + x_46000; - - x_45999 = defunc_1_op_res_46001; + if (atomic_cmpxchg_i32_global(global_failure, + -1, 129) == -1) { + global_failure_args[0] = j_107984; + global_failure_args[1] = i_108002; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; } - // write result of operation + } + + double x_108008 = ((__global + double *) double_buffer_mem_125569)[phys_tid_107039 + + (j_107984 * + (num_threads_126174 * + rp1_75837) + + i_108002 * + num_threads_126174)]; + bool index_certs_108009; + + if (!bounds_check_108005) { { - ((volatile __local - float *) red_arr_mem_45988)[sext_i32_i64(local_tid_45984)] = - x_45999; + if (atomic_cmpxchg_i32_global(global_failure, + -1, 130) == -1) { + global_failure_args[0] = i_108002; + global_failure_args[1] = rp1_75837; + ; + } + local_failure = true; + goto error_0; } } - offset_46011 *= 2; + + double y_108010 = ((__global + double *) double_buffer_mem_125582)[phys_tid_107039 + + i_108002 * + num_threads_126174]; + double y_108011 = x_108008 * y_108010; + double loopres_108012 = t_108000 - y_108011; + double t_tmp_127311 = loopres_108012; + + t_108000 = t_tmp_127311; } - while (slt32(skip_waves_46012, - squot32(sext_i64_i32(segred_group_sizze_40518) + - wave_sizze_45986 - 1, - wave_sizze_45986))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46011 = skip_waves_46012 * wave_sizze_45986; - if (slt32(local_tid_45984 + offset_46011, - sext_i64_i32(segred_group_sizze_40518)) && - ((local_tid_45984 - squot32(local_tid_45984, - wave_sizze_45986) * - wave_sizze_45986) == 0 && - (squot32(local_tid_45984, wave_sizze_45986) & (2 * - skip_waves_46012 - - 1)) == - 0)) { - // read array element - { - x_46000 = ((__local - float *) red_arr_mem_45988)[sext_i32_i64(local_tid_45984 + - offset_46011)]; - } - // apply reduction operation + t_107998 = t_108000; + + double t_108013 = t_107998 / zeze_arg_107988; + double y_108014 = zeze_arg_107988 * t_108013; + double lw_val_108015 = y_107993 + y_108014; + + ((__global + double *) double_buffer_mem_125582)[phys_tid_107039 + + j_107984 * + num_threads_126174] = + lw_val_108015; + for (int64_t i0_108018 = 0; i0_108018 < upper_bound_107997; + i0_108018++) { + int64_t x_108020 = add64(j_107984, i0_108018); + int64_t i_108021 = add64((int64_t) 1, x_108020); + bool x_108022 = sle64((int64_t) 0, i_108021); + bool y_108023 = slt64(i_108021, rp1_75837); + bool bounds_check_108024 = x_108022 && y_108023; + bool index_certs_108025; + + if (!bounds_check_108024) { { - float defunc_1_op_res_46001 = x_45999 + x_46000; - - x_45999 = defunc_1_op_res_46001; + if (atomic_cmpxchg_i32_global(global_failure, + -1, 131) == -1) { + global_failure_args[0] = i_108021; + global_failure_args[1] = rp1_75837; + ; + } + local_failure = true; + goto error_0; } - // write result of operation + } + + double x_108026 = ((__global + double *) double_buffer_mem_125582)[phys_tid_107039 + + i_108021 * + num_threads_126174]; + bool index_ok_108027 = y_107986 && bounds_check_108024; + bool index_certs_108028; + + if (!index_ok_108027) { { - ((__local - float *) red_arr_mem_45988)[sext_i32_i64(local_tid_45984)] = - x_45999; + if (atomic_cmpxchg_i32_global(global_failure, + -1, 132) == -1) { + global_failure_args[0] = j_107984; + global_failure_args[1] = i_108021; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; } } - skip_waves_46012 *= 2; + + double y_108029 = ((__global + double *) double_buffer_mem_125569)[phys_tid_107039 + + (j_107984 * + (num_threads_126174 * + rp1_75837) + + i_108021 * + num_threads_126174)]; + double y_108030 = t_108013 * y_108029; + double lw_val_108031 = x_108026 + y_108030; + + ((__global + double *) double_buffer_mem_125582)[phys_tid_107039 + + i_108021 * + num_threads_126174] = + lw_val_108031; + } + for (int64_t i_127313 = 0; i_127313 < rp1_75837; + i_127313++) { + ((__global double *) mem_125258)[phys_tid_107039 + + i_127313 * + num_threads_126174] = + ((__global + double *) double_buffer_mem_125582)[phys_tid_107039 + + i_127313 * + num_threads_126174]; + } + } + for (int64_t i_127314 = 0; i_127314 < rp1_75837; i_127314++) { + ((__global + double *) double_buffer_mem_125582)[phys_tid_107039 + + i_127314 * + num_threads_126174] = + ((__global double *) mem_125258)[phys_tid_107039 + + i_127314 * + num_threads_126174]; + } + } + for (int64_t i_127315 = 0; i_127315 < k2p2zq_75151; i_127315++) { + ((__global double *) mem_122537)[phys_tid_107039 + i_127315 * + num_threads_126174] = 0.0; + } + for (int64_t write_iter_119781 = 0; write_iter_119781 < + k2p2zq_75151; write_iter_119781++) { + int64_t write_iv_119784 = ((__global + int64_t *) mem_122047)[phys_tid_107039 + + write_iter_119781 * + num_threads_126174]; + double defunc_2_reduce_res_108038; + double redout_119791 = 0.0; + + for (int64_t i_119792 = 0; i_119792 < k2p2zq_75151; + i_119792++) { + double x_108042 = ((__global + double *) double_buffer_mem_125582)[phys_tid_107039 + + i_119792 * + num_threads_126174]; + double x_108043 = ((__global + double *) mem_122423)[phys_tid_107039 + + (i_119792 * + (num_threads_126174 * + k2p2zq_75151) + + write_iter_119781 * + num_threads_126174)]; + double defunc_1_f_res_108044 = x_108042 * x_108043; + double defunc_1_op_res_108041 = defunc_1_f_res_108044 + + redout_119791; + double redout_tmp_127317 = defunc_1_op_res_108041; + + redout_119791 = redout_tmp_127317; + } + defunc_2_reduce_res_108038 = redout_119791; + + bool less_than_zzero_119785 = slt64(write_iv_119784, + (int64_t) 0); + bool greater_than_sizze_119786 = sle64(k2p2zq_75151, + write_iv_119784); + bool outside_bounds_dim_119787 = less_than_zzero_119785 || + greater_than_sizze_119786; + + if (!outside_bounds_dim_119787) { + ((__global double *) mem_122537)[phys_tid_107039 + + write_iv_119784 * + num_threads_126174] = + defunc_2_reduce_res_108038; + } + } + for (int64_t i_119795 = 0; i_119795 < k2p2zq_75151; i_119795++) { + int64_t x_108047 = ((__global + int64_t *) mem_122047)[phys_tid_107039 + + i_119795 * + num_threads_126174]; + + for (int64_t i_127319 = 0; i_127319 < k2p2zq_75151; + i_127319++) { + ((__global int64_t *) mem_122564)[phys_tid_107039 + + i_127319 * + num_threads_126174] = + x_108047; + } + for (int64_t i_127320 = 0; i_127320 < k2p2zq_75151; + i_127320++) { + ((__global int64_t *) mem_122552)[phys_tid_107039 + + (i_119795 * + (num_threads_126174 * + k2p2zq_75151) + + i_127320 * + num_threads_126174)] = + ((__global int64_t *) mem_122564)[phys_tid_107039 + + i_127320 * + num_threads_126174]; + } + } + for (int64_t i_127321 = 0; i_127321 < k2p2zq_75151; i_127321++) { + for (int64_t i_127322 = 0; i_127322 < k2p2zq_75151; + i_127322++) { + ((__global double *) mem_122575)[phys_tid_107039 + + (i_127321 * + (num_threads_126174 * + k2p2zq_75151) + + i_127322 * + num_threads_126174)] = + 0.0; + } + } + for (int64_t write_iter_119797 = 0; write_iter_119797 < + binop_x_120251; write_iter_119797++) { + int64_t new_index_120022 = squot64(write_iter_119797, + k2p2zq_75151); + int64_t binop_y_120024 = k2p2zq_75151 * new_index_120022; + int64_t new_index_120025 = write_iter_119797 - binop_y_120024; + int64_t write_iv_119799 = ((__global + int64_t *) mem_122552)[phys_tid_107039 + + (new_index_120022 * + (num_threads_126174 * + k2p2zq_75151) + + new_index_120025 * + num_threads_126174)]; + int64_t write_iv_119800 = ((__global + int64_t *) mem_122047)[phys_tid_107039 + + new_index_120025 * + num_threads_126174]; + bool less_than_zzero_119802 = slt64(write_iv_119799, + (int64_t) 0); + bool greater_than_sizze_119803 = sle64(k2p2zq_75151, + write_iv_119799); + bool outside_bounds_dim_119804 = less_than_zzero_119802 || + greater_than_sizze_119803; + bool less_than_zzero_119805 = slt64(write_iv_119800, + (int64_t) 0); + bool greater_than_sizze_119806 = sle64(k2p2zq_75151, + write_iv_119800); + bool outside_bounds_dim_119807 = less_than_zzero_119805 || + greater_than_sizze_119806; + bool outside_bounds_119809 = outside_bounds_dim_119804 || + outside_bounds_dim_119807; + + if (!outside_bounds_119809) { + for (int64_t i_127324 = 0; i_127324 < (int64_t) 1; + i_127324++) { + ((__global double *) mem_122575)[phys_tid_107039 + + (write_iv_119799 * + (num_threads_126174 * + k2p2zq_75151) + + (write_iv_119800 + + i_127324) * + num_threads_126174)] = + ((__global double *) mem_122464)[phys_tid_107039 + + new_index_120022 * + (num_threads_126174 * + k2p2zq_75151) + + num_threads_126174 * + new_index_120025 + + i_127324 * + num_threads_126174]; + } + } + } + for (int64_t i_119816 = 0; i_119816 < k2p2zq_75151; i_119816++) { + double x_108060 = ((__global + double *) mem_122537)[phys_tid_107039 + + i_119816 * + num_threads_126174]; + + for (int64_t i_119821 = 0; i_119821 < k2p2zq_75151; + i_119821++) { + double x_108062 = ((__global + double *) mem_122575)[phys_tid_107039 + + (i_119816 * + (num_threads_126174 * + k2p2zq_75151) + + i_119821 * + num_threads_126174)]; + bool isnan_res_108063; + + isnan_res_108063 = futrts_isnan64(x_108062); + + double defunc_0_f_res_108064; + + if (isnan_res_108063) { + defunc_0_f_res_108064 = 0.0; + } else { + defunc_0_f_res_108064 = x_108062; } - // and back to memory with the final result - { - if (local_tid_45984 == 0) { - ((__global float *) mem_44844)[gtid_40455 * - i32_res_29181 + - gtid_40456] = - x_45999; - } + ((__global double *) mem_122598)[phys_tid_107039 + + (i_119816 * + (num_threads_126174 * + k2p2zq_75151) + + i_119821 * + num_threads_126174)] = + defunc_0_f_res_108064; + } + + bool isnan_res_108065; + + isnan_res_108065 = futrts_isnan64(x_108060); + + double defunc_0_f_res_108066; + + if (isnan_res_108065) { + defunc_0_f_res_108066 = 0.0; + } else { + defunc_0_f_res_108066 = x_108060; + } + ((__global double *) mem_122595)[phys_tid_107039 + i_119816 * + num_threads_126174] = + defunc_0_f_res_108066; + } + + bool isnan_res_108067; + + isnan_res_108067 = futrts_isnan64(recresid_r_107615); + + bool cond_108068 = !isnan_res_108067; + bool cond_t_res_108069 = x_107585 == k2p2zq_75151; + bool x_108070 = cond_108068 && cond_t_res_108069; + bool nona_t_res_108071 = min_res_107907 == k2p2zq_75151; + bool x_108072 = x_108070 && nona_t_res_108071; + bool complement_arg_108073; + + if (x_108072) { + double defunc_2_reduce_res_108074; + double redout_119823 = 0.0; + + for (int64_t i_119824 = 0; i_119824 < k2p2zq_75151; + i_119824++) { + double x_108078 = ((__global + double *) mem_122028)[phys_tid_107039 + + i_119824 * + num_threads_126174]; + double x_108080 = ((__global + double *) mem_122014)[i_119824 * + m_75136 + + gtid_107038]; + double x_108081 = ((__global + double *) mem_122595)[phys_tid_107039 + + i_119824 * + num_threads_126174]; + double defunc_0_f_res_108082; + double redout_119825 = 0.0; + + for (int64_t i_119826 = 0; i_119826 < k2p2zq_75151; + i_119826++) { + double x_108086 = ((__global + double *) mem_122028)[phys_tid_107039 + + i_119826 * + num_threads_126174]; + double x_108087 = ((__global + double *) mem_122011)[i_119824 * + (m_75136 * + k2p2zq_75151) + + i_119826 * + m_75136 + + gtid_107038]; + double x_108088 = ((__global + double *) mem_120246)[i_119826 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_107038 * + defunc_2_reduce_res_75260 + + r_75826]; + double x_108089 = x_108078 * x_108086; + double y_108090 = x_108089 / fr_107604; + double defunc_1_f_res_108091 = x_108087 - y_108090; + double defunc_1_f_res_108092 = x_108088 * + defunc_1_f_res_108091; + double defunc_1_op_res_108085 = defunc_1_f_res_108092 + + redout_119825; + double redout_tmp_127329 = defunc_1_op_res_108085; + + redout_119825 = redout_tmp_127329; } + defunc_0_f_res_108082 = redout_119825; + + double defunc_0_g_res_108093 = resid_107613 * + defunc_0_f_res_108082; + double defunc_1_f_res_108094 = x_108080 + + defunc_0_g_res_108093; + double defunc_1_f_res_108095 = x_108081 - + defunc_1_f_res_108094; + double defunc_0_f_res_108096 = fabs(defunc_1_f_res_108095); + double defunc_1_op_res_108077 = defunc_0_f_res_108096 + + redout_119823; + double redout_tmp_127328 = defunc_1_op_res_108077; + + redout_119823 = redout_tmp_127328; } + defunc_2_reduce_res_108074 = redout_119823; + + double i64_res_108097 = sitofp_i64_f64(k2p2zq_75151); + double mean_abs_res_108098 = defunc_2_reduce_res_108074 / + i64_res_108097; + bool approx_equal_res_108099 = mean_abs_res_108098 <= tol_75329; + + complement_arg_108073 = approx_equal_res_108099; + } else { + complement_arg_108073 = 0; } + + bool check_108100 = !complement_arg_108073; + bool check_108101; + + if (check_108100) { + bool defunc_2_reduce_res_108102; + bool redout_119827 = 1; + + for (int64_t i_119828 = 0; i_119828 < defunc_2_reduce_res_75260; + i_119828++) { + double x_108106 = ((__global + double *) mem_121941)[i_119828 * + m_75136 + + gtid_107038]; + bool defunc_0_f_res_108107; + + defunc_0_f_res_108107 = futrts_isnan64(x_108106); + + bool x_108105 = defunc_0_f_res_108107 && redout_119827; + bool redout_tmp_127330 = x_108105; + + redout_119827 = redout_tmp_127330; + } + defunc_2_reduce_res_108102 = redout_119827; + + bool check_t_res_108108 = !defunc_2_reduce_res_108102; + + check_108101 = check_t_res_108108; + } else { + check_108101 = 0; + } + ((__global bool *) mem_122650)[gtid_107038] = check_108101; + for (int64_t i_127331 = 0; i_127331 < k2p2zq_75151; i_127331++) { + for (int64_t i_127332 = 0; i_127332 < k2p2zq_75151; + i_127332++) { + ((__global double *) mem_122654)[i_127331 * (m_75136 * + k2p2zq_75151) + + i_127332 * m_75136 + + gtid_107038] = ((__global + double *) mem_122598)[phys_tid_107039 + + (i_127331 * + (num_threads_126174 * + k2p2zq_75151) + + i_127332 * + num_threads_126174)]; + } + } + for (int64_t i_127333 = 0; i_127333 < k2p2zq_75151; i_127333++) { + ((__global double *) mem_122657)[i_127333 * m_75136 + + gtid_107038] = ((__global + double *) mem_122595)[phys_tid_107039 + + i_127333 * + num_threads_126174]; + } + ((__global int64_t *) mem_122659)[gtid_107038] = min_res_107907; + ((__global double *) mem_122661)[gtid_107038] = recresid_r_107615; } barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - error_1: + error_0: return; - #undef segred_group_sizze_40518 + #undef segmap_group_sizze_107574 } -__kernel void mainzisegred_large_40603(__global int *global_failure, - __local volatile - int64_t *sync_arr_mem_46078_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_46076_backing_aligned_1, - int64_t i32_res_29181, - int64_t num_groups_40652, - int64_t groups_per_segment_46062, - int64_t elements_per_thread_46063, - int64_t virt_num_groups_46064, - int64_t threads_per_segment_46066, - __global - unsigned char *defunc_3_map_res_mem_44629, - __global - unsigned char *defunc_3_map_res_mem_44850, - __global unsigned char *mem_44910, - __global - unsigned char *group_res_arr_mem_46067, - __global - unsigned char *mainzicounter_mem_46069) +__kernel void mainzisegmap_108136(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, + int64_t defunc_2_reduce_res_75260, + double tol_75329, int64_t r_75826, __global + unsigned char *mem_120246, __global + unsigned char *mem_121941, __global + unsigned char *mem_param_121972, __global + unsigned char *mem_122674, __global + unsigned char *mem_122680, __global + unsigned char *mem_122682, __global + unsigned char *mem_123135, __global + unsigned char *mem_123699, __global + unsigned char *mem_123702, __global + unsigned char *mem_123705, __global + unsigned char *mem_123708, __global + unsigned char *mem_123711) { - #define segred_group_sizze_40651 (mainzisegred_group_sizze_40597) + #define segmap_group_sizze_110797 (mainzisegmap_group_sizze_108138) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict sync_arr_mem_46078_backing_1 = - (__local volatile - char *) sync_arr_mem_46078_backing_aligned_0; - __local volatile char *restrict red_arr_mem_46076_backing_0 = - (__local volatile - char *) red_arr_mem_46076_backing_aligned_1; if (*global_failure >= 0) return; - int32_t global_tid_46071; - int32_t local_tid_46072; - int64_t group_sizze_46075; - int32_t wave_sizze_46074; - int32_t group_tid_46073; - - global_tid_46071 = get_global_id(0); - local_tid_46072 = get_local_id(0); - group_sizze_46075 = get_local_size(0); - wave_sizze_46074 = LOCKSTEP_WIDTH; - group_tid_46073 = get_group_id(0); - - int32_t phys_tid_40603; - - phys_tid_40603 = global_tid_46071; - - __local char *red_arr_mem_46076; - - red_arr_mem_46076 = (__local char *) red_arr_mem_46076_backing_0; - - __local char *sync_arr_mem_46078; - - sync_arr_mem_46078 = (__local char *) sync_arr_mem_46078_backing_1; - - int32_t phys_group_id_46080; - - phys_group_id_46080 = get_group_id(0); - for (int32_t i_46081 = 0; i_46081 < - sdiv_up32(sext_i64_i32(virt_num_groups_46064) - phys_group_id_46080, - sext_i64_i32(num_groups_40652)); i_46081++) { - int32_t virt_group_id_46082 = phys_group_id_46080 + i_46081 * - sext_i64_i32(num_groups_40652); - int32_t flat_segment_id_46083 = squot32(virt_group_id_46082, - sext_i64_i32(groups_per_segment_46062)); - int64_t global_tid_46084 = srem64(sext_i32_i64(virt_group_id_46082) * - segred_group_sizze_40651 + - sext_i32_i64(local_tid_46072), - segred_group_sizze_40651 * - groups_per_segment_46062); - int64_t gtid_40592 = squot64(sext_i32_i64(flat_segment_id_46083), - i32_res_29181); - int64_t gtid_40593 = sext_i32_i64(flat_segment_id_46083) - - squot64(sext_i32_i64(flat_segment_id_46083), i32_res_29181) * - i32_res_29181; - int64_t gtid_40602; - float x_acc_46085; - int64_t chunk_sizze_46086; - - chunk_sizze_46086 = smin64(elements_per_thread_46063, - sdiv_up64(i32_res_29181 - - sext_i32_i64(sext_i64_i32(global_tid_46084)), - threads_per_segment_46066)); - - float x_40655; - float x_40656; - - // neutral-initialise the accumulators - { - x_acc_46085 = 0.0F; - } - for (int64_t i_46090 = 0; i_46090 < chunk_sizze_46086; i_46090++) { - gtid_40602 = sext_i32_i64(sext_i64_i32(global_tid_46084)) + - threads_per_segment_46066 * i_46090; - // apply map function - { - float x_40661 = ((__global - float *) defunc_3_map_res_mem_44850)[gtid_40592 * - i32_res_29181 + - gtid_40602]; - float x_40662 = ((__global - float *) defunc_3_map_res_mem_44629)[gtid_40592 * - (i32_res_29181 * - i32_res_29181) + - gtid_40593 * - i32_res_29181 + - gtid_40602]; - float defunc_1_f_res_40663 = x_40661 * x_40662; - - // save map-out results - { } - // load accumulator - { - x_40655 = x_acc_46085; - } - // load new values - { - x_40656 = defunc_1_f_res_40663; - } - // apply reduction operator - { - float defunc_1_op_res_40657 = x_40655 + x_40656; + int32_t global_tid_127965; + int32_t local_tid_127966; + int64_t group_sizze_127969; + int32_t wave_sizze_127968; + int32_t group_tid_127967; + + global_tid_127965 = get_global_id(0); + local_tid_127966 = get_local_id(0); + group_sizze_127969 = get_local_size(0); + wave_sizze_127968 = LOCKSTEP_WIDTH; + group_tid_127967 = get_group_id(0); + + int32_t phys_tid_108136; + + phys_tid_108136 = global_tid_127965; + + int64_t gtid_108135; + + gtid_108135 = sext_i32_i64(group_tid_127967) * segmap_group_sizze_110797 + + sext_i32_i64(local_tid_127966); + if (slt64(gtid_108135, m_75136)) { + int64_t x_110803 = ((__global int64_t *) mem_param_121972)[gtid_108135]; + double recresid_r_110808 = ((__global + double *) mem_122682)[gtid_108135]; + int64_t min_res_110809 = ((__global int64_t *) mem_123135)[gtid_108135]; + bool isnan_res_110811; + + isnan_res_110811 = futrts_isnan64(recresid_r_110808); + + bool cond_110812 = !isnan_res_110811; + bool cond_t_res_110813 = x_110803 == k2p2zq_75151; + bool x_110814 = cond_110812 && cond_t_res_110813; + bool nona_t_res_110815 = min_res_110809 == k2p2zq_75151; + bool x_110816 = x_110814 && nona_t_res_110815; + bool complement_arg_110817; + + if (x_110816) { + double fr_110806 = ((__global double *) mem_122674)[gtid_108135]; + double resid_110807 = ((__global double *) mem_122680)[gtid_108135]; + double defunc_2_reduce_res_110818; + double redout_119860 = 0.0; + + for (int64_t i_119861 = 0; i_119861 < k2p2zq_75151; i_119861++) { + double x_110822 = ((__global double *) mem_123705)[i_119861 * + m_75136 + + gtid_108135]; + double x_110824 = ((__global double *) mem_123702)[i_119861 * + m_75136 + + gtid_108135]; + double x_110825 = ((__global double *) mem_123708)[i_119861 * + m_75136 + + gtid_108135]; + double defunc_0_f_res_110826; + double redout_119862 = 0.0; + + for (int64_t i_119863 = 0; i_119863 < k2p2zq_75151; + i_119863++) { + double x_110830 = ((__global + double *) mem_123705)[i_119863 * + m_75136 + + gtid_108135]; + double x_110831 = ((__global + double *) mem_123699)[i_119861 * + (m_75136 * + k2p2zq_75151) + + i_119863 * + m_75136 + + gtid_108135]; + double x_110832 = ((__global + double *) mem_120246)[i_119863 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_108135 * + defunc_2_reduce_res_75260 + + r_75826]; + double x_110833 = x_110822 * x_110830; + double y_110834 = x_110833 / fr_110806; + double defunc_1_f_res_110835 = x_110831 - y_110834; + double defunc_1_f_res_110836 = x_110832 * + defunc_1_f_res_110835; + double defunc_1_op_res_110829 = defunc_1_f_res_110836 + + redout_119862; + double redout_tmp_127971 = defunc_1_op_res_110829; - // store in accumulator - { - x_acc_46085 = defunc_1_op_res_40657; - } + redout_119862 = redout_tmp_127971; } + defunc_0_f_res_110826 = redout_119862; + + double defunc_0_g_res_110837 = resid_110807 * + defunc_0_f_res_110826; + double defunc_1_f_res_110838 = x_110824 + defunc_0_g_res_110837; + double defunc_1_f_res_110839 = x_110825 - defunc_1_f_res_110838; + double defunc_0_f_res_110840 = fabs(defunc_1_f_res_110839); + double defunc_1_op_res_110821 = defunc_0_f_res_110840 + + redout_119860; + double redout_tmp_127970 = defunc_1_op_res_110821; + + redout_119860 = redout_tmp_127970; } + defunc_2_reduce_res_110818 = redout_119860; + + double i64_res_110841 = sitofp_i64_f64(k2p2zq_75151); + double mean_abs_res_110842 = defunc_2_reduce_res_110818 / + i64_res_110841; + bool approx_equal_res_110843 = mean_abs_res_110842 <= tol_75329; + + complement_arg_110817 = approx_equal_res_110843; + } else { + complement_arg_110817 = 0; } - // to reduce current chunk, first store our result in memory - { - x_40655 = x_acc_46085; - ((__local - float *) red_arr_mem_46076)[sext_i32_i64(local_tid_46072)] = - x_40655; - } - barrier(CLK_LOCAL_MEM_FENCE); - int32_t offset_46091; - int32_t skip_waves_46092; + bool check_110844 = !complement_arg_110817; + bool check_110845; - skip_waves_46092 = 1; - - float x_46087; - float x_46088; - - offset_46091 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46072, - sext_i64_i32(segred_group_sizze_40651))) { - x_46087 = ((__local - float *) red_arr_mem_46076)[sext_i32_i64(local_tid_46072 + - offset_46091)]; - } - } - offset_46091 = 1; - while (slt32(offset_46091, wave_sizze_46074)) { - if (slt32(local_tid_46072 + offset_46091, - sext_i64_i32(segred_group_sizze_40651)) && - ((local_tid_46072 - squot32(local_tid_46072, wave_sizze_46074) * - wave_sizze_46074) & (2 * offset_46091 - 1)) == 0) { - // read array element - { - x_46088 = ((volatile __local - float *) red_arr_mem_46076)[sext_i32_i64(local_tid_46072 + - offset_46091)]; - } - // apply reduction operation - { - float defunc_1_op_res_46089 = x_46087 + x_46088; - - x_46087 = defunc_1_op_res_46089; - } - // write result of operation - { - ((volatile __local - float *) red_arr_mem_46076)[sext_i32_i64(local_tid_46072)] = - x_46087; - } - } - offset_46091 *= 2; - } - while (slt32(skip_waves_46092, - squot32(sext_i64_i32(segred_group_sizze_40651) + - wave_sizze_46074 - 1, wave_sizze_46074))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46091 = skip_waves_46092 * wave_sizze_46074; - if (slt32(local_tid_46072 + offset_46091, - sext_i64_i32(segred_group_sizze_40651)) && - ((local_tid_46072 - squot32(local_tid_46072, wave_sizze_46074) * - wave_sizze_46074) == 0 && (squot32(local_tid_46072, - wave_sizze_46074) & (2 * - skip_waves_46092 - - 1)) == - 0)) { - // read array element - { - x_46088 = ((__local - float *) red_arr_mem_46076)[sext_i32_i64(local_tid_46072 + - offset_46091)]; - } - // apply reduction operation - { - float defunc_1_op_res_46089 = x_46087 + x_46088; - - x_46087 = defunc_1_op_res_46089; - } - // write result of operation - { - ((__local - float *) red_arr_mem_46076)[sext_i32_i64(local_tid_46072)] = - x_46087; - } - } - skip_waves_46092 *= 2; - } - barrier(CLK_LOCAL_MEM_FENCE); - // first thread saves the result in accumulator - { - if (sext_i32_i64(local_tid_46072) == (int64_t) 0) { - x_acc_46085 = x_46087; - } - } - if (groups_per_segment_46062 == (int64_t) 1) { - // first thread in group saves final result to memory - { - if (local_tid_46072 == 0) { - ((__global float *) mem_44910)[gtid_40592 * i32_res_29181 + - gtid_40593] = x_acc_46085; - } - } - } else { - int32_t old_counter_46093; + if (check_110844) { + bool defunc_2_reduce_res_110846; + bool redout_119864 = 1; - // first thread in group saves group result to global memory - { - if (local_tid_46072 == 0) { - ((__global - float *) group_res_arr_mem_46067)[sext_i32_i64(virt_group_id_46082) * - segred_group_sizze_40651] = - x_acc_46085; - mem_fence_global(); - old_counter_46093 = - atomic_add_i32_global(&((volatile __global - int *) mainzicounter_mem_46069)[sext_i32_i64(srem32(flat_segment_id_46083, - 10240))], - (int) 1); - ((__local bool *) sync_arr_mem_46078)[(int64_t) 0] = - old_counter_46093 == groups_per_segment_46062 - - (int64_t) 1; - } + for (int64_t i_119865 = 0; i_119865 < defunc_2_reduce_res_75260; + i_119865++) { + double x_110850 = ((__global double *) mem_121941)[i_119865 * + m_75136 + + gtid_108135]; + bool defunc_0_f_res_110851; + + defunc_0_f_res_110851 = futrts_isnan64(x_110850); + + bool x_110849 = defunc_0_f_res_110851 && redout_119864; + bool redout_tmp_127972 = x_110849; + + redout_119864 = redout_tmp_127972; } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + defunc_2_reduce_res_110846 = redout_119864; - bool is_last_group_46094; + bool check_t_res_110852 = !defunc_2_reduce_res_110846; - is_last_group_46094 = ((__local - bool *) sync_arr_mem_46078)[(int64_t) 0]; - if (is_last_group_46094) { - if (local_tid_46072 == 0) { - old_counter_46093 = - atomic_add_i32_global(&((volatile __global - int *) mainzicounter_mem_46069)[sext_i32_i64(srem32(flat_segment_id_46083, - 10240))], - (int) ((int64_t) 0 - - groups_per_segment_46062)); - } - // read in the per-group-results - { - int64_t read_per_thread_46095 = - sdiv_up64(groups_per_segment_46062, - segred_group_sizze_40651); - - x_40655 = 0.0F; - for (int64_t i_46096 = 0; i_46096 < read_per_thread_46095; - i_46096++) { - int64_t group_res_id_46097 = - sext_i32_i64(local_tid_46072) * - read_per_thread_46095 + i_46096; - int64_t index_of_group_res_46098 = - sext_i32_i64(flat_segment_id_46083) * - groups_per_segment_46062 + group_res_id_46097; - - if (slt64(group_res_id_46097, - groups_per_segment_46062)) { - x_40656 = ((__global - float *) group_res_arr_mem_46067)[index_of_group_res_46098 * - segred_group_sizze_40651]; - - float defunc_1_op_res_40657; - - defunc_1_op_res_40657 = x_40655 + x_40656; - x_40655 = defunc_1_op_res_40657; - } - } - } - ((__local - float *) red_arr_mem_46076)[sext_i32_i64(local_tid_46072)] = - x_40655; - barrier(CLK_LOCAL_MEM_FENCE); - // reduce the per-group results - { - int32_t offset_46099; - int32_t skip_waves_46100; - - skip_waves_46100 = 1; - - float x_46087; - float x_46088; - - offset_46099 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46072, - sext_i64_i32(segred_group_sizze_40651))) { - x_46087 = ((__local - float *) red_arr_mem_46076)[sext_i32_i64(local_tid_46072 + - offset_46099)]; - } - } - offset_46099 = 1; - while (slt32(offset_46099, wave_sizze_46074)) { - if (slt32(local_tid_46072 + offset_46099, - sext_i64_i32(segred_group_sizze_40651)) && - ((local_tid_46072 - squot32(local_tid_46072, - wave_sizze_46074) * - wave_sizze_46074) & (2 * offset_46099 - 1)) == - 0) { - // read array element - { - x_46088 = ((volatile __local - float *) red_arr_mem_46076)[sext_i32_i64(local_tid_46072 + - offset_46099)]; - } - // apply reduction operation - { - float defunc_1_op_res_46089 = x_46087 + x_46088; - - x_46087 = defunc_1_op_res_46089; - } - // write result of operation - { - ((volatile __local - float *) red_arr_mem_46076)[sext_i32_i64(local_tid_46072)] = - x_46087; - } - } - offset_46099 *= 2; - } - while (slt32(skip_waves_46100, - squot32(sext_i64_i32(segred_group_sizze_40651) + - wave_sizze_46074 - 1, - wave_sizze_46074))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46099 = skip_waves_46100 * wave_sizze_46074; - if (slt32(local_tid_46072 + offset_46099, - sext_i64_i32(segred_group_sizze_40651)) && - ((local_tid_46072 - squot32(local_tid_46072, - wave_sizze_46074) * - wave_sizze_46074) == 0 && - (squot32(local_tid_46072, wave_sizze_46074) & (2 * - skip_waves_46100 - - 1)) == - 0)) { - // read array element - { - x_46088 = ((__local - float *) red_arr_mem_46076)[sext_i32_i64(local_tid_46072 + - offset_46099)]; - } - // apply reduction operation - { - float defunc_1_op_res_46089 = x_46087 + x_46088; - - x_46087 = defunc_1_op_res_46089; - } - // write result of operation - { - ((__local - float *) red_arr_mem_46076)[sext_i32_i64(local_tid_46072)] = - x_46087; - } - } - skip_waves_46100 *= 2; - } - // and back to memory with the final result - { - if (local_tid_46072 == 0) { - ((__global float *) mem_44910)[gtid_40592 * - i32_res_29181 + - gtid_40593] = - x_46087; - } - } - } - } + check_110845 = check_t_res_110852; + } else { + check_110845 = 0; } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + ((__global bool *) mem_123711)[gtid_108135] = check_110845; } - error_1: + error_0: return; - #undef segred_group_sizze_40651 + #undef segmap_group_sizze_110797 } -__kernel void mainzisegred_large_40733(__global int *global_failure, - __local volatile - int64_t *sync_arr_mem_46210_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_46208_backing_aligned_1, - int64_t N_29165, int64_t i32_res_29181, - int64_t num_groups_40780, - int64_t groups_per_segment_46194, - int64_t elements_per_thread_46195, - int64_t virt_num_groups_46196, - int64_t threads_per_segment_46198, - __global unsigned char *mem_44397, - __global - unsigned char *defunc_4_map_res_mem_44916, - __global unsigned char *mem_45134, - __global - unsigned char *group_res_arr_mem_46199, - __global - unsigned char *mainzicounter_mem_46201) +__kernel void mainzisegmap_108208(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, __global + unsigned char *mem_122003, __global + unsigned char *mem_123695) { - #define segred_group_sizze_40779 (mainzisegred_group_sizze_40727) + #define segmap_group_sizze_110788 (mainzisegmap_group_sizze_108211) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict sync_arr_mem_46210_backing_1 = - (__local volatile - char *) sync_arr_mem_46210_backing_aligned_0; - __local volatile char *restrict red_arr_mem_46208_backing_0 = - (__local volatile - char *) red_arr_mem_46208_backing_aligned_1; if (*global_failure >= 0) return; - int32_t global_tid_46203; - int32_t local_tid_46204; - int64_t group_sizze_46207; - int32_t wave_sizze_46206; - int32_t group_tid_46205; - - global_tid_46203 = get_global_id(0); - local_tid_46204 = get_local_id(0); - group_sizze_46207 = get_local_size(0); - wave_sizze_46206 = LOCKSTEP_WIDTH; - group_tid_46205 = get_group_id(0); + int32_t global_tid_127960; + int32_t local_tid_127961; + int64_t group_sizze_127964; + int32_t wave_sizze_127963; + int32_t group_tid_127962; - int32_t phys_tid_40733; + global_tid_127960 = get_global_id(0); + local_tid_127961 = get_local_id(0); + group_sizze_127964 = get_local_size(0); + wave_sizze_127963 = LOCKSTEP_WIDTH; + group_tid_127962 = get_group_id(0); - phys_tid_40733 = global_tid_46203; + int32_t phys_tid_108208; - __local char *red_arr_mem_46208; + phys_tid_108208 = global_tid_127960; - red_arr_mem_46208 = (__local char *) red_arr_mem_46208_backing_0; + int64_t gtid_108206; - __local char *sync_arr_mem_46210; + gtid_108206 = squot64(sext_i32_i64(group_tid_127962) * + segmap_group_sizze_110788 + + sext_i32_i64(local_tid_127961), k2p2zq_75151); - sync_arr_mem_46210 = (__local char *) sync_arr_mem_46210_backing_1; + int64_t gtid_108207; - int32_t phys_group_id_46212; - - phys_group_id_46212 = get_group_id(0); - for (int32_t i_46213 = 0; i_46213 < - sdiv_up32(sext_i64_i32(virt_num_groups_46196) - phys_group_id_46212, - sext_i64_i32(num_groups_40780)); i_46213++) { - int32_t virt_group_id_46214 = phys_group_id_46212 + i_46213 * - sext_i64_i32(num_groups_40780); - int32_t flat_segment_id_46215 = squot32(virt_group_id_46214, - sext_i64_i32(groups_per_segment_46194)); - int64_t global_tid_46216 = srem64(sext_i32_i64(virt_group_id_46214) * - segred_group_sizze_40779 + - sext_i32_i64(local_tid_46204), - segred_group_sizze_40779 * - groups_per_segment_46194); - int64_t gtid_40722 = squot64(sext_i32_i64(flat_segment_id_46215), - N_29165); - int64_t gtid_40723 = sext_i32_i64(flat_segment_id_46215) - - squot64(sext_i32_i64(flat_segment_id_46215), N_29165) * N_29165; - int64_t gtid_40732; - float x_acc_46217; - int64_t chunk_sizze_46218; + gtid_108207 = sext_i32_i64(group_tid_127962) * segmap_group_sizze_110788 + + sext_i32_i64(local_tid_127961) - + squot64(sext_i32_i64(group_tid_127962) * segmap_group_sizze_110788 + + sext_i32_i64(local_tid_127961), k2p2zq_75151) * k2p2zq_75151; + if (slt64(gtid_108206, m_75136) && slt64(gtid_108207, k2p2zq_75151)) { + double x_110791 = ((__global double *) mem_122003)[gtid_108206 * + k2p2zq_75151 + + gtid_108207]; + bool isnan_res_110792; - chunk_sizze_46218 = smin64(elements_per_thread_46195, - sdiv_up64(i32_res_29181 - - sext_i32_i64(sext_i64_i32(global_tid_46216)), - threads_per_segment_46198)); + isnan_res_110792 = futrts_isnan64(x_110791); - float x_40783; - float x_40784; + double defunc_0_f_res_110793; - // neutral-initialise the accumulators - { - x_acc_46217 = 0.0F; + if (isnan_res_110792) { + defunc_0_f_res_110793 = 0.0; + } else { + defunc_0_f_res_110793 = x_110791; } - for (int64_t i_46222 = 0; i_46222 < chunk_sizze_46218; i_46222++) { - gtid_40732 = sext_i32_i64(sext_i64_i32(global_tid_46216)) + - threads_per_segment_46198 * i_46222; - // apply map function - { - float x_40788 = ((__global - float *) defunc_4_map_res_mem_44916)[gtid_40722 * - i32_res_29181 + - gtid_40732]; - float x_40789 = ((__global float *) mem_44397)[gtid_40723 * - i32_res_29181 + - gtid_40732]; - float defunc_1_f_res_40790 = x_40788 * x_40789; - - // save map-out results - { } - // load accumulator - { - x_40783 = x_acc_46217; - } - // load new values - { - x_40784 = defunc_1_f_res_40790; - } - // apply reduction operator - { - float defunc_1_op_res_40785 = x_40783 + x_40784; - - // store in accumulator - { - x_acc_46217 = defunc_1_op_res_40785; - } - } - } - } - // to reduce current chunk, first store our result in memory - { - x_40783 = x_acc_46217; - ((__local - float *) red_arr_mem_46208)[sext_i32_i64(local_tid_46204)] = - x_40783; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t offset_46223; - int32_t skip_waves_46224; - - skip_waves_46224 = 1; - - float x_46219; - float x_46220; - - offset_46223 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46204, - sext_i64_i32(segred_group_sizze_40779))) { - x_46219 = ((__local - float *) red_arr_mem_46208)[sext_i32_i64(local_tid_46204 + - offset_46223)]; - } - } - offset_46223 = 1; - while (slt32(offset_46223, wave_sizze_46206)) { - if (slt32(local_tid_46204 + offset_46223, - sext_i64_i32(segred_group_sizze_40779)) && - ((local_tid_46204 - squot32(local_tid_46204, wave_sizze_46206) * - wave_sizze_46206) & (2 * offset_46223 - 1)) == 0) { - // read array element - { - x_46220 = ((volatile __local - float *) red_arr_mem_46208)[sext_i32_i64(local_tid_46204 + - offset_46223)]; - } - // apply reduction operation - { - float defunc_1_op_res_46221 = x_46219 + x_46220; - - x_46219 = defunc_1_op_res_46221; - } - // write result of operation - { - ((volatile __local - float *) red_arr_mem_46208)[sext_i32_i64(local_tid_46204)] = - x_46219; - } - } - offset_46223 *= 2; - } - while (slt32(skip_waves_46224, - squot32(sext_i64_i32(segred_group_sizze_40779) + - wave_sizze_46206 - 1, wave_sizze_46206))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46223 = skip_waves_46224 * wave_sizze_46206; - if (slt32(local_tid_46204 + offset_46223, - sext_i64_i32(segred_group_sizze_40779)) && - ((local_tid_46204 - squot32(local_tid_46204, wave_sizze_46206) * - wave_sizze_46206) == 0 && (squot32(local_tid_46204, - wave_sizze_46206) & (2 * - skip_waves_46224 - - 1)) == - 0)) { - // read array element - { - x_46220 = ((__local - float *) red_arr_mem_46208)[sext_i32_i64(local_tid_46204 + - offset_46223)]; - } - // apply reduction operation - { - float defunc_1_op_res_46221 = x_46219 + x_46220; - - x_46219 = defunc_1_op_res_46221; - } - // write result of operation - { - ((__local - float *) red_arr_mem_46208)[sext_i32_i64(local_tid_46204)] = - x_46219; - } - } - skip_waves_46224 *= 2; + ((__global double *) mem_123695)[gtid_108206 * k2p2zq_75151 + + gtid_108207] = defunc_0_f_res_110793; + } + + error_0: + return; + #undef segmap_group_sizze_110788 +} +__kernel void mainzisegmap_108230(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, __global + unsigned char *mem_122007, __global + unsigned char *mem_123691) +{ + #define segmap_group_sizze_110779 (mainzisegmap_group_sizze_108234) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127955; + int32_t local_tid_127956; + int64_t group_sizze_127959; + int32_t wave_sizze_127958; + int32_t group_tid_127957; + + global_tid_127955 = get_global_id(0); + local_tid_127956 = get_local_id(0); + group_sizze_127959 = get_local_size(0); + wave_sizze_127958 = LOCKSTEP_WIDTH; + group_tid_127957 = get_group_id(0); + + int32_t phys_tid_108230; + + phys_tid_108230 = global_tid_127955; + + int64_t gtid_108227; + + gtid_108227 = squot64(sext_i32_i64(group_tid_127957) * + segmap_group_sizze_110779 + + sext_i32_i64(local_tid_127956), k2p2zq_75151 * + k2p2zq_75151); + + int64_t gtid_108228; + + gtid_108228 = squot64(sext_i32_i64(group_tid_127957) * + segmap_group_sizze_110779 + + sext_i32_i64(local_tid_127956) - + squot64(sext_i32_i64(group_tid_127957) * + segmap_group_sizze_110779 + + sext_i32_i64(local_tid_127956), k2p2zq_75151 * + k2p2zq_75151) * (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151); + + int64_t gtid_108229; + + gtid_108229 = sext_i32_i64(group_tid_127957) * segmap_group_sizze_110779 + + sext_i32_i64(local_tid_127956) - + squot64(sext_i32_i64(group_tid_127957) * segmap_group_sizze_110779 + + sext_i32_i64(local_tid_127956), k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151) - squot64(sext_i32_i64(group_tid_127957) * + segmap_group_sizze_110779 + + sext_i32_i64(local_tid_127956) - + squot64(sext_i32_i64(group_tid_127957) * + segmap_group_sizze_110779 + + sext_i32_i64(local_tid_127956), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151) * k2p2zq_75151; + if ((slt64(gtid_108227, m_75136) && slt64(gtid_108228, k2p2zq_75151)) && + slt64(gtid_108229, k2p2zq_75151)) { + double x_110782 = ((__global double *) mem_122007)[gtid_108227 * + (k2p2zq_75151 * + k2p2zq_75151) + + gtid_108228 * + k2p2zq_75151 + + gtid_108229]; + bool isnan_res_110783; + + isnan_res_110783 = futrts_isnan64(x_110782); + + double defunc_0_f_res_110784; + + if (isnan_res_110783) { + defunc_0_f_res_110784 = 0.0; + } else { + defunc_0_f_res_110784 = x_110782; } - barrier(CLK_LOCAL_MEM_FENCE); - // first thread saves the result in accumulator - { - if (sext_i32_i64(local_tid_46204) == (int64_t) 0) { - x_acc_46217 = x_46219; - } + ((__global double *) mem_123691)[gtid_108227 * (k2p2zq_75151 * + k2p2zq_75151) + + gtid_108228 * k2p2zq_75151 + + gtid_108229] = defunc_0_f_res_110784; + } + + error_0: + return; + #undef segmap_group_sizze_110779 +} +__kernel void mainzisegmap_108278(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, int64_t binop_x_120251, + __global unsigned char *mem_122007, __global + unsigned char *defunc_3_map_res_r_mem_123630, + __global unsigned char *mem_123685) +{ + #define segmap_group_sizze_110760 (mainzisegmap_group_sizze_108281) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127950; + int32_t local_tid_127951; + int64_t group_sizze_127954; + int32_t wave_sizze_127953; + int32_t group_tid_127952; + + global_tid_127950 = get_global_id(0); + local_tid_127951 = get_local_id(0); + group_sizze_127954 = get_local_size(0); + wave_sizze_127953 = LOCKSTEP_WIDTH; + group_tid_127952 = get_group_id(0); + + int32_t phys_tid_108278; + + phys_tid_108278 = global_tid_127950; + + int64_t gtid_108276; + + gtid_108276 = squot64(sext_i32_i64(group_tid_127952) * + segmap_group_sizze_110760 + + sext_i32_i64(local_tid_127951), binop_x_120251); + + int64_t gtid_108277; + + gtid_108277 = sext_i32_i64(group_tid_127952) * segmap_group_sizze_110760 + + sext_i32_i64(local_tid_127951) - + squot64(sext_i32_i64(group_tid_127952) * segmap_group_sizze_110760 + + sext_i32_i64(local_tid_127951), binop_x_120251) * + binop_x_120251; + if (slt64(gtid_108276, m_75136) && slt64(gtid_108277, binop_x_120251)) { + int64_t binop_x_115166 = gtid_108276 * binop_x_120251; + int64_t binop_x_115167 = gtid_108277 + binop_x_115166; + int64_t new_index_115169 = squot64(binop_x_115167, binop_x_120251); + int64_t binop_y_115177 = new_index_115169 * binop_x_120251; + int64_t binop_x_115178 = binop_x_115167 - binop_y_115177; + int64_t new_index_115179 = squot64(binop_x_115178, k2p2zq_75151); + int64_t write_index_110763 = ((__global + int64_t *) mem_123685)[new_index_115169 * + k2p2zq_75151 + + new_index_115179]; + int64_t binop_y_115234 = k2p2zq_75151 * new_index_115179; + int64_t new_index_115235 = binop_x_115178 - binop_y_115234; + int64_t write_index_110764 = ((__global + int64_t *) mem_123685)[new_index_115169 * + k2p2zq_75151 + + new_index_115235]; + double write_value_110765 = ((__global + double *) defunc_3_map_res_r_mem_123630)[new_index_115169 * + (k2p2zq_75151 * + k2p2zq_75151) + + new_index_115179 * + k2p2zq_75151 + + new_index_115235]; + + if (((sle64((int64_t) 0, gtid_108276) && slt64(gtid_108276, m_75136)) && + (sle64((int64_t) 0, write_index_110763) && + slt64(write_index_110763, k2p2zq_75151))) && (sle64((int64_t) 0, + write_index_110764) && + slt64(write_index_110764, + k2p2zq_75151))) { + ((__global double *) mem_122007)[gtid_108276 * (k2p2zq_75151 * + k2p2zq_75151) + + write_index_110763 * k2p2zq_75151 + + write_index_110764] = + write_value_110765; } - if (groups_per_segment_46194 == (int64_t) 1) { - // first thread in group saves final result to memory - { - if (local_tid_46204 == 0) { - ((__global float *) mem_45134)[gtid_40722 * N_29165 + - gtid_40723] = x_acc_46217; - } - } - } else { - int32_t old_counter_46225; - - // first thread in group saves group result to global memory - { - if (local_tid_46204 == 0) { - ((__global - float *) group_res_arr_mem_46199)[sext_i32_i64(virt_group_id_46214) * - segred_group_sizze_40779] = - x_acc_46217; - mem_fence_global(); - old_counter_46225 = - atomic_add_i32_global(&((volatile __global - int *) mainzicounter_mem_46201)[sext_i32_i64(srem32(flat_segment_id_46215, - 10240))], - (int) 1); - ((__local bool *) sync_arr_mem_46210)[(int64_t) 0] = - old_counter_46225 == groups_per_segment_46194 - - (int64_t) 1; - } - } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - - bool is_last_group_46226; + } + + error_0: + return; + #undef segmap_group_sizze_110760 +} +__kernel void mainzisegmap_108435(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, int64_t rp1_75837, + int64_t binop_x_120251, __global + unsigned char *mem_122003, __global + unsigned char *mem_123133, __global + unsigned char *defunc_3_map_res_r_mem_123392, + __global unsigned char *mem_123681) +{ + #define segmap_group_sizze_110689 (mainzisegmap_group_sizze_108438) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127944; + int32_t local_tid_127945; + int64_t group_sizze_127948; + int32_t wave_sizze_127947; + int32_t group_tid_127946; + + global_tid_127944 = get_global_id(0); + local_tid_127945 = get_local_id(0); + group_sizze_127948 = get_local_size(0); + wave_sizze_127947 = LOCKSTEP_WIDTH; + group_tid_127946 = get_group_id(0); + + int32_t phys_tid_108435; + + phys_tid_108435 = global_tid_127944; + + int64_t gtid_108433; + + gtid_108433 = squot64(sext_i32_i64(group_tid_127946) * + segmap_group_sizze_110689 + + sext_i32_i64(local_tid_127945), k2p2zq_75151); + + int64_t gtid_108434; + + gtid_108434 = sext_i32_i64(group_tid_127946) * segmap_group_sizze_110689 + + sext_i32_i64(local_tid_127945) - + squot64(sext_i32_i64(group_tid_127946) * segmap_group_sizze_110689 + + sext_i32_i64(local_tid_127945), k2p2zq_75151) * k2p2zq_75151; + if (slt64(gtid_108433, m_75136) && slt64(gtid_108434, k2p2zq_75151)) { + int64_t write_index_110694 = ((__global + int64_t *) mem_123133)[gtid_108434 * + m_75136 + + gtid_108433]; + double defunc_2_reduce_res_110695; + double redout_119858 = 0.0; + + for (int64_t i_119859 = 0; i_119859 < k2p2zq_75151; i_119859++) { + double x_110699 = ((__global double *) mem_123681)[gtid_108433 * + rp1_75837 + + i_119859]; + double x_110700 = ((__global + double *) defunc_3_map_res_r_mem_123392)[gtid_108433 * + binop_x_120251 + + i_119859 * + k2p2zq_75151 + + gtid_108434]; + double defunc_1_f_res_110701 = x_110699 * x_110700; + double defunc_1_op_res_110698 = defunc_1_f_res_110701 + + redout_119858; + double redout_tmp_127949 = defunc_1_op_res_110698; - is_last_group_46226 = ((__local - bool *) sync_arr_mem_46210)[(int64_t) 0]; - if (is_last_group_46226) { - if (local_tid_46204 == 0) { - old_counter_46225 = - atomic_add_i32_global(&((volatile __global - int *) mainzicounter_mem_46201)[sext_i32_i64(srem32(flat_segment_id_46215, - 10240))], - (int) ((int64_t) 0 - - groups_per_segment_46194)); - } - // read in the per-group-results - { - int64_t read_per_thread_46227 = - sdiv_up64(groups_per_segment_46194, - segred_group_sizze_40779); - - x_40783 = 0.0F; - for (int64_t i_46228 = 0; i_46228 < read_per_thread_46227; - i_46228++) { - int64_t group_res_id_46229 = - sext_i32_i64(local_tid_46204) * - read_per_thread_46227 + i_46228; - int64_t index_of_group_res_46230 = - sext_i32_i64(flat_segment_id_46215) * - groups_per_segment_46194 + group_res_id_46229; - - if (slt64(group_res_id_46229, - groups_per_segment_46194)) { - x_40784 = ((__global - float *) group_res_arr_mem_46199)[index_of_group_res_46230 * - segred_group_sizze_40779]; - - float defunc_1_op_res_40785; - - defunc_1_op_res_40785 = x_40783 + x_40784; - x_40783 = defunc_1_op_res_40785; - } - } - } - ((__local - float *) red_arr_mem_46208)[sext_i32_i64(local_tid_46204)] = - x_40783; - barrier(CLK_LOCAL_MEM_FENCE); - // reduce the per-group results - { - int32_t offset_46231; - int32_t skip_waves_46232; - - skip_waves_46232 = 1; - - float x_46219; - float x_46220; - - offset_46231 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46204, - sext_i64_i32(segred_group_sizze_40779))) { - x_46219 = ((__local - float *) red_arr_mem_46208)[sext_i32_i64(local_tid_46204 + - offset_46231)]; - } - } - offset_46231 = 1; - while (slt32(offset_46231, wave_sizze_46206)) { - if (slt32(local_tid_46204 + offset_46231, - sext_i64_i32(segred_group_sizze_40779)) && - ((local_tid_46204 - squot32(local_tid_46204, - wave_sizze_46206) * - wave_sizze_46206) & (2 * offset_46231 - 1)) == - 0) { - // read array element - { - x_46220 = ((volatile __local - float *) red_arr_mem_46208)[sext_i32_i64(local_tid_46204 + - offset_46231)]; - } - // apply reduction operation - { - float defunc_1_op_res_46221 = x_46219 + x_46220; - - x_46219 = defunc_1_op_res_46221; - } - // write result of operation - { - ((volatile __local - float *) red_arr_mem_46208)[sext_i32_i64(local_tid_46204)] = - x_46219; - } - } - offset_46231 *= 2; - } - while (slt32(skip_waves_46232, - squot32(sext_i64_i32(segred_group_sizze_40779) + - wave_sizze_46206 - 1, - wave_sizze_46206))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46231 = skip_waves_46232 * wave_sizze_46206; - if (slt32(local_tid_46204 + offset_46231, - sext_i64_i32(segred_group_sizze_40779)) && - ((local_tid_46204 - squot32(local_tid_46204, - wave_sizze_46206) * - wave_sizze_46206) == 0 && - (squot32(local_tid_46204, wave_sizze_46206) & (2 * - skip_waves_46232 - - 1)) == - 0)) { - // read array element - { - x_46220 = ((__local - float *) red_arr_mem_46208)[sext_i32_i64(local_tid_46204 + - offset_46231)]; - } - // apply reduction operation - { - float defunc_1_op_res_46221 = x_46219 + x_46220; - - x_46219 = defunc_1_op_res_46221; - } - // write result of operation - { - ((__local - float *) red_arr_mem_46208)[sext_i32_i64(local_tid_46204)] = - x_46219; - } - } - skip_waves_46232 *= 2; - } - // and back to memory with the final result - { - if (local_tid_46204 == 0) { - ((__global float *) mem_45134)[gtid_40722 * - N_29165 + - gtid_40723] = - x_46219; - } - } - } - } + redout_119858 = redout_tmp_127949; + } + defunc_2_reduce_res_110695 = redout_119858; + if ((sle64((int64_t) 0, gtid_108433) && slt64(gtid_108433, m_75136)) && + (sle64((int64_t) 0, write_index_110694) && slt64(write_index_110694, + k2p2zq_75151))) { + ((__global double *) mem_122003)[gtid_108433 * k2p2zq_75151 + + write_index_110694] = + defunc_2_reduce_res_110695; } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - error_1: + error_0: return; - #undef segred_group_sizze_40779 + #undef segmap_group_sizze_110689 } -__kernel void mainzisegred_large_41311(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, - __local volatile - int64_t *sync_arr_mem_46462_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_46460_backing_aligned_1, - int64_t N_29165, int64_t i32_res_29175, - int64_t num_groups_41362, - int64_t groups_per_segment_46446, - int64_t elements_per_thread_46447, - int64_t virt_num_groups_46448, - int64_t threads_per_segment_46450, - __global - unsigned char *defunc_4_map_res_mem_45178, - __global unsigned char *mem_45232, - __global unsigned char *mem_45235, - __global - unsigned char *group_res_arr_mem_46451, - __global - unsigned char *mainzicounter_mem_46453) +__kernel void mainzisegmap_108490(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, int64_t m_75136, + int64_t k2p2zq_75151, int64_t r_75826, + int64_t rp1_75837, int64_t num_groups_110611, + int64_t num_threads_126236, __global + unsigned char *mem_123130, __global + unsigned char *mem_123135, __global + unsigned char *mem_123633, __global + unsigned char *mem_123641, __global + unsigned char *mem_123678, __global + unsigned char *mem_125341, __global + unsigned char *double_buffer_mem_125599) { - #define segred_group_sizze_41361 (mainzisegred_group_sizze_41305) + #define segmap_group_sizze_110610 (mainzisegmap_group_sizze_108492) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict sync_arr_mem_46462_backing_1 = - (__local volatile - char *) sync_arr_mem_46462_backing_aligned_0; - __local volatile char *restrict red_arr_mem_46460_backing_0 = - (__local volatile - char *) red_arr_mem_46460_backing_aligned_1; volatile __local bool local_failure; if (failure_is_an_option) { @@ -11911,87 +16463,90 @@ def sync(self): local_failure = false; barrier(CLK_LOCAL_MEM_FENCE); - int32_t global_tid_46455; - int32_t local_tid_46456; - int64_t group_sizze_46459; - int32_t wave_sizze_46458; - int32_t group_tid_46457; - - global_tid_46455 = get_global_id(0); - local_tid_46456 = get_local_id(0); - group_sizze_46459 = get_local_size(0); - wave_sizze_46458 = LOCKSTEP_WIDTH; - group_tid_46457 = get_group_id(0); - - int32_t phys_tid_41311; - - phys_tid_41311 = global_tid_46455; - - __local char *red_arr_mem_46460; - - red_arr_mem_46460 = (__local char *) red_arr_mem_46460_backing_0; - - __local char *sync_arr_mem_46462; - - sync_arr_mem_46462 = (__local char *) sync_arr_mem_46462_backing_1; - - int32_t phys_group_id_46464; - - phys_group_id_46464 = get_group_id(0); - for (int32_t i_46465 = 0; i_46465 < - sdiv_up32(sext_i64_i32(virt_num_groups_46448) - phys_group_id_46464, - sext_i64_i32(num_groups_41362)); i_46465++) { - int32_t virt_group_id_46466 = phys_group_id_46464 + i_46465 * - sext_i64_i32(num_groups_41362); - int32_t flat_segment_id_46467 = squot32(virt_group_id_46466, - sext_i64_i32(groups_per_segment_46446)); - int64_t global_tid_46468 = srem64(sext_i32_i64(virt_group_id_46466) * - segred_group_sizze_41361 + - sext_i32_i64(local_tid_46456), - segred_group_sizze_41361 * - groups_per_segment_46446); - int64_t gtid_41302 = sext_i32_i64(flat_segment_id_46467); - int64_t gtid_41310; - float x_acc_46469; - int64_t chunk_sizze_46470; - - chunk_sizze_46470 = smin64(elements_per_thread_46447, - sdiv_up64(i32_res_29175 - - sext_i32_i64(sext_i64_i32(global_tid_46468)), - threads_per_segment_46450)); - - float x_41365; - float x_41366; - - // neutral-initialise the accumulators - { - x_acc_46469 = 0.0F; - } - for (int64_t i_46474 = 0; i_46474 < chunk_sizze_46470; i_46474++) { - gtid_41310 = sext_i32_i64(sext_i64_i32(global_tid_46468)) + - threads_per_segment_46450 * i_46474; - // apply map function - { - int32_t defunc_0_f_res_41369 = ((__global - int32_t *) mem_45232)[gtid_41302]; - int32_t index_primexp_42385 = sext_i64_i32(gtid_41310); - bool cond_41371 = slt32(index_primexp_42385, - defunc_0_f_res_41369); - float defunc_0_f_res_41372; + int32_t global_tid_127928; + int32_t local_tid_127929; + int64_t group_sizze_127932; + int32_t wave_sizze_127931; + int32_t group_tid_127930; + + global_tid_127928 = get_global_id(0); + local_tid_127929 = get_local_id(0); + group_sizze_127932 = get_local_size(0); + wave_sizze_127931 = LOCKSTEP_WIDTH; + group_tid_127930 = get_group_id(0); + + int32_t phys_tid_108490; + + phys_tid_108490 = global_tid_127928; + + int32_t phys_group_id_127933; + + phys_group_id_127933 = get_group_id(0); + for (int32_t i_127934 = 0; i_127934 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, segmap_group_sizze_110610)) - + phys_group_id_127933, sext_i64_i32(num_groups_110611)); + i_127934++) { + int32_t virt_group_id_127935 = phys_group_id_127933 + i_127934 * + sext_i64_i32(num_groups_110611); + int64_t gtid_108489 = sext_i32_i64(virt_group_id_127935) * + segmap_group_sizze_110610 + sext_i32_i64(local_tid_127929); + + if (slt64(gtid_108489, m_75136)) { + int64_t min_res_110617 = ((__global + int64_t *) mem_123135)[gtid_108489]; + int64_t min_res_110618 = smin64(r_75826, min_res_110617); + + for (int64_t i_127936 = 0; i_127936 < rp1_75837; i_127936++) { + ((__global double *) double_buffer_mem_125599)[phys_tid_108490 + + i_127936 * + num_threads_126236] = + ((__global double *) mem_123633)[gtid_108489 + i_127936 * + m_75136]; + } + for (int64_t j_110620 = 0; j_110620 < min_res_110618; j_110620++) { + bool y_110622 = slt64(j_110620, k2p2zq_75151); + bool index_certs_110623; + + if (!y_110622) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 171) == -1) { + global_failure_args[0] = j_110620; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double zeze_arg_110624 = ((__global + double *) mem_123130)[j_110620 * + m_75136 + + gtid_108489]; + bool zeze_res_110625 = zeze_arg_110624 == 0.0; - if (cond_41371) { - int64_t i_41373 = sext_i32_i64(index_primexp_42385); - bool x_41374 = sle64((int64_t) 0, i_41373); - bool y_41375 = slt64(i_41373, N_29165); - bool bounds_check_41376 = x_41374 && y_41375; - bool index_certs_41377; + if (zeze_res_110625) { + for (int64_t i_127938 = 0; i_127938 < rp1_75837; + i_127938++) { + ((__global double *) mem_125341)[phys_tid_108490 + + i_127938 * + num_threads_126236] = + ((__global + double *) double_buffer_mem_125599)[phys_tid_108490 + + i_127938 * + num_threads_126236]; + } + } else { + bool y_110627 = slt64(j_110620, rp1_75837); + bool index_certs_110628; - if (!bounds_check_41376) { + if (!y_110627) { { if (atomic_cmpxchg_i32_global(global_failure, -1, - 19) == -1) { - global_failure_args[0] = i_41373; - global_failure_args[1] = N_29165; + 172) == -1) { + global_failure_args[0] = j_110620; + global_failure_args[1] = rp1_75837; ; } local_failure = true; @@ -11999,10119 +16554,11428 @@ def sync(self): } } - float defunc_0_f_res_t_res_41378 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_41302 * - N_29165 + - i_41373]; - - defunc_0_f_res_41372 = defunc_0_f_res_t_res_41378; - } else { - defunc_0_f_res_41372 = 0.0F; - } - - float defunc_0_f_res_41379 = defunc_0_f_res_41372 * - defunc_0_f_res_41372; - - // save map-out results - { } - // load accumulator - { - x_41365 = x_acc_46469; - } - // load new values - { - x_41366 = defunc_0_f_res_41379; - } - // apply reduction operator - { - float defunc_1_op_res_41367 = x_41365 + x_41366; + double y_110629 = ((__global + double *) double_buffer_mem_125599)[phys_tid_108490 + + j_110620 * + num_threads_126236]; + double negate_arg_110630 = zeze_arg_110624 * y_110629; + double t_110631 = 0.0 - negate_arg_110630; + int64_t x_110632 = sub64(rp1_75837, j_110620); + int64_t upper_bound_110633 = sub64(x_110632, (int64_t) 1); + double t_110634; + double t_110636 = t_110631; - // store in accumulator - { - x_acc_46469 = defunc_1_op_res_41367; + for (int64_t i0_110635 = 0; i0_110635 < upper_bound_110633; + i0_110635++) { + int64_t x_110637 = add64(j_110620, i0_110635); + int64_t i_110638 = add64((int64_t) 1, x_110637); + bool x_110639 = sle64((int64_t) 0, i_110638); + bool y_110640 = slt64(i_110638, rp1_75837); + bool bounds_check_110641 = x_110639 && y_110640; + bool index_ok_110642 = y_110622 && bounds_check_110641; + bool index_certs_110643; + + if (!index_ok_110642) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 173) == -1) { + global_failure_args[0] = j_110620; + global_failure_args[1] = i_110638; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_110644 = ((__global + double *) mem_123641)[i_110638 * + (m_75136 * + k2p2zq_75151) + + j_110620 * + m_75136 + + gtid_108489]; + bool index_certs_110645; + + if (!bounds_check_110641) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 174) == -1) { + global_failure_args[0] = i_110638; + global_failure_args[1] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_110646 = ((__global + double *) double_buffer_mem_125599)[phys_tid_108490 + + i_110638 * + num_threads_126236]; + double y_110647 = x_110644 * y_110646; + double loopres_110648 = t_110636 - y_110647; + double t_tmp_127939 = loopres_110648; + + t_110636 = t_tmp_127939; } - } - } - } - // to reduce current chunk, first store our result in memory - { - x_41365 = x_acc_46469; - ((__local - float *) red_arr_mem_46460)[sext_i32_i64(local_tid_46456)] = - x_41365; - } - - error_0: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t offset_46475; - int32_t skip_waves_46476; - - skip_waves_46476 = 1; - - float x_46471; - float x_46472; - - offset_46475 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46456, - sext_i64_i32(segred_group_sizze_41361))) { - x_46471 = ((__local - float *) red_arr_mem_46460)[sext_i32_i64(local_tid_46456 + - offset_46475)]; - } - } - offset_46475 = 1; - while (slt32(offset_46475, wave_sizze_46458)) { - if (slt32(local_tid_46456 + offset_46475, - sext_i64_i32(segred_group_sizze_41361)) && - ((local_tid_46456 - squot32(local_tid_46456, wave_sizze_46458) * - wave_sizze_46458) & (2 * offset_46475 - 1)) == 0) { - // read array element - { - x_46472 = ((volatile __local - float *) red_arr_mem_46460)[sext_i32_i64(local_tid_46456 + - offset_46475)]; - } - // apply reduction operation - { - float defunc_1_op_res_46473 = x_46471 + x_46472; + t_110634 = t_110636; - x_46471 = defunc_1_op_res_46473; - } - // write result of operation - { - ((volatile __local - float *) red_arr_mem_46460)[sext_i32_i64(local_tid_46456)] = - x_46471; - } - } - offset_46475 *= 2; - } - while (slt32(skip_waves_46476, - squot32(sext_i64_i32(segred_group_sizze_41361) + - wave_sizze_46458 - 1, wave_sizze_46458))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46475 = skip_waves_46476 * wave_sizze_46458; - if (slt32(local_tid_46456 + offset_46475, - sext_i64_i32(segred_group_sizze_41361)) && - ((local_tid_46456 - squot32(local_tid_46456, wave_sizze_46458) * - wave_sizze_46458) == 0 && (squot32(local_tid_46456, - wave_sizze_46458) & (2 * - skip_waves_46476 - - 1)) == - 0)) { - // read array element - { - x_46472 = ((__local - float *) red_arr_mem_46460)[sext_i32_i64(local_tid_46456 + - offset_46475)]; - } - // apply reduction operation - { - float defunc_1_op_res_46473 = x_46471 + x_46472; + double t_110649 = t_110634 / zeze_arg_110624; + double y_110650 = zeze_arg_110624 * t_110649; + double lw_val_110651 = y_110629 + y_110650; - x_46471 = defunc_1_op_res_46473; - } - // write result of operation - { - ((__local - float *) red_arr_mem_46460)[sext_i32_i64(local_tid_46456)] = - x_46471; + ((__global + double *) double_buffer_mem_125599)[phys_tid_108490 + + j_110620 * + num_threads_126236] = + lw_val_110651; + for (int64_t i0_110654 = 0; i0_110654 < upper_bound_110633; + i0_110654++) { + int64_t x_110656 = add64(j_110620, i0_110654); + int64_t i_110657 = add64((int64_t) 1, x_110656); + bool x_110658 = sle64((int64_t) 0, i_110657); + bool y_110659 = slt64(i_110657, rp1_75837); + bool bounds_check_110660 = x_110658 && y_110659; + bool index_certs_110661; + + if (!bounds_check_110660) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 175) == -1) { + global_failure_args[0] = i_110657; + global_failure_args[1] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_110662 = ((__global + double *) double_buffer_mem_125599)[phys_tid_108490 + + i_110657 * + num_threads_126236]; + bool index_ok_110663 = y_110622 && bounds_check_110660; + bool index_certs_110664; + + if (!index_ok_110663) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 176) == -1) { + global_failure_args[0] = j_110620; + global_failure_args[1] = i_110657; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_110665 = ((__global + double *) mem_123641)[i_110657 * + (m_75136 * + k2p2zq_75151) + + j_110620 * + m_75136 + + gtid_108489]; + double y_110666 = t_110649 * y_110665; + double lw_val_110667 = x_110662 + y_110666; + + ((__global + double *) double_buffer_mem_125599)[phys_tid_108490 + + i_110657 * + num_threads_126236] = + lw_val_110667; + } + for (int64_t i_127941 = 0; i_127941 < rp1_75837; + i_127941++) { + ((__global double *) mem_125341)[phys_tid_108490 + + i_127941 * + num_threads_126236] = + ((__global + double *) double_buffer_mem_125599)[phys_tid_108490 + + i_127941 * + num_threads_126236]; + } + } + for (int64_t i_127942 = 0; i_127942 < rp1_75837; i_127942++) { + ((__global + double *) double_buffer_mem_125599)[phys_tid_108490 + + i_127942 * + num_threads_126236] = + ((__global double *) mem_125341)[phys_tid_108490 + + i_127942 * + num_threads_126236]; } } - skip_waves_46476 *= 2; - } - barrier(CLK_LOCAL_MEM_FENCE); - // first thread saves the result in accumulator - { - if (sext_i32_i64(local_tid_46456) == (int64_t) 0) { - x_acc_46469 = x_46471; + for (int64_t i_127943 = 0; i_127943 < rp1_75837; i_127943++) { + ((__global double *) mem_123678)[i_127943 * m_75136 + + gtid_108489] = ((__global + double *) double_buffer_mem_125599)[phys_tid_108490 + + i_127943 * + num_threads_126236]; } } - if (groups_per_segment_46446 == (int64_t) 1) { - // first thread in group saves final result to memory - { - if (local_tid_46456 == 0) { - ((__global float *) mem_45235)[gtid_41302] = x_acc_46469; + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_110610 +} +__kernel void mainzisegmap_108554(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, + int64_t num_groups_110552, + int64_t binop_x_120251, + int64_t num_threads_126232, __global + unsigned char *defunc_3_map_res_r_mem_123392, + __global unsigned char *mem_123396, __global + unsigned char *mem_123399, __global + unsigned char *mem_123415) +{ + #define segmap_group_sizze_110551 (mainzisegmap_group_sizze_108557) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127785; + int32_t local_tid_127786; + int64_t group_sizze_127789; + int32_t wave_sizze_127788; + int32_t group_tid_127787; + + global_tid_127785 = get_global_id(0); + local_tid_127786 = get_local_id(0); + group_sizze_127789 = get_local_size(0); + wave_sizze_127788 = LOCKSTEP_WIDTH; + group_tid_127787 = get_group_id(0); + + int32_t phys_tid_108554; + + phys_tid_108554 = global_tid_127785; + + int32_t phys_group_id_127790; + + phys_group_id_127790 = get_group_id(0); + for (int32_t i_127791 = 0; i_127791 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136 * k2p2zq_75151, + segmap_group_sizze_110551)) - + phys_group_id_127790, sext_i64_i32(num_groups_110552)); + i_127791++) { + int32_t virt_group_id_127792 = phys_group_id_127790 + i_127791 * + sext_i64_i32(num_groups_110552); + int64_t gtid_108552 = squot64(sext_i32_i64(virt_group_id_127792) * + segmap_group_sizze_110551 + + sext_i32_i64(local_tid_127786), + k2p2zq_75151); + int64_t gtid_108553 = sext_i32_i64(virt_group_id_127792) * + segmap_group_sizze_110551 + sext_i32_i64(local_tid_127786) - + squot64(sext_i32_i64(virt_group_id_127792) * + segmap_group_sizze_110551 + + sext_i32_i64(local_tid_127786), k2p2zq_75151) * + k2p2zq_75151; + + if (slt64(gtid_108552, m_75136) && slt64(gtid_108553, k2p2zq_75151)) { + for (int64_t i_119854 = 0; i_119854 < k2p2zq_75151; i_119854++) { + double defunc_2_reduce_res_110563; + double redout_119856 = 0.0; + + for (int64_t i_119857 = 0; i_119857 < k2p2zq_75151; + i_119857++) { + double x_110567 = ((__global + double *) defunc_3_map_res_r_mem_123392)[gtid_108552 * + binop_x_120251 + + i_119857 * + k2p2zq_75151 + + gtid_108553]; + double x_110568 = ((__global + double *) mem_123396)[gtid_108552 * + (k2p2zq_75151 * + k2p2zq_75151) + + i_119854 * + k2p2zq_75151 + + i_119857]; + double defunc_1_f_res_110569 = x_110567 * x_110568; + double defunc_1_op_res_110566 = defunc_1_f_res_110569 + + redout_119856; + double redout_tmp_127794 = defunc_1_op_res_110566; + + redout_119856 = redout_tmp_127794; } + defunc_2_reduce_res_110563 = redout_119856; + ((__global double *) mem_123399)[phys_tid_108554 + i_119854 * + num_threads_126232] = + defunc_2_reduce_res_110563; } - } else { - int32_t old_counter_46477; - - // first thread in group saves group result to global memory - { - if (local_tid_46456 == 0) { - ((__global - float *) group_res_arr_mem_46451)[sext_i32_i64(virt_group_id_46466) * - segred_group_sizze_41361] = - x_acc_46469; - mem_fence_global(); - old_counter_46477 = - atomic_add_i32_global(&((volatile __global - int *) mainzicounter_mem_46453)[sext_i32_i64(srem32(flat_segment_id_46467, - 10240))], - (int) 1); - ((__local bool *) sync_arr_mem_46462)[(int64_t) 0] = - old_counter_46477 == groups_per_segment_46446 - - (int64_t) 1; - } + for (int64_t i_127795 = 0; i_127795 < k2p2zq_75151; i_127795++) { + ((__global double *) mem_123415)[i_127795 * (k2p2zq_75151 * + m_75136) + + gtid_108552 * k2p2zq_75151 + + gtid_108553] = ((__global + double *) mem_123399)[phys_tid_108554 + + i_127795 * + num_threads_126232]; } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - - bool is_last_group_46478; + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_110551 +} +__kernel void mainzisegmap_108770(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, int64_t x_110426, + int64_t i_110427, int64_t j_m_i_110431, + int64_t num_groups_110459, + int64_t num_threads_126224, __global + unsigned char *mem_121938, __global + unsigned char *mem_123143, __global + unsigned char *mem_123241, __global + unsigned char *mem_123259, __global + unsigned char *mem_123263, __global + unsigned char *mem_123275, __global + unsigned char *mem_123287) +{ + #define segmap_group_sizze_110458 (mainzisegmap_group_sizze_108772) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127682; + int32_t local_tid_127683; + int64_t group_sizze_127686; + int32_t wave_sizze_127685; + int32_t group_tid_127684; + + global_tid_127682 = get_global_id(0); + local_tid_127683 = get_local_id(0); + group_sizze_127686 = get_local_size(0); + wave_sizze_127685 = LOCKSTEP_WIDTH; + group_tid_127684 = get_group_id(0); + + int32_t phys_tid_108770; + + phys_tid_108770 = global_tid_127682; + + int32_t phys_group_id_127687; + + phys_group_id_127687 = get_group_id(0); + for (int32_t i_127688 = 0; i_127688 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, segmap_group_sizze_110458)) - + phys_group_id_127687, sext_i64_i32(num_groups_110459)); + i_127688++) { + int32_t virt_group_id_127689 = phys_group_id_127687 + i_127688 * + sext_i64_i32(num_groups_110459); + int64_t gtid_108769 = sext_i32_i64(virt_group_id_127689) * + segmap_group_sizze_110458 + sext_i32_i64(local_tid_127683); + + if (slt64(gtid_108769, m_75136)) { + double defunc_3_map_res_r_transformed_row_110464 = ((__global + double *) mem_123143)[gtid_108769 * + (k2p2zq_75151 * + k2p2zq_75151) + + i_110427 * + k2p2zq_75151 + + i_110427]; - is_last_group_46478 = ((__local - bool *) sync_arr_mem_46462)[(int64_t) 0]; - if (is_last_group_46478) { - if (local_tid_46456 == 0) { - old_counter_46477 = - atomic_add_i32_global(&((volatile __global - int *) mainzicounter_mem_46453)[sext_i32_i64(srem32(flat_segment_id_46467, - 10240))], - (int) ((int64_t) 0 - - groups_per_segment_46446)); - } - // read in the per-group-results - { - int64_t read_per_thread_46479 = - sdiv_up64(groups_per_segment_46446, - segred_group_sizze_41361); + for (int64_t i_119843 = 0; i_119843 < k2p2zq_75151; i_119843++) { + for (int64_t i_127691 = 0; i_127691 < k2p2zq_75151; + i_127691++) { + ((__global double *) mem_123275)[phys_tid_108770 + + i_127691 * + num_threads_126224] = + ((__global double *) mem_123259)[i_119843 * (m_75136 * + k2p2zq_75151) + + gtid_108769 + + i_127691 * m_75136]; + } + + double defunc_2_map_res_transformed_row_110469 = ((__global + double *) mem_121938)[i_119843 * + k2p2zq_75151 + + i_110427]; + double defunc_2_reduce_res_110470; + double redout_119846 = 0.0; + + for (int64_t i_119847 = 0; i_119847 < j_m_i_110431; + i_119847++) { + int64_t slice_120040 = x_110426 + i_119847; + double x_110475 = ((__global + double *) mem_123241)[slice_120040 * + (k2p2zq_75151 * + m_75136) + + gtid_108769 * + k2p2zq_75151 + + i_110427]; + bool isnan_res_110476; + + isnan_res_110476 = futrts_isnan64(x_110475); + + double defunc_1_f_res_110477; - x_41365 = 0.0F; - for (int64_t i_46480 = 0; i_46480 < read_per_thread_46479; - i_46480++) { - int64_t group_res_id_46481 = - sext_i32_i64(local_tid_46456) * - read_per_thread_46479 + i_46480; - int64_t index_of_group_res_46482 = - sext_i32_i64(flat_segment_id_46467) * - groups_per_segment_46446 + group_res_id_46481; + if (isnan_res_110476) { + defunc_1_f_res_110477 = 0.0; + } else { + double x_110474 = ((__global + double *) mem_123259)[i_119843 * + (m_75136 * + k2p2zq_75151) + + slice_120040 * + m_75136 + + gtid_108769]; + double defunc_1_f_res_f_res_110478 = x_110474 * + x_110475; - if (slt64(group_res_id_46481, - groups_per_segment_46446)) { - x_41366 = ((__global - float *) group_res_arr_mem_46451)[index_of_group_res_46482 * - segred_group_sizze_41361]; - - float defunc_1_op_res_41367; - - defunc_1_op_res_41367 = x_41365 + x_41366; - x_41365 = defunc_1_op_res_41367; - } + defunc_1_f_res_110477 = defunc_1_f_res_f_res_110478; } - } - ((__local - float *) red_arr_mem_46460)[sext_i32_i64(local_tid_46456)] = - x_41365; - barrier(CLK_LOCAL_MEM_FENCE); - // reduce the per-group results - { - int32_t offset_46483; - int32_t skip_waves_46484; - - skip_waves_46484 = 1; - float x_46471; - float x_46472; + double defunc_1_op_res_110473 = defunc_1_f_res_110477 + + redout_119846; + double redout_tmp_127692 = defunc_1_op_res_110473; - offset_46483 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46456, - sext_i64_i32(segred_group_sizze_41361))) { - x_46471 = ((__local - float *) red_arr_mem_46460)[sext_i32_i64(local_tid_46456 + - offset_46483)]; - } - } - offset_46483 = 1; - while (slt32(offset_46483, wave_sizze_46458)) { - if (slt32(local_tid_46456 + offset_46483, - sext_i64_i32(segred_group_sizze_41361)) && - ((local_tid_46456 - squot32(local_tid_46456, - wave_sizze_46458) * - wave_sizze_46458) & (2 * offset_46483 - 1)) == - 0) { - // read array element - { - x_46472 = ((volatile __local - float *) red_arr_mem_46460)[sext_i32_i64(local_tid_46456 + - offset_46483)]; - } - // apply reduction operation - { - float defunc_1_op_res_46473 = x_46471 + x_46472; - - x_46471 = defunc_1_op_res_46473; - } - // write result of operation - { - ((volatile __local - float *) red_arr_mem_46460)[sext_i32_i64(local_tid_46456)] = - x_46471; - } - } - offset_46483 *= 2; - } - while (slt32(skip_waves_46484, - squot32(sext_i64_i32(segred_group_sizze_41361) + - wave_sizze_46458 - 1, - wave_sizze_46458))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46483 = skip_waves_46484 * wave_sizze_46458; - if (slt32(local_tid_46456 + offset_46483, - sext_i64_i32(segred_group_sizze_41361)) && - ((local_tid_46456 - squot32(local_tid_46456, - wave_sizze_46458) * - wave_sizze_46458) == 0 && - (squot32(local_tid_46456, wave_sizze_46458) & (2 * - skip_waves_46484 - - 1)) == - 0)) { - // read array element - { - x_46472 = ((__local - float *) red_arr_mem_46460)[sext_i32_i64(local_tid_46456 + - offset_46483)]; - } - // apply reduction operation - { - float defunc_1_op_res_46473 = x_46471 + x_46472; - - x_46471 = defunc_1_op_res_46473; - } - // write result of operation - { - ((__local - float *) red_arr_mem_46460)[sext_i32_i64(local_tid_46456)] = - x_46471; - } - } - skip_waves_46484 *= 2; - } - // and back to memory with the final result - { - if (local_tid_46456 == 0) { - ((__global float *) mem_45235)[gtid_41302] = - x_46471; - } - } + redout_119846 = redout_tmp_127692; + } + defunc_2_reduce_res_110470 = redout_119846; + + double zm_res_110479 = defunc_2_map_res_transformed_row_110469 - + defunc_2_reduce_res_110470; + double zs_res_110480 = zm_res_110479 / + defunc_3_map_res_r_transformed_row_110464; + + ((__global double *) mem_123275)[phys_tid_108770 + i_110427 * + num_threads_126224] = + zs_res_110480; + for (int64_t i_127693 = 0; i_127693 < k2p2zq_75151; + i_127693++) { + ((__global double *) mem_123263)[phys_tid_108770 + + (i_119843 * + (num_threads_126224 * + k2p2zq_75151) + + i_127693 * + num_threads_126224)] = + ((__global double *) mem_123275)[phys_tid_108770 + + i_127693 * + num_threads_126224]; + } + } + for (int64_t i_127694 = 0; i_127694 < k2p2zq_75151; i_127694++) { + for (int64_t i_127695 = 0; i_127695 < k2p2zq_75151; + i_127695++) { + ((__global double *) mem_123287)[i_127694 * (m_75136 * + k2p2zq_75151) + + i_127695 * m_75136 + + gtid_108769] = ((__global + double *) mem_123263)[phys_tid_108770 + + (i_127694 * + (num_threads_126224 * + k2p2zq_75151) + + i_127695 * + num_threads_126224)]; } } } barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - error_1: + error_0: return; - #undef segred_group_sizze_41361 + #undef segmap_group_sizze_110458 } -__kernel void mainzisegred_large_41336(__global int *global_failure, - __local volatile - int64_t *sync_arr_mem_46402_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_46400_backing_aligned_1, - int64_t N_29165, int64_t i32_res_29175, - int64_t num_groups_41348, - int64_t groups_per_segment_46386, - int64_t elements_per_thread_46387, - int64_t virt_num_groups_46388, - int64_t threads_per_segment_46390, - __global unsigned char *images_mem_44381, - __global unsigned char *mem_45232, - __global - unsigned char *group_res_arr_mem_46391, - __global - unsigned char *mainzicounter_mem_46393) +__kernel void mainzisegmap_108858(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, int64_t i_110427, + int64_t binop_x_120251, __global + unsigned char *mem_param_123252, __global + unsigned char *mem_123342) { - #define segred_group_sizze_41347 (mainzisegred_group_sizze_41330) + #define segmap_group_sizze_110540 (mainzisegmap_group_sizze_108862) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict sync_arr_mem_46402_backing_1 = - (__local volatile - char *) sync_arr_mem_46402_backing_aligned_0; - __local volatile char *restrict red_arr_mem_46400_backing_0 = - (__local volatile - char *) red_arr_mem_46400_backing_aligned_1; if (*global_failure >= 0) return; - int32_t global_tid_46395; - int32_t local_tid_46396; - int64_t group_sizze_46399; - int32_t wave_sizze_46398; - int32_t group_tid_46397; + int32_t global_tid_127777; + int32_t local_tid_127778; + int64_t group_sizze_127781; + int32_t wave_sizze_127780; + int32_t group_tid_127779; + + global_tid_127777 = get_global_id(0); + local_tid_127778 = get_local_id(0); + group_sizze_127781 = get_local_size(0); + wave_sizze_127780 = LOCKSTEP_WIDTH; + group_tid_127779 = get_group_id(0); + + int32_t phys_tid_108858; + + phys_tid_108858 = global_tid_127777; + + int64_t gtid_108855; + + gtid_108855 = squot64(sext_i32_i64(group_tid_127779) * + segmap_group_sizze_110540 + + sext_i32_i64(local_tid_127778), k2p2zq_75151); + + int64_t gtid_108856; + + gtid_108856 = sext_i32_i64(group_tid_127779) * segmap_group_sizze_110540 + + sext_i32_i64(local_tid_127778) - + squot64(sext_i32_i64(group_tid_127779) * segmap_group_sizze_110540 + + sext_i32_i64(local_tid_127778), k2p2zq_75151) * k2p2zq_75151; + + int64_t gtid_108857; + + gtid_108857 = sext_i32_i64(group_tid_127779) * segmap_group_sizze_110540 + + sext_i32_i64(local_tid_127778) - + squot64(sext_i32_i64(group_tid_127779) * segmap_group_sizze_110540 + + sext_i32_i64(local_tid_127778), k2p2zq_75151) * k2p2zq_75151 - + (sext_i32_i64(group_tid_127779) * segmap_group_sizze_110540 + + sext_i32_i64(local_tid_127778) - + squot64(sext_i32_i64(group_tid_127779) * segmap_group_sizze_110540 + + sext_i32_i64(local_tid_127778), k2p2zq_75151) * k2p2zq_75151); + if ((slt64(gtid_108855, m_75136) && slt64(gtid_108856, k2p2zq_75151)) && + slt64(gtid_108857, (int64_t) 1)) { + double zs_res_110543 = ((__global double *) mem_123342)[gtid_108855 * + k2p2zq_75151 + + gtid_108856]; + + if (((sle64((int64_t) 0, gtid_108855) && slt64(gtid_108855, m_75136)) && + (sle64((int64_t) 0, gtid_108856) && slt64(gtid_108856, + k2p2zq_75151))) && + (sle64((int64_t) 0, i_110427) && slt64(i_110427, k2p2zq_75151))) { + ((__global double *) mem_param_123252)[gtid_108855 * + binop_x_120251 + + gtid_108856 * k2p2zq_75151 + + i_110427] = zs_res_110543; + } + } + + error_0: + return; + #undef segmap_group_sizze_110540 +} +__kernel void mainzisegmap_108870(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, int64_t i_110427, + __global unsigned char *mem_121938, __global + unsigned char *mem_123143, __global + unsigned char *mem_123338, __global + unsigned char *mem_123342) +{ + #define segmap_group_sizze_110529 (mainzisegmap_group_sizze_108873) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127772; + int32_t local_tid_127773; + int64_t group_sizze_127776; + int32_t wave_sizze_127775; + int32_t group_tid_127774; + + global_tid_127772 = get_global_id(0); + local_tid_127773 = get_local_id(0); + group_sizze_127776 = get_local_size(0); + wave_sizze_127775 = LOCKSTEP_WIDTH; + group_tid_127774 = get_group_id(0); + + int32_t phys_tid_108870; + + phys_tid_108870 = global_tid_127772; + + int64_t gtid_108868; + + gtid_108868 = squot64(sext_i32_i64(group_tid_127774) * + segmap_group_sizze_110529 + + sext_i32_i64(local_tid_127773), k2p2zq_75151); + + int64_t gtid_108869; + + gtid_108869 = sext_i32_i64(group_tid_127774) * segmap_group_sizze_110529 + + sext_i32_i64(local_tid_127773) - + squot64(sext_i32_i64(group_tid_127774) * segmap_group_sizze_110529 + + sext_i32_i64(local_tid_127773), k2p2zq_75151) * k2p2zq_75151; + if (slt64(gtid_108868, m_75136) && slt64(gtid_108869, k2p2zq_75151)) { + double defunc_3_map_res_r_transformed_row_110532 = ((__global + double *) mem_123143)[gtid_108868 * + (k2p2zq_75151 * + k2p2zq_75151) + + i_110427 * + k2p2zq_75151 + + i_110427]; + double defunc_2_map_res_transformed_row_110533 = ((__global + double *) mem_121938)[gtid_108869 * + k2p2zq_75151 + + i_110427]; + double defunc_2_reduce_res_110534 = ((__global + double *) mem_123338)[gtid_108868 * + k2p2zq_75151 + + gtid_108869]; + double zm_res_110535 = defunc_2_map_res_transformed_row_110533 - + defunc_2_reduce_res_110534; + double zs_res_110536 = zm_res_110535 / + defunc_3_map_res_r_transformed_row_110532; + + ((__global double *) mem_123342)[gtid_108868 * k2p2zq_75151 + + gtid_108869] = zs_res_110536; + } + + error_0: + return; + #undef segmap_group_sizze_110529 +} +__kernel void mainzisegmap_109197(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, int64_t rp1_75837, + __global unsigned char *mem_123127, __global + unsigned char *mem_123135, __global + unsigned char *mem_123138, __global + unsigned char *mem_123143) +{ + #define segmap_group_sizze_110336 (mainzisegmap_group_sizze_109201) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127649; + int32_t local_tid_127650; + int64_t group_sizze_127653; + int32_t wave_sizze_127652; + int32_t group_tid_127651; + + global_tid_127649 = get_global_id(0); + local_tid_127650 = get_local_id(0); + group_sizze_127653 = get_local_size(0); + wave_sizze_127652 = LOCKSTEP_WIDTH; + group_tid_127651 = get_group_id(0); + + int32_t phys_tid_109197; + + phys_tid_109197 = global_tid_127649; + + int64_t gtid_109194; + + gtid_109194 = squot64(sext_i32_i64(group_tid_127651) * + segmap_group_sizze_110336 + + sext_i32_i64(local_tid_127650), k2p2zq_75151 * + k2p2zq_75151); + + int64_t gtid_109195; + + gtid_109195 = squot64(sext_i32_i64(group_tid_127651) * + segmap_group_sizze_110336 + + sext_i32_i64(local_tid_127650) - + squot64(sext_i32_i64(group_tid_127651) * + segmap_group_sizze_110336 + + sext_i32_i64(local_tid_127650), k2p2zq_75151 * + k2p2zq_75151) * (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151); + + int64_t gtid_109196; + + gtid_109196 = sext_i32_i64(group_tid_127651) * segmap_group_sizze_110336 + + sext_i32_i64(local_tid_127650) - + squot64(sext_i32_i64(group_tid_127651) * segmap_group_sizze_110336 + + sext_i32_i64(local_tid_127650), k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151) - squot64(sext_i32_i64(group_tid_127651) * + segmap_group_sizze_110336 + + sext_i32_i64(local_tid_127650) - + squot64(sext_i32_i64(group_tid_127651) * + segmap_group_sizze_110336 + + sext_i32_i64(local_tid_127650), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151) * k2p2zq_75151; + if ((slt64(gtid_109194, m_75136) && slt64(gtid_109195, k2p2zq_75151)) && + slt64(gtid_109196, k2p2zq_75151)) { + int64_t min_res_110339 = ((__global int64_t *) mem_123135)[gtid_109194]; + bool cond_f_res_110340 = ((__global bool *) mem_123138)[gtid_109194 * + k2p2zq_75151 + + gtid_109195]; + int64_t x_110343 = add64((int64_t) 1, gtid_109196); + bool cond_110344 = slt64(min_res_110339, x_110343); + bool x_110345 = !cond_110344; + bool y_110346 = cond_f_res_110340 && x_110345; + bool cond_110347 = cond_110344 || y_110346; + double defunc_1_f_res_110348; + + if (cond_110347) { + defunc_1_f_res_110348 = NAN; + } else { + double x_110342 = ((__global double *) mem_123127)[gtid_109195 * + (m_75136 * + rp1_75837) + + gtid_109196 * + m_75136 + + gtid_109194]; + + defunc_1_f_res_110348 = x_110342; + } + ((__global double *) mem_123143)[gtid_109194 * (k2p2zq_75151 * + k2p2zq_75151) + + gtid_109195 * k2p2zq_75151 + + gtid_109196] = defunc_1_f_res_110348; + } + + error_0: + return; + #undef segmap_group_sizze_110336 +} +__kernel void mainzisegmap_109232(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, __global + unsigned char *mem_123135, __global + unsigned char *mem_123138) +{ + #define segmap_group_sizze_110321 (mainzisegmap_group_sizze_109235) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; - global_tid_46395 = get_global_id(0); - local_tid_46396 = get_local_id(0); - group_sizze_46399 = get_local_size(0); - wave_sizze_46398 = LOCKSTEP_WIDTH; - group_tid_46397 = get_group_id(0); + if (*global_failure >= 0) + return; - int32_t phys_tid_41336; + int32_t global_tid_127644; + int32_t local_tid_127645; + int64_t group_sizze_127648; + int32_t wave_sizze_127647; + int32_t group_tid_127646; - phys_tid_41336 = global_tid_46395; + global_tid_127644 = get_global_id(0); + local_tid_127645 = get_local_id(0); + group_sizze_127648 = get_local_size(0); + wave_sizze_127647 = LOCKSTEP_WIDTH; + group_tid_127646 = get_group_id(0); - __local char *red_arr_mem_46400; + int32_t phys_tid_109232; - red_arr_mem_46400 = (__local char *) red_arr_mem_46400_backing_0; + phys_tid_109232 = global_tid_127644; - __local char *sync_arr_mem_46402; + int64_t gtid_109230; - sync_arr_mem_46402 = (__local char *) sync_arr_mem_46402_backing_1; + gtid_109230 = squot64(sext_i32_i64(group_tid_127646) * + segmap_group_sizze_110321 + + sext_i32_i64(local_tid_127645), k2p2zq_75151); - int32_t phys_group_id_46404; + int64_t gtid_109231; - phys_group_id_46404 = get_group_id(0); - for (int32_t i_46405 = 0; i_46405 < - sdiv_up32(sext_i64_i32(virt_num_groups_46388) - phys_group_id_46404, - sext_i64_i32(num_groups_41348)); i_46405++) { - int32_t virt_group_id_46406 = phys_group_id_46404 + i_46405 * - sext_i64_i32(num_groups_41348); - int32_t flat_segment_id_46407 = squot32(virt_group_id_46406, - sext_i64_i32(groups_per_segment_46386)); - int64_t global_tid_46408 = srem64(sext_i32_i64(virt_group_id_46406) * - segred_group_sizze_41347 + - sext_i32_i64(local_tid_46396), - segred_group_sizze_41347 * - groups_per_segment_46386); - int64_t gtid_41327 = sext_i32_i64(flat_segment_id_46407); - int64_t gtid_41335; - int32_t x_acc_46409; - int64_t chunk_sizze_46410; - - chunk_sizze_46410 = smin64(elements_per_thread_46387, - sdiv_up64(i32_res_29175 - - sext_i32_i64(sext_i64_i32(global_tid_46408)), - threads_per_segment_46390)); + gtid_109231 = sext_i32_i64(group_tid_127646) * segmap_group_sizze_110321 + + sext_i32_i64(local_tid_127645) - + squot64(sext_i32_i64(group_tid_127646) * segmap_group_sizze_110321 + + sext_i32_i64(local_tid_127645), k2p2zq_75151) * k2p2zq_75151; + if (slt64(gtid_109230, m_75136) && slt64(gtid_109231, k2p2zq_75151)) { + int64_t min_res_110324 = ((__global int64_t *) mem_123135)[gtid_109230]; + int64_t x_110326 = add64((int64_t) 1, gtid_109231); + bool cond_f_res_110327 = slt64(min_res_110324, x_110326); - int32_t x_41351; - int32_t x_41352; + ((__global bool *) mem_123138)[gtid_109230 * k2p2zq_75151 + + gtid_109231] = cond_f_res_110327; + } + + error_0: + return; + #undef segmap_group_sizze_110321 +} +__kernel void mainzisegmap_109290(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, int64_t m_75136, + int64_t k2p2zq_75151, int64_t m_75223, + unsigned char y_75227, int64_t k_75342, + int64_t rp1_75837, int64_t min_res_75849, + int64_t num_groups_110026, + int64_t num_threads_126209, __global + unsigned char *mem_120248, __global + unsigned char *mem_122793, __global + unsigned char *mem_122796, __global + unsigned char *mem_122800, __global + unsigned char *mem_122803, __global + unsigned char *mem_123127, __global + unsigned char *mem_123130, __global + unsigned char *mem_123133, __global + unsigned char *mem_123135, __global + unsigned char *mem_125265, __global + unsigned char *mem_125267, __global + unsigned char *mem_125472, __global + unsigned char *mem_125480, __global + unsigned char *mem_125482, __global + unsigned char *mem_125512, __global + unsigned char *double_buffer_mem_125586, + __global + unsigned char *double_buffer_mem_125587, + __global + unsigned char *double_buffer_mem_125588) +{ + #define segmap_group_sizze_110025 (mainzisegmap_group_sizze_109292) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; - // neutral-initialise the accumulators - { - x_acc_46409 = 0; - } - for (int64_t i_46414 = 0; i_46414 < chunk_sizze_46410; i_46414++) { - gtid_41335 = sext_i32_i64(sext_i64_i32(global_tid_46408)) + - threads_per_segment_46390 * i_46414; - // apply map function - { - float x_41355 = ((__global - float *) images_mem_44381)[gtid_41327 * - N_29165 + - gtid_41335]; - bool isnan_res_41356; - - isnan_res_41356 = futrts_isnan32(x_41355); - - bool cond_41357 = !isnan_res_41356; - int32_t defunc_0_f_res_41358 = btoi_bool_i32(cond_41357); - - // save map-out results - { } - // load accumulator - { - x_41351 = x_acc_46409; - } - // load new values - { - x_41352 = defunc_0_f_res_41358; - } - // apply reduction operator - { - int32_t defunc_1_op_res_41353 = add32(x_41351, x_41352); - - // store in accumulator - { - x_acc_46409 = defunc_1_op_res_41353; - } + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_127575; + int32_t local_tid_127576; + int64_t group_sizze_127579; + int32_t wave_sizze_127578; + int32_t group_tid_127577; + + global_tid_127575 = get_global_id(0); + local_tid_127576 = get_local_id(0); + group_sizze_127579 = get_local_size(0); + wave_sizze_127578 = LOCKSTEP_WIDTH; + group_tid_127577 = get_group_id(0); + + int32_t phys_tid_109290; + + phys_tid_109290 = global_tid_127575; + + int32_t phys_group_id_127580; + + phys_group_id_127580 = get_group_id(0); + for (int32_t i_127581 = 0; i_127581 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, segmap_group_sizze_110025)) - + phys_group_id_127580, sext_i64_i32(num_groups_110026)); + i_127581++) { + int32_t virt_group_id_127582 = phys_group_id_127580 + i_127581 * + sext_i64_i32(num_groups_110026); + int64_t gtid_109289 = sext_i32_i64(virt_group_id_127582) * + segmap_group_sizze_110025 + sext_i32_i64(local_tid_127576); + + if (slt64(gtid_109289, m_75136)) { + for (int64_t i_127583 = 0; i_127583 < k2p2zq_75151; i_127583++) { + ((__global int64_t *) mem_122803)[phys_tid_109290 + i_127583 * + num_threads_126209] = + ((__global int64_t *) mem_120248)[i_127583]; + } + for (int64_t i_127584 = 0; i_127584 < k2p2zq_75151; i_127584++) { + for (int64_t i_127585 = 0; i_127585 < rp1_75837; i_127585++) { + ((__global + double *) double_buffer_mem_125586)[phys_tid_109290 + + (i_127584 * + (num_threads_126209 * + rp1_75837) + + i_127585 * + num_threads_126209)] = + ((__global double *) mem_122793)[gtid_109289 + + (i_127584 * (m_75136 * + rp1_75837) + + i_127585 * m_75136)]; + } + } + for (int64_t i_127586 = 0; i_127586 < k2p2zq_75151; i_127586++) { + ((__global double *) double_buffer_mem_125587)[phys_tid_109290 + + i_127586 * + num_threads_126209] = + ((__global double *) mem_122796)[gtid_109289 + i_127586 * + m_75136]; + } + for (int64_t i_127587 = 0; i_127587 < (int64_t) 2; i_127587++) { + for (int64_t i_127588 = 0; i_127588 < k2p2zq_75151; + i_127588++) { + ((__global + double *) double_buffer_mem_125588)[phys_tid_109290 + + (i_127587 * + (num_threads_126209 * + k2p2zq_75151) + + i_127588 * + num_threads_126209)] = + ((__global double *) mem_122800)[gtid_109289 + + (i_127587 * (m_75136 * + k2p2zq_75151) + + i_127588 * m_75136)]; } } - } - // to reduce current chunk, first store our result in memory - { - x_41351 = x_acc_46409; - ((__local - int32_t *) red_arr_mem_46400)[sext_i32_i64(local_tid_46396)] = - x_41351; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t offset_46415; - int32_t skip_waves_46416; - - skip_waves_46416 = 1; - - int32_t x_46411; - int32_t x_46412; - - offset_46415 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46396, - sext_i64_i32(segred_group_sizze_41347))) { - x_46411 = ((__local - int32_t *) red_arr_mem_46400)[sext_i32_i64(local_tid_46396 + - offset_46415)]; - } - } - offset_46415 = 1; - while (slt32(offset_46415, wave_sizze_46398)) { - if (slt32(local_tid_46396 + offset_46415, - sext_i64_i32(segred_group_sizze_41347)) && - ((local_tid_46396 - squot32(local_tid_46396, wave_sizze_46398) * - wave_sizze_46398) & (2 * offset_46415 - 1)) == 0) { - // read array element - { - x_46412 = ((volatile __local - int32_t *) red_arr_mem_46400)[sext_i32_i64(local_tid_46396 + - offset_46415)]; - } - // apply reduction operation - { - int32_t defunc_1_op_res_46413 = add32(x_46411, x_46412); + + int64_t dqrdc2_res_110040; + int64_t k_110046 = k_75342; + + for (int64_t l_110041 = 0; l_110041 < min_res_75849; l_110041++) { + int64_t x_110047 = add64((int64_t) 1, l_110041); + bool cond_110048 = slt64(x_110047, k_110046); + bool loop_cond_110049; + + if (cond_110048) { + bool y_110050 = slt64(l_110041, k2p2zq_75151); + bool index_certs_110051; - x_46411 = defunc_1_op_res_46413; - } - // write result of operation - { - ((volatile __local - int32_t *) red_arr_mem_46400)[sext_i32_i64(local_tid_46396)] = - x_46411; + if (!y_110050) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 133) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_110041; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double zt_arg_110052 = ((__global + double *) double_buffer_mem_125588)[phys_tid_109290 + + (num_threads_126209 * + k2p2zq_75151 + + l_110041 * + num_threads_126209)]; + double zt_res_110053 = 1.0e-7 * zt_arg_110052; + bool index_certs_110054; + + if (!y_110050) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 134) == -1) { + global_failure_args[0] = l_110041; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double zl_arg_110055 = ((__global + double *) double_buffer_mem_125587)[phys_tid_109290 + + l_110041 * + num_threads_126209]; + bool zl_res_110056 = zl_arg_110055 < zt_res_110053; + + loop_cond_110049 = zl_res_110056; + } else { + loop_cond_110049 = 0; } - } - offset_46415 *= 2; - } - while (slt32(skip_waves_46416, - squot32(sext_i64_i32(segred_group_sizze_41347) + - wave_sizze_46398 - 1, wave_sizze_46398))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46415 = skip_waves_46416 * wave_sizze_46398; - if (slt32(local_tid_46396 + offset_46415, - sext_i64_i32(segred_group_sizze_41347)) && - ((local_tid_46396 - squot32(local_tid_46396, wave_sizze_46398) * - wave_sizze_46398) == 0 && (squot32(local_tid_46396, - wave_sizze_46398) & (2 * - skip_waves_46416 - - 1)) == - 0)) { - // read array element - { - x_46412 = ((__local - int32_t *) red_arr_mem_46400)[sext_i32_i64(local_tid_46396 + - offset_46415)]; + + bool y_110057 = slt64(l_110041, k2p2zq_75151); + int64_t upper_bound_110058 = sub64(k2p2zq_75151, x_110047); + bool loop_not_taken_110059 = !loop_cond_110049; + bool protect_assert_disj_110060 = y_110057 || + loop_not_taken_110059; + bool index_certs_110061; + + if (!protect_assert_disj_110060) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 135) == -1) { + global_failure_args[0] = l_110041; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } } - // apply reduction operation - { - int32_t defunc_1_op_res_46413 = add32(x_46411, x_46412); - - x_46411 = defunc_1_op_res_46413; + + bool index_certs_110062; + + if (!protect_assert_disj_110060) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 136) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = l_110041; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } } - // write result of operation - { - ((__local - int32_t *) red_arr_mem_46400)[sext_i32_i64(local_tid_46396)] = - x_46411; + + bool index_certs_110063; + + if (!protect_assert_disj_110060) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 137) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_110041; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } } - } - skip_waves_46416 *= 2; - } - barrier(CLK_LOCAL_MEM_FENCE); - // first thread saves the result in accumulator - { - if (sext_i32_i64(local_tid_46396) == (int64_t) 0) { - x_acc_46409 = x_46411; - } - } - if (groups_per_segment_46386 == (int64_t) 1) { - // first thread in group saves final result to memory - { - if (local_tid_46396 == 0) { - ((__global int32_t *) mem_45232)[gtid_41327] = x_acc_46409; + + bool protect_assert_disj_110064 = y_75227 || + loop_not_taken_110059; + bool index_certs_110065; + + if (!protect_assert_disj_110064) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 138) == -1) { + global_failure_args[0] = m_75223; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } } - } - } else { - int32_t old_counter_46417; - - // first thread in group saves group result to global memory - { - if (local_tid_46396 == 0) { - ((__global - int32_t *) group_res_arr_mem_46391)[sext_i32_i64(virt_group_id_46406) * - segred_group_sizze_41347] = - x_acc_46409; - mem_fence_global(); - old_counter_46417 = - atomic_add_i32_global(&((volatile __global - int *) mainzicounter_mem_46393)[sext_i32_i64(srem32(flat_segment_id_46407, - 10240))], - (int) 1); - ((__local bool *) sync_arr_mem_46402)[(int64_t) 0] = - old_counter_46417 == groups_per_segment_46386 - - (int64_t) 1; + + bool index_certs_110066; + + if (!protect_assert_disj_110064) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 139) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = m_75223; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } } - } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - - bool is_last_group_46418; - - is_last_group_46418 = ((__local - bool *) sync_arr_mem_46402)[(int64_t) 0]; - if (is_last_group_46418) { - if (local_tid_46396 == 0) { - old_counter_46417 = - atomic_add_i32_global(&((volatile __global - int *) mainzicounter_mem_46393)[sext_i32_i64(srem32(flat_segment_id_46407, - 10240))], - (int) ((int64_t) 0 - - groups_per_segment_46386)); + + bool index_certs_110067; + + if (!protect_assert_disj_110064) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 140) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = m_75223; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } } - // read in the per-group-results - { - int64_t read_per_thread_46419 = - sdiv_up64(groups_per_segment_46386, - segred_group_sizze_41347); - - x_41351 = 0; - for (int64_t i_46420 = 0; i_46420 < read_per_thread_46419; - i_46420++) { - int64_t group_res_id_46421 = - sext_i32_i64(local_tid_46396) * - read_per_thread_46419 + i_46420; - int64_t index_of_group_res_46422 = - sext_i32_i64(flat_segment_id_46407) * - groups_per_segment_46386 + group_res_id_46421; + + bool loopres_110068; + int64_t loopres_110073; + bool loop_while_110074; + int64_t k_110079; + + loop_while_110074 = loop_cond_110049; + k_110079 = k_110046; + while (loop_while_110074) { + for (int64_t i_110081 = 0; i_110081 < rp1_75837; + i_110081++) { + bool index_certs_110083; + + if (!y_110057) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 141) == -1) { + global_failure_args[0] = l_110041; + global_failure_args[1] = i_110081; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } + + double t_110084 = ((__global + double *) double_buffer_mem_125586)[phys_tid_109290 + + (l_110041 * + (num_threads_126209 * + rp1_75837) + + i_110081 * + num_threads_126209)]; - if (slt64(group_res_id_46421, - groups_per_segment_46386)) { - x_41352 = ((__global - int32_t *) group_res_arr_mem_46391)[index_of_group_res_46422 * - segred_group_sizze_41347]; + for (int64_t j0_110086 = 0; j0_110086 < + upper_bound_110058; j0_110086++) { + int64_t j_110088 = add64(x_110047, j0_110086); + bool x_110089 = sle64((int64_t) 0, j_110088); + bool y_110090 = slt64(j_110088, k2p2zq_75151); + bool bounds_check_110091 = x_110089 && y_110090; + bool index_certs_110092; - int32_t defunc_1_op_res_41353; + if (!bounds_check_110091) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 142) == + -1) { + global_failure_args[0] = j_110088; + global_failure_args[1] = i_110081; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } - defunc_1_op_res_41353 = add32(x_41351, x_41352); - x_41351 = defunc_1_op_res_41353; + double lw_val_110093 = ((__global + double *) double_buffer_mem_125586)[phys_tid_109290 + + (j_110088 * + (num_threads_126209 * + rp1_75837) + + i_110081 * + num_threads_126209)]; + int64_t i_110094 = sub64(j_110088, (int64_t) 1); + bool x_110095 = sle64((int64_t) 0, i_110094); + bool y_110096 = slt64(i_110094, k2p2zq_75151); + bool bounds_check_110097 = x_110095 && y_110096; + bool index_certs_110098; + + if (!bounds_check_110097) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 143) == + -1) { + global_failure_args[0] = i_110094; + global_failure_args[1] = i_110081; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125586)[phys_tid_109290 + + (i_110094 * + (num_threads_126209 * + rp1_75837) + + i_110081 * + num_threads_126209)] = + lw_val_110093; + } + + bool index_certs_110100; + + if (!y_75227) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 144) == -1) { + global_failure_args[0] = m_75223; + global_failure_args[1] = i_110081; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } } + ((__global + double *) double_buffer_mem_125586)[phys_tid_109290 + + (m_75223 * + (num_threads_126209 * + rp1_75837) + + i_110081 * + num_threads_126209)] = + t_110084; } - } - ((__local - int32_t *) red_arr_mem_46400)[sext_i32_i64(local_tid_46396)] = - x_41351; - barrier(CLK_LOCAL_MEM_FENCE); - // reduce the per-group results - { - int32_t offset_46423; - int32_t skip_waves_46424; - skip_waves_46424 = 1; + int64_t i_110102 = ((__global + int64_t *) mem_122803)[phys_tid_109290 + + l_110041 * + num_threads_126209]; + double t_110103 = ((__global + double *) double_buffer_mem_125587)[phys_tid_109290 + + l_110041 * + num_threads_126209]; + double tt_110104 = ((__global + double *) double_buffer_mem_125588)[phys_tid_109290 + + l_110041 * + num_threads_126209]; + double ttt_110105 = ((__global + double *) double_buffer_mem_125588)[phys_tid_109290 + + (num_threads_126209 * + k2p2zq_75151 + + l_110041 * + num_threads_126209)]; - int32_t x_46411; - int32_t x_46412; - - offset_46423 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46396, - sext_i64_i32(segred_group_sizze_41347))) { - x_46411 = ((__local - int32_t *) red_arr_mem_46400)[sext_i32_i64(local_tid_46396 + - offset_46423)]; + for (int64_t j0_110109 = 0; j0_110109 < upper_bound_110058; + j0_110109++) { + int64_t j_110113 = add64(x_110047, j0_110109); + bool x_110114 = sle64((int64_t) 0, j_110113); + bool y_110115 = slt64(j_110113, k2p2zq_75151); + bool bounds_check_110116 = x_110114 && y_110115; + bool index_certs_110117; + + if (!bounds_check_110116) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 145) == -1) { + global_failure_args[0] = j_110113; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } } - } - offset_46423 = 1; - while (slt32(offset_46423, wave_sizze_46398)) { - if (slt32(local_tid_46396 + offset_46423, - sext_i64_i32(segred_group_sizze_41347)) && - ((local_tid_46396 - squot32(local_tid_46396, - wave_sizze_46398) * - wave_sizze_46398) & (2 * offset_46423 - 1)) == - 0) { - // read array element + + int64_t lw_val_110118 = ((__global + int64_t *) mem_122803)[phys_tid_109290 + + j_110113 * + num_threads_126209]; + int64_t i_110119 = sub64(j_110113, (int64_t) 1); + bool x_110120 = sle64((int64_t) 0, i_110119); + bool y_110121 = slt64(i_110119, k2p2zq_75151); + bool bounds_check_110122 = x_110120 && y_110121; + bool index_certs_110123; + + if (!bounds_check_110122) { { - x_46412 = ((volatile __local - int32_t *) red_arr_mem_46400)[sext_i32_i64(local_tid_46396 + - offset_46423)]; + if (atomic_cmpxchg_i32_global(global_failure, + -1, 146) == -1) { + global_failure_args[0] = i_110119; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; } - // apply reduction operation + } + ((__global int64_t *) mem_122803)[phys_tid_109290 + + i_110119 * + num_threads_126209] = + lw_val_110118; + + double lw_val_110125 = ((__global + double *) double_buffer_mem_125587)[phys_tid_109290 + + j_110113 * + num_threads_126209]; + + ((__global + double *) double_buffer_mem_125587)[phys_tid_109290 + + i_110119 * + num_threads_126209] = + lw_val_110125; + + bool index_certs_110127; + + if (!bounds_check_110116) { { - int32_t defunc_1_op_res_46413 = add32(x_46411, - x_46412); - - x_46411 = defunc_1_op_res_46413; + if (atomic_cmpxchg_i32_global(global_failure, + -1, 147) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = j_110113; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; } - // write result of operation + } + + double lw_val_110128 = ((__global + double *) double_buffer_mem_125588)[phys_tid_109290 + + j_110113 * + num_threads_126209]; + bool index_certs_110129; + + if (!bounds_check_110122) { { - ((volatile __local - int32_t *) red_arr_mem_46400)[sext_i32_i64(local_tid_46396)] = - x_46411; + if (atomic_cmpxchg_i32_global(global_failure, + -1, 148) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = i_110119; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125588)[phys_tid_109290 + + i_110119 * + num_threads_126209] = + lw_val_110128; + + bool index_certs_110131; + + if (!bounds_check_110116) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 149) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = j_110113; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_110132 = ((__global + double *) double_buffer_mem_125588)[phys_tid_109290 + + (num_threads_126209 * + k2p2zq_75151 + + j_110113 * + num_threads_126209)]; + bool index_certs_110133; + + if (!bounds_check_110122) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 150) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = i_110119; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125588)[phys_tid_109290 + + (num_threads_126209 * + k2p2zq_75151 + + i_110119 * + num_threads_126209)] = + lw_val_110132; + } + ((__global int64_t *) mem_122803)[phys_tid_109290 + + m_75223 * + num_threads_126209] = + i_110102; + ((__global + double *) double_buffer_mem_125587)[phys_tid_109290 + + m_75223 * + num_threads_126209] = + t_110103; + ((__global + double *) double_buffer_mem_125588)[phys_tid_109290 + + m_75223 * + num_threads_126209] = + tt_110104; + ((__global + double *) double_buffer_mem_125588)[phys_tid_109290 + + (num_threads_126209 * + k2p2zq_75151 + + m_75223 * + num_threads_126209)] = + ttt_110105; + + int64_t k_110139 = sub64(k_110079, (int64_t) 1); + bool cond_110140 = slt64(x_110047, k_110139); + bool loop_cond_110141; + + if (cond_110140) { + bool index_certs_110142; + + if (!y_110057) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 151) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_110041; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double zt_arg_110143 = ((__global + double *) double_buffer_mem_125588)[phys_tid_109290 + + (num_threads_126209 * + k2p2zq_75151 + + l_110041 * + num_threads_126209)]; + double zt_res_110144 = 1.0e-7 * zt_arg_110143; + bool index_certs_110145; + + if (!y_110057) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 152) == -1) { + global_failure_args[0] = l_110041; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; } } - offset_46423 *= 2; + + double zl_arg_110146 = ((__global + double *) double_buffer_mem_125587)[phys_tid_109290 + + l_110041 * + num_threads_126209]; + bool zl_res_110147 = zl_arg_110146 < zt_res_110144; + + loop_cond_110141 = zl_res_110147; + } else { + loop_cond_110141 = 0; } - while (slt32(skip_waves_46424, - squot32(sext_i64_i32(segred_group_sizze_41347) + - wave_sizze_46398 - 1, - wave_sizze_46398))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46423 = skip_waves_46424 * wave_sizze_46398; - if (slt32(local_tid_46396 + offset_46423, - sext_i64_i32(segred_group_sizze_41347)) && - ((local_tid_46396 - squot32(local_tid_46396, - wave_sizze_46398) * - wave_sizze_46398) == 0 && - (squot32(local_tid_46396, wave_sizze_46398) & (2 * - skip_waves_46424 - - 1)) == - 0)) { - // read array element + + bool loop_while_tmp_127594 = loop_cond_110141; + int64_t k_tmp_127599 = k_110139; + + loop_while_110074 = loop_while_tmp_127594; + k_110079 = k_tmp_127599; + } + loopres_110068 = loop_while_110074; + loopres_110073 = k_110079; + + bool cond_110148 = x_110047 == rp1_75837; + int64_t j_m_i_110149 = sub64(rp1_75837, l_110041); + bool empty_slice_110153 = j_m_i_110149 == (int64_t) 0; + int64_t m_110154 = sub64(j_m_i_110149, (int64_t) 1); + int64_t i_p_m_t_s_110155 = add64(l_110041, m_110154); + bool zzero_leq_i_p_m_t_s_110156 = sle64((int64_t) 0, + i_p_m_t_s_110155); + bool i_p_m_t_s_leq_w_110157 = slt64(i_p_m_t_s_110155, + rp1_75837); + bool i_lte_j_110158 = sle64(l_110041, rp1_75837); + bool y_110159 = zzero_leq_i_p_m_t_s_110156 && + i_p_m_t_s_leq_w_110157; + bool y_110160 = i_lte_j_110158 && y_110159; + bool ok_or_empty_110161 = empty_slice_110153 || y_110160; + bool index_ok_110162 = y_110057 && ok_or_empty_110161; + + if (cond_110148) { + for (int64_t i_127605 = 0; i_127605 < k2p2zq_75151; + i_127605++) { + ((__global double *) mem_125482)[phys_tid_109290 + + i_127605 * + num_threads_126209] = + ((__global + double *) double_buffer_mem_125587)[phys_tid_109290 + + i_127605 * + num_threads_126209]; + } + for (int64_t i_127606 = 0; i_127606 < (int64_t) 2; + i_127606++) { + for (int64_t i_127607 = 0; i_127607 < k2p2zq_75151; + i_127607++) { + ((__global double *) mem_125480)[phys_tid_109290 + + (i_127606 * + (num_threads_126209 * + k2p2zq_75151) + + i_127607 * + num_threads_126209)] = + ((__global + double *) double_buffer_mem_125588)[phys_tid_109290 + + (i_127606 * + (num_threads_126209 * + k2p2zq_75151) + + i_127607 * + num_threads_126209)]; + } + } + for (int64_t i_127608 = 0; i_127608 < k2p2zq_75151; + i_127608++) { + for (int64_t i_127609 = 0; i_127609 < rp1_75837; + i_127609++) { + ((__global double *) mem_125512)[phys_tid_109290 + + (i_127608 * + (num_threads_126209 * + rp1_75837) + + i_127609 * + num_threads_126209)] = + ((__global + double *) double_buffer_mem_125586)[phys_tid_109290 + + (i_127608 * + (num_threads_126209 * + rp1_75837) + + i_127609 * + num_threads_126209)]; + } + } + } else { + bool index_certs_110163; + + if (!index_ok_110162) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 153) == -1) { + global_failure_args[0] = l_110041; + global_failure_args[1] = l_110041; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_110165; + double redout_119833 = 0.0; + + for (int64_t i_119834 = 0; i_119834 < j_m_i_110149; + i_119834++) { + int64_t slice_120035 = l_110041 + i_119834; + double x_110169 = ((__global + double *) double_buffer_mem_125586)[phys_tid_109290 + + (l_110041 * + (num_threads_126209 * + rp1_75837) + + slice_120035 * + num_threads_126209)]; + double defunc_1_f_res_110170 = x_110169 * x_110169; + double defunc_1_op_res_110168 = defunc_1_f_res_110170 + + redout_119833; + double redout_tmp_127610 = defunc_1_op_res_110168; + + redout_119833 = redout_tmp_127610; + } + defunc_2_reduce_res_110165 = redout_119833; + + double sqrt_res_110171; + + sqrt_res_110171 = futrts_sqrt64(defunc_2_reduce_res_110165); + + bool zeze_res_110172 = sqrt_res_110171 == 0.0; + + if (zeze_res_110172) { + for (int64_t i_127611 = 0; i_127611 < k2p2zq_75151; + i_127611++) { + ((__global double *) mem_125267)[phys_tid_109290 + + i_127611 * + num_threads_126209] = + ((__global + double *) double_buffer_mem_125587)[phys_tid_109290 + + i_127611 * + num_threads_126209]; + } + for (int64_t i_127612 = 0; i_127612 < (int64_t) 2; + i_127612++) { + for (int64_t i_127613 = 0; i_127613 < k2p2zq_75151; + i_127613++) { + ((__global + double *) mem_125265)[phys_tid_109290 + + (i_127612 * + (num_threads_126209 * + k2p2zq_75151) + + i_127613 * + num_threads_126209)] = + ((__global + double *) double_buffer_mem_125588)[phys_tid_109290 + + (i_127612 * + (num_threads_126209 * + k2p2zq_75151) + + i_127613 * + num_threads_126209)]; + } + } + for (int64_t i_127614 = 0; i_127614 < k2p2zq_75151; + i_127614++) { + for (int64_t i_127615 = 0; i_127615 < rp1_75837; + i_127615++) { + ((__global + double *) mem_125472)[phys_tid_109290 + + (i_127614 * + (num_threads_126209 * + rp1_75837) + + i_127615 * + num_threads_126209)] = + ((__global + double *) double_buffer_mem_125586)[phys_tid_109290 + + (i_127614 * + (num_threads_126209 * + rp1_75837) + + i_127615 * + num_threads_126209)]; + } + } + } else { + bool y_110176 = slt64(l_110041, rp1_75837); + bool index_ok_110177 = y_110057 && y_110176; + bool index_certs_110178; + + if (!index_ok_110177) { { - x_46412 = ((__local - int32_t *) red_arr_mem_46400)[sext_i32_i64(local_tid_46396 + - offset_46423)]; + if (atomic_cmpxchg_i32_global(global_failure, + -1, 154) == -1) { + global_failure_args[0] = l_110041; + global_failure_args[1] = l_110041; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; } - // apply reduction operation + } + + double znze_arg_110179 = ((__global + double *) double_buffer_mem_125586)[phys_tid_109290 + + (l_110041 * + (num_threads_126209 * + rp1_75837) + + l_110041 * + num_threads_126209)]; + bool zeze_res_110180 = znze_arg_110179 == 0.0; + bool znze_res_110181 = !zeze_res_110180; + double nrmxl_110182; + + if (znze_res_110181) { + double abs_res_110183 = fabs(sqrt_res_110171); + double sgn_res_110184 = fsignum32(znze_arg_110179); + double zt_res_110185 = abs_res_110183 * + sgn_res_110184; + + nrmxl_110182 = zt_res_110185; + } else { + nrmxl_110182 = sqrt_res_110171; + } + for (int64_t i0_110187 = 0; i0_110187 < j_m_i_110149; + i0_110187++) { + int64_t i_110189 = add64(l_110041, i0_110187); + bool x_110190 = sle64((int64_t) 0, i_110189); + bool y_110191 = slt64(i_110189, rp1_75837); + bool bounds_check_110192 = x_110190 && y_110191; + bool index_ok_110193 = y_110057 && + bounds_check_110192; + bool index_certs_110194; + + if (!index_ok_110193) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 155) == + -1) { + global_failure_args[0] = l_110041; + global_failure_args[1] = i_110189; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_110195 = ((__global + double *) double_buffer_mem_125586)[phys_tid_109290 + + (l_110041 * + (num_threads_126209 * + rp1_75837) + + i_110189 * + num_threads_126209)]; + double lw_val_110196 = x_110195 / nrmxl_110182; + + ((__global + double *) double_buffer_mem_125586)[phys_tid_109290 + + (l_110041 * + (num_threads_126209 * + rp1_75837) + + i_110189 * + num_threads_126209)] = + lw_val_110196; + } + + double zp_arg_110198 = ((__global + double *) double_buffer_mem_125586)[phys_tid_109290 + + (l_110041 * + (num_threads_126209 * + rp1_75837) + + l_110041 * + num_threads_126209)]; + double zp_res_110199 = 1.0 + zp_arg_110198; + + ((__global + double *) double_buffer_mem_125586)[phys_tid_109290 + + (l_110041 * + (num_threads_126209 * + rp1_75837) + + l_110041 * + num_threads_126209)] = + zp_res_110199; + + bool bounds_invalid_upwards_110201 = slt64(k2p2zq_75151, + x_110047); + bool valid_110202 = !bounds_invalid_upwards_110201; + bool range_valid_c_110203; + + if (!valid_110202) { { - int32_t defunc_1_op_res_46413 = add32(x_46411, - x_46412); + if (atomic_cmpxchg_i32_global(global_failure, + -1, 156) == -1) { + global_failure_args[0] = x_110047; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + bool loop_nonempty_110204 = slt64((int64_t) 0, + upper_bound_110058); + bool loop_not_taken_110205 = !loop_nonempty_110204; + bool protect_assert_disj_110206 = index_ok_110177 || + loop_not_taken_110205; + bool index_certs_110207; + + if (!protect_assert_disj_110206) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 157) == -1) { + global_failure_args[0] = l_110041; + global_failure_args[1] = l_110041; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_110211 = 0; i_110211 < + upper_bound_110058; i_110211++) { + int64_t index_primexp_110215 = add64(x_110047, + i_110211); + bool x_110216 = sle64((int64_t) 0, + index_primexp_110215); + bool y_110217 = slt64(index_primexp_110215, + k2p2zq_75151); + bool bounds_check_110218 = x_110216 && y_110217; + double t_110219; + double t_110221 = 0.0; + + for (int64_t i0_110220 = 0; i0_110220 < + j_m_i_110149; i0_110220++) { + int64_t i_110222 = add64(l_110041, i0_110220); + bool x_110223 = sle64((int64_t) 0, i_110222); + bool y_110224 = slt64(i_110222, rp1_75837); + bool bounds_check_110225 = x_110223 && y_110224; + bool index_ok_110226 = y_110057 && + bounds_check_110225; + bool index_certs_110227; + + if (!index_ok_110226) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 158) == + -1) { + global_failure_args[0] = l_110041; + global_failure_args[1] = i_110222; + global_failure_args[2] = + k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_110228 = ((__global + double *) double_buffer_mem_125586)[phys_tid_109290 + + (l_110041 * + (num_threads_126209 * + rp1_75837) + + i_110222 * + num_threads_126209)]; + bool index_ok_110229 = bounds_check_110218 && + bounds_check_110225; + bool index_certs_110230; + + if (!index_ok_110229) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 159) == + -1) { + global_failure_args[0] = + index_primexp_110215; + global_failure_args[1] = i_110222; + global_failure_args[2] = + k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_110231 = ((__global + double *) double_buffer_mem_125586)[phys_tid_109290 + + (index_primexp_110215 * + (num_threads_126209 * + rp1_75837) + + i_110222 * + num_threads_126209)]; + double y_110232 = x_110228 * y_110231; + double loopres_110233 = t_110221 - y_110232; + double t_tmp_127620 = loopres_110233; - x_46411 = defunc_1_op_res_46413; + t_110221 = t_tmp_127620; } - // write result of operation + t_110219 = t_110221; + + double y_110234 = ((__global + double *) double_buffer_mem_125586)[phys_tid_109290 + + (l_110041 * + (num_threads_126209 * + rp1_75837) + + l_110041 * + num_threads_126209)]; + double t_110235 = t_110219 / y_110234; + + for (int64_t i0_110237 = 0; i0_110237 < + j_m_i_110149; i0_110237++) { + int64_t i_110239 = add64(l_110041, i0_110237); + bool x_110240 = sle64((int64_t) 0, i_110239); + bool y_110241 = slt64(i_110239, rp1_75837); + bool bounds_check_110242 = x_110240 && y_110241; + bool index_ok_110243 = bounds_check_110218 && + bounds_check_110242; + bool index_certs_110244; + + if (!index_ok_110243) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 160) == + -1) { + global_failure_args[0] = + index_primexp_110215; + global_failure_args[1] = i_110239; + global_failure_args[2] = + k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_110245 = ((__global + double *) double_buffer_mem_125586)[phys_tid_109290 + + (index_primexp_110215 * + (num_threads_126209 * + rp1_75837) + + i_110239 * + num_threads_126209)]; + bool index_ok_110246 = y_110057 && + bounds_check_110242; + bool index_certs_110247; + + if (!index_ok_110246) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 161) == + -1) { + global_failure_args[0] = l_110041; + global_failure_args[1] = i_110239; + global_failure_args[2] = + k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_110248 = ((__global + double *) double_buffer_mem_125586)[phys_tid_109290 + + (l_110041 * + (num_threads_126209 * + rp1_75837) + + i_110239 * + num_threads_126209)]; + double y_110249 = t_110235 * y_110248; + double lw_val_110250 = x_110245 + y_110249; + + ((__global + double *) double_buffer_mem_125586)[phys_tid_109290 + + (index_primexp_110215 * + (num_threads_126209 * + rp1_75837) + + i_110239 * + num_threads_126209)] = + lw_val_110250; + } + + bool index_certs_110252; + + if (!bounds_check_110218) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 162) == + -1) { + global_failure_args[0] = + index_primexp_110215; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + + double zeze_arg_110253 = ((__global + double *) double_buffer_mem_125587)[phys_tid_109290 + + index_primexp_110215 * + num_threads_126209]; + bool zeze_res_110254 = zeze_arg_110253 == 0.0; + + if (!zeze_res_110254) { + bool index_ok_110257 = y_110176 && + bounds_check_110218; + bool index_certs_110258; + + if (!index_ok_110257) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 163) == + -1) { + global_failure_args[0] = + index_primexp_110215; + global_failure_args[1] = l_110041; + global_failure_args[2] = + k2p2zq_75151; + global_failure_args[3] = rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } + + double abs_arg_110259 = ((__global + double *) double_buffer_mem_125586)[phys_tid_109290 + + (index_primexp_110215 * + (num_threads_126209 * + rp1_75837) + + l_110041 * + num_threads_126209)]; + double abs_res_110260 = fabs(abs_arg_110259); + double zs_res_110261 = abs_res_110260 / + zeze_arg_110253; + double ztzt_res_110262 = fpow64(zs_res_110261, + 2.0); + double zm_res_110263 = 1.0 - ztzt_res_110262; + double max_res_110264 = fmax64(0.0, + zm_res_110263); + double abs_res_110265 = fabs(max_res_110264); + bool zgze_res_110266 = 1.0e-6 <= abs_res_110265; + int64_t j_m_i_110267 = sub64(rp1_75837, + x_110047); + + if (zgze_res_110266) { + double sqrt_res_110270; + + sqrt_res_110270 = + futrts_sqrt64(max_res_110264); + + double zt_res_110271 = zeze_arg_110253 * + sqrt_res_110270; + + ((__global + double *) double_buffer_mem_125587)[phys_tid_109290 + + index_primexp_110215 * + num_threads_126209] = + zt_res_110271; + } else { + bool empty_slice_110273 = j_m_i_110267 == + (int64_t) 0; + int64_t m_110274 = sub64(j_m_i_110267, + (int64_t) 1); + int64_t i_p_m_t_s_110275 = add64(x_110047, + m_110274); + bool zzero_leq_i_p_m_t_s_110276 = + sle64((int64_t) 0, i_p_m_t_s_110275); + bool i_p_m_t_s_leq_w_110277 = + slt64(i_p_m_t_s_110275, rp1_75837); + bool zzero_lte_i_110278 = sle64((int64_t) 0, + x_110047); + bool i_lte_j_110279 = sle64(x_110047, + rp1_75837); + bool y_110280 = i_p_m_t_s_leq_w_110277 && + zzero_lte_i_110278; + bool y_110281 = + zzero_leq_i_p_m_t_s_110276 && y_110280; + bool y_110282 = i_lte_j_110279 && y_110281; + bool forwards_ok_110283 = + zzero_lte_i_110278 && y_110282; + bool ok_or_empty_110284 = + empty_slice_110273 || + forwards_ok_110283; + bool index_ok_110285 = + bounds_check_110218 && + ok_or_empty_110284; + bool index_certs_110286; + + if (!index_ok_110285) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 164) == + -1) { + global_failure_args[0] = + index_primexp_110215; + global_failure_args[1] = + x_110047; + global_failure_args[2] = + k2p2zq_75151; + global_failure_args[3] = + rp1_75837; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_110288; + double redout_119835 = 0.0; + + for (int64_t i_119836 = 0; i_119836 < + j_m_i_110267; i_119836++) { + int64_t slice_120036 = x_110047 + + i_119836; + double x_110292 = ((__global + double *) double_buffer_mem_125586)[phys_tid_109290 + + (index_primexp_110215 * + (num_threads_126209 * + rp1_75837) + + slice_120036 * + num_threads_126209)]; + double defunc_1_f_res_110293 = + x_110292 * x_110292; + double defunc_1_op_res_110291 = + defunc_1_f_res_110293 + + redout_119835; + double redout_tmp_127622 = + defunc_1_op_res_110291; + + redout_119835 = redout_tmp_127622; + } + defunc_2_reduce_res_110288 = redout_119835; + + double sqrt_res_110294; + + sqrt_res_110294 = + futrts_sqrt64(defunc_2_reduce_res_110288); + ((__global + double *) double_buffer_mem_125587)[phys_tid_109290 + + index_primexp_110215 * + num_threads_126209] = + sqrt_res_110294; + + bool index_certs_110296; + + if (!bounds_check_110218) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 165) == + -1) { + global_failure_args[0] = + (int64_t) 0; + global_failure_args[1] = + index_primexp_110215; + global_failure_args[2] = + (int64_t) 2; + global_failure_args[3] = + k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_127623 = 0; i_127623 < + (int64_t) 1; i_127623++) { + ((__global + double *) double_buffer_mem_125588)[phys_tid_109290 + + (index_primexp_110215 + + i_127623) * + num_threads_126209] = + ((__global + double *) double_buffer_mem_125587)[phys_tid_109290 + + num_threads_126209 * + index_primexp_110215 + + i_127623 * + num_threads_126209]; + } + } + } + } + + bool index_certs_110299; + + if (!y_110057) { { - ((__local - int32_t *) red_arr_mem_46400)[sext_i32_i64(local_tid_46396)] = - x_46411; + if (atomic_cmpxchg_i32_global(global_failure, + -1, 166) == -1) { + global_failure_args[0] = l_110041; + global_failure_args[1] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; } } - skip_waves_46424 *= 2; - } - // and back to memory with the final result - { - if (local_tid_46396 == 0) { - ((__global int32_t *) mem_45232)[gtid_41327] = - x_46411; + for (int64_t i_127624 = 0; i_127624 < (int64_t) 1; + i_127624++) { + ((__global + double *) double_buffer_mem_125587)[phys_tid_109290 + + (l_110041 + + i_127624) * + num_threads_126209] = + ((__global + double *) double_buffer_mem_125586)[phys_tid_109290 + + l_110041 * + (num_threads_126209 * + rp1_75837) + + num_threads_126209 * + l_110041 + + i_127624 * + num_threads_126209]; + } + + double zt_res_110302 = -1.0 * nrmxl_110182; + + ((__global + double *) double_buffer_mem_125586)[phys_tid_109290 + + (l_110041 * + (num_threads_126209 * + rp1_75837) + + l_110041 * + num_threads_126209)] = + zt_res_110302; + for (int64_t i_127625 = 0; i_127625 < k2p2zq_75151; + i_127625++) { + ((__global double *) mem_125267)[phys_tid_109290 + + i_127625 * + num_threads_126209] = + ((__global + double *) double_buffer_mem_125587)[phys_tid_109290 + + i_127625 * + num_threads_126209]; + } + for (int64_t i_127626 = 0; i_127626 < (int64_t) 2; + i_127626++) { + for (int64_t i_127627 = 0; i_127627 < k2p2zq_75151; + i_127627++) { + ((__global + double *) mem_125265)[phys_tid_109290 + + (i_127626 * + (num_threads_126209 * + k2p2zq_75151) + + i_127627 * + num_threads_126209)] = + ((__global + double *) double_buffer_mem_125588)[phys_tid_109290 + + (i_127626 * + (num_threads_126209 * + k2p2zq_75151) + + i_127627 * + num_threads_126209)]; + } + } + for (int64_t i_127628 = 0; i_127628 < k2p2zq_75151; + i_127628++) { + for (int64_t i_127629 = 0; i_127629 < rp1_75837; + i_127629++) { + ((__global + double *) mem_125472)[phys_tid_109290 + + (i_127628 * + (num_threads_126209 * + rp1_75837) + + i_127629 * + num_threads_126209)] = + ((__global + double *) double_buffer_mem_125586)[phys_tid_109290 + + (i_127628 * + (num_threads_126209 * + rp1_75837) + + i_127629 * + num_threads_126209)]; + } } } + for (int64_t i_127630 = 0; i_127630 < k2p2zq_75151; + i_127630++) { + ((__global double *) mem_125482)[phys_tid_109290 + + i_127630 * + num_threads_126209] = + ((__global double *) mem_125267)[phys_tid_109290 + + i_127630 * + num_threads_126209]; + } + for (int64_t i_127631 = 0; i_127631 < (int64_t) 2; + i_127631++) { + for (int64_t i_127632 = 0; i_127632 < k2p2zq_75151; + i_127632++) { + ((__global double *) mem_125480)[phys_tid_109290 + + (i_127631 * + (num_threads_126209 * + k2p2zq_75151) + + i_127632 * + num_threads_126209)] = + ((__global + double *) mem_125265)[phys_tid_109290 + + (i_127631 * + (num_threads_126209 * + k2p2zq_75151) + + i_127632 * + num_threads_126209)]; + } + } + for (int64_t i_127633 = 0; i_127633 < k2p2zq_75151; + i_127633++) { + for (int64_t i_127634 = 0; i_127634 < rp1_75837; + i_127634++) { + ((__global double *) mem_125512)[phys_tid_109290 + + (i_127633 * + (num_threads_126209 * + rp1_75837) + + i_127634 * + num_threads_126209)] = + ((__global + double *) mem_125472)[phys_tid_109290 + + (i_127633 * + (num_threads_126209 * + rp1_75837) + + i_127634 * + num_threads_126209)]; + } + } + } + for (int64_t i_127635 = 0; i_127635 < k2p2zq_75151; + i_127635++) { + for (int64_t i_127636 = 0; i_127636 < rp1_75837; + i_127636++) { + ((__global + double *) double_buffer_mem_125586)[phys_tid_109290 + + (i_127635 * + (num_threads_126209 * + rp1_75837) + + i_127636 * + num_threads_126209)] = + ((__global double *) mem_125512)[phys_tid_109290 + + (i_127635 * + (num_threads_126209 * + rp1_75837) + + i_127636 * + num_threads_126209)]; + } + } + for (int64_t i_127637 = 0; i_127637 < k2p2zq_75151; + i_127637++) { + ((__global + double *) double_buffer_mem_125587)[phys_tid_109290 + + i_127637 * + num_threads_126209] = + ((__global double *) mem_125482)[phys_tid_109290 + + i_127637 * + num_threads_126209]; + } + for (int64_t i_127638 = 0; i_127638 < (int64_t) 2; i_127638++) { + for (int64_t i_127639 = 0; i_127639 < k2p2zq_75151; + i_127639++) { + ((__global + double *) double_buffer_mem_125588)[phys_tid_109290 + + (i_127638 * + (num_threads_126209 * + k2p2zq_75151) + + i_127639 * + num_threads_126209)] = + ((__global double *) mem_125480)[phys_tid_109290 + + (i_127638 * + (num_threads_126209 * + k2p2zq_75151) + + i_127639 * + num_threads_126209)]; + } + } + + int64_t k_tmp_127593 = loopres_110073; + + k_110046 = k_tmp_127593; + } + dqrdc2_res_110040 = k_110046; + + int64_t min_arg_110304 = sub64(dqrdc2_res_110040, (int64_t) 1); + int64_t min_res_110305 = smin64(rp1_75837, min_arg_110304); + + for (int64_t i_127640 = 0; i_127640 < k2p2zq_75151; i_127640++) { + for (int64_t i_127641 = 0; i_127641 < rp1_75837; i_127641++) { + ((__global double *) mem_123127)[i_127640 * (m_75136 * + rp1_75837) + + i_127641 * m_75136 + + gtid_109289] = ((__global + double *) double_buffer_mem_125586)[phys_tid_109290 + + (i_127640 * + (num_threads_126209 * + rp1_75837) + + i_127641 * + num_threads_126209)]; + } + } + for (int64_t i_127642 = 0; i_127642 < k2p2zq_75151; i_127642++) { + ((__global double *) mem_123130)[i_127642 * m_75136 + + gtid_109289] = ((__global + double *) double_buffer_mem_125587)[phys_tid_109290 + + i_127642 * + num_threads_126209]; + } + for (int64_t i_127643 = 0; i_127643 < k2p2zq_75151; i_127643++) { + ((__global int64_t *) mem_123133)[i_127643 * m_75136 + + gtid_109289] = ((__global + int64_t *) mem_122803)[phys_tid_109290 + + i_127643 * + num_threads_126209]; + } + ((__global int64_t *) mem_123135)[gtid_109289] = min_res_110305; + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_110025 +} +__kernel void mainzisegmap_109580(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, int64_t rp1_75837, + int64_t j_109957, int64_t num_groups_109968, + __global unsigned char *mem_122686, __global + unsigned char *mem_122715, __global + unsigned char *mem_122719, __global + unsigned char *mem_122723, __global + unsigned char *mem_122727) +{ + #define segmap_group_sizze_109967 (mainzisegmap_group_sizze_109582) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127475; + int32_t local_tid_127476; + int64_t group_sizze_127479; + int32_t wave_sizze_127478; + int32_t group_tid_127477; + + global_tid_127475 = get_global_id(0); + local_tid_127476 = get_local_id(0); + group_sizze_127479 = get_local_size(0); + wave_sizze_127478 = LOCKSTEP_WIDTH; + group_tid_127477 = get_group_id(0); + + int32_t phys_tid_109580; + + phys_tid_109580 = global_tid_127475; + + int32_t phys_group_id_127480; + + phys_group_id_127480 = get_group_id(0); + for (int32_t i_127481 = 0; i_127481 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, segmap_group_sizze_109967)) - + phys_group_id_127480, sext_i64_i32(num_groups_109968)); + i_127481++) { + int32_t virt_group_id_127482 = phys_group_id_127480 + i_127481 * + sext_i64_i32(num_groups_109968); + int64_t gtid_109579 = sext_i32_i64(virt_group_id_127482) * + segmap_group_sizze_109967 + sext_i32_i64(local_tid_127476); + + if (slt64(gtid_109579, m_75136)) { + double defunc_2_reduce_res_109975; + double redout_119831 = 0.0; + + for (int64_t i_119832 = 0; i_119832 < rp1_75837; i_119832++) { + double x_109979 = ((__global double *) mem_122686)[i_119832 * + (k2p2zq_75151 * + m_75136) + + gtid_109579 * + k2p2zq_75151 + + j_109957]; + double defunc_1_f_res_109980 = x_109979 * x_109979; + double defunc_1_op_res_109978 = defunc_1_f_res_109980 + + redout_119831; + double redout_tmp_127483 = defunc_1_op_res_109978; + + redout_119831 = redout_tmp_127483; + } + defunc_2_reduce_res_109975 = redout_119831; + + double sqrt_res_109981; + + sqrt_res_109981 = futrts_sqrt64(defunc_2_reduce_res_109975); + ((__global double *) mem_122715)[gtid_109579 + j_109957 * m_75136] = + sqrt_res_109981; + ((__global double *) mem_122719)[gtid_109579 + j_109957 * m_75136] = + sqrt_res_109981; + + bool zeze_res_109984 = sqrt_res_109981 == 0.0; + double lw_val_109985; + + if (zeze_res_109984) { + lw_val_109985 = 1.0; + } else { + lw_val_109985 = sqrt_res_109981; + } + ((__global double *) mem_122719)[gtid_109579 + (m_75136 * + k2p2zq_75151 + + j_109957 * + m_75136)] = + lw_val_109985; + for (int64_t i_127484 = 0; i_127484 < k2p2zq_75151; i_127484++) { + ((__global double *) mem_122723)[i_127484 * m_75136 + + gtid_109579] = ((__global + double *) mem_122715)[gtid_109579 + + i_127484 * + m_75136]; + } + for (int64_t i_127485 = 0; i_127485 < (int64_t) 2; i_127485++) { + for (int64_t i_127486 = 0; i_127486 < k2p2zq_75151; + i_127486++) { + ((__global double *) mem_122727)[i_127485 * (m_75136 * + k2p2zq_75151) + + i_127486 * m_75136 + + gtid_109579] = ((__global + double *) mem_122719)[gtid_109579 + + (i_127485 * + (m_75136 * + k2p2zq_75151) + + i_127486 * + m_75136)]; } } } barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - error_1: + error_0: return; - #undef segred_group_sizze_41347 + #undef segmap_group_sizze_109967 } -__kernel void mainzisegred_large_41499(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, - __local volatile - int64_t *sync_arr_mem_46571_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_46569_backing_aligned_1, - int64_t N_29165, int64_t i32_res_29568, - int64_t num_groups_41521, - int64_t groups_per_segment_46555, - int64_t elements_per_thread_46556, - int64_t virt_num_groups_46557, - int64_t threads_per_segment_46559, - __global - unsigned char *defunc_4_map_res_mem_45178, - __global - unsigned char *defunc_3_map_res_mem_45244, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global unsigned char *mem_45278, - __global - unsigned char *group_res_arr_mem_46560, - __global - unsigned char *mainzicounter_mem_46562) +__kernel void mainzisegmap_109625(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, int64_t j_109957, + int64_t num_groups_110013, + int64_t num_threads_115503, + int64_t per_chunk_115510, __global + unsigned char *mem_122733, __global + unsigned char *mem_122738, __global + unsigned char *mem_122743, __global + unsigned char *mem_122748) { - #define segred_group_sizze_41520 (mainzisegred_group_sizze_41493) + #define segmap_group_sizze_110012 (mainzisegmap_group_sizze_109627) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict sync_arr_mem_46571_backing_1 = - (__local volatile - char *) sync_arr_mem_46571_backing_aligned_0; - __local volatile char *restrict red_arr_mem_46569_backing_0 = - (__local volatile - char *) red_arr_mem_46569_backing_aligned_1; - volatile __local bool local_failure; - if (failure_is_an_option) { - int failed = *global_failure >= 0; - - if (failed) - return; + if (*global_failure >= 0) + return; + + int32_t global_tid_127558; + int32_t local_tid_127559; + int64_t group_sizze_127562; + int32_t wave_sizze_127561; + int32_t group_tid_127560; + + global_tid_127558 = get_global_id(0); + local_tid_127559 = get_local_id(0); + group_sizze_127562 = get_local_size(0); + wave_sizze_127561 = LOCKSTEP_WIDTH; + group_tid_127560 = get_group_id(0); + + int32_t phys_tid_109625; + + phys_tid_109625 = global_tid_127558; + + int32_t phys_group_id_127563; + + phys_group_id_127563 = get_group_id(0); + for (int32_t i_127564 = 0; i_127564 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, segmap_group_sizze_110012)) - + phys_group_id_127563, sext_i64_i32(num_groups_110013)); + i_127564++) { + int32_t virt_group_id_127565 = phys_group_id_127563 + i_127564 * + sext_i64_i32(num_groups_110013); + int64_t gtid_109624 = sext_i32_i64(virt_group_id_127565) * + segmap_group_sizze_110012 + sext_i32_i64(local_tid_127559); + + if (slt64(gtid_109624, m_75136)) { + double sqrt_res_110017 = ((__global + double *) mem_122733)[gtid_109624]; + + for (int64_t i_127566 = 0; i_127566 < (int64_t) 1; i_127566++) { + ((__global double *) mem_122738)[gtid_109624 + (j_109957 + + i_127566) * + m_75136] = ((__global + double *) mem_122743)[(gtid_109624 + + i_127566 - + squot64(gtid_109624 + + i_127566, + per_chunk_115510) * + per_chunk_115510) * + num_threads_115503 + + squot64(gtid_109624 + + i_127566, + per_chunk_115510)]; + } + + bool zeze_res_110019 = sqrt_res_110017 == 0.0; + double lw_val_110020; + + if (zeze_res_110019) { + lw_val_110020 = 1.0; + } else { + lw_val_110020 = sqrt_res_110017; + } + ((__global double *) mem_122738)[gtid_109624 + (m_75136 * + k2p2zq_75151 + + j_109957 * + m_75136)] = + lw_val_110020; + for (int64_t i_127567 = 0; i_127567 < (int64_t) 2; i_127567++) { + for (int64_t i_127568 = 0; i_127568 < k2p2zq_75151; + i_127568++) { + ((__global double *) mem_122748)[i_127567 * (m_75136 * + k2p2zq_75151) + + i_127568 * m_75136 + + gtid_109624] = ((__global + double *) mem_122738)[gtid_109624 + + (i_127567 * + (m_75136 * + k2p2zq_75151) + + i_127568 * + m_75136)]; + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - local_failure = false; - barrier(CLK_LOCAL_MEM_FENCE); - int32_t global_tid_46564; - int32_t local_tid_46565; - int64_t group_sizze_46568; - int32_t wave_sizze_46567; - int32_t group_tid_46566; + error_0: + return; + #undef segmap_group_sizze_110012 +} +__kernel void mainzisegmap_109640(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, int64_t j_109957, + __global unsigned char *mem_param_122694, + __global unsigned char *mem_122733) +{ + #define segmap_group_sizze_110007 (mainzisegmap_group_sizze_109643) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; - global_tid_46564 = get_global_id(0); - local_tid_46565 = get_local_id(0); - group_sizze_46568 = get_local_size(0); - wave_sizze_46567 = LOCKSTEP_WIDTH; - group_tid_46566 = get_group_id(0); + if (*global_failure >= 0) + return; - int32_t phys_tid_41499; + int32_t global_tid_127552; + int32_t local_tid_127553; + int64_t group_sizze_127556; + int32_t wave_sizze_127555; + int32_t group_tid_127554; - phys_tid_41499 = global_tid_46564; + global_tid_127552 = get_global_id(0); + local_tid_127553 = get_local_id(0); + group_sizze_127556 = get_local_size(0); + wave_sizze_127555 = LOCKSTEP_WIDTH; + group_tid_127554 = get_group_id(0); - __local char *red_arr_mem_46569; + int32_t phys_tid_109640; - red_arr_mem_46569 = (__local char *) red_arr_mem_46569_backing_0; + phys_tid_109640 = global_tid_127552; - __local char *sync_arr_mem_46571; + int64_t gtid_109638; - sync_arr_mem_46571 = (__local char *) sync_arr_mem_46571_backing_1; + gtid_109638 = sext_i32_i64(group_tid_127554) * segmap_group_sizze_110007 + + sext_i32_i64(local_tid_127553); - int32_t phys_group_id_46573; + int64_t gtid_109639; - phys_group_id_46573 = get_group_id(0); - for (int32_t i_46574 = 0; i_46574 < - sdiv_up32(sext_i64_i32(virt_num_groups_46557) - phys_group_id_46573, - sext_i64_i32(num_groups_41521)); i_46574++) { - int32_t virt_group_id_46575 = phys_group_id_46573 + i_46574 * - sext_i64_i32(num_groups_41521); - int32_t flat_segment_id_46576 = squot32(virt_group_id_46575, - sext_i64_i32(groups_per_segment_46555)); - int64_t global_tid_46577 = srem64(sext_i32_i64(virt_group_id_46575) * - segred_group_sizze_41520 + - sext_i32_i64(local_tid_46565), - segred_group_sizze_41520 * - groups_per_segment_46555); - int64_t gtid_41490 = sext_i32_i64(flat_segment_id_46576); - int64_t gtid_41498; - float x_acc_46578; - int64_t chunk_sizze_46579; - - chunk_sizze_46579 = smin64(elements_per_thread_46556, - sdiv_up64(i32_res_29568 - - sext_i32_i64(sext_i64_i32(global_tid_46577)), - threads_per_segment_46559)); + gtid_109639 = sext_i32_i64(group_tid_127554) * segmap_group_sizze_110007 + + sext_i32_i64(local_tid_127553) - (sext_i32_i64(group_tid_127554) * + segmap_group_sizze_110007 + + sext_i32_i64(local_tid_127553)); + if (slt64(gtid_109638, m_75136) && slt64(gtid_109639, (int64_t) 1)) { + double sqrt_res_110010 = ((__global double *) mem_122733)[gtid_109638]; - float x_41524; - float x_41525; - - // neutral-initialise the accumulators - { - x_acc_46578 = 0.0F; - } - for (int64_t i_46583 = 0; i_46583 < chunk_sizze_46579; i_46583++) { - gtid_41498 = sext_i32_i64(sext_i64_i32(global_tid_46577)) + - threads_per_segment_46559 * i_46583; - // apply map function - { - int32_t x_41529 = ((__global - int32_t *) defunc_3_map_res_mem_45244)[gtid_41490]; - int32_t index_primexp_42390 = sext_i64_i32(gtid_41498); - bool cond_41531 = slt32(index_primexp_42390, x_41529); - float defunc_0_f_res_41532; - - if (cond_41531) { - int32_t x_41528 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_41490]; - int32_t x_41533 = add32(x_41528, index_primexp_42390); - int32_t x_41534 = sub32(x_41533, x_41529); - int32_t i_41535 = add32(1, x_41534); - int64_t i_41536 = sext_i32_i64(i_41535); - bool x_41537 = sle64((int64_t) 0, i_41536); - bool y_41538 = slt64(i_41536, N_29165); - bool bounds_check_41539 = x_41537 && y_41538; - bool index_certs_41540; - - if (!bounds_check_41539) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 23) == -1) { - global_failure_args[0] = i_41536; - global_failure_args[1] = N_29165; - ; - } - local_failure = true; - goto error_0; - } - } - - float defunc_0_f_res_t_res_41541 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_41490 * - N_29165 + - i_41536]; - - defunc_0_f_res_41532 = defunc_0_f_res_t_res_41541; - } else { - defunc_0_f_res_41532 = 0.0F; - } - // save map-out results - { } - // load accumulator - { - x_41524 = x_acc_46578; - } - // load new values - { - x_41525 = defunc_0_f_res_41532; - } - // apply reduction operator - { - float defunc_1_op_res_41526 = x_41524 + x_41525; - - // store in accumulator - { - x_acc_46578 = defunc_1_op_res_41526; - } - } - } - } - // to reduce current chunk, first store our result in memory - { - x_41524 = x_acc_46578; - ((__local - float *) red_arr_mem_46569)[sext_i32_i64(local_tid_46565)] = - x_41524; + if ((sle64((int64_t) 0, gtid_109638) && slt64(gtid_109638, m_75136)) && + (sle64((int64_t) 0, j_109957) && slt64(j_109957, k2p2zq_75151))) { + ((__global double *) mem_param_122694)[gtid_109638 * k2p2zq_75151 + + j_109957] = sqrt_res_110010; } + } + + error_0: + return; + #undef segmap_group_sizze_110007 +} +__kernel void mainzisegmap_109649(__global int *global_failure, int64_t m_75136, + __global unsigned char *mem_122730, __global + unsigned char *mem_122733) +{ + #define segmap_group_sizze_110000 (mainzisegmap_group_sizze_109651) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127547; + int32_t local_tid_127548; + int64_t group_sizze_127551; + int32_t wave_sizze_127550; + int32_t group_tid_127549; + + global_tid_127547 = get_global_id(0); + local_tid_127548 = get_local_id(0); + group_sizze_127551 = get_local_size(0); + wave_sizze_127550 = LOCKSTEP_WIDTH; + group_tid_127549 = get_group_id(0); + + int32_t phys_tid_109649; + + phys_tid_109649 = global_tid_127547; + + int64_t gtid_109648; + + gtid_109648 = sext_i32_i64(group_tid_127549) * segmap_group_sizze_110000 + + sext_i32_i64(local_tid_127548); + if (slt64(gtid_109648, m_75136)) { + double defunc_2_reduce_res_110003 = ((__global + double *) mem_122730)[gtid_109648]; + double sqrt_res_110004; - error_0: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); + sqrt_res_110004 = futrts_sqrt64(defunc_2_reduce_res_110003); + ((__global double *) mem_122733)[gtid_109648] = sqrt_res_110004; + } + + error_0: + return; + #undef segmap_group_sizze_110000 +} +__kernel void mainzisegmap_109798(__global int *global_failure, int64_t m_75136, + int64_t n_75139, int64_t r_75826, __global + unsigned char *defunc_3_map_res_mem_120231, + __global unsigned char *mem_122674, __global + unsigned char *mem_122677, __global + unsigned char *mem_122680, __global + unsigned char *mem_122682) +{ + #define segmap_group_sizze_109909 (mainzisegmap_group_sizze_109800) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127461; + int32_t local_tid_127462; + int64_t group_sizze_127465; + int32_t wave_sizze_127464; + int32_t group_tid_127463; + + global_tid_127461 = get_global_id(0); + local_tid_127462 = get_local_id(0); + group_sizze_127465 = get_local_size(0); + wave_sizze_127464 = LOCKSTEP_WIDTH; + group_tid_127463 = get_group_id(0); + + int32_t phys_tid_109798; + + phys_tid_109798 = global_tid_127461; + + int64_t gtid_109797; + + gtid_109797 = sext_i32_i64(group_tid_127463) * segmap_group_sizze_109909 + + sext_i32_i64(local_tid_127462); + if (slt64(gtid_109797, m_75136)) { + double fr_109913 = ((__global double *) mem_122674)[gtid_109797]; + double x_109914 = ((__global + double *) defunc_3_map_res_mem_120231)[gtid_109797 * + n_75139 + + r_75826]; + double defunc_0_f_res_109915 = ((__global + double *) mem_122677)[gtid_109797]; + double resid_109916 = x_109914 - defunc_0_f_res_109915; + double sqrt_res_109917; - int32_t offset_46584; - int32_t skip_waves_46585; + sqrt_res_109917 = futrts_sqrt64(fr_109913); - skip_waves_46585 = 1; + double recresid_r_109918 = resid_109916 / sqrt_res_109917; - float x_46580; - float x_46581; + ((__global double *) mem_122680)[gtid_109797] = resid_109916; + ((__global double *) mem_122682)[gtid_109797] = recresid_r_109918; + } + + error_0: + return; + #undef segmap_group_sizze_109909 +} +__kernel void mainzisegmap_109829(__global int *global_failure, int64_t m_75136, + __global unsigned char *mem_122668, __global + unsigned char *mem_122674) +{ + #define segmap_group_sizze_109886 (mainzisegmap_group_sizze_109831) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127396; + int32_t local_tid_127397; + int64_t group_sizze_127400; + int32_t wave_sizze_127399; + int32_t group_tid_127398; + + global_tid_127396 = get_global_id(0); + local_tid_127397 = get_local_id(0); + group_sizze_127400 = get_local_size(0); + wave_sizze_127399 = LOCKSTEP_WIDTH; + group_tid_127398 = get_group_id(0); + + int32_t phys_tid_109829; + + phys_tid_109829 = global_tid_127396; + + int64_t gtid_109828; + + gtid_109828 = sext_i32_i64(group_tid_127398) * segmap_group_sizze_109886 + + sext_i32_i64(local_tid_127397); + if (slt64(gtid_109828, m_75136)) { + double defunc_0_f_res_109891 = ((__global + double *) mem_122668)[gtid_109828]; + double fr_109892 = 1.0 + defunc_0_f_res_109891; - offset_46584 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46565, - sext_i64_i32(segred_group_sizze_41520))) { - x_46580 = ((__local - float *) red_arr_mem_46569)[sext_i32_i64(local_tid_46565 + - offset_46584)]; - } - } - offset_46584 = 1; - while (slt32(offset_46584, wave_sizze_46567)) { - if (slt32(local_tid_46565 + offset_46584, - sext_i64_i32(segred_group_sizze_41520)) && - ((local_tid_46565 - squot32(local_tid_46565, wave_sizze_46567) * - wave_sizze_46567) & (2 * offset_46584 - 1)) == 0) { - // read array element - { - x_46581 = ((volatile __local - float *) red_arr_mem_46569)[sext_i32_i64(local_tid_46565 + - offset_46584)]; - } - // apply reduction operation - { - float defunc_1_op_res_46582 = x_46580 + x_46581; - - x_46580 = defunc_1_op_res_46582; - } - // write result of operation - { - ((volatile __local - float *) red_arr_mem_46569)[sext_i32_i64(local_tid_46565)] = - x_46580; - } - } - offset_46584 *= 2; - } - while (slt32(skip_waves_46585, - squot32(sext_i64_i32(segred_group_sizze_41520) + - wave_sizze_46567 - 1, wave_sizze_46567))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46584 = skip_waves_46585 * wave_sizze_46567; - if (slt32(local_tid_46565 + offset_46584, - sext_i64_i32(segred_group_sizze_41520)) && - ((local_tid_46565 - squot32(local_tid_46565, wave_sizze_46567) * - wave_sizze_46567) == 0 && (squot32(local_tid_46565, - wave_sizze_46567) & (2 * - skip_waves_46585 - - 1)) == - 0)) { - // read array element - { - x_46581 = ((__local - float *) red_arr_mem_46569)[sext_i32_i64(local_tid_46565 + - offset_46584)]; - } - // apply reduction operation - { - float defunc_1_op_res_46582 = x_46580 + x_46581; + ((__global double *) mem_122674)[gtid_109828] = fr_109892; + } + + error_0: + return; + #undef segmap_group_sizze_109886 +} +__kernel void mainzisegmap_110907(__global int *global_failure, int64_t m_75136, + int64_t n_75139, int64_t k2p2zq_75151, + int64_t defunc_2_reduce_res_75260, + int64_t index_primexp_76437, + int64_t num_groups_110965, + int64_t num_threads_126245, __global + unsigned char *defunc_3_map_res_mem_120231, + __global unsigned char *mem_120246, __global + unsigned char *mem_123798, __global + unsigned char *mem_123801, __global + unsigned char *mem_123804, __global + unsigned char *mem_123818, __global + unsigned char *mem_123821, __global + unsigned char *mem_123840, __global + unsigned char *mem_123869, __global + unsigned char *mem_123872, __global + unsigned char *mem_123874) +{ + #define segmap_group_sizze_110964 (mainzisegmap_group_sizze_110909) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128014; + int32_t local_tid_128015; + int64_t group_sizze_128018; + int32_t wave_sizze_128017; + int32_t group_tid_128016; + + global_tid_128014 = get_global_id(0); + local_tid_128015 = get_local_id(0); + group_sizze_128018 = get_local_size(0); + wave_sizze_128017 = LOCKSTEP_WIDTH; + group_tid_128016 = get_group_id(0); + + int32_t phys_tid_110907; + + phys_tid_110907 = global_tid_128014; + + int32_t phys_group_id_128019; + + phys_group_id_128019 = get_group_id(0); + for (int32_t i_128020 = 0; i_128020 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, segmap_group_sizze_110964)) - + phys_group_id_128019, sext_i64_i32(num_groups_110965)); + i_128020++) { + int32_t virt_group_id_128021 = phys_group_id_128019 + i_128020 * + sext_i64_i32(num_groups_110965); + int64_t gtid_110906 = sext_i32_i64(virt_group_id_128021) * + segmap_group_sizze_110964 + sext_i32_i64(local_tid_128015); + + if (slt64(gtid_110906, m_75136)) { + double defunc_11_internal_map_res_transformed_row_110973 = + ((__global + double *) defunc_3_map_res_mem_120231)[gtid_110906 * + n_75139 + + index_primexp_76437]; + double defunc_0_f_res_110974; + double redout_119867 = 0.0; + + for (int64_t i_119869 = 0; i_119869 < k2p2zq_75151; i_119869++) { + double x_110980 = ((__global double *) mem_120246)[i_119869 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_110906 * + defunc_2_reduce_res_75260 + + index_primexp_76437]; + double defunc_0_f_res_110981; + double redout_119871 = 0.0; + + for (int64_t i_119872 = 0; i_119872 < k2p2zq_75151; + i_119872++) { + double x_110985 = ((__global + double *) mem_120246)[i_119872 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_110906 * + defunc_2_reduce_res_75260 + + index_primexp_76437]; + double x_110986 = ((__global + double *) mem_123798)[i_119869 * + (m_75136 * + k2p2zq_75151) + + i_119872 * + m_75136 + + gtid_110906]; + double defunc_1_f_res_110987 = x_110985 * x_110986; + double defunc_1_op_res_110984 = defunc_1_f_res_110987 + + redout_119871; + double redout_tmp_128024 = defunc_1_op_res_110984; - x_46580 = defunc_1_op_res_46582; - } - // write result of operation - { - ((__local - float *) red_arr_mem_46569)[sext_i32_i64(local_tid_46565)] = - x_46580; - } - } - skip_waves_46585 *= 2; - } - barrier(CLK_LOCAL_MEM_FENCE); - // first thread saves the result in accumulator - { - if (sext_i32_i64(local_tid_46565) == (int64_t) 0) { - x_acc_46578 = x_46580; - } - } - if (groups_per_segment_46555 == (int64_t) 1) { - // first thread in group saves final result to memory - { - if (local_tid_46565 == 0) { - ((__global float *) mem_45278)[gtid_41490] = x_acc_46578; + redout_119871 = redout_tmp_128024; } + defunc_0_f_res_110981 = redout_119871; + + double defunc_1_f_res_110988 = x_110980 * defunc_0_f_res_110981; + double defunc_1_op_res_110978 = defunc_1_f_res_110988 + + redout_119867; + + ((__global double *) mem_123804)[phys_tid_110907 + i_119869 * + num_threads_126245] = + defunc_0_f_res_110981; + + double redout_tmp_128022 = defunc_1_op_res_110978; + + redout_119867 = redout_tmp_128022; } - } else { - int32_t old_counter_46586; + defunc_0_f_res_110974 = redout_119867; - // first thread in group saves group result to global memory - { - if (local_tid_46565 == 0) { - ((__global - float *) group_res_arr_mem_46560)[sext_i32_i64(virt_group_id_46575) * - segred_group_sizze_41520] = - x_acc_46578; - mem_fence_global(); - old_counter_46586 = - atomic_add_i32_global(&((volatile __global - int *) mainzicounter_mem_46562)[sext_i32_i64(srem32(flat_segment_id_46576, - 10240))], - (int) 1); - ((__local bool *) sync_arr_mem_46571)[(int64_t) 0] = - old_counter_46586 == groups_per_segment_46555 - - (int64_t) 1; - } - } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + double fr_110989 = 1.0 + defunc_0_f_res_110974; + double defunc_0_f_res_110990; + double redout_119873 = 0.0; - bool is_last_group_46587; + for (int64_t i_119874 = 0; i_119874 < k2p2zq_75151; i_119874++) { + double x_110994 = ((__global double *) mem_120246)[i_119874 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_110906 * + defunc_2_reduce_res_75260 + + index_primexp_76437]; + double x_110995 = ((__global double *) mem_123801)[i_119874 * + m_75136 + + gtid_110906]; + double defunc_1_f_res_110996 = x_110994 * x_110995; + double defunc_1_op_res_110993 = defunc_1_f_res_110996 + + redout_119873; + double redout_tmp_128025 = defunc_1_op_res_110993; + + redout_119873 = redout_tmp_128025; + } + defunc_0_f_res_110990 = redout_119873; - is_last_group_46587 = ((__local - bool *) sync_arr_mem_46571)[(int64_t) 0]; - if (is_last_group_46587) { - if (local_tid_46565 == 0) { - old_counter_46586 = - atomic_add_i32_global(&((volatile __global - int *) mainzicounter_mem_46562)[sext_i32_i64(srem32(flat_segment_id_46576, - 10240))], - (int) ((int64_t) 0 - - groups_per_segment_46555)); - } - // read in the per-group-results - { - int64_t read_per_thread_46588 = - sdiv_up64(groups_per_segment_46555, - segred_group_sizze_41520); - - x_41524 = 0.0F; - for (int64_t i_46589 = 0; i_46589 < read_per_thread_46588; - i_46589++) { - int64_t group_res_id_46590 = - sext_i32_i64(local_tid_46565) * - read_per_thread_46588 + i_46589; - int64_t index_of_group_res_46591 = - sext_i32_i64(flat_segment_id_46576) * - groups_per_segment_46555 + group_res_id_46590; - - if (slt64(group_res_id_46590, - groups_per_segment_46555)) { - x_41525 = ((__global - float *) group_res_arr_mem_46560)[index_of_group_res_46591 * - segred_group_sizze_41520]; - - float defunc_1_op_res_41526; - - defunc_1_op_res_41526 = x_41524 + x_41525; - x_41524 = defunc_1_op_res_41526; - } - } - } - ((__local - float *) red_arr_mem_46569)[sext_i32_i64(local_tid_46565)] = - x_41524; - barrier(CLK_LOCAL_MEM_FENCE); - // reduce the per-group results - { - int32_t offset_46592; - int32_t skip_waves_46593; + double resid_110997 = + defunc_11_internal_map_res_transformed_row_110973 - + defunc_0_f_res_110990; + double sqrt_res_110998; + + sqrt_res_110998 = futrts_sqrt64(fr_110989); + + double recresid_r_110999 = resid_110997 / sqrt_res_110998; + + for (int64_t i_119879 = 0; i_119879 < k2p2zq_75151; i_119879++) { + double x_111002 = ((__global + double *) mem_123804)[phys_tid_110907 + + i_119879 * + num_threads_126245]; + double x_111004 = ((__global double *) mem_123801)[i_119879 * + m_75136 + + gtid_110906]; + double defunc_0_f_res_111005; + double redout_119883 = 0.0; + + for (int64_t i_119885 = 0; i_119885 < k2p2zq_75151; + i_119885++) { + double x_111010 = ((__global + double *) mem_123804)[phys_tid_110907 + + i_119885 * + num_threads_126245]; + double x_111011 = ((__global + double *) mem_123798)[i_119879 * + (m_75136 * + k2p2zq_75151) + + i_119885 * + m_75136 + + gtid_110906]; + double x_111012 = ((__global + double *) mem_120246)[i_119885 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_110906 * + defunc_2_reduce_res_75260 + + index_primexp_76437]; + double x_111013 = x_111002 * x_111010; + double y_111014 = x_111013 / fr_110989; + double defunc_1_f_res_111015 = x_111011 - y_111014; + double defunc_1_f_res_111016 = x_111012 * + defunc_1_f_res_111015; + double defunc_1_op_res_111009 = defunc_1_f_res_111016 + + redout_119883; - skip_waves_46593 = 1; + ((__global double *) mem_123840)[phys_tid_110907 + + i_119885 * + num_threads_126245] = + defunc_1_f_res_111015; - float x_46580; - float x_46581; + double redout_tmp_128028 = defunc_1_op_res_111009; - offset_46592 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46565, - sext_i64_i32(segred_group_sizze_41520))) { - x_46580 = ((__local - float *) red_arr_mem_46569)[sext_i32_i64(local_tid_46565 + - offset_46592)]; - } - } - offset_46592 = 1; - while (slt32(offset_46592, wave_sizze_46567)) { - if (slt32(local_tid_46565 + offset_46592, - sext_i64_i32(segred_group_sizze_41520)) && - ((local_tid_46565 - squot32(local_tid_46565, - wave_sizze_46567) * - wave_sizze_46567) & (2 * offset_46592 - 1)) == - 0) { - // read array element - { - x_46581 = ((volatile __local - float *) red_arr_mem_46569)[sext_i32_i64(local_tid_46565 + - offset_46592)]; - } - // apply reduction operation - { - float defunc_1_op_res_46582 = x_46580 + x_46581; - - x_46580 = defunc_1_op_res_46582; - } - // write result of operation - { - ((volatile __local - float *) red_arr_mem_46569)[sext_i32_i64(local_tid_46565)] = - x_46580; - } - } - offset_46592 *= 2; - } - while (slt32(skip_waves_46593, - squot32(sext_i64_i32(segred_group_sizze_41520) + - wave_sizze_46567 - 1, - wave_sizze_46567))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46592 = skip_waves_46593 * wave_sizze_46567; - if (slt32(local_tid_46565 + offset_46592, - sext_i64_i32(segred_group_sizze_41520)) && - ((local_tid_46565 - squot32(local_tid_46565, - wave_sizze_46567) * - wave_sizze_46567) == 0 && - (squot32(local_tid_46565, wave_sizze_46567) & (2 * - skip_waves_46593 - - 1)) == - 0)) { - // read array element - { - x_46581 = ((__local - float *) red_arr_mem_46569)[sext_i32_i64(local_tid_46565 + - offset_46592)]; - } - // apply reduction operation - { - float defunc_1_op_res_46582 = x_46580 + x_46581; - - x_46580 = defunc_1_op_res_46582; - } - // write result of operation - { - ((__local - float *) red_arr_mem_46569)[sext_i32_i64(local_tid_46565)] = - x_46580; - } - } - skip_waves_46593 *= 2; - } - // and back to memory with the final result - { - if (local_tid_46565 == 0) { - ((__global float *) mem_45278)[gtid_41490] = - x_46580; - } - } - } - } + redout_119883 = redout_tmp_128028; + } + defunc_0_f_res_111005 = redout_119883; + + double defunc_0_g_res_111017 = resid_110997 * + defunc_0_f_res_111005; + double defunc_1_f_res_111018 = x_111004 + defunc_0_g_res_111017; + + ((__global double *) mem_123818)[phys_tid_110907 + i_119879 * + num_threads_126245] = + defunc_1_f_res_111018; + for (int64_t i_128030 = 0; i_128030 < k2p2zq_75151; + i_128030++) { + ((__global double *) mem_123821)[phys_tid_110907 + + (i_119879 * + (num_threads_126245 * + k2p2zq_75151) + + i_128030 * + num_threads_126245)] = + ((__global double *) mem_123840)[phys_tid_110907 + + i_128030 * + num_threads_126245]; + } + } + for (int64_t i_128031 = 0; i_128031 < k2p2zq_75151; i_128031++) { + for (int64_t i_128032 = 0; i_128032 < k2p2zq_75151; + i_128032++) { + ((__global double *) mem_123869)[i_128031 * (m_75136 * + k2p2zq_75151) + + i_128032 * m_75136 + + gtid_110906] = ((__global + double *) mem_123821)[phys_tid_110907 + + (i_128031 * + (num_threads_126245 * + k2p2zq_75151) + + i_128032 * + num_threads_126245)]; + } + } + for (int64_t i_128033 = 0; i_128033 < k2p2zq_75151; i_128033++) { + ((__global double *) mem_123872)[i_128033 * m_75136 + + gtid_110906] = ((__global + double *) mem_123818)[phys_tid_110907 + + i_128033 * + num_threads_126245]; + } + ((__global double *) mem_123874)[gtid_110906] = recresid_r_110999; } barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - error_1: + error_0: return; - #undef segred_group_sizze_41520 + #undef segmap_group_sizze_110964 } -__kernel void mainzisegred_large_42060(__global int *global_failure, - __local volatile - int64_t *sync_arr_mem_46752_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_46750_backing_aligned_1, - __local volatile - int64_t *red_arr_mem_46748_backing_aligned_2, - __local volatile - int64_t *red_arr_mem_46746_backing_aligned_3, - int64_t iota32_arg_29597, - int64_t num_groups_42247, - int64_t groups_per_segment_46728, - int64_t elements_per_thread_46729, - int64_t virt_num_groups_46730, __global - unsigned char *mem_45284, __global - unsigned char *mem_45296, __global - unsigned char *mem_45298, __global - unsigned char *mem_45302, __global - unsigned char *mem_45305, __global - unsigned char *mem_45307, __global - unsigned char *mem_45309, __global - unsigned char *group_res_arr_mem_46733, - __global - unsigned char *group_res_arr_mem_46735, - __global - unsigned char *group_res_arr_mem_46737, - __global - unsigned char *mainzicounter_mem_46739) +__kernel void mainzisegmap_111084(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, + int64_t defunc_2_reduce_res_75260, + int64_t index_primexp_76437, + int64_t num_groups_111318, + int64_t num_threads_126255, __global + unsigned char *mem_120246, __global + unsigned char *mem_param_123786, __global + unsigned char *mem_123901, __global + unsigned char *mem_123907, __global + unsigned char *mem_123910, __global + unsigned char *mem_123916, __global + unsigned char *mem_123921, __global + unsigned char *mem_123937, __global + unsigned char *mem_123940) { - #define segred_group_sizze_42246 (mainzisegred_group_sizze_42054) + #define segmap_group_sizze_111317 (mainzisegmap_group_sizze_111087) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict sync_arr_mem_46752_backing_3 = - (__local volatile - char *) sync_arr_mem_46752_backing_aligned_0; - __local volatile char *restrict red_arr_mem_46750_backing_2 = - (__local volatile - char *) red_arr_mem_46750_backing_aligned_1; - __local volatile char *restrict red_arr_mem_46748_backing_1 = - (__local volatile - char *) red_arr_mem_46748_backing_aligned_2; - __local volatile char *restrict red_arr_mem_46746_backing_0 = - (__local volatile - char *) red_arr_mem_46746_backing_aligned_3; if (*global_failure >= 0) return; - int32_t global_tid_46741; - int32_t local_tid_46742; - int64_t group_sizze_46745; - int32_t wave_sizze_46744; - int32_t group_tid_46743; + int32_t global_tid_128199; + int32_t local_tid_128200; + int64_t group_sizze_128203; + int32_t wave_sizze_128202; + int32_t group_tid_128201; + + global_tid_128199 = get_global_id(0); + local_tid_128200 = get_local_id(0); + group_sizze_128203 = get_local_size(0); + wave_sizze_128202 = LOCKSTEP_WIDTH; + group_tid_128201 = get_group_id(0); + + int32_t phys_tid_111084; + + phys_tid_111084 = global_tid_128199; + + int32_t phys_group_id_128204; + + phys_group_id_128204 = get_group_id(0); + for (int32_t i_128205 = 0; i_128205 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136 * k2p2zq_75151, + segmap_group_sizze_111317)) - + phys_group_id_128204, sext_i64_i32(num_groups_111318)); + i_128205++) { + int32_t virt_group_id_128206 = phys_group_id_128204 + i_128205 * + sext_i64_i32(num_groups_111318); + int64_t gtid_111082 = squot64(sext_i32_i64(virt_group_id_128206) * + segmap_group_sizze_111317 + + sext_i32_i64(local_tid_128200), + k2p2zq_75151); + int64_t gtid_111083 = sext_i32_i64(virt_group_id_128206) * + segmap_group_sizze_111317 + sext_i32_i64(local_tid_128200) - + squot64(sext_i32_i64(virt_group_id_128206) * + segmap_group_sizze_111317 + + sext_i32_i64(local_tid_128200), k2p2zq_75151) * + k2p2zq_75151; + + if (slt64(gtid_111082, m_75136) && slt64(gtid_111083, k2p2zq_75151)) { + double fr_111329 = ((__global double *) mem_123910)[gtid_111082]; + double resid_111330 = ((__global double *) mem_123916)[gtid_111082]; + double x_111331 = ((__global double *) mem_123907)[gtid_111082 * + k2p2zq_75151 + + gtid_111083]; + double x_111333 = ((__global + double *) mem_param_123786)[gtid_111082 * + k2p2zq_75151 + + gtid_111083]; + double defunc_0_f_res_111334; + double redout_119892 = 0.0; + + for (int64_t i_119894 = 0; i_119894 < k2p2zq_75151; i_119894++) { + double x_111339 = ((__global double *) mem_123907)[gtid_111082 * + k2p2zq_75151 + + i_119894]; + double x_111340 = ((__global double *) mem_123901)[i_119894 * + (k2p2zq_75151 * + m_75136) + + gtid_111082 * + k2p2zq_75151 + + gtid_111083]; + double x_111341 = ((__global double *) mem_120246)[i_119894 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_111082 * + defunc_2_reduce_res_75260 + + index_primexp_76437]; + double x_111342 = x_111331 * x_111339; + double y_111343 = x_111342 / fr_111329; + double defunc_1_f_res_111344 = x_111340 - y_111343; + double defunc_1_f_res_111345 = x_111341 * defunc_1_f_res_111344; + double defunc_1_op_res_111338 = defunc_1_f_res_111345 + + redout_119892; + + ((__global double *) mem_123921)[phys_tid_111084 + i_119894 * + num_threads_126255] = + defunc_1_f_res_111344; + + double redout_tmp_128207 = defunc_1_op_res_111338; + + redout_119892 = redout_tmp_128207; + } + defunc_0_f_res_111334 = redout_119892; + + double defunc_0_g_res_111346 = resid_111330 * defunc_0_f_res_111334; + double defunc_1_f_res_111347 = x_111333 + defunc_0_g_res_111346; + + for (int64_t i_128209 = 0; i_128209 < k2p2zq_75151; i_128209++) { + ((__global double *) mem_123937)[i_128209 * (k2p2zq_75151 * + m_75136) + + gtid_111082 * k2p2zq_75151 + + gtid_111083] = ((__global + double *) mem_123921)[phys_tid_111084 + + i_128209 * + num_threads_126255]; + } + ((__global double *) mem_123940)[gtid_111082 * k2p2zq_75151 + + gtid_111083] = + defunc_1_f_res_111347; + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_111317 +} +__kernel void mainzisegmap_111122(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, __global + unsigned char *mem_param_123786, __global + unsigned char *mem_123916, __global + unsigned char *mem_123944, __global + unsigned char *mem_123952) +{ + #define segmap_group_sizze_111374 (mainzisegmap_group_sizze_111125) - global_tid_46741 = get_global_id(0); - local_tid_46742 = get_local_id(0); - group_sizze_46745 = get_local_size(0); - wave_sizze_46744 = LOCKSTEP_WIDTH; - group_tid_46743 = get_group_id(0); + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; - int32_t phys_tid_42060; + if (*global_failure >= 0) + return; - phys_tid_42060 = global_tid_46741; + int32_t global_tid_128270; + int32_t local_tid_128271; + int64_t group_sizze_128274; + int32_t wave_sizze_128273; + int32_t group_tid_128272; + + global_tid_128270 = get_global_id(0); + local_tid_128271 = get_local_id(0); + group_sizze_128274 = get_local_size(0); + wave_sizze_128273 = LOCKSTEP_WIDTH; + group_tid_128272 = get_group_id(0); + + int32_t phys_tid_111122; + + phys_tid_111122 = global_tid_128270; + + int64_t gtid_111120; + + gtid_111120 = squot64(sext_i32_i64(group_tid_128272) * + segmap_group_sizze_111374 + + sext_i32_i64(local_tid_128271), k2p2zq_75151); + + int64_t gtid_111121; + + gtid_111121 = sext_i32_i64(group_tid_128272) * segmap_group_sizze_111374 + + sext_i32_i64(local_tid_128271) - + squot64(sext_i32_i64(group_tid_128272) * segmap_group_sizze_111374 + + sext_i32_i64(local_tid_128271), k2p2zq_75151) * k2p2zq_75151; + if (slt64(gtid_111120, m_75136) && slt64(gtid_111121, k2p2zq_75151)) { + double resid_111377 = ((__global double *) mem_123916)[gtid_111120]; + double x_111378 = ((__global double *) mem_param_123786)[gtid_111120 * + k2p2zq_75151 + + gtid_111121]; + double defunc_0_f_res_111379 = ((__global + double *) mem_123944)[gtid_111120 * + k2p2zq_75151 + + gtid_111121]; + double defunc_0_g_res_111380 = resid_111377 * defunc_0_f_res_111379; + double defunc_1_f_res_111381 = x_111378 + defunc_0_g_res_111380; + + ((__global double *) mem_123952)[gtid_111120 * k2p2zq_75151 + + gtid_111121] = defunc_1_f_res_111381; + } - __local char *red_arr_mem_46746; + error_0: + return; + #undef segmap_group_sizze_111374 +} +__kernel void mainzisegmap_111199(__global int *global_failure, int64_t m_75136, + int64_t n_75139, int64_t index_primexp_76437, + __global + unsigned char *defunc_3_map_res_mem_120231, + __global unsigned char *mem_123910, __global + unsigned char *mem_123913, __global + unsigned char *mem_123916, __global + unsigned char *mem_123918) +{ + #define segmap_group_sizze_111305 (mainzisegmap_group_sizze_111201) - red_arr_mem_46746 = (__local char *) red_arr_mem_46746_backing_0; + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; - __local char *red_arr_mem_46748; + if (*global_failure >= 0) + return; - red_arr_mem_46748 = (__local char *) red_arr_mem_46748_backing_1; + int32_t global_tid_128194; + int32_t local_tid_128195; + int64_t group_sizze_128198; + int32_t wave_sizze_128197; + int32_t group_tid_128196; + + global_tid_128194 = get_global_id(0); + local_tid_128195 = get_local_id(0); + group_sizze_128198 = get_local_size(0); + wave_sizze_128197 = LOCKSTEP_WIDTH; + group_tid_128196 = get_group_id(0); + + int32_t phys_tid_111199; + + phys_tid_111199 = global_tid_128194; + + int64_t gtid_111198; + + gtid_111198 = sext_i32_i64(group_tid_128196) * segmap_group_sizze_111305 + + sext_i32_i64(local_tid_128195); + if (slt64(gtid_111198, m_75136)) { + double defunc_11_internal_map_res_transformed_row_111309 = ((__global + double *) defunc_3_map_res_mem_120231)[gtid_111198 * + n_75139 + + index_primexp_76437]; + double fr_111310 = ((__global double *) mem_123910)[gtid_111198]; + double defunc_0_f_res_111311 = ((__global + double *) mem_123913)[gtid_111198]; + double resid_111312 = + defunc_11_internal_map_res_transformed_row_111309 - + defunc_0_f_res_111311; + double sqrt_res_111313; + + sqrt_res_111313 = futrts_sqrt64(fr_111310); + + double recresid_r_111314 = resid_111312 / sqrt_res_111313; + + ((__global double *) mem_123916)[gtid_111198] = resid_111312; + ((__global double *) mem_123918)[gtid_111198] = recresid_r_111314; + } - __local char *red_arr_mem_46750; + error_0: + return; + #undef segmap_group_sizze_111305 +} +__kernel void mainzisegmap_111230(__global int *global_failure, int64_t m_75136, + __global unsigned char *mem_123904, __global + unsigned char *mem_123910) +{ + #define segmap_group_sizze_111285 (mainzisegmap_group_sizze_111232) - red_arr_mem_46750 = (__local char *) red_arr_mem_46750_backing_2; + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; - __local char *sync_arr_mem_46752; + if (*global_failure >= 0) + return; - sync_arr_mem_46752 = (__local char *) sync_arr_mem_46752_backing_3; + int32_t global_tid_128129; + int32_t local_tid_128130; + int64_t group_sizze_128133; + int32_t wave_sizze_128132; + int32_t group_tid_128131; - int32_t phys_group_id_46754; + global_tid_128129 = get_global_id(0); + local_tid_128130 = get_local_id(0); + group_sizze_128133 = get_local_size(0); + wave_sizze_128132 = LOCKSTEP_WIDTH; + group_tid_128131 = get_group_id(0); - phys_group_id_46754 = get_group_id(0); - for (int32_t i_46755 = 0; i_46755 < - sdiv_up32(sext_i64_i32(virt_num_groups_46730) - phys_group_id_46754, - sext_i64_i32(num_groups_42247)); i_46755++) { - int32_t virt_group_id_46756 = phys_group_id_46754 + i_46755 * - sext_i64_i32(num_groups_42247); - int32_t flat_segment_id_46757 = squot32(virt_group_id_46756, - sext_i64_i32(groups_per_segment_46728)); - int64_t global_tid_46758 = srem64(sext_i32_i64(virt_group_id_46756) * - segred_group_sizze_42246 + - sext_i32_i64(local_tid_46742), - segred_group_sizze_42246 * - groups_per_segment_46728); - int64_t gtid_42051 = sext_i32_i64(flat_segment_id_46757); - int64_t gtid_42059; - bool x_acc_46759; - int32_t x_acc_46760; - float x_acc_46761; - int64_t chunk_sizze_46762; - int64_t starting_point_46763; - - starting_point_46763 = sext_i32_i64(sext_i64_i32(global_tid_46758)) * - elements_per_thread_46729; - - int64_t remaining_elements_46764; - - remaining_elements_46764 = iota32_arg_29597 - starting_point_46763; - if (sle64(remaining_elements_46764, (int64_t) 0) || - sle64(iota32_arg_29597, starting_point_46763)) { - chunk_sizze_46762 = (int64_t) 0; - } else { - if (slt64(iota32_arg_29597, - (sext_i32_i64(sext_i64_i32(global_tid_46758)) + - (int64_t) 1) * elements_per_thread_46729)) { - chunk_sizze_46762 = iota32_arg_29597 - - sext_i32_i64(sext_i64_i32(global_tid_46758)) * - elements_per_thread_46729; - } else { - chunk_sizze_46762 = elements_per_thread_46729; - } - } + int32_t phys_tid_111230; + + phys_tid_111230 = global_tid_128129; + + int64_t gtid_111229; + + gtid_111229 = sext_i32_i64(group_tid_128131) * segmap_group_sizze_111285 + + sext_i32_i64(local_tid_128130); + if (slt64(gtid_111229, m_75136)) { + double defunc_0_f_res_111288 = ((__global + double *) mem_123904)[gtid_111229]; + double fr_111289 = 1.0 + defunc_0_f_res_111288; - bool x_42252; - int32_t x_42253; - float x_42254; - bool x_42255; - int32_t x_42256; - float x_42257; + ((__global double *) mem_123910)[gtid_111229] = fr_111289; + } + + error_0: + return; + #undef segmap_group_sizze_111285 +} +__kernel void mainzisegmap_111405(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, int64_t m_75136, + int64_t k2p2zq_75151, + int64_t num_recresids_padded_75809, + int64_t Nmk_76536, int64_t num_groups_111456, + int64_t num_threads_126259, __global + unsigned char *defunc_3_map_res_mem_120230, + __global unsigned char *mem_121934, __global + unsigned char *mem_124009, __global + unsigned char *mem_124024, __global + unsigned char *mem_124026) +{ + #define segmap_group_sizze_111455 (mainzisegmap_group_sizze_111407) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; - // neutral-initialise the accumulators - { - x_acc_46759 = 0; - x_acc_46760 = -1; - x_acc_46761 = 0.0F; - } - for (int64_t i_46779 = 0; i_46779 < elements_per_thread_46729; - i_46779++) { - gtid_42059 = sext_i32_i64(local_tid_46742) + - (sext_i32_i64(squot32(sext_i64_i32(global_tid_46758), - sext_i64_i32(segred_group_sizze_42246))) * - elements_per_thread_46729 + i_46779) * - segred_group_sizze_42246; - if (slt64(gtid_42059, iota32_arg_29597)) { - // apply map function - { - int32_t y_42266 = ((__global - int32_t *) mem_45298)[gtid_42051]; - float y_42267 = ((__global float *) mem_45296)[gtid_42051]; - float x_42271 = ((__global float *) mem_45302)[gtid_42051 * - iota32_arg_29597 + - gtid_42059]; - float x_42272 = ((__global float *) mem_45284)[gtid_42059]; - int32_t index_primexp_42404 = sext_i64_i32(gtid_42059); - float defunc_0_f_res_42275 = x_42271 / y_42267; - bool cond_42276 = slt32(index_primexp_42404, y_42266); - bool isnan_res_42277; - - isnan_res_42277 = futrts_isnan32(defunc_0_f_res_42275); - - bool cond_t_res_42278 = !isnan_res_42277; - bool x_42279 = cond_42276 && cond_t_res_42278; - float abs_res_42280 = (float) fabs(defunc_0_f_res_42275); - bool defunc_2_f_res_t_res_42281 = x_42272 < abs_res_42280; - bool x_42282 = x_42279 && defunc_2_f_res_t_res_42281; - float defunc_1_f_res_42283; - - if (cond_42276) { - defunc_1_f_res_42283 = defunc_0_f_res_42275; + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_128282; + int32_t local_tid_128283; + int64_t group_sizze_128286; + int32_t wave_sizze_128285; + int32_t group_tid_128284; + + global_tid_128282 = get_global_id(0); + local_tid_128283 = get_local_id(0); + group_sizze_128286 = get_local_size(0); + wave_sizze_128285 = LOCKSTEP_WIDTH; + group_tid_128284 = get_group_id(0); + + int32_t phys_tid_111405; + + phys_tid_111405 = global_tid_128282; + + int32_t phys_group_id_128287; + + phys_group_id_128287 = get_group_id(0); + for (int32_t i_128288 = 0; i_128288 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, segmap_group_sizze_111455)) - + phys_group_id_128287, sext_i64_i32(num_groups_111456)); + i_128288++) { + int32_t virt_group_id_128289 = phys_group_id_128287 + i_128288 * + sext_i64_i32(num_groups_111456); + int64_t gtid_111404 = sext_i32_i64(virt_group_id_128289) * + segmap_group_sizze_111455 + sext_i32_i64(local_tid_128283); + + if (slt64(gtid_111404, m_75136)) { + int64_t x_111461 = ((__global + int64_t *) defunc_3_map_res_mem_120230)[gtid_111404]; + int64_t n_111462 = sub64(x_111461, k2p2zq_75151); + double i64_res_111463 = sitofp_i64_f64(n_111462); + double defunc_2_reduce_res_111464; + double redout_119896 = 0.0; + + for (int64_t i_119897 = 0; i_119897 < num_recresids_padded_75809; + i_119897++) { + double x_111472 = ((__global double *) mem_121934)[i_119897 * + m_75136 + + gtid_111404]; + bool isnan_res_111467; + + isnan_res_111467 = futrts_isnan64(redout_119896); + + double defunc_1_op_res_111468; + + if (isnan_res_111467) { + defunc_1_op_res_111468 = x_111472; + } else { + bool isnan_res_111469; + + isnan_res_111469 = futrts_isnan64(x_111472); + + double defunc_1_op_res_f_res_111470; + + if (isnan_res_111469) { + defunc_1_op_res_f_res_111470 = redout_119896; } else { - defunc_1_f_res_42283 = 0.0F; - } - // save map-out results - { } - // load accumulator - { - x_42252 = x_acc_46759; - x_42253 = x_acc_46760; - x_42254 = x_acc_46761; - } - // load new values - { - x_42255 = x_42282; - x_42256 = index_primexp_42404; - x_42257 = defunc_1_f_res_42283; - } - // apply reduction operator - { - bool defunc_1_op_res_42258; - int32_t defunc_1_op_res_42259; - - if (x_42252) { - defunc_1_op_res_42258 = x_42252; - defunc_1_op_res_42259 = x_42253; - } else { - bool x_42260 = x_42255 && x_42255; - bool x_42261 = !x_42255; - bool y_42262 = x_42252 && x_42261; - bool defunc_1_op_res_f_res_42263 = x_42260 || - y_42262; - int32_t defunc_1_op_res_f_res_42264; - - if (x_42255) { - defunc_1_op_res_f_res_42264 = x_42256; - } else { - defunc_1_op_res_f_res_42264 = x_42253; - } - defunc_1_op_res_42258 = defunc_1_op_res_f_res_42263; - defunc_1_op_res_42259 = defunc_1_op_res_f_res_42264; - } - - float defunc_1_op_res_42265 = x_42254 + x_42257; + double defunc_1_op_res_f_res_f_res_111471 = x_111472 + + redout_119896; - // store in accumulator - { - x_acc_46759 = defunc_1_op_res_42258; - x_acc_46760 = defunc_1_op_res_42259; - x_acc_46761 = defunc_1_op_res_42265; - } + defunc_1_op_res_f_res_111470 = + defunc_1_op_res_f_res_f_res_111471; } + defunc_1_op_res_111468 = defunc_1_op_res_f_res_111470; } + + double redout_tmp_128290 = defunc_1_op_res_111468; + + redout_119896 = redout_tmp_128290; } - // to reduce current chunk, first store our result in memory - { - x_42252 = x_acc_46759; - x_42253 = x_acc_46760; - x_42254 = x_acc_46761; - ((__local - bool *) red_arr_mem_46746)[sext_i32_i64(local_tid_46742)] = - x_42252; - ((__local - int32_t *) red_arr_mem_46748)[sext_i32_i64(local_tid_46742)] = - x_42253; - ((__local - float *) red_arr_mem_46750)[sext_i32_i64(local_tid_46742)] = - x_42254; - } - barrier(CLK_LOCAL_MEM_FENCE); + defunc_2_reduce_res_111464 = redout_119896; - int32_t offset_46780; - int32_t skip_waves_46781; + double x_mean_111473 = defunc_2_reduce_res_111464 / i64_res_111463; + double defunc_2_reduce_res_111474; + double redout_119898 = 0.0; - skip_waves_46781 = 1; - - bool x_46765; - int32_t x_46766; - float x_46767; - bool x_46768; - int32_t x_46769; - float x_46770; - - offset_46780 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46742, - sext_i64_i32(segred_group_sizze_42246))) { - x_46765 = ((__local - bool *) red_arr_mem_46746)[sext_i32_i64(local_tid_46742 + - offset_46780)]; - x_46766 = ((__local - int32_t *) red_arr_mem_46748)[sext_i32_i64(local_tid_46742 + - offset_46780)]; - x_46767 = ((__local - float *) red_arr_mem_46750)[sext_i32_i64(local_tid_46742 + - offset_46780)]; - } - } - offset_46780 = 1; - while (slt32(offset_46780, wave_sizze_46744)) { - if (slt32(local_tid_46742 + offset_46780, - sext_i64_i32(segred_group_sizze_42246)) && - ((local_tid_46742 - squot32(local_tid_46742, - wave_sizze_46744) * - wave_sizze_46744) & (2 * offset_46780 - 1)) == 0) { - // read array element - { - x_46768 = ((volatile __local - bool *) red_arr_mem_46746)[sext_i32_i64(local_tid_46742 + - offset_46780)]; - x_46769 = ((volatile __local - int32_t *) red_arr_mem_46748)[sext_i32_i64(local_tid_46742 + - offset_46780)]; - x_46770 = ((volatile __local - float *) red_arr_mem_46750)[sext_i32_i64(local_tid_46742 + - offset_46780)]; - } - // apply reduction operation - { - bool defunc_1_op_res_46771; - int32_t defunc_1_op_res_46772; - - if (x_46765) { - defunc_1_op_res_46771 = x_46765; - defunc_1_op_res_46772 = x_46766; - } else { - bool x_46773 = x_46768 && x_46768; - bool x_46774 = !x_46768; - bool y_46775 = x_46765 && x_46774; - bool defunc_1_op_res_f_res_46776 = x_46773 || - y_46775; - int32_t defunc_1_op_res_f_res_46777; - - if (x_46768) { - defunc_1_op_res_f_res_46777 = x_46769; - } else { - defunc_1_op_res_f_res_46777 = x_46766; - } - defunc_1_op_res_46771 = defunc_1_op_res_f_res_46776; - defunc_1_op_res_46772 = defunc_1_op_res_f_res_46777; - } - - float defunc_1_op_res_46778 = x_46767 + x_46770; - - x_46765 = defunc_1_op_res_46771; - x_46766 = defunc_1_op_res_46772; - x_46767 = defunc_1_op_res_46778; - } - // write result of operation - { - ((volatile __local - bool *) red_arr_mem_46746)[sext_i32_i64(local_tid_46742)] = - x_46765; - ((volatile __local - int32_t *) red_arr_mem_46748)[sext_i32_i64(local_tid_46742)] = - x_46766; - ((volatile __local - float *) red_arr_mem_46750)[sext_i32_i64(local_tid_46742)] = - x_46767; - } - } - offset_46780 *= 2; - } - while (slt32(skip_waves_46781, - squot32(sext_i64_i32(segred_group_sizze_42246) + - wave_sizze_46744 - 1, wave_sizze_46744))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46780 = skip_waves_46781 * wave_sizze_46744; - if (slt32(local_tid_46742 + offset_46780, - sext_i64_i32(segred_group_sizze_42246)) && - ((local_tid_46742 - squot32(local_tid_46742, - wave_sizze_46744) * - wave_sizze_46744) == 0 && (squot32(local_tid_46742, - wave_sizze_46744) & - (2 * skip_waves_46781 - 1)) == - 0)) { - // read array element - { - x_46768 = ((__local - bool *) red_arr_mem_46746)[sext_i32_i64(local_tid_46742 + - offset_46780)]; - x_46769 = ((__local - int32_t *) red_arr_mem_46748)[sext_i32_i64(local_tid_46742 + - offset_46780)]; - x_46770 = ((__local - float *) red_arr_mem_46750)[sext_i32_i64(local_tid_46742 + - offset_46780)]; - } - // apply reduction operation - { - bool defunc_1_op_res_46771; - int32_t defunc_1_op_res_46772; - - if (x_46765) { - defunc_1_op_res_46771 = x_46765; - defunc_1_op_res_46772 = x_46766; - } else { - bool x_46773 = x_46768 && x_46768; - bool x_46774 = !x_46768; - bool y_46775 = x_46765 && x_46774; - bool defunc_1_op_res_f_res_46776 = x_46773 || - y_46775; - int32_t defunc_1_op_res_f_res_46777; - - if (x_46768) { - defunc_1_op_res_f_res_46777 = x_46769; - } else { - defunc_1_op_res_f_res_46777 = x_46766; - } - defunc_1_op_res_46771 = defunc_1_op_res_f_res_46776; - defunc_1_op_res_46772 = defunc_1_op_res_f_res_46777; - } - - float defunc_1_op_res_46778 = x_46767 + x_46770; - - x_46765 = defunc_1_op_res_46771; - x_46766 = defunc_1_op_res_46772; - x_46767 = defunc_1_op_res_46778; - } - // write result of operation - { - ((__local - bool *) red_arr_mem_46746)[sext_i32_i64(local_tid_46742)] = - x_46765; - ((__local - int32_t *) red_arr_mem_46748)[sext_i32_i64(local_tid_46742)] = - x_46766; - ((__local - float *) red_arr_mem_46750)[sext_i32_i64(local_tid_46742)] = - x_46767; - } - } - skip_waves_46781 *= 2; - } - barrier(CLK_LOCAL_MEM_FENCE); - // first thread saves the result in accumulator - { - if (sext_i32_i64(local_tid_46742) == (int64_t) 0) { - x_acc_46759 = x_46765; - x_acc_46760 = x_46766; - x_acc_46761 = x_46767; - } - } - // first thread keeps accumulator; others reset to neutral element - { - if (!(sext_i32_i64(local_tid_46742) == (int64_t) 0)) { - x_acc_46759 = 0; - x_acc_46760 = -1; - x_acc_46761 = 0.0F; - } - } - } - x_42252 = x_acc_46759; - x_42253 = x_acc_46760; - x_42254 = x_acc_46761; - if (groups_per_segment_46728 == (int64_t) 1) { - // first thread in group saves final result to memory - { - if (local_tid_46742 == 0) { - ((__global bool *) mem_45305)[gtid_42051] = x_acc_46759; - ((__global int32_t *) mem_45307)[gtid_42051] = x_acc_46760; - ((__global float *) mem_45309)[gtid_42051] = x_acc_46761; + for (int64_t i_119899 = 0; i_119899 < num_recresids_padded_75809; + i_119899++) { + double x_111478 = ((__global double *) mem_121934)[i_119899 * + m_75136 + + gtid_111404]; + bool isnan_res_111479; + + isnan_res_111479 = futrts_isnan64(x_111478); + + double defunc_0_f_res_111480; + + if (isnan_res_111479) { + defunc_0_f_res_111480 = 0.0; + } else { + double x_111481 = x_111478 - x_mean_111473; + double defunc_0_f_res_f_res_111482 = fpow64(x_111481, 2.0); + + defunc_0_f_res_111480 = defunc_0_f_res_f_res_111482; } + + double defunc_1_op_res_111477 = defunc_0_f_res_111480 + + redout_119898; + double redout_tmp_128291 = defunc_1_op_res_111477; + + redout_119898 = redout_tmp_128291; } - } else { - int32_t old_counter_46782; + defunc_2_reduce_res_111474 = redout_119898; - // first thread in group saves group result to global memory - { - if (local_tid_46742 == 0) { - ((__global - bool *) group_res_arr_mem_46733)[sext_i32_i64(virt_group_id_46756) * - segred_group_sizze_42246] = - x_acc_46759; - ((__global - int32_t *) group_res_arr_mem_46735)[sext_i32_i64(virt_group_id_46756) * - segred_group_sizze_42246] = - x_acc_46760; - ((__global - float *) group_res_arr_mem_46737)[sext_i32_i64(virt_group_id_46756) * - segred_group_sizze_42246] = - x_acc_46761; - mem_fence_global(); - old_counter_46782 = - atomic_add_i32_global(&((volatile __global - int *) mainzicounter_mem_46739)[sext_i32_i64(srem32(flat_segment_id_46757, - 10240))], - (int) 1); - ((__local bool *) sync_arr_mem_46752)[(int64_t) 0] = - old_counter_46782 == groups_per_segment_46728 - - (int64_t) 1; - } - } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + double y_111483 = i64_res_111463 - 1.0; + double binop_p_111484 = defunc_2_reduce_res_111474 / y_111483; + double defunc_0_f_res_111485; - bool is_last_group_46783; + defunc_0_f_res_111485 = futrts_sqrt64(binop_p_111484); - is_last_group_46783 = ((__local - bool *) sync_arr_mem_46752)[(int64_t) 0]; - if (is_last_group_46783) { - if (local_tid_46742 == 0) { - old_counter_46782 = - atomic_add_i32_global(&((volatile __global - int *) mainzicounter_mem_46739)[sext_i32_i64(srem32(flat_segment_id_46757, - 10240))], - (int) ((int64_t) 0 - - groups_per_segment_46728)); - } - // read in the per-group-results - { - int64_t read_per_thread_46784 = - sdiv_up64(groups_per_segment_46728, - segred_group_sizze_42246); - - x_42252 = 0; - x_42253 = -1; - x_42254 = 0.0F; - for (int64_t i_46785 = 0; i_46785 < read_per_thread_46784; - i_46785++) { - int64_t group_res_id_46786 = - sext_i32_i64(local_tid_46742) * - read_per_thread_46784 + i_46785; - int64_t index_of_group_res_46787 = - sext_i32_i64(flat_segment_id_46757) * - groups_per_segment_46728 + group_res_id_46786; - - if (slt64(group_res_id_46786, - groups_per_segment_46728)) { - x_42255 = ((__global - bool *) group_res_arr_mem_46733)[index_of_group_res_46787 * - segred_group_sizze_42246]; - x_42256 = ((__global - int32_t *) group_res_arr_mem_46735)[index_of_group_res_46787 * - segred_group_sizze_42246]; - x_42257 = ((__global - float *) group_res_arr_mem_46737)[index_of_group_res_46787 * - segred_group_sizze_42246]; - - bool defunc_1_op_res_42258; - int32_t defunc_1_op_res_42259; - - if (x_42252) { - defunc_1_op_res_42258 = x_42252; - defunc_1_op_res_42259 = x_42253; - } else { - bool x_42260 = x_42255 && x_42255; - bool x_42261 = !x_42255; - bool y_42262 = x_42252 && x_42261; - bool defunc_1_op_res_f_res_42263 = x_42260 || - y_42262; - int32_t defunc_1_op_res_f_res_42264; - - if (x_42255) { - defunc_1_op_res_f_res_42264 = x_42256; - } else { - defunc_1_op_res_f_res_42264 = x_42253; - } - defunc_1_op_res_42258 = - defunc_1_op_res_f_res_42263; - defunc_1_op_res_42259 = - defunc_1_op_res_f_res_42264; + double sqrt_res_111486; + + sqrt_res_111486 = futrts_sqrt64(i64_res_111463); + + double fr_111487 = defunc_0_f_res_111485 * sqrt_res_111486; + double discard_119905; + double scanacc_119901 = 0.0; + + for (int64_t i_119903 = 0; i_119903 < Nmk_76536; i_119903++) { + bool cond_111493 = i_119903 == (int64_t) 0; + double defunc_0_f_res_111494; + + if (cond_111493) { + defunc_0_f_res_111494 = 0.0; + } else { + int64_t i_111495 = sub64(i_119903, (int64_t) 1); + bool x_111496 = sle64((int64_t) 0, i_111495); + bool y_111497 = slt64(i_111495, num_recresids_padded_75809); + bool bounds_check_111498 = x_111496 && y_111497; + bool index_certs_111499; + + if (!bounds_check_111498) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 177) == -1) { + global_failure_args[0] = i_111495; + global_failure_args[1] = + num_recresids_padded_75809; + ; } - - float defunc_1_op_res_42265 = x_42254 + x_42257; - - x_42252 = defunc_1_op_res_42258; - x_42253 = defunc_1_op_res_42259; - x_42254 = defunc_1_op_res_42265; + local_failure = true; + goto error_0; } } - } - ((__local - bool *) red_arr_mem_46746)[sext_i32_i64(local_tid_46742)] = - x_42252; - ((__local - int32_t *) red_arr_mem_46748)[sext_i32_i64(local_tid_46742)] = - x_42253; - ((__local - float *) red_arr_mem_46750)[sext_i32_i64(local_tid_46742)] = - x_42254; - barrier(CLK_LOCAL_MEM_FENCE); - // reduce the per-group results - { - int32_t offset_46788; - int32_t skip_waves_46789; - - skip_waves_46789 = 1; - bool x_46765; - int32_t x_46766; - float x_46767; - bool x_46768; - int32_t x_46769; - float x_46770; + double x_111500 = ((__global + double *) mem_121934)[i_111495 * + m_75136 + + gtid_111404]; + double defunc_0_f_res_f_res_111501 = x_111500 / fr_111487; - offset_46788 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46742, - sext_i64_i32(segred_group_sizze_42246))) { - x_46765 = ((__local - bool *) red_arr_mem_46746)[sext_i32_i64(local_tid_46742 + - offset_46788)]; - x_46766 = ((__local - int32_t *) red_arr_mem_46748)[sext_i32_i64(local_tid_46742 + - offset_46788)]; - x_46767 = ((__local - float *) red_arr_mem_46750)[sext_i32_i64(local_tid_46742 + - offset_46788)]; - } - } - offset_46788 = 1; - while (slt32(offset_46788, wave_sizze_46744)) { - if (slt32(local_tid_46742 + offset_46788, - sext_i64_i32(segred_group_sizze_42246)) && - ((local_tid_46742 - squot32(local_tid_46742, - wave_sizze_46744) * - wave_sizze_46744) & (2 * offset_46788 - 1)) == - 0) { - // read array element - { - x_46768 = ((volatile __local - bool *) red_arr_mem_46746)[sext_i32_i64(local_tid_46742 + - offset_46788)]; - x_46769 = ((volatile __local - int32_t *) red_arr_mem_46748)[sext_i32_i64(local_tid_46742 + - offset_46788)]; - x_46770 = ((volatile __local - float *) red_arr_mem_46750)[sext_i32_i64(local_tid_46742 + - offset_46788)]; - } - // apply reduction operation - { - bool defunc_1_op_res_46771; - int32_t defunc_1_op_res_46772; - - if (x_46765) { - defunc_1_op_res_46771 = x_46765; - defunc_1_op_res_46772 = x_46766; - } else { - bool x_46773 = x_46768 && x_46768; - bool x_46774 = !x_46768; - bool y_46775 = x_46765 && x_46774; - bool defunc_1_op_res_f_res_46776 = - x_46773 || y_46775; - int32_t defunc_1_op_res_f_res_46777; - - if (x_46768) { - defunc_1_op_res_f_res_46777 = x_46769; - } else { - defunc_1_op_res_f_res_46777 = x_46766; - } - defunc_1_op_res_46771 = - defunc_1_op_res_f_res_46776; - defunc_1_op_res_46772 = - defunc_1_op_res_f_res_46777; - } - - float defunc_1_op_res_46778 = x_46767 + x_46770; - - x_46765 = defunc_1_op_res_46771; - x_46766 = defunc_1_op_res_46772; - x_46767 = defunc_1_op_res_46778; - } - // write result of operation - { - ((volatile __local - bool *) red_arr_mem_46746)[sext_i32_i64(local_tid_46742)] = - x_46765; - ((volatile __local - int32_t *) red_arr_mem_46748)[sext_i32_i64(local_tid_46742)] = - x_46766; - ((volatile __local - float *) red_arr_mem_46750)[sext_i32_i64(local_tid_46742)] = - x_46767; - } - } - offset_46788 *= 2; - } - while (slt32(skip_waves_46789, - squot32(sext_i64_i32(segred_group_sizze_42246) + - wave_sizze_46744 - 1, - wave_sizze_46744))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46788 = skip_waves_46789 * wave_sizze_46744; - if (slt32(local_tid_46742 + offset_46788, - sext_i64_i32(segred_group_sizze_42246)) && - ((local_tid_46742 - squot32(local_tid_46742, - wave_sizze_46744) * - wave_sizze_46744) == 0 && - (squot32(local_tid_46742, wave_sizze_46744) & (2 * - skip_waves_46789 - - 1)) == - 0)) { - // read array element - { - x_46768 = ((__local - bool *) red_arr_mem_46746)[sext_i32_i64(local_tid_46742 + - offset_46788)]; - x_46769 = ((__local - int32_t *) red_arr_mem_46748)[sext_i32_i64(local_tid_46742 + - offset_46788)]; - x_46770 = ((__local - float *) red_arr_mem_46750)[sext_i32_i64(local_tid_46742 + - offset_46788)]; - } - // apply reduction operation - { - bool defunc_1_op_res_46771; - int32_t defunc_1_op_res_46772; - - if (x_46765) { - defunc_1_op_res_46771 = x_46765; - defunc_1_op_res_46772 = x_46766; - } else { - bool x_46773 = x_46768 && x_46768; - bool x_46774 = !x_46768; - bool y_46775 = x_46765 && x_46774; - bool defunc_1_op_res_f_res_46776 = - x_46773 || y_46775; - int32_t defunc_1_op_res_f_res_46777; - - if (x_46768) { - defunc_1_op_res_f_res_46777 = x_46769; - } else { - defunc_1_op_res_f_res_46777 = x_46766; - } - defunc_1_op_res_46771 = - defunc_1_op_res_f_res_46776; - defunc_1_op_res_46772 = - defunc_1_op_res_f_res_46777; - } - - float defunc_1_op_res_46778 = x_46767 + x_46770; - - x_46765 = defunc_1_op_res_46771; - x_46766 = defunc_1_op_res_46772; - x_46767 = defunc_1_op_res_46778; - } - // write result of operation - { - ((__local - bool *) red_arr_mem_46746)[sext_i32_i64(local_tid_46742)] = - x_46765; - ((__local - int32_t *) red_arr_mem_46748)[sext_i32_i64(local_tid_46742)] = - x_46766; - ((__local - float *) red_arr_mem_46750)[sext_i32_i64(local_tid_46742)] = - x_46767; - } - } - skip_waves_46789 *= 2; - } - // and back to memory with the final result - { - if (local_tid_46742 == 0) { - ((__global bool *) mem_45305)[gtid_42051] = x_46765; - ((__global int32_t *) mem_45307)[gtid_42051] = - x_46766; - ((__global float *) mem_45309)[gtid_42051] = - x_46767; - } - } + defunc_0_f_res_111494 = defunc_0_f_res_f_res_111501; } + + double defunc_1_op_res_111491 = defunc_0_f_res_111494 + + scanacc_119901; + + ((__global double *) mem_124009)[phys_tid_111405 + i_119903 * + num_threads_126259] = + defunc_1_op_res_111491; + + double scanacc_tmp_128292 = defunc_1_op_res_111491; + + scanacc_119901 = scanacc_tmp_128292; + } + discard_119905 = scanacc_119901; + for (int64_t i_128294 = 0; i_128294 < Nmk_76536; i_128294++) { + ((__global double *) mem_124024)[i_128294 * m_75136 + + gtid_111404] = ((__global + double *) mem_124009)[phys_tid_111405 + + i_128294 * + num_threads_126259]; } + ((__global int64_t *) mem_124026)[gtid_111404] = n_111462; } barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - error_1: + error_0: return; - #undef segred_group_sizze_42246 + #undef segmap_group_sizze_111455 } -__kernel void mainzisegred_nonseg_41409(__global int *global_failure, - __local volatile - int64_t *red_arr_mem_46505_backing_aligned_0, - __local volatile - int64_t *sync_arr_mem_46503_backing_aligned_1, - int64_t m_29166, - int64_t num_groups_41404, - int64_t num_threads_46497, __global - unsigned char *defunc_3_map_res_mem_45244, - __global unsigned char *mem_45249, - __global - unsigned char *mainzicounter_mem_46493, - __global - unsigned char *group_res_arr_mem_46495) +__kernel void mainzisegmap_111581(__global int *global_failure, int64_t m_75136, + __global unsigned char *mem_124040, __global + unsigned char *mem_124054, __global + unsigned char *mem_124057) { - #define segred_group_sizze_41402 (mainzisegred_group_sizze_41401) + #define segmap_group_sizze_111699 (mainzisegmap_group_sizze_111583) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_46505_backing_1 = - (__local volatile - char *) red_arr_mem_46505_backing_aligned_0; - __local volatile char *restrict sync_arr_mem_46503_backing_0 = - (__local volatile - char *) sync_arr_mem_46503_backing_aligned_1; if (*global_failure >= 0) return; - int32_t global_tid_46498; - int32_t local_tid_46499; - int64_t group_sizze_46502; - int32_t wave_sizze_46501; - int32_t group_tid_46500; + int32_t global_tid_128462; + int32_t local_tid_128463; + int64_t group_sizze_128466; + int32_t wave_sizze_128465; + int32_t group_tid_128464; - global_tid_46498 = get_global_id(0); - local_tid_46499 = get_local_id(0); - group_sizze_46502 = get_local_size(0); - wave_sizze_46501 = LOCKSTEP_WIDTH; - group_tid_46500 = get_group_id(0); + global_tid_128462 = get_global_id(0); + local_tid_128463 = get_local_id(0); + group_sizze_128466 = get_local_size(0); + wave_sizze_128465 = LOCKSTEP_WIDTH; + group_tid_128464 = get_group_id(0); - int32_t phys_tid_41409; + int32_t phys_tid_111581; - phys_tid_41409 = global_tid_46498; + phys_tid_111581 = global_tid_128462; - __local char *sync_arr_mem_46503; + int64_t gtid_111580; - sync_arr_mem_46503 = (__local char *) sync_arr_mem_46503_backing_0; - - __local char *red_arr_mem_46505; + gtid_111580 = sext_i32_i64(group_tid_128464) * segmap_group_sizze_111699 + + sext_i32_i64(local_tid_128463); + if (slt64(gtid_111580, m_75136)) { + double i64_res_111702 = ((__global double *) mem_124040)[gtid_111580]; + double defunc_2_reduce_res_111703 = ((__global + double *) mem_124054)[gtid_111580]; + double y_111704 = i64_res_111702 - 1.0; + double binop_p_111705 = defunc_2_reduce_res_111703 / y_111704; + double defunc_0_f_res_111706; + + defunc_0_f_res_111706 = futrts_sqrt64(binop_p_111705); + + double sqrt_res_111707; + + sqrt_res_111707 = futrts_sqrt64(i64_res_111702); + + double fr_111708 = defunc_0_f_res_111706 * sqrt_res_111707; + + ((__global double *) mem_124057)[gtid_111580] = fr_111708; + } - red_arr_mem_46505 = (__local char *) red_arr_mem_46505_backing_1; + error_0: + return; + #undef segmap_group_sizze_111699 +} +__kernel void mainzisegmap_111616(__global int *global_failure, int64_t m_75136, + __global unsigned char *mem_124040, __global + unsigned char *mem_124048, __global + unsigned char *mem_124051) +{ + #define segmap_group_sizze_111676 (mainzisegmap_group_sizze_111618) - int64_t dummy_41407; + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; - dummy_41407 = (int64_t) 0; + if (*global_failure >= 0) + return; - int64_t gtid_41408; + int32_t global_tid_128397; + int32_t local_tid_128398; + int64_t group_sizze_128401; + int32_t wave_sizze_128400; + int32_t group_tid_128399; - gtid_41408 = (int64_t) 0; + global_tid_128397 = get_global_id(0); + local_tid_128398 = get_local_id(0); + group_sizze_128401 = get_local_size(0); + wave_sizze_128400 = LOCKSTEP_WIDTH; + group_tid_128399 = get_group_id(0); - int32_t x_acc_46507; - int64_t chunk_sizze_46508; + int32_t phys_tid_111616; - chunk_sizze_46508 = smin64(sdiv_up64(m_29166, - sext_i32_i64(sext_i64_i32(segred_group_sizze_41402 * - num_groups_41404))), - sdiv_up64(m_29166 - sext_i32_i64(phys_tid_41409), - num_threads_46497)); + phys_tid_111616 = global_tid_128397; - int32_t x_29564; - int32_t x_29565; + int64_t gtid_111615; - // neutral-initialise the accumulators - { - x_acc_46507 = 0; + gtid_111615 = sext_i32_i64(group_tid_128399) * segmap_group_sizze_111676 + + sext_i32_i64(local_tid_128398); + if (slt64(gtid_111615, m_75136)) { + double i64_res_111679 = ((__global double *) mem_124040)[gtid_111615]; + double defunc_2_reduce_res_111680 = ((__global + double *) mem_124048)[gtid_111615]; + double x_mean_111681 = defunc_2_reduce_res_111680 / i64_res_111679; + + ((__global double *) mem_124051)[gtid_111615] = x_mean_111681; } - for (int64_t i_46512 = 0; i_46512 < chunk_sizze_46508; i_46512++) { - gtid_41408 = sext_i32_i64(phys_tid_41409) + num_threads_46497 * i_46512; - // apply map function - { - int32_t x_29567 = ((__global - int32_t *) defunc_3_map_res_mem_45244)[gtid_41408]; + + error_0: + return; + #undef segmap_group_sizze_111676 +} +__kernel void mainzisegmap_111644(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, __global + unsigned char *defunc_3_map_res_mem_120230, + __global unsigned char *mem_124040, __global + unsigned char *mem_124042) +{ + #define segmap_group_sizze_111653 (mainzisegmap_group_sizze_111646) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128322; + int32_t local_tid_128323; + int64_t group_sizze_128326; + int32_t wave_sizze_128325; + int32_t group_tid_128324; + + global_tid_128322 = get_global_id(0); + local_tid_128323 = get_local_id(0); + group_sizze_128326 = get_local_size(0); + wave_sizze_128325 = LOCKSTEP_WIDTH; + group_tid_128324 = get_group_id(0); + + int32_t phys_tid_111644; + + phys_tid_111644 = global_tid_128322; + + int64_t gtid_111643; + + gtid_111643 = sext_i32_i64(group_tid_128324) * segmap_group_sizze_111653 + + sext_i32_i64(local_tid_128323); + if (slt64(gtid_111643, m_75136)) { + int64_t x_111657 = ((__global + int64_t *) defunc_3_map_res_mem_120230)[gtid_111643]; + int64_t n_111658 = sub64(x_111657, k2p2zq_75151); + double i64_res_111659 = sitofp_i64_f64(n_111658); + + ((__global double *) mem_124040)[gtid_111643] = i64_res_111659; + ((__global int64_t *) mem_124042)[gtid_111643] = n_111658; + } + + error_0: + return; + #undef segmap_group_sizze_111653 +} +__kernel void mainzisegmap_111746(__global int *global_failure, int64_t m_75136, + double conf_75145, int64_t Nmk_76536, __global + unsigned char *mem_124072, __global + unsigned char *mem_124074, __global + unsigned char *mem_124078) +{ + #define segmap_group_sizze_111802 (mainzisegmap_group_sizze_111749) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128530; + int32_t local_tid_128531; + int64_t group_sizze_128534; + int32_t wave_sizze_128533; + int32_t group_tid_128532; + + global_tid_128530 = get_global_id(0); + local_tid_128531 = get_local_id(0); + group_sizze_128534 = get_local_size(0); + wave_sizze_128533 = LOCKSTEP_WIDTH; + group_tid_128532 = get_group_id(0); + + int32_t phys_tid_111746; + + phys_tid_111746 = global_tid_128530; + + int64_t gtid_111744; + + gtid_111744 = squot64(sext_i32_i64(group_tid_128532) * + segmap_group_sizze_111802 + + sext_i32_i64(local_tid_128531), Nmk_76536); + + int64_t gtid_111745; + + gtid_111745 = sext_i32_i64(group_tid_128532) * segmap_group_sizze_111802 + + sext_i32_i64(local_tid_128531) - + squot64(sext_i32_i64(group_tid_128532) * segmap_group_sizze_111802 + + sext_i32_i64(local_tid_128531), Nmk_76536) * Nmk_76536; + if (slt64(gtid_111744, m_75136) && slt64(gtid_111745, Nmk_76536)) { + int64_t n_111805 = ((__global int64_t *) mem_124072)[gtid_111744]; + bool cond_111808 = slt64(gtid_111745, n_111805); + double defunc_0_f_res_111809; + + if (cond_111808) { + double div_111806 = ((__global double *) mem_124074)[gtid_111744]; + double x_111810 = 2.0 * conf_75145; + double i64_res_111811 = sitofp_i64_f64(gtid_111745); + double x_111812 = x_111810 * i64_res_111811; + double y_111813 = x_111812 / div_111806; + double defunc_0_f_res_t_res_111814 = conf_75145 + y_111813; - // save map-out results - { } - // load accumulator - { - x_29564 = x_acc_46507; - } - // load new values - { - x_29565 = x_29567; - } - // apply reduction operator - { - int32_t defunc_1_op_res_29566 = smax32(x_29564, x_29565); - - // store in accumulator - { - x_acc_46507 = defunc_1_op_res_29566; - } - } + defunc_0_f_res_111809 = defunc_0_f_res_t_res_111814; + } else { + defunc_0_f_res_111809 = NAN; } + ((__global double *) mem_124078)[gtid_111744 * Nmk_76536 + + gtid_111745] = defunc_0_f_res_111809; } - // to reduce current chunk, first store our result in memory - { - x_29564 = x_acc_46507; - ((__local int32_t *) red_arr_mem_46505)[sext_i32_i64(local_tid_46499)] = - x_29564; + + error_0: + return; + #undef segmap_group_sizze_111802 +} +__kernel void mainzisegmap_111778(__global int *global_failure, int64_t m_75136, + __global + unsigned char *defunc_3_map_res_mem_124069, + __global unsigned char *mem_124072, __global + unsigned char *mem_124074) +{ + #define segmap_group_sizze_111788 (mainzisegmap_group_sizze_111780) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128525; + int32_t local_tid_128526; + int64_t group_sizze_128529; + int32_t wave_sizze_128528; + int32_t group_tid_128527; + + global_tid_128525 = get_global_id(0); + local_tid_128526 = get_local_id(0); + group_sizze_128529 = get_local_size(0); + wave_sizze_128528 = LOCKSTEP_WIDTH; + group_tid_128527 = get_group_id(0); + + int32_t phys_tid_111778; + + phys_tid_111778 = global_tid_128525; + + int64_t gtid_111777; + + gtid_111777 = sext_i32_i64(group_tid_128527) * segmap_group_sizze_111788 + + sext_i32_i64(local_tid_128526); + if (slt64(gtid_111777, m_75136)) { + int64_t x_111792 = ((__global + int64_t *) defunc_3_map_res_mem_124069)[gtid_111777]; + int64_t n_111793 = add64((int64_t) 1, x_111792); + double i64_res_111794 = sitofp_i64_f64(n_111793); + double div_111795 = i64_res_111794 - 1.0; + + ((__global int64_t *) mem_124072)[gtid_111777] = n_111793; + ((__global double *) mem_124074)[gtid_111777] = div_111795; } - barrier(CLK_LOCAL_MEM_FENCE); - int32_t offset_46513; - int32_t skip_waves_46514; + error_0: + return; + #undef segmap_group_sizze_111788 +} +__kernel void mainzisegmap_112242(__global int *global_failure, int64_t m_75136, + double level_75142, __global + unsigned char *defunc_3_map_res_mem_124069, + __global unsigned char *mem_124127, __global + unsigned char *mem_124130, __global + unsigned char *mem_124133) +{ + #define segmap_group_sizze_112560 (mainzisegmap_group_sizze_112244) - skip_waves_46514 = 1; + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; - int32_t x_46509; - int32_t x_46510; + if (*global_failure >= 0) + return; - offset_46513 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46499, sext_i64_i32(segred_group_sizze_41402))) { - x_46509 = ((__local - int32_t *) red_arr_mem_46505)[sext_i32_i64(local_tid_46499 + - offset_46513)]; - } - } - offset_46513 = 1; - while (slt32(offset_46513, wave_sizze_46501)) { - if (slt32(local_tid_46499 + offset_46513, - sext_i64_i32(segred_group_sizze_41402)) && ((local_tid_46499 - - squot32(local_tid_46499, - wave_sizze_46501) * - wave_sizze_46501) & - (2 * - offset_46513 - - 1)) == 0) { - // read array element - { - x_46510 = ((volatile __local - int32_t *) red_arr_mem_46505)[sext_i32_i64(local_tid_46499 + - offset_46513)]; - } - // apply reduction operation - { - int32_t defunc_1_op_res_46511 = smax32(x_46509, x_46510); - - x_46509 = defunc_1_op_res_46511; - } - // write result of operation - { - ((volatile __local - int32_t *) red_arr_mem_46505)[sext_i32_i64(local_tid_46499)] = - x_46509; - } - } - offset_46513 *= 2; - } - while (slt32(skip_waves_46514, - squot32(sext_i64_i32(segred_group_sizze_41402) + - wave_sizze_46501 - 1, wave_sizze_46501))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46513 = skip_waves_46514 * wave_sizze_46501; - if (slt32(local_tid_46499 + offset_46513, - sext_i64_i32(segred_group_sizze_41402)) && ((local_tid_46499 - - squot32(local_tid_46499, - wave_sizze_46501) * - wave_sizze_46501) == - 0 && - (squot32(local_tid_46499, - wave_sizze_46501) & - (2 * - skip_waves_46514 - - 1)) == 0)) { - // read array element - { - x_46510 = ((__local - int32_t *) red_arr_mem_46505)[sext_i32_i64(local_tid_46499 + - offset_46513)]; - } - // apply reduction operation - { - int32_t defunc_1_op_res_46511 = smax32(x_46509, x_46510); - - x_46509 = defunc_1_op_res_46511; - } - // write result of operation - { - ((__local - int32_t *) red_arr_mem_46505)[sext_i32_i64(local_tid_46499)] = - x_46509; - } + int32_t global_tid_128692; + int32_t local_tid_128693; + int64_t group_sizze_128696; + int32_t wave_sizze_128695; + int32_t group_tid_128694; + + global_tid_128692 = get_global_id(0); + local_tid_128693 = get_local_id(0); + group_sizze_128696 = get_local_size(0); + wave_sizze_128695 = LOCKSTEP_WIDTH; + group_tid_128694 = get_group_id(0); + + int32_t phys_tid_112242; + + phys_tid_112242 = global_tid_128692; + + int64_t gtid_112241; + + gtid_112241 = sext_i32_i64(group_tid_128694) * segmap_group_sizze_112560 + + sext_i32_i64(local_tid_128693); + if (slt64(gtid_112241, m_75136)) { + double pval_brownian_motion_max_res_112564 = ((__global + double *) mem_124127)[gtid_112241]; + int64_t defunc_0_f_res_112565 = ((__global + int64_t *) mem_124130)[gtid_112241]; + bool isnan_res_112566; + + isnan_res_112566 = futrts_isnan64(pval_brownian_motion_max_res_112564); + + bool cond_112567 = !isnan_res_112566; + bool cond_t_res_112568 = pval_brownian_motion_max_res_112564 < + level_75142; + bool x_112569 = cond_112567 && cond_t_res_112568; + bool chk_t_res_112570 = defunc_0_f_res_112565 == + (int64_t) 9223372036854775807; + bool chk_t_res_112571 = !chk_t_res_112570; + bool x_112572 = x_112569 && chk_t_res_112571; + int64_t y_start_112573; + + if (x_112572) { + int64_t x_112563 = ((__global + int64_t *) defunc_3_map_res_mem_124069)[gtid_112241]; + int64_t y_start_t_res_112574 = sub64(x_112563, + defunc_0_f_res_112565); + + y_start_112573 = y_start_t_res_112574; + } else { + y_start_112573 = (int64_t) 0; } - skip_waves_46514 *= 2; + ((__global int64_t *) mem_124133)[gtid_112241] = y_start_112573; } - barrier(CLK_LOCAL_MEM_FENCE); - // first thread saves the result in accumulator - { - if (sext_i32_i64(local_tid_46499) == (int64_t) 0) { - x_acc_46507 = x_46509; + + error_0: + return; + #undef segmap_group_sizze_112560 +} +__kernel void mainzisegmap_112282(__global int *global_failure, int64_t m_75136, + __global unsigned char *mem_124124, __global + unsigned char *mem_124127) +{ + #define segmap_group_sizze_112443 (mainzisegmap_group_sizze_112284) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128627; + int32_t local_tid_128628; + int64_t group_sizze_128631; + int32_t wave_sizze_128630; + int32_t group_tid_128629; + + global_tid_128627 = get_global_id(0); + local_tid_128628 = get_local_id(0); + group_sizze_128631 = get_local_size(0); + wave_sizze_128630 = LOCKSTEP_WIDTH; + group_tid_128629 = get_group_id(0); + + int32_t phys_tid_112282; + + phys_tid_112282 = global_tid_128627; + + int64_t gtid_112281; + + gtid_112281 = sext_i32_i64(group_tid_128629) * segmap_group_sizze_112443 + + sext_i32_i64(local_tid_128628); + if (slt64(gtid_112281, m_75136)) { + double defunc_2_reduce_res_112446 = ((__global + double *) mem_124124)[gtid_112281]; + double defunc_0_Q_arg_112447 = 3.0 * defunc_2_reduce_res_112446; + double zs_res_112448 = defunc_0_Q_arg_112447 / 1.4142135623730951; + double abs_res_112449 = fabs(zs_res_112448); + double zs_res_112450 = abs_res_112449 / 2.0; + double zp_res_112451 = 1.0 + zs_res_112450; + double zs_res_112452 = 1.0 / zp_res_112451; + double zt_res_112453 = zs_res_112452 * zs_res_112452; + double zt_res_112454 = zs_res_112452 * zt_res_112453; + double zt_res_112455 = zt_res_112453 * zt_res_112453; + double zt_res_112456 = zt_res_112453 * zt_res_112454; + double zt_res_112457 = zt_res_112454 * zt_res_112454; + double zt_res_112458 = zt_res_112454 * zt_res_112455; + double zt_res_112459 = zt_res_112455 * zt_res_112455; + double zt_res_112460 = zt_res_112455 * zt_res_112456; + double zt_res_112461 = 0.17087277 * zt_res_112460; + double zt_res_112462 = 0.82215223 * zt_res_112459; + double zt_res_112463 = 1.48851587 * zt_res_112458; + double zt_res_112464 = 1.13520398 * zt_res_112457; + double zt_res_112465 = 0.27886807 * zt_res_112456; + double zt_res_112466 = 0.18628806 * zt_res_112455; + double zt_res_112467 = 9.678418e-2 * zt_res_112454; + double zt_res_112468 = 0.37409196 * zt_res_112453; + double zt_res_112469 = 1.00002368 * zs_res_112452; + double zt_res_112470 = zs_res_112448 * zs_res_112448; + double zm_res_112471 = 0.0 - zt_res_112470; + double zm_res_112472 = zm_res_112471 - 1.26551223; + double zp_res_112473 = zt_res_112469 + zm_res_112472; + double zp_res_112474 = zt_res_112468 + zp_res_112473; + double zp_res_112475 = zt_res_112467 + zp_res_112474; + double zm_res_112476 = zp_res_112475 - zt_res_112466; + double zp_res_112477 = zt_res_112465 + zm_res_112476; + double zm_res_112478 = zp_res_112477 - zt_res_112464; + double zp_res_112479 = zt_res_112463 + zm_res_112478; + double zm_res_112480 = zp_res_112479 - zt_res_112462; + double zp_res_112481 = zt_res_112461 + zm_res_112480; + double exp_res_112482; + + exp_res_112482 = futrts_exp64(zp_res_112481); + + double zt_res_112483 = zs_res_112452 * exp_res_112482; + bool zgze_res_112484 = 0.0 <= zs_res_112448; + double erf_res_112485; + + if (zgze_res_112484) { + double zm_res_112486 = 1.0 - zt_res_112483; + + erf_res_112485 = zm_res_112486; + } else { + double zm_res_112487 = zt_res_112483 - 1.0; + + erf_res_112485 = zm_res_112487; + } + + double zp_res_112488 = 1.0 + erf_res_112485; + double zs_res_112489 = zp_res_112488 / 2.0; + double defunc_0_Q_res_112490 = 1.0 - zs_res_112489; + double y_112491 = fpow64(defunc_2_reduce_res_112446, 2.0); + double negate_arg_112492 = 4.0 * y_112491; + double defunc_0_exp_arg_112493 = 0.0 - negate_arg_112492; + double defunc_0_exp_res_112494 = fpow64(2.718281828459045, + defunc_0_exp_arg_112493); + double x_112495 = defunc_0_Q_res_112490 + defunc_0_exp_res_112494; + double zs_res_112496 = defunc_2_reduce_res_112446 / 1.4142135623730951; + double abs_res_112497 = fabs(zs_res_112496); + double zs_res_112498 = abs_res_112497 / 2.0; + double zp_res_112499 = 1.0 + zs_res_112498; + double zs_res_112500 = 1.0 / zp_res_112499; + double zt_res_112501 = zs_res_112500 * zs_res_112500; + double zt_res_112502 = zs_res_112500 * zt_res_112501; + double zt_res_112503 = zt_res_112501 * zt_res_112501; + double zt_res_112504 = zt_res_112501 * zt_res_112502; + double zt_res_112505 = zt_res_112502 * zt_res_112502; + double zt_res_112506 = zt_res_112502 * zt_res_112503; + double zt_res_112507 = zt_res_112503 * zt_res_112503; + double zt_res_112508 = zt_res_112503 * zt_res_112504; + double zt_res_112509 = 0.17087277 * zt_res_112508; + double zt_res_112510 = 0.82215223 * zt_res_112507; + double zt_res_112511 = 1.48851587 * zt_res_112506; + double zt_res_112512 = 1.13520398 * zt_res_112505; + double zt_res_112513 = 0.27886807 * zt_res_112504; + double zt_res_112514 = 0.18628806 * zt_res_112503; + double zt_res_112515 = 9.678418e-2 * zt_res_112502; + double zt_res_112516 = 0.37409196 * zt_res_112501; + double zt_res_112517 = 1.00002368 * zs_res_112500; + double zt_res_112518 = zs_res_112496 * zs_res_112496; + double zm_res_112519 = 0.0 - zt_res_112518; + double zm_res_112520 = zm_res_112519 - 1.26551223; + double zp_res_112521 = zt_res_112517 + zm_res_112520; + double zp_res_112522 = zt_res_112516 + zp_res_112521; + double zp_res_112523 = zt_res_112515 + zp_res_112522; + double zm_res_112524 = zp_res_112523 - zt_res_112514; + double zp_res_112525 = zt_res_112513 + zm_res_112524; + double zm_res_112526 = zp_res_112525 - zt_res_112512; + double zp_res_112527 = zt_res_112511 + zm_res_112526; + double zm_res_112528 = zp_res_112527 - zt_res_112510; + double zp_res_112529 = zt_res_112509 + zm_res_112528; + double exp_res_112530; + + exp_res_112530 = futrts_exp64(zp_res_112529); + + double zt_res_112531 = zs_res_112500 * exp_res_112530; + bool zgze_res_112532 = 0.0 <= zs_res_112496; + double erf_res_112533; + + if (zgze_res_112532) { + double zm_res_112534 = 1.0 - zt_res_112531; + + erf_res_112533 = zm_res_112534; + } else { + double zm_res_112535 = zt_res_112531 - 1.0; + + erf_res_112533 = zm_res_112535; } + + double zp_res_112536 = 1.0 + erf_res_112533; + double zs_res_112537 = zp_res_112536 / 2.0; + double defunc_0_Q_res_112538 = 1.0 - zs_res_112537; + double y_112539 = defunc_0_exp_res_112494 * defunc_0_Q_res_112538; + double y_112540 = x_112495 - y_112539; + double pval_brownian_motion_max_res_112541 = 2.0 * y_112540; + + ((__global double *) mem_124127)[gtid_112281] = + pval_brownian_motion_max_res_112541; } - int32_t old_counter_46515; + error_0: + return; + #undef segmap_group_sizze_112443 +} +__kernel void mainzisegmap_112409(__global int *global_failure, int64_t m_75136, + __global + unsigned char *defunc_3_map_res_mem_124069, + __global unsigned char *mem_124121) +{ + #define segmap_group_sizze_112417 (mainzisegmap_group_sizze_112411) - // first thread in group saves group result to global memory - { - if (local_tid_46499 == 0) { - ((__global - int32_t *) group_res_arr_mem_46495)[sext_i32_i64(group_tid_46500) * - segred_group_sizze_41402] = - x_acc_46507; - mem_fence_global(); - old_counter_46515 = atomic_add_i32_global(&((volatile __global - int *) mainzicounter_mem_46493)[(int64_t) 0], - (int) 1); - ((__local bool *) sync_arr_mem_46503)[(int64_t) 0] = - old_counter_46515 == num_groups_41404 - (int64_t) 1; - } + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128562; + int32_t local_tid_128563; + int64_t group_sizze_128566; + int32_t wave_sizze_128565; + int32_t group_tid_128564; + + global_tid_128562 = get_global_id(0); + local_tid_128563 = get_local_id(0); + group_sizze_128566 = get_local_size(0); + wave_sizze_128565 = LOCKSTEP_WIDTH; + group_tid_128564 = get_group_id(0); + + int32_t phys_tid_112409; + + phys_tid_112409 = global_tid_128562; + + int64_t gtid_112408; + + gtid_112408 = sext_i32_i64(group_tid_128564) * segmap_group_sizze_112417 + + sext_i32_i64(local_tid_128563); + if (slt64(gtid_112408, m_75136)) { + int64_t x_112420 = ((__global + int64_t *) defunc_3_map_res_mem_124069)[gtid_112408]; + double i64_res_112421 = sitofp_i64_f64(x_112420); + + ((__global double *) mem_124121)[gtid_112408] = i64_res_112421; } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - bool is_last_group_46516; + error_0: + return; + #undef segmap_group_sizze_112417 +} +__kernel void mainzisegmap_112587(__global int *global_failure, int64_t N_75135, + int64_t m_75136, __global + unsigned char *images_mem_120108, __global + unsigned char *hist_inds_mem_124138, __global + unsigned char *mem_124142) +{ + #define segmap_group_sizze_112614 (mainzisegmap_group_sizze_112590) - is_last_group_46516 = ((__local bool *) sync_arr_mem_46503)[(int64_t) 0]; - if (is_last_group_46516) { - if (local_tid_46499 == 0) { - old_counter_46515 = atomic_add_i32_global(&((volatile __global - int *) mainzicounter_mem_46493)[(int64_t) 0], - (int) ((int64_t) 0 - - num_groups_41404)); - } - // read in the per-group-results - { - int64_t read_per_thread_46517 = sdiv_up64(num_groups_41404, - segred_group_sizze_41402); + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128699; + int32_t local_tid_128700; + int64_t group_sizze_128703; + int32_t wave_sizze_128702; + int32_t group_tid_128701; + + global_tid_128699 = get_global_id(0); + local_tid_128700 = get_local_id(0); + group_sizze_128703 = get_local_size(0); + wave_sizze_128702 = LOCKSTEP_WIDTH; + group_tid_128701 = get_group_id(0); + + int32_t phys_tid_112587; + + phys_tid_112587 = global_tid_128699; + + int64_t gtid_112585; + + gtid_112585 = squot64(sext_i32_i64(group_tid_128701) * + segmap_group_sizze_112614 + + sext_i32_i64(local_tid_128700), N_75135); + + int64_t gtid_112586; + + gtid_112586 = sext_i32_i64(group_tid_128701) * segmap_group_sizze_112614 + + sext_i32_i64(local_tid_128700) - + squot64(sext_i32_i64(group_tid_128701) * segmap_group_sizze_112614 + + sext_i32_i64(local_tid_128700), N_75135) * N_75135; + if (slt64(gtid_112585, m_75136) && slt64(gtid_112586, N_75135)) { + int64_t x_112617 = ((__global + int64_t *) hist_inds_mem_124138)[gtid_112585]; + bool cond_112620 = slt64(gtid_112586, x_112617); + double defunc_1_f_res_112621; + + if (cond_112620) { + defunc_1_f_res_112621 = NAN; + } else { + double x_112619 = ((__global + double *) images_mem_120108)[gtid_112585 * + N_75135 + + gtid_112586]; - x_29564 = 0; - for (int64_t i_46518 = 0; i_46518 < read_per_thread_46517; - i_46518++) { - int64_t group_res_id_46519 = sext_i32_i64(local_tid_46499) * - read_per_thread_46517 + i_46518; - int64_t index_of_group_res_46520 = group_res_id_46519; - - if (slt64(group_res_id_46519, num_groups_41404)) { - x_29565 = ((__global - int32_t *) group_res_arr_mem_46495)[index_of_group_res_46520 * - segred_group_sizze_41402]; - - int32_t defunc_1_op_res_29566; - - defunc_1_op_res_29566 = smax32(x_29564, x_29565); - x_29564 = defunc_1_op_res_29566; - } - } + defunc_1_f_res_112621 = x_112619; } - ((__local int32_t *) red_arr_mem_46505)[sext_i32_i64(local_tid_46499)] = - x_29564; - barrier(CLK_LOCAL_MEM_FENCE); - // reduce the per-group results - { - int32_t offset_46521; - int32_t skip_waves_46522; - - skip_waves_46522 = 1; - - int32_t x_46509; - int32_t x_46510; - - offset_46521 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46499, - sext_i64_i32(segred_group_sizze_41402))) { - x_46509 = ((__local - int32_t *) red_arr_mem_46505)[sext_i32_i64(local_tid_46499 + - offset_46521)]; - } - } - offset_46521 = 1; - while (slt32(offset_46521, wave_sizze_46501)) { - if (slt32(local_tid_46499 + offset_46521, - sext_i64_i32(segred_group_sizze_41402)) && - ((local_tid_46499 - squot32(local_tid_46499, - wave_sizze_46501) * - wave_sizze_46501) & (2 * offset_46521 - 1)) == 0) { - // read array element - { - x_46510 = ((volatile __local - int32_t *) red_arr_mem_46505)[sext_i32_i64(local_tid_46499 + - offset_46521)]; - } - // apply reduction operation - { - int32_t defunc_1_op_res_46511 = smax32(x_46509, - x_46510); + ((__global double *) mem_124142)[gtid_112585 * N_75135 + gtid_112586] = + defunc_1_f_res_112621; + } + + error_0: + return; + #undef segmap_group_sizze_112614 +} +__kernel void mainzisegmap_112626(__global int *global_failure, int64_t N_75135, + int64_t m_75136, int64_t n_75139, + int64_t k2p2zq_75151, + int64_t num_groups_112651, + int64_t num_threads_126278, __global + unsigned char *binop_p_mem_120117, __global + unsigned char *mem_120124, __global + unsigned char *mem_124145, __global + unsigned char *mem_124149, __global + unsigned char *mem_124191) +{ + #define segmap_group_sizze_112650 (mainzisegmap_group_sizze_112628) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128707; + int32_t local_tid_128708; + int64_t group_sizze_128711; + int32_t wave_sizze_128710; + int32_t group_tid_128709; + + global_tid_128707 = get_global_id(0); + local_tid_128708 = get_local_id(0); + group_sizze_128711 = get_local_size(0); + wave_sizze_128710 = LOCKSTEP_WIDTH; + group_tid_128709 = get_group_id(0); + + int32_t phys_tid_112626; + + phys_tid_112626 = global_tid_128707; + + int32_t phys_group_id_128712; + + phys_group_id_128712 = get_group_id(0); + for (int32_t i_128713 = 0; i_128713 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, segmap_group_sizze_112650)) - + phys_group_id_128712, sext_i64_i32(num_groups_112651)); + i_128713++) { + int32_t virt_group_id_128714 = phys_group_id_128712 + i_128713 * + sext_i64_i32(num_groups_112651); + int64_t gtid_112625 = sext_i32_i64(virt_group_id_128714) * + segmap_group_sizze_112650 + sext_i32_i64(local_tid_128708); + + if (slt64(gtid_112625, m_75136)) { + for (int64_t i_119914 = 0; i_119914 < k2p2zq_75151; i_119914++) { + for (int64_t i_119918 = 0; i_119918 < k2p2zq_75151; + i_119918++) { + double defunc_2_reduce_res_112659; + double redout_119920 = 0.0; + + for (int64_t i_119921 = 0; i_119921 < n_75139; i_119921++) { + double x_112663 = ((__global + double *) mem_124145)[i_119921 * + m_75136 + + gtid_112625]; + double x_112664 = ((__global + double *) binop_p_mem_120117)[i_119914 * + N_75135 + + i_119921]; + double x_112665 = ((__global + double *) mem_120124)[i_119921 * + k2p2zq_75151 + + i_119918]; + double x_112666 = x_112664 * x_112665; + bool isnan_res_112667; - x_46509 = defunc_1_op_res_46511; - } - // write result of operation - { - ((volatile __local - int32_t *) red_arr_mem_46505)[sext_i32_i64(local_tid_46499)] = - x_46509; - } + isnan_res_112667 = futrts_isnan64(x_112663); + + double y_112668; + + if (isnan_res_112667) { + y_112668 = 0.0; + } else { + y_112668 = 1.0; + } + + double defunc_2_f_res_112669 = x_112666 * y_112668; + double defunc_1_op_res_112662 = defunc_2_f_res_112669 + + redout_119920; + double redout_tmp_128717 = defunc_1_op_res_112662; + + redout_119920 = redout_tmp_128717; + } + defunc_2_reduce_res_112659 = redout_119920; + ((__global double *) mem_124149)[phys_tid_112626 + + (i_119914 * + (num_threads_126278 * + k2p2zq_75151) + + i_119918 * + num_threads_126278)] = + defunc_2_reduce_res_112659; + } + } + for (int64_t i_128718 = 0; i_128718 < k2p2zq_75151; i_128718++) { + for (int64_t i_128719 = 0; i_128719 < k2p2zq_75151; + i_128719++) { + ((__global double *) mem_124191)[i_128718 * (m_75136 * + k2p2zq_75151) + + i_128719 * m_75136 + + gtid_112625] = ((__global + double *) mem_124149)[phys_tid_112626 + + (i_128718 * + (num_threads_126278 * + k2p2zq_75151) + + i_128719 * + num_threads_126278)]; } - offset_46521 *= 2; } - while (slt32(skip_waves_46522, - squot32(sext_i64_i32(segred_group_sizze_41402) + - wave_sizze_46501 - 1, wave_sizze_46501))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46521 = skip_waves_46522 * wave_sizze_46501; - if (slt32(local_tid_46499 + offset_46521, - sext_i64_i32(segred_group_sizze_41402)) && - ((local_tid_46499 - squot32(local_tid_46499, - wave_sizze_46501) * - wave_sizze_46501) == 0 && (squot32(local_tid_46499, - wave_sizze_46501) & - (2 * skip_waves_46522 - 1)) == - 0)) { - // read array element - { - x_46510 = ((__local - int32_t *) red_arr_mem_46505)[sext_i32_i64(local_tid_46499 + - offset_46521)]; - } - // apply reduction operation - { - int32_t defunc_1_op_res_46511 = smax32(x_46509, - x_46510); - - x_46509 = defunc_1_op_res_46511; - } - // write result of operation - { - ((__local - int32_t *) red_arr_mem_46505)[sext_i32_i64(local_tid_46499)] = - x_46509; + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_112650 +} +__kernel void mainzisegmap_112672(__global int *global_failure, int64_t N_75135, + int64_t m_75136, int64_t n_75139, + int64_t k2p2zq_75151, + int64_t num_groups_112828, + int64_t num_threads_126280, __global + unsigned char *mem_120120, __global + unsigned char *mem_120124, __global + unsigned char *mem_124142, __global + unsigned char *mem_124194, __global + unsigned char *mem_124210) +{ + #define segmap_group_sizze_112827 (mainzisegmap_group_sizze_112675) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128720; + int32_t local_tid_128721; + int64_t group_sizze_128724; + int32_t wave_sizze_128723; + int32_t group_tid_128722; + + global_tid_128720 = get_global_id(0); + local_tid_128721 = get_local_id(0); + group_sizze_128724 = get_local_size(0); + wave_sizze_128723 = LOCKSTEP_WIDTH; + group_tid_128722 = get_group_id(0); + + int32_t phys_tid_112672; + + phys_tid_112672 = global_tid_128720; + + int32_t phys_group_id_128725; + + phys_group_id_128725 = get_group_id(0); + for (int32_t i_128726 = 0; i_128726 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136 * k2p2zq_75151, + segmap_group_sizze_112827)) - + phys_group_id_128725, sext_i64_i32(num_groups_112828)); + i_128726++) { + int32_t virt_group_id_128727 = phys_group_id_128725 + i_128726 * + sext_i64_i32(num_groups_112828); + int64_t gtid_112670 = squot64(sext_i32_i64(virt_group_id_128727) * + segmap_group_sizze_112827 + + sext_i32_i64(local_tid_128721), + k2p2zq_75151); + int64_t gtid_112671 = sext_i32_i64(virt_group_id_128727) * + segmap_group_sizze_112827 + sext_i32_i64(local_tid_128721) - + squot64(sext_i32_i64(virt_group_id_128727) * + segmap_group_sizze_112827 + + sext_i32_i64(local_tid_128721), k2p2zq_75151) * + k2p2zq_75151; + + if (slt64(gtid_112670, m_75136) && slt64(gtid_112671, k2p2zq_75151)) { + for (int64_t i_119924 = 0; i_119924 < k2p2zq_75151; i_119924++) { + double defunc_2_reduce_res_112839; + double redout_119926 = 0.0; + + for (int64_t i_119927 = 0; i_119927 < n_75139; i_119927++) { + double x_112843 = ((__global + double *) mem_124142)[gtid_112670 * + N_75135 + + i_119927]; + double x_112844 = ((__global + double *) mem_120120)[i_119927 * + k2p2zq_75151 + + gtid_112671]; + double x_112845 = ((__global + double *) mem_120124)[i_119927 * + k2p2zq_75151 + + i_119924]; + double x_112846 = x_112844 * x_112845; + bool isnan_res_112847; + + isnan_res_112847 = futrts_isnan64(x_112843); + + double y_112848; + + if (isnan_res_112847) { + y_112848 = 0.0; + } else { + y_112848 = 1.0; } + + double defunc_2_f_res_112849 = x_112846 * y_112848; + double defunc_1_op_res_112842 = defunc_2_f_res_112849 + + redout_119926; + double redout_tmp_128729 = defunc_1_op_res_112842; + + redout_119926 = redout_tmp_128729; } - skip_waves_46522 *= 2; + defunc_2_reduce_res_112839 = redout_119926; + ((__global double *) mem_124194)[phys_tid_112672 + i_119924 * + num_threads_126280] = + defunc_2_reduce_res_112839; } - // and back to memory with the final result - { - if (local_tid_46499 == 0) { - ((__global int32_t *) mem_45249)[(int64_t) 0] = x_46509; - } + for (int64_t i_128730 = 0; i_128730 < k2p2zq_75151; i_128730++) { + ((__global double *) mem_124210)[i_128730 * (k2p2zq_75151 * + m_75136) + + gtid_112670 * k2p2zq_75151 + + gtid_112671] = ((__global + double *) mem_124194)[phys_tid_112672 + + i_128730 * + num_threads_126280]; } } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - error_1: + error_0: return; - #undef segred_group_sizze_41402 + #undef segmap_group_sizze_112827 } -__kernel void mainzisegred_small_39115(__global int *global_failure, - __local volatile - int64_t *red_arr_mem_45763_backing_aligned_0, - int64_t N_29165, int64_t m_29166, - int64_t i32_res_29175, - int64_t i32_res_29181, - int64_t num_groups_39254, - int64_t segment_sizze_nonzzero_45756, - __global unsigned char *images_mem_44381, - __global - unsigned char *binop_p_mem_44390, - __global unsigned char *mem_44531, - __global unsigned char *mem_44536) -{ - #define segred_group_sizze_39253 (mainzisegred_group_sizze_39109) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_45763_backing_0 = - (__local volatile - char *) red_arr_mem_45763_backing_aligned_0; - - if (*global_failure >= 0) - return; - - int32_t global_tid_45758; - int32_t local_tid_45759; - int64_t group_sizze_45762; - int32_t wave_sizze_45761; - int32_t group_tid_45760; - - global_tid_45758 = get_global_id(0); - local_tid_45759 = get_local_id(0); - group_sizze_45762 = get_local_size(0); - wave_sizze_45761 = LOCKSTEP_WIDTH; - group_tid_45760 = get_group_id(0); - - int32_t phys_tid_39115; - - phys_tid_39115 = global_tid_45758; - - __local char *red_arr_mem_45763; - - red_arr_mem_45763 = (__local char *) red_arr_mem_45763_backing_0; - - int32_t phys_group_id_45765; - - phys_group_id_45765 = get_group_id(0); - for (int32_t i_45766 = 0; i_45766 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_29166 * i32_res_29181 * - i32_res_29181, - squot64(segred_group_sizze_39253, - segment_sizze_nonzzero_45756))) - - phys_group_id_45765, sext_i64_i32(num_groups_39254)); - i_45766++) { - int32_t virt_group_id_45767 = phys_group_id_45765 + i_45766 * - sext_i64_i32(num_groups_39254); - int64_t gtid_39102 = squot64(squot64(sext_i32_i64(local_tid_45759), - segment_sizze_nonzzero_45756) + - sext_i32_i64(virt_group_id_45767) * - squot64(segred_group_sizze_39253, - segment_sizze_nonzzero_45756), - i32_res_29181 * i32_res_29181); - int64_t gtid_39103 = squot64(squot64(sext_i32_i64(local_tid_45759), - segment_sizze_nonzzero_45756) + - sext_i32_i64(virt_group_id_45767) * - squot64(segred_group_sizze_39253, - segment_sizze_nonzzero_45756) - - squot64(squot64(sext_i32_i64(local_tid_45759), - segment_sizze_nonzzero_45756) + - sext_i32_i64(virt_group_id_45767) * - squot64(segred_group_sizze_39253, - segment_sizze_nonzzero_45756), - i32_res_29181 * i32_res_29181) * - (i32_res_29181 * i32_res_29181), - i32_res_29181); - int64_t gtid_39104 = squot64(sext_i32_i64(local_tid_45759), - segment_sizze_nonzzero_45756) + - sext_i32_i64(virt_group_id_45767) * - squot64(segred_group_sizze_39253, - segment_sizze_nonzzero_45756) - - squot64(squot64(sext_i32_i64(local_tid_45759), - segment_sizze_nonzzero_45756) + - sext_i32_i64(virt_group_id_45767) * - squot64(segred_group_sizze_39253, - segment_sizze_nonzzero_45756), i32_res_29181 * - i32_res_29181) * (i32_res_29181 * i32_res_29181) - - squot64(squot64(sext_i32_i64(local_tid_45759), - segment_sizze_nonzzero_45756) + - sext_i32_i64(virt_group_id_45767) * - squot64(segred_group_sizze_39253, - segment_sizze_nonzzero_45756) - - squot64(squot64(sext_i32_i64(local_tid_45759), - segment_sizze_nonzzero_45756) + - sext_i32_i64(virt_group_id_45767) * - squot64(segred_group_sizze_39253, - segment_sizze_nonzzero_45756), - i32_res_29181 * i32_res_29181) * - (i32_res_29181 * i32_res_29181), i32_res_29181) * - i32_res_29181; - int64_t gtid_39114 = srem64(sext_i32_i64(local_tid_45759), - i32_res_29175); - - // apply map function if in bounds - { - if (slt64((int64_t) 0, i32_res_29175) && (((slt64(gtid_39102, - m_29166) && - slt64(gtid_39103, - i32_res_29181)) && - slt64(gtid_39104, - i32_res_29181)) && - slt64(sext_i32_i64(local_tid_45759), - i32_res_29175 * - squot64(segred_group_sizze_39253, - segment_sizze_nonzzero_45756)))) { - float x_39263 = ((__global - float *) images_mem_44381)[gtid_39102 * - N_29165 + - gtid_39114]; - float x_39264 = ((__global - float *) binop_p_mem_44390)[gtid_39103 * - N_29165 + - gtid_39114]; - float x_39265 = ((__global float *) mem_44531)[gtid_39104 * - N_29165 + - gtid_39114]; - float x_39266 = x_39264 * x_39265; - bool isnan_res_39267; - - isnan_res_39267 = futrts_isnan32(x_39263); - - float y_39268; - - if (isnan_res_39267) { - y_39268 = 0.0F; - } else { - y_39268 = 1.0F; - } - - float defunc_2_f_res_39269 = x_39266 * y_39268; - - // save map-out results - { } - // save results to be reduced - { - ((__local - float *) red_arr_mem_45763)[sext_i32_i64(local_tid_45759)] = - defunc_2_f_res_39269; - } - } else { - ((__local - float *) red_arr_mem_45763)[sext_i32_i64(local_tid_45759)] = - 0.0F; - } +__kernel void mainzisegmap_113042(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, int64_t m_76774, + int64_t nm_76775, + int64_t gauss_jordan_res_r_ixfn_124361, + int64_t gauss_jordan_res_r_ixfn_124362, + int64_t gauss_jordan_res_r_ixfn_124364, + __global + unsigned char *gauss_jordan_res_r_mem_124366, + __global unsigned char *mem_124371) +{ + #define segmap_group_sizze_113545 (mainzisegmap_group_sizze_113046) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128880; + int32_t local_tid_128881; + int64_t group_sizze_128884; + int32_t wave_sizze_128883; + int32_t group_tid_128882; + + global_tid_128880 = get_global_id(0); + local_tid_128881 = get_local_id(0); + group_sizze_128884 = get_local_size(0); + wave_sizze_128883 = LOCKSTEP_WIDTH; + group_tid_128882 = get_group_id(0); + + int32_t phys_tid_113042; + + phys_tid_113042 = global_tid_128880; + + int64_t gtid_113039; + + gtid_113039 = squot64(sext_i32_i64(group_tid_128882) * + segmap_group_sizze_113545 + + sext_i32_i64(local_tid_128881), k2p2zq_75151 * + k2p2zq_75151); + + int64_t gtid_slice_113037; + + gtid_slice_113037 = squot64(sext_i32_i64(group_tid_128882) * + segmap_group_sizze_113545 + + sext_i32_i64(local_tid_128881) - + squot64(sext_i32_i64(group_tid_128882) * + segmap_group_sizze_113545 + + sext_i32_i64(local_tid_128881), + k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), k2p2zq_75151); + + int64_t gtid_slice_113038; + + gtid_slice_113038 = sext_i32_i64(group_tid_128882) * + segmap_group_sizze_113545 + sext_i32_i64(local_tid_128881) - + squot64(sext_i32_i64(group_tid_128882) * segmap_group_sizze_113545 + + sext_i32_i64(local_tid_128881), k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151) - squot64(sext_i32_i64(group_tid_128882) * + segmap_group_sizze_113545 + + sext_i32_i64(local_tid_128881) - + squot64(sext_i32_i64(group_tid_128882) * + segmap_group_sizze_113545 + + sext_i32_i64(local_tid_128881), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151) * k2p2zq_75151; + if ((slt64(gtid_113039, m_75136) && slt64(gtid_slice_113037, + k2p2zq_75151)) && + slt64(gtid_slice_113038, k2p2zq_75151)) { + int64_t slice_113549 = k2p2zq_75151 + gtid_slice_113038; + int64_t binop_x_115354 = nm_76775 * gtid_113039; + int64_t binop_y_115355 = m_76774 * gtid_slice_113037; + int64_t binop_x_115356 = binop_x_115354 + binop_y_115355; + int64_t binop_x_115357 = slice_113549 + binop_x_115356; + int64_t new_index_115358 = squot64(binop_x_115357, nm_76775); + int64_t binop_y_115370 = nm_76775 * new_index_115358; + int64_t new_index_115371 = binop_x_115357 - binop_y_115370; + double v_113550 = ((__global + double *) gauss_jordan_res_r_mem_124366)[gauss_jordan_res_r_ixfn_124361 + + (new_index_115358 * + gauss_jordan_res_r_ixfn_124362 + + new_index_115371 * + gauss_jordan_res_r_ixfn_124364)]; + + ((__global double *) mem_124371)[gtid_113039 * (k2p2zq_75151 * + k2p2zq_75151) + + gtid_slice_113037 * k2p2zq_75151 + + gtid_slice_113038] = v_113550; + } + + error_0: + return; + #undef segmap_group_sizze_113545 +} +__kernel void mainzisegmap_113153(__global int *global_failure, int64_t m_75136, + int64_t nm_76775, + int64_t ctx_param_ext_124325, + int64_t ctx_param_ext_124326, + int64_t ctx_param_ext_124328, __global + unsigned char *mem_param_124330, __global + unsigned char *mem_124349) +{ + #define segmap_group_sizze_113535 (mainzisegmap_group_sizze_113156) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128874; + int32_t local_tid_128875; + int64_t group_sizze_128878; + int32_t wave_sizze_128877; + int32_t group_tid_128876; + + global_tid_128874 = get_global_id(0); + local_tid_128875 = get_local_id(0); + group_sizze_128878 = get_local_size(0); + wave_sizze_128877 = LOCKSTEP_WIDTH; + group_tid_128876 = get_group_id(0); + + int32_t phys_tid_113153; + + phys_tid_113153 = global_tid_128874; + + int64_t gtid_113151; + + gtid_113151 = squot64(sext_i32_i64(group_tid_128876) * + segmap_group_sizze_113535 + + sext_i32_i64(local_tid_128875), nm_76775); + + int64_t gtid_113152; + + gtid_113152 = sext_i32_i64(group_tid_128876) * segmap_group_sizze_113535 + + sext_i32_i64(local_tid_128875) - + squot64(sext_i32_i64(group_tid_128876) * segmap_group_sizze_113535 + + sext_i32_i64(local_tid_128875), nm_76775) * nm_76775; + if (slt64(gtid_113151, m_75136) && slt64(gtid_113152, nm_76775)) { + double write_value_113539 = ((__global + double *) mem_124349)[gtid_113151 * + nm_76775 + + gtid_113152]; + + if ((sle64((int64_t) 0, gtid_113151) && slt64(gtid_113151, m_75136)) && + (sle64((int64_t) 0, gtid_113152) && slt64(gtid_113152, nm_76775))) { + ((__global double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_113151 * + ctx_param_ext_124326 + + gtid_113152 * + ctx_param_ext_124328)] = + write_value_113539; } - barrier(CLK_LOCAL_MEM_FENCE); - if (slt64((int64_t) 0, i32_res_29175)) { - // perform segmented scan to imitate reduction - { - float x_39257; - float x_39258; - float x_45768; - float x_45769; - bool ltid_in_bounds_45771; - - ltid_in_bounds_45771 = slt64(sext_i32_i64(local_tid_45759), - i32_res_29175 * - squot64(segred_group_sizze_39253, - segment_sizze_nonzzero_45756)); - - int32_t skip_threads_45772; - - // read input for in-block scan - { - if (ltid_in_bounds_45771) { - x_39258 = ((volatile __local - float *) red_arr_mem_45763)[sext_i32_i64(local_tid_45759)]; - if ((local_tid_45759 - squot32(local_tid_45759, 32) * - 32) == 0) { - x_39257 = x_39258; - } - } - } - // in-block scan (hopefully no barriers needed) + } + + error_0: + return; + #undef segmap_group_sizze_113535 +} +__kernel void mainzisegmap_113173(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, int64_t m_75136, + int64_t k2p2zq_75151, int64_t m_76774, + int64_t nm_76775, int64_t i_113419, + int64_t ctx_param_ext_124325, + int64_t ctx_param_ext_124326, + int64_t ctx_param_ext_124328, __global + unsigned char *mem_param_124330, __global + unsigned char *mem_124345, __global + unsigned char *mem_124349) +{ + #define segmap_group_sizze_113490 (mainzisegmap_group_sizze_113176) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128869; + int32_t local_tid_128870; + int64_t group_sizze_128873; + int32_t wave_sizze_128872; + int32_t group_tid_128871; + + global_tid_128869 = get_global_id(0); + local_tid_128870 = get_local_id(0); + group_sizze_128873 = get_local_size(0); + wave_sizze_128872 = LOCKSTEP_WIDTH; + group_tid_128871 = get_group_id(0); + + int32_t phys_tid_113173; + + phys_tid_113173 = global_tid_128869; + + int64_t gtid_113171; + + gtid_113171 = squot64(sext_i32_i64(group_tid_128871) * + segmap_group_sizze_113490 + + sext_i32_i64(local_tid_128870), nm_76775); + + int64_t gtid_113172; + + gtid_113172 = sext_i32_i64(group_tid_128871) * segmap_group_sizze_113490 + + sext_i32_i64(local_tid_128870) - + squot64(sext_i32_i64(group_tid_128871) * segmap_group_sizze_113490 + + sext_i32_i64(local_tid_128870), nm_76775) * nm_76775; + if (slt64(gtid_113171, m_75136) && slt64(gtid_113172, nm_76775)) { + bool cond_113495 = ((__global bool *) mem_124345)[gtid_113171]; + int64_t defunc_0_f_res_113497 = sdiv64(gtid_113172, m_76774); + int64_t defunc_0_f_res_113498 = smod64(gtid_113172, m_76774); + double defunc_0_f_res_113499; + + if (cond_113495) { + int64_t x_113500 = mul64(m_76774, defunc_0_f_res_113497); + int64_t i_113501 = add64(defunc_0_f_res_113498, x_113500); + bool x_113502 = sle64((int64_t) 0, i_113501); + bool y_113503 = slt64(i_113501, nm_76775); + bool bounds_check_113504 = x_113502 && y_113503; + bool index_certs_113505; + + if (!bounds_check_113504) { { - skip_threads_45772 = 1; - while (slt32(skip_threads_45772, 32)) { - if (sle32(skip_threads_45772, local_tid_45759 - - squot32(local_tid_45759, 32) * 32) && - ltid_in_bounds_45771) { - // read operands - { - x_39257 = ((volatile __local - float *) red_arr_mem_45763)[sext_i32_i64(local_tid_45759) - - sext_i32_i64(skip_threads_45772)]; - } - // perform operation - { - bool inactive_45773 = - slt64(srem64(sext_i32_i64(local_tid_45759), - i32_res_29175), - sext_i32_i64(local_tid_45759) - - sext_i32_i64(local_tid_45759 - - skip_threads_45772)); - - if (inactive_45773) { - x_39257 = x_39258; - } - if (!inactive_45773) { - float defunc_1_op_res_39259 = x_39257 + - x_39258; - - x_39257 = defunc_1_op_res_39259; - } - } - } - if (sle32(wave_sizze_45761, skip_threads_45772)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_45772, local_tid_45759 - - squot32(local_tid_45759, 32) * 32) && - ltid_in_bounds_45771) { - // write result - { - ((volatile __local - float *) red_arr_mem_45763)[sext_i32_i64(local_tid_45759)] = - x_39257; - x_39258 = x_39257; - } - } - if (sle32(wave_sizze_45761, skip_threads_45772)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_45772 *= 2; + if (atomic_cmpxchg_i32_global(global_failure, -1, 191) == + -1) { + global_failure_args[0] = i_113501; + global_failure_args[1] = nm_76775; + ; } + return; } - barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' + } + + double defunc_0_f_res_t_res_113506 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_113171 * + ctx_param_ext_124326 + + i_113501 * + ctx_param_ext_124328)]; + + defunc_0_f_res_113499 = defunc_0_f_res_t_res_113506; + } else { + double v1_113494 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_113171 * + ctx_param_ext_124326 + + i_113419 * + ctx_param_ext_124328)]; + bool x_113507 = sle64((int64_t) 0, defunc_0_f_res_113498); + bool y_113508 = slt64(defunc_0_f_res_113498, nm_76775); + bool bounds_check_113509 = x_113507 && y_113508; + bool index_certs_113510; + + if (!bounds_check_113509) { { - if ((local_tid_45759 - squot32(local_tid_45759, 32) * 32) == - 31 && ltid_in_bounds_45771) { - ((volatile __local - float *) red_arr_mem_45763)[sext_i32_i64(squot32(local_tid_45759, - 32))] = - x_39257; + if (atomic_cmpxchg_i32_global(global_failure, -1, 192) == + -1) { + global_failure_args[0] = defunc_0_f_res_113498; + global_failure_args[1] = nm_76775; + ; } + return; } - barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' - { - int32_t skip_threads_45774; - - // read input for in-block scan - { - if (squot32(local_tid_45759, 32) == 0 && - ltid_in_bounds_45771) { - x_45769 = ((volatile __local - float *) red_arr_mem_45763)[sext_i32_i64(local_tid_45759)]; - if ((local_tid_45759 - squot32(local_tid_45759, - 32) * 32) == 0) { - x_45768 = x_45769; - } - } - } - // in-block scan (hopefully no barriers needed) + } + + double x_113511 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_113171 * + ctx_param_ext_124326 + + defunc_0_f_res_113498 * + ctx_param_ext_124328)]; + double x_113512 = x_113511 / v1_113494; + int64_t y_113513 = sub64(k2p2zq_75151, (int64_t) 1); + bool cond_113514 = slt64(defunc_0_f_res_113497, y_113513); + double defunc_0_f_res_f_res_113515; + + if (cond_113514) { + int64_t x_113516 = add64((int64_t) 1, defunc_0_f_res_113497); + int64_t x_113517 = mul64(m_76774, x_113516); + int64_t i_113518 = add64(defunc_0_f_res_113498, x_113517); + bool x_113519 = sle64((int64_t) 0, i_113518); + bool y_113520 = slt64(i_113518, nm_76775); + bool bounds_check_113521 = x_113519 && y_113520; + bool index_certs_113522; + + if (!bounds_check_113521) { { - skip_threads_45774 = 1; - while (slt32(skip_threads_45774, 32)) { - if (sle32(skip_threads_45774, local_tid_45759 - - squot32(local_tid_45759, 32) * 32) && - (squot32(local_tid_45759, 32) == 0 && - ltid_in_bounds_45771)) { - // read operands - { - x_45768 = ((volatile __local - float *) red_arr_mem_45763)[sext_i32_i64(local_tid_45759) - - sext_i32_i64(skip_threads_45774)]; - } - // perform operation - { - bool inactive_45775 = - slt64(srem64(sext_i32_i64(local_tid_45759 * - 32 + 32 - 1), - i32_res_29175), - sext_i32_i64(local_tid_45759 * - 32 + 32 - 1) - - sext_i32_i64((local_tid_45759 - - skip_threads_45774) * - 32 + 32 - 1)); - - if (inactive_45775) { - x_45768 = x_45769; - } - if (!inactive_45775) { - float defunc_1_op_res_45770 = x_45768 + - x_45769; - - x_45768 = defunc_1_op_res_45770; - } - } - } - if (sle32(wave_sizze_45761, skip_threads_45774)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_45774, local_tid_45759 - - squot32(local_tid_45759, 32) * 32) && - (squot32(local_tid_45759, 32) == 0 && - ltid_in_bounds_45771)) { - // write result - { - ((volatile __local - float *) red_arr_mem_45763)[sext_i32_i64(local_tid_45759)] = - x_45768; - x_45769 = x_45768; - } - } - if (sle32(wave_sizze_45761, skip_threads_45774)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_45774 *= 2; + if (atomic_cmpxchg_i32_global(global_failure, -1, + 193) == -1) { + global_failure_args[0] = i_113518; + global_failure_args[1] = nm_76775; + ; } + return; } } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_45759, 32) == 0 || - !ltid_in_bounds_45771)) { - // read operands - { - x_39258 = x_39257; - x_39257 = ((__local - float *) red_arr_mem_45763)[sext_i32_i64(squot32(local_tid_45759, - 32)) - - (int64_t) 1]; - } - // perform operation - { - bool inactive_45776 = - slt64(srem64(sext_i32_i64(local_tid_45759), - i32_res_29175), - sext_i32_i64(local_tid_45759) - - sext_i32_i64(squot32(local_tid_45759, - 32) * 32 - 1)); - - if (inactive_45776) { - x_39257 = x_39258; - } - if (!inactive_45776) { - float defunc_1_op_res_39259 = x_39257 + x_39258; - - x_39257 = defunc_1_op_res_39259; - } - } - // write final result - { - ((__local - float *) red_arr_mem_45763)[sext_i32_i64(local_tid_45759)] = - x_39257; + + double x_113523 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_113171 * + ctx_param_ext_124326 + + i_113518 * + ctx_param_ext_124328)]; + int64_t i_113524 = add64(i_113419, x_113517); + bool x_113525 = sle64((int64_t) 0, i_113524); + bool y_113526 = slt64(i_113524, nm_76775); + bool bounds_check_113527 = x_113525 && y_113526; + bool index_certs_113528; + + if (!bounds_check_113527) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 194) == -1) { + global_failure_args[0] = i_113524; + global_failure_args[1] = nm_76775; + ; } + return; } } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_45759, 32) == 0) { - ((__local - float *) red_arr_mem_45763)[sext_i32_i64(local_tid_45759)] = - x_39258; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // save final values of segments - { - if (slt64(sext_i32_i64(virt_group_id_45767) * - squot64(segred_group_sizze_39253, - segment_sizze_nonzzero_45756) + - sext_i32_i64(local_tid_45759), m_29166 * i32_res_29181 * - i32_res_29181) && slt64(sext_i32_i64(local_tid_45759), - squot64(segred_group_sizze_39253, - segment_sizze_nonzzero_45756))) { - ((__global - float *) mem_44536)[squot64(sext_i32_i64(virt_group_id_45767) * - squot64(segred_group_sizze_39253, - segment_sizze_nonzzero_45756) + - sext_i32_i64(local_tid_45759), - i32_res_29181 * i32_res_29181) * - (i32_res_29181 * i32_res_29181) + - squot64(sext_i32_i64(virt_group_id_45767) * - squot64(segred_group_sizze_39253, - segment_sizze_nonzzero_45756) + - sext_i32_i64(local_tid_45759) - - squot64(sext_i32_i64(virt_group_id_45767) * - squot64(segred_group_sizze_39253, - segment_sizze_nonzzero_45756) + - sext_i32_i64(local_tid_45759), - i32_res_29181 * - i32_res_29181) * - (i32_res_29181 * i32_res_29181), - i32_res_29181) * i32_res_29181 + - (sext_i32_i64(virt_group_id_45767) * - squot64(segred_group_sizze_39253, - segment_sizze_nonzzero_45756) + - sext_i32_i64(local_tid_45759) - - squot64(sext_i32_i64(virt_group_id_45767) * - squot64(segred_group_sizze_39253, - segment_sizze_nonzzero_45756) + - sext_i32_i64(local_tid_45759), - i32_res_29181 * i32_res_29181) * - (i32_res_29181 * i32_res_29181) - - squot64(sext_i32_i64(virt_group_id_45767) * - squot64(segred_group_sizze_39253, - segment_sizze_nonzzero_45756) + - sext_i32_i64(local_tid_45759) - - squot64(sext_i32_i64(virt_group_id_45767) * - squot64(segred_group_sizze_39253, - segment_sizze_nonzzero_45756) + - sext_i32_i64(local_tid_45759), - i32_res_29181 * - i32_res_29181) * - (i32_res_29181 * i32_res_29181), - i32_res_29181) * - i32_res_29181)] = ((__local - float *) red_arr_mem_45763)[(sext_i32_i64(local_tid_45759) + - (int64_t) 1) * - segment_sizze_nonzzero_45756 - - (int64_t) 1]; + + double x_113529 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_113171 * + ctx_param_ext_124326 + + i_113524 * + ctx_param_ext_124328)]; + double y_113530 = x_113512 * x_113529; + double defunc_0_f_res_f_res_t_res_113531 = x_113523 - y_113530; + + defunc_0_f_res_f_res_113515 = defunc_0_f_res_f_res_t_res_113531; + } else { + defunc_0_f_res_f_res_113515 = x_113512; } + defunc_0_f_res_113499 = defunc_0_f_res_f_res_113515; } - barrier(CLK_LOCAL_MEM_FENCE); - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + ((__global double *) mem_124349)[gtid_113171 * nm_76775 + gtid_113172] = + defunc_0_f_res_113499; } - error_1: + error_0: return; - #undef segred_group_sizze_39253 + #undef segmap_group_sizze_113490 } -__kernel void mainzisegred_small_40466(__global int *global_failure, - __local volatile - int64_t *red_arr_mem_45960_backing_aligned_0, - int64_t N_29165, int64_t m_29166, - int64_t i32_res_29175, - int64_t i32_res_29181, - int64_t num_groups_40519, - int64_t segment_sizze_nonzzero_45953, - __global unsigned char *images_mem_44381, - __global - unsigned char *binop_p_mem_44390, - __global unsigned char *mem_44844) +__kernel void mainzisegmap_113263(__global int *global_failure, int64_t m_75136, + int64_t i_113419, + int64_t ctx_param_ext_124325, + int64_t ctx_param_ext_124326, + int64_t ctx_param_ext_124328, __global + unsigned char *mem_param_124330, __global + unsigned char *mem_124345) { - #define segred_group_sizze_40518 (mainzisegred_group_sizze_40460) + #define segmap_group_sizze_113477 (mainzisegmap_group_sizze_113265) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_45960_backing_0 = - (__local volatile - char *) red_arr_mem_45960_backing_aligned_0; if (*global_failure >= 0) return; - int32_t global_tid_45955; - int32_t local_tid_45956; - int64_t group_sizze_45959; - int32_t wave_sizze_45958; - int32_t group_tid_45957; + int32_t global_tid_128864; + int32_t local_tid_128865; + int64_t group_sizze_128868; + int32_t wave_sizze_128867; + int32_t group_tid_128866; - global_tid_45955 = get_global_id(0); - local_tid_45956 = get_local_id(0); - group_sizze_45959 = get_local_size(0); - wave_sizze_45958 = LOCKSTEP_WIDTH; - group_tid_45957 = get_group_id(0); + global_tid_128864 = get_global_id(0); + local_tid_128865 = get_local_id(0); + group_sizze_128868 = get_local_size(0); + wave_sizze_128867 = LOCKSTEP_WIDTH; + group_tid_128866 = get_group_id(0); - int32_t phys_tid_40466; + int32_t phys_tid_113263; - phys_tid_40466 = global_tid_45955; + phys_tid_113263 = global_tid_128864; - __local char *red_arr_mem_45960; + int64_t gtid_113262; - red_arr_mem_45960 = (__local char *) red_arr_mem_45960_backing_0; + gtid_113262 = sext_i32_i64(group_tid_128866) * segmap_group_sizze_113477 + + sext_i32_i64(local_tid_128865); + if (slt64(gtid_113262, m_75136)) { + double v1_113482 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_113262 * + ctx_param_ext_124326 + + i_113419 * + ctx_param_ext_124328)]; + bool cond_113483 = v1_113482 == 0.0; + + ((__global bool *) mem_124345)[gtid_113262] = cond_113483; + } - int32_t phys_group_id_45962; + error_0: + return; + #undef segmap_group_sizze_113477 +} +__kernel void mainzisegmap_113346(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, int64_t m_75136, + int64_t k2p2zq_75151, int64_t m_76774, + int64_t nm_76775, __global + unsigned char *defunc_3_map_res_mem_124294, + __global unsigned char *mem_124322) +{ + #define segmap_group_sizze_113397 (mainzisegmap_group_sizze_113349) - phys_group_id_45962 = get_group_id(0); - for (int32_t i_45963 = 0; i_45963 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_29166 * i32_res_29181, - squot64(segred_group_sizze_40518, - segment_sizze_nonzzero_45953))) - - phys_group_id_45962, sext_i64_i32(num_groups_40519)); - i_45963++) { - int32_t virt_group_id_45964 = phys_group_id_45962 + i_45963 * - sext_i64_i32(num_groups_40519); - int64_t gtid_40455 = squot64(squot64(sext_i32_i64(local_tid_45956), - segment_sizze_nonzzero_45953) + - sext_i32_i64(virt_group_id_45964) * - squot64(segred_group_sizze_40518, - segment_sizze_nonzzero_45953), - i32_res_29181); - int64_t gtid_40456 = squot64(sext_i32_i64(local_tid_45956), - segment_sizze_nonzzero_45953) + - sext_i32_i64(virt_group_id_45964) * - squot64(segred_group_sizze_40518, - segment_sizze_nonzzero_45953) - - squot64(squot64(sext_i32_i64(local_tid_45956), - segment_sizze_nonzzero_45953) + - sext_i32_i64(virt_group_id_45964) * - squot64(segred_group_sizze_40518, - segment_sizze_nonzzero_45953), i32_res_29181) * - i32_res_29181; - int64_t gtid_40465 = srem64(sext_i32_i64(local_tid_45956), - i32_res_29175); - - // apply map function if in bounds - { - if (slt64((int64_t) 0, i32_res_29175) && ((slt64(gtid_40455, - m_29166) && - slt64(gtid_40456, - i32_res_29181)) && - slt64(sext_i32_i64(local_tid_45956), - i32_res_29175 * - squot64(segred_group_sizze_40518, - segment_sizze_nonzzero_45953)))) { - float x_40528 = ((__global - float *) images_mem_44381)[gtid_40455 * - N_29165 + - gtid_40465]; - bool isnan_res_40529; - - isnan_res_40529 = futrts_isnan32(x_40528); - - float defunc_1_f_res_40530; - - if (isnan_res_40529) { - defunc_1_f_res_40530 = 0.0F; - } else { - float x_40527 = ((__global - float *) binop_p_mem_44390)[gtid_40456 * - N_29165 + - gtid_40465]; - float defunc_1_f_res_f_res_40531 = x_40527 * x_40528; - - defunc_1_f_res_40530 = defunc_1_f_res_f_res_40531; - } - // save map-out results - { } - // save results to be reduced + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128837; + int32_t local_tid_128838; + int64_t group_sizze_128841; + int32_t wave_sizze_128840; + int32_t group_tid_128839; + + global_tid_128837 = get_global_id(0); + local_tid_128838 = get_local_id(0); + group_sizze_128841 = get_local_size(0); + wave_sizze_128840 = LOCKSTEP_WIDTH; + group_tid_128839 = get_group_id(0); + + int32_t phys_tid_113346; + + phys_tid_113346 = global_tid_128837; + + int64_t gtid_113344; + + gtid_113344 = squot64(sext_i32_i64(group_tid_128839) * + segmap_group_sizze_113397 + + sext_i32_i64(local_tid_128838), nm_76775); + + int64_t gtid_113345; + + gtid_113345 = sext_i32_i64(group_tid_128839) * segmap_group_sizze_113397 + + sext_i32_i64(local_tid_128838) - + squot64(sext_i32_i64(group_tid_128839) * segmap_group_sizze_113397 + + sext_i32_i64(local_tid_128838), nm_76775) * nm_76775; + if (slt64(gtid_113344, m_75136) && slt64(gtid_113345, nm_76775)) { + int64_t defunc_0_f_res_113402 = sdiv64(gtid_113345, m_76774); + int64_t defunc_0_f_res_113403 = smod64(gtid_113345, m_76774); + bool cond_113404 = slt64(defunc_0_f_res_113403, k2p2zq_75151); + double defunc_0_f_res_113405; + + if (cond_113404) { + bool x_113406 = sle64((int64_t) 0, defunc_0_f_res_113402); + bool y_113407 = slt64(defunc_0_f_res_113402, k2p2zq_75151); + bool bounds_check_113408 = x_113406 && y_113407; + bool x_113409 = sle64((int64_t) 0, defunc_0_f_res_113403); + bool bounds_check_113410 = cond_113404 && x_113409; + bool index_ok_113411 = bounds_check_113408 && bounds_check_113410; + bool index_certs_113412; + + if (!index_ok_113411) { { - ((__local - float *) red_arr_mem_45960)[sext_i32_i64(local_tid_45956)] = - defunc_1_f_res_40530; + if (atomic_cmpxchg_i32_global(global_failure, -1, 186) == + -1) { + global_failure_args[0] = defunc_0_f_res_113402; + global_failure_args[1] = defunc_0_f_res_113403; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + ; + } + return; } + } + + double defunc_0_f_res_t_res_113413 = ((__global + double *) defunc_3_map_res_mem_124294)[gtid_113344 * + (k2p2zq_75151 * + k2p2zq_75151) + + defunc_0_f_res_113402 * + k2p2zq_75151 + + defunc_0_f_res_113403]; + + defunc_0_f_res_113405 = defunc_0_f_res_t_res_113413; + } else { + int64_t y_113414 = add64(k2p2zq_75151, defunc_0_f_res_113402); + bool cond_113415 = defunc_0_f_res_113403 == y_113414; + double defunc_0_f_res_f_res_113416; + + if (cond_113415) { + defunc_0_f_res_f_res_113416 = 1.0; } else { - ((__local - float *) red_arr_mem_45960)[sext_i32_i64(local_tid_45956)] = - 0.0F; + defunc_0_f_res_f_res_113416 = 0.0; } + defunc_0_f_res_113405 = defunc_0_f_res_f_res_113416; } - barrier(CLK_LOCAL_MEM_FENCE); - if (slt64((int64_t) 0, i32_res_29175)) { - // perform segmented scan to imitate reduction - { - float x_40522; - float x_40523; - float x_45965; - float x_45966; - bool ltid_in_bounds_45968; - - ltid_in_bounds_45968 = slt64(sext_i32_i64(local_tid_45956), - i32_res_29175 * - squot64(segred_group_sizze_40518, - segment_sizze_nonzzero_45953)); - - int32_t skip_threads_45969; - - // read input for in-block scan - { - if (ltid_in_bounds_45968) { - x_40523 = ((volatile __local - float *) red_arr_mem_45960)[sext_i32_i64(local_tid_45956)]; - if ((local_tid_45956 - squot32(local_tid_45956, 32) * - 32) == 0) { - x_40522 = x_40523; - } - } - } - // in-block scan (hopefully no barriers needed) - { - skip_threads_45969 = 1; - while (slt32(skip_threads_45969, 32)) { - if (sle32(skip_threads_45969, local_tid_45956 - - squot32(local_tid_45956, 32) * 32) && - ltid_in_bounds_45968) { - // read operands - { - x_40522 = ((volatile __local - float *) red_arr_mem_45960)[sext_i32_i64(local_tid_45956) - - sext_i32_i64(skip_threads_45969)]; - } - // perform operation - { - bool inactive_45970 = - slt64(srem64(sext_i32_i64(local_tid_45956), - i32_res_29175), - sext_i32_i64(local_tid_45956) - - sext_i32_i64(local_tid_45956 - - skip_threads_45969)); - - if (inactive_45970) { - x_40522 = x_40523; - } - if (!inactive_45970) { - float defunc_1_op_res_40524 = x_40522 + - x_40523; - - x_40522 = defunc_1_op_res_40524; - } - } - } - if (sle32(wave_sizze_45958, skip_threads_45969)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_45969, local_tid_45956 - - squot32(local_tid_45956, 32) * 32) && - ltid_in_bounds_45968) { - // write result - { - ((volatile __local - float *) red_arr_mem_45960)[sext_i32_i64(local_tid_45956)] = - x_40522; - x_40523 = x_40522; - } - } - if (sle32(wave_sizze_45958, skip_threads_45969)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_45969 *= 2; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' - { - if ((local_tid_45956 - squot32(local_tid_45956, 32) * 32) == - 31 && ltid_in_bounds_45968) { - ((volatile __local - float *) red_arr_mem_45960)[sext_i32_i64(squot32(local_tid_45956, - 32))] = - x_40522; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' - { - int32_t skip_threads_45971; + ((__global double *) mem_124322)[gtid_113344 * nm_76775 + gtid_113345] = + defunc_0_f_res_113405; + } + + error_0: + return; + #undef segmap_group_sizze_113397 +} +__kernel void mainzisegmap_113555(__global int *global_failure, int64_t N_75135, + int64_t m_75136, int64_t n_75139, + int64_t k2p2zq_75151, + int64_t num_groups_113576, + int64_t num_threads_126291, __global + unsigned char *binop_p_mem_120117, __global + unsigned char *mem_124375, __global + unsigned char *mem_124378, __global + unsigned char *mem_124393) +{ + #define segmap_group_sizze_113575 (mainzisegmap_group_sizze_113557) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128888; + int32_t local_tid_128889; + int64_t group_sizze_128892; + int32_t wave_sizze_128891; + int32_t group_tid_128890; + + global_tid_128888 = get_global_id(0); + local_tid_128889 = get_local_id(0); + group_sizze_128892 = get_local_size(0); + wave_sizze_128891 = LOCKSTEP_WIDTH; + group_tid_128890 = get_group_id(0); + + int32_t phys_tid_113555; + + phys_tid_113555 = global_tid_128888; + + int32_t phys_group_id_128893; + + phys_group_id_128893 = get_group_id(0); + for (int32_t i_128894 = 0; i_128894 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, segmap_group_sizze_113575)) - + phys_group_id_128893, sext_i64_i32(num_groups_113576)); + i_128894++) { + int32_t virt_group_id_128895 = phys_group_id_128893 + i_128894 * + sext_i64_i32(num_groups_113576); + int64_t gtid_113554 = sext_i32_i64(virt_group_id_128895) * + segmap_group_sizze_113575 + sext_i32_i64(local_tid_128889); + + if (slt64(gtid_113554, m_75136)) { + for (int64_t i_119930 = 0; i_119930 < k2p2zq_75151; i_119930++) { + double defunc_2_reduce_res_113582; + double redout_119932 = 0.0; + + for (int64_t i_119933 = 0; i_119933 < n_75139; i_119933++) { + double x_113587 = ((__global + double *) mem_124375)[i_119933 * + m_75136 + + gtid_113554]; + bool isnan_res_113588; - // read input for in-block scan - { - if (squot32(local_tid_45956, 32) == 0 && - ltid_in_bounds_45968) { - x_45966 = ((volatile __local - float *) red_arr_mem_45960)[sext_i32_i64(local_tid_45956)]; - if ((local_tid_45956 - squot32(local_tid_45956, - 32) * 32) == 0) { - x_45965 = x_45966; - } - } - } - // in-block scan (hopefully no barriers needed) - { - skip_threads_45971 = 1; - while (slt32(skip_threads_45971, 32)) { - if (sle32(skip_threads_45971, local_tid_45956 - - squot32(local_tid_45956, 32) * 32) && - (squot32(local_tid_45956, 32) == 0 && - ltid_in_bounds_45968)) { - // read operands - { - x_45965 = ((volatile __local - float *) red_arr_mem_45960)[sext_i32_i64(local_tid_45956) - - sext_i32_i64(skip_threads_45971)]; - } - // perform operation - { - bool inactive_45972 = - slt64(srem64(sext_i32_i64(local_tid_45956 * - 32 + 32 - 1), - i32_res_29175), - sext_i32_i64(local_tid_45956 * - 32 + 32 - 1) - - sext_i32_i64((local_tid_45956 - - skip_threads_45971) * - 32 + 32 - 1)); - - if (inactive_45972) { - x_45965 = x_45966; - } - if (!inactive_45972) { - float defunc_1_op_res_45967 = x_45965 + - x_45966; - - x_45965 = defunc_1_op_res_45967; - } - } - } - if (sle32(wave_sizze_45958, skip_threads_45971)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_45971, local_tid_45956 - - squot32(local_tid_45956, 32) * 32) && - (squot32(local_tid_45956, 32) == 0 && - ltid_in_bounds_45968)) { - // write result - { - ((volatile __local - float *) red_arr_mem_45960)[sext_i32_i64(local_tid_45956)] = - x_45965; - x_45966 = x_45965; - } - } - if (sle32(wave_sizze_45958, skip_threads_45971)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_45971 *= 2; - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_45956, 32) == 0 || - !ltid_in_bounds_45968)) { - // read operands - { - x_40523 = x_40522; - x_40522 = ((__local - float *) red_arr_mem_45960)[sext_i32_i64(squot32(local_tid_45956, - 32)) - - (int64_t) 1]; - } - // perform operation - { - bool inactive_45973 = - slt64(srem64(sext_i32_i64(local_tid_45956), - i32_res_29175), - sext_i32_i64(local_tid_45956) - - sext_i32_i64(squot32(local_tid_45956, - 32) * 32 - 1)); - - if (inactive_45973) { - x_40522 = x_40523; - } - if (!inactive_45973) { - float defunc_1_op_res_40524 = x_40522 + x_40523; - - x_40522 = defunc_1_op_res_40524; - } - } - // write final result - { - ((__local - float *) red_arr_mem_45960)[sext_i32_i64(local_tid_45956)] = - x_40522; - } + isnan_res_113588 = futrts_isnan64(x_113587); + + double defunc_1_f_res_113589; + + if (isnan_res_113588) { + defunc_1_f_res_113589 = 0.0; + } else { + double x_113586 = ((__global + double *) binop_p_mem_120117)[i_119930 * + N_75135 + + i_119933]; + double defunc_1_f_res_f_res_113590 = x_113586 * + x_113587; + + defunc_1_f_res_113589 = defunc_1_f_res_f_res_113590; } + + double defunc_1_op_res_113585 = defunc_1_f_res_113589 + + redout_119932; + double redout_tmp_128897 = defunc_1_op_res_113585; + + redout_119932 = redout_tmp_128897; } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_45956, 32) == 0) { - ((__local - float *) red_arr_mem_45960)[sext_i32_i64(local_tid_45956)] = - x_40523; - } + defunc_2_reduce_res_113582 = redout_119932; + ((__global double *) mem_124378)[phys_tid_113555 + i_119930 * + num_threads_126291] = + defunc_2_reduce_res_113582; + } + for (int64_t i_128898 = 0; i_128898 < k2p2zq_75151; i_128898++) { + ((__global double *) mem_124393)[i_128898 * m_75136 + + gtid_113554] = ((__global + double *) mem_124378)[phys_tid_113555 + + i_128898 * + num_threads_126291]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_113575 +} +__kernel void mainzisegmap_113696(__global int *global_failure, int64_t m_75136, + int64_t k2p2zq_75151, + int64_t num_groups_113716, + int64_t num_threads_126295, __global + unsigned char *mem_124597, __global + unsigned char *mem_124600, __global + unsigned char *mem_124603, __global + unsigned char *mem_124618) +{ + #define segmap_group_sizze_113715 (mainzisegmap_group_sizze_113698) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129020; + int32_t local_tid_129021; + int64_t group_sizze_129024; + int32_t wave_sizze_129023; + int32_t group_tid_129022; + + global_tid_129020 = get_global_id(0); + local_tid_129021 = get_local_id(0); + group_sizze_129024 = get_local_size(0); + wave_sizze_129023 = LOCKSTEP_WIDTH; + group_tid_129022 = get_group_id(0); + + int32_t phys_tid_113696; + + phys_tid_113696 = global_tid_129020; + + int32_t phys_group_id_129025; + + phys_group_id_129025 = get_group_id(0); + for (int32_t i_129026 = 0; i_129026 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, segmap_group_sizze_113715)) - + phys_group_id_129025, sext_i64_i32(num_groups_113716)); + i_129026++) { + int32_t virt_group_id_129027 = phys_group_id_129025 + i_129026 * + sext_i64_i32(num_groups_113716); + int64_t gtid_113695 = sext_i32_i64(virt_group_id_129027) * + segmap_group_sizze_113715 + sext_i32_i64(local_tid_129021); + + if (slt64(gtid_113695, m_75136)) { + for (int64_t i_119936 = 0; i_119936 < k2p2zq_75151; i_119936++) { + double defunc_0_f_res_113723; + double redout_119938 = 0.0; + + for (int64_t i_119939 = 0; i_119939 < k2p2zq_75151; + i_119939++) { + double x_113727 = ((__global + double *) mem_124600)[i_119939 * + m_75136 + + gtid_113695]; + double x_113728 = ((__global + double *) mem_124597)[i_119936 * + (m_75136 * + k2p2zq_75151) + + i_119939 * + m_75136 + + gtid_113695]; + double defunc_1_f_res_113729 = x_113727 * x_113728; + double defunc_1_op_res_113726 = defunc_1_f_res_113729 + + redout_119938; + double redout_tmp_129029 = defunc_1_op_res_113726; + + redout_119938 = redout_tmp_129029; } - barrier(CLK_LOCAL_MEM_FENCE); + defunc_0_f_res_113723 = redout_119938; + ((__global double *) mem_124603)[phys_tid_113696 + i_119936 * + num_threads_126295] = + defunc_0_f_res_113723; + } + for (int64_t i_129030 = 0; i_129030 < k2p2zq_75151; i_129030++) { + ((__global double *) mem_124618)[i_129030 * m_75136 + + gtid_113695] = ((__global + double *) mem_124603)[phys_tid_113696 + + i_129030 * + num_threads_126295]; } } - barrier(CLK_LOCAL_MEM_FENCE); - // save final values of segments - { - if (slt64(sext_i32_i64(virt_group_id_45964) * - squot64(segred_group_sizze_40518, - segment_sizze_nonzzero_45953) + - sext_i32_i64(local_tid_45956), m_29166 * i32_res_29181) && - slt64(sext_i32_i64(local_tid_45956), - squot64(segred_group_sizze_40518, - segment_sizze_nonzzero_45953))) { - ((__global - float *) mem_44844)[squot64(sext_i32_i64(virt_group_id_45964) * - squot64(segred_group_sizze_40518, - segment_sizze_nonzzero_45953) + - sext_i32_i64(local_tid_45956), - i32_res_29181) * i32_res_29181 + - (sext_i32_i64(virt_group_id_45964) * - squot64(segred_group_sizze_40518, - segment_sizze_nonzzero_45953) + - sext_i32_i64(local_tid_45956) - - squot64(sext_i32_i64(virt_group_id_45964) * - squot64(segred_group_sizze_40518, - segment_sizze_nonzzero_45953) + - sext_i32_i64(local_tid_45956), - i32_res_29181) * - i32_res_29181)] = ((__local - float *) red_arr_mem_45960)[(sext_i32_i64(local_tid_45956) + - (int64_t) 1) * - segment_sizze_nonzzero_45953 - - (int64_t) 1]; + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_113715 +} +__kernel void mainzisegmap_113828(__global int *global_failure, int64_t N_75135, + int64_t m_75136, int64_t k2p2zq_75151, + int64_t num_groups_113847, + int64_t num_threads_126299, __global + unsigned char *mem_120124, __global + unsigned char *mem_124662, __global + unsigned char *mem_124665, __global + unsigned char *mem_124680) +{ + #define segmap_group_sizze_113846 (mainzisegmap_group_sizze_113830) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129108; + int32_t local_tid_129109; + int64_t group_sizze_129112; + int32_t wave_sizze_129111; + int32_t group_tid_129110; + + global_tid_129108 = get_global_id(0); + local_tid_129109 = get_local_id(0); + group_sizze_129112 = get_local_size(0); + wave_sizze_129111 = LOCKSTEP_WIDTH; + group_tid_129110 = get_group_id(0); + + int32_t phys_tid_113828; + + phys_tid_113828 = global_tid_129108; + + int32_t phys_group_id_129113; + + phys_group_id_129113 = get_group_id(0); + for (int32_t i_129114 = 0; i_129114 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, segmap_group_sizze_113846)) - + phys_group_id_129113, sext_i64_i32(num_groups_113847)); + i_129114++) { + int32_t virt_group_id_129115 = phys_group_id_129113 + i_129114 * + sext_i64_i32(num_groups_113847); + int64_t gtid_113827 = sext_i32_i64(virt_group_id_129115) * + segmap_group_sizze_113846 + sext_i32_i64(local_tid_129109); + + if (slt64(gtid_113827, m_75136)) { + for (int64_t i_119946 = 0; i_119946 < N_75135; i_119946++) { + double defunc_0_f_res_113853; + double redout_119948 = 0.0; + + for (int64_t i_119949 = 0; i_119949 < k2p2zq_75151; + i_119949++) { + double x_113857 = ((__global + double *) mem_124662)[i_119949 * + m_75136 + + gtid_113827]; + double x_113858 = ((__global + double *) mem_120124)[i_119946 * + k2p2zq_75151 + + i_119949]; + double defunc_1_f_res_113859 = x_113857 * x_113858; + double defunc_1_op_res_113856 = defunc_1_f_res_113859 + + redout_119948; + double redout_tmp_129117 = defunc_1_op_res_113856; + + redout_119948 = redout_tmp_129117; + } + defunc_0_f_res_113853 = redout_119948; + ((__global double *) mem_124665)[phys_tid_113828 + i_119946 * + num_threads_126299] = + defunc_0_f_res_113853; + } + for (int64_t i_129118 = 0; i_129118 < N_75135; i_129118++) { + ((__global double *) mem_124680)[i_129118 * m_75136 + + gtid_113827] = ((__global + double *) mem_124665)[phys_tid_113828 + + i_129118 * + num_threads_126299]; } } - barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - error_1: + error_0: return; - #undef segred_group_sizze_40518 + #undef segmap_group_sizze_113846 } -__kernel void mainzisegred_small_40603(__global int *global_failure, - __local volatile - int64_t *red_arr_mem_46048_backing_aligned_0, - int64_t m_29166, int64_t i32_res_29181, - int64_t num_groups_40652, - int64_t segment_sizze_nonzzero_46041, - __global - unsigned char *defunc_3_map_res_mem_44629, - __global - unsigned char *defunc_3_map_res_mem_44850, - __global unsigned char *mem_44910) +__kernel void mainzisegmap_114006(__global int *global_failure, int64_t N_75135, + int64_t m_75136, __global + unsigned char *mem_124906, __global + unsigned char *mem_124909, __global + unsigned char *mem_124914, __global + unsigned char *mem_124917) { - #define segred_group_sizze_40651 (mainzisegred_group_sizze_40597) + #define segmap_group_sizze_114150 (mainzisegmap_group_sizze_114009) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_46048_backing_0 = - (__local volatile - char *) red_arr_mem_46048_backing_aligned_0; if (*global_failure >= 0) return; - int32_t global_tid_46043; - int32_t local_tid_46044; - int64_t group_sizze_46047; - int32_t wave_sizze_46046; - int32_t group_tid_46045; + int32_t global_tid_129317; + int32_t local_tid_129318; + int64_t group_sizze_129321; + int32_t wave_sizze_129320; + int32_t group_tid_129319; - global_tid_46043 = get_global_id(0); - local_tid_46044 = get_local_id(0); - group_sizze_46047 = get_local_size(0); - wave_sizze_46046 = LOCKSTEP_WIDTH; - group_tid_46045 = get_group_id(0); + global_tid_129317 = get_global_id(0); + local_tid_129318 = get_local_id(0); + group_sizze_129321 = get_local_size(0); + wave_sizze_129320 = LOCKSTEP_WIDTH; + group_tid_129319 = get_group_id(0); - int32_t phys_tid_40603; + int32_t phys_tid_114006; - phys_tid_40603 = global_tid_46043; + phys_tid_114006 = global_tid_129317; - __local char *red_arr_mem_46048; + int64_t gtid_114004; - red_arr_mem_46048 = (__local char *) red_arr_mem_46048_backing_0; + gtid_114004 = squot64(sext_i32_i64(group_tid_129319) * + segmap_group_sizze_114150 + + sext_i32_i64(local_tid_129318), N_75135); - int32_t phys_group_id_46050; + int64_t gtid_114005; - phys_group_id_46050 = get_group_id(0); - for (int32_t i_46051 = 0; i_46051 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_29166 * i32_res_29181, - squot64(segred_group_sizze_40651, - segment_sizze_nonzzero_46041))) - - phys_group_id_46050, sext_i64_i32(num_groups_40652)); - i_46051++) { - int32_t virt_group_id_46052 = phys_group_id_46050 + i_46051 * - sext_i64_i32(num_groups_40652); - int64_t gtid_40592 = squot64(squot64(sext_i32_i64(local_tid_46044), - segment_sizze_nonzzero_46041) + - sext_i32_i64(virt_group_id_46052) * - squot64(segred_group_sizze_40651, - segment_sizze_nonzzero_46041), - i32_res_29181); - int64_t gtid_40593 = squot64(sext_i32_i64(local_tid_46044), - segment_sizze_nonzzero_46041) + - sext_i32_i64(virt_group_id_46052) * - squot64(segred_group_sizze_40651, - segment_sizze_nonzzero_46041) - - squot64(squot64(sext_i32_i64(local_tid_46044), - segment_sizze_nonzzero_46041) + - sext_i32_i64(virt_group_id_46052) * - squot64(segred_group_sizze_40651, - segment_sizze_nonzzero_46041), i32_res_29181) * - i32_res_29181; - int64_t gtid_40602 = srem64(sext_i32_i64(local_tid_46044), - i32_res_29181); + gtid_114005 = sext_i32_i64(group_tid_129319) * segmap_group_sizze_114150 + + sext_i32_i64(local_tid_129318) - + squot64(sext_i32_i64(group_tid_129319) * segmap_group_sizze_114150 + + sext_i32_i64(local_tid_129318), N_75135) * N_75135; + if (slt64(gtid_114004, m_75136) && slt64(gtid_114005, N_75135)) { + double x_114154 = ((__global double *) mem_124909)[gtid_114004 * + N_75135 + + gtid_114005]; + bool isnan_res_114157; - // apply map function if in bounds - { - if (slt64((int64_t) 0, i32_res_29181) && ((slt64(gtid_40592, - m_29166) && - slt64(gtid_40593, - i32_res_29181)) && - slt64(sext_i32_i64(local_tid_46044), - i32_res_29181 * - squot64(segred_group_sizze_40651, - segment_sizze_nonzzero_46041)))) { - float x_40661 = ((__global - float *) defunc_3_map_res_mem_44850)[gtid_40592 * - i32_res_29181 + - gtid_40602]; - float x_40662 = ((__global - float *) defunc_3_map_res_mem_44629)[gtid_40592 * - (i32_res_29181 * - i32_res_29181) + - gtid_40593 * - i32_res_29181 + - gtid_40602]; - float defunc_1_f_res_40663 = x_40661 * x_40662; - - // save map-out results - { } - // save results to be reduced - { - ((__local - float *) red_arr_mem_46048)[sext_i32_i64(local_tid_46044)] = - defunc_1_f_res_40663; - } - } else { - ((__local - float *) red_arr_mem_46048)[sext_i32_i64(local_tid_46044)] = - 0.0F; - } + isnan_res_114157 = futrts_isnan64(x_114154); + + bool defunc_0_p_res_114158 = !isnan_res_114157; + int64_t defunc_1_f_res_114159; + + if (defunc_0_p_res_114158) { + int64_t x_114155 = ((__global int64_t *) mem_124906)[gtid_114004 * + N_75135 + + gtid_114005]; + int64_t defunc_1_f_res_t_res_114160 = sub64(x_114155, (int64_t) 1); + + defunc_1_f_res_114159 = defunc_1_f_res_t_res_114160; + } else { + defunc_1_f_res_114159 = (int64_t) -1; } - barrier(CLK_LOCAL_MEM_FENCE); - if (slt64((int64_t) 0, i32_res_29181)) { - // perform segmented scan to imitate reduction - { - float x_40655; - float x_40656; - float x_46053; - float x_46054; - bool ltid_in_bounds_46056; - - ltid_in_bounds_46056 = slt64(sext_i32_i64(local_tid_46044), - i32_res_29181 * - squot64(segred_group_sizze_40651, - segment_sizze_nonzzero_46041)); - - int32_t skip_threads_46057; - - // read input for in-block scan - { - if (ltid_in_bounds_46056) { - x_40656 = ((volatile __local - float *) red_arr_mem_46048)[sext_i32_i64(local_tid_46044)]; - if ((local_tid_46044 - squot32(local_tid_46044, 32) * - 32) == 0) { - x_40655 = x_40656; - } - } - } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46057 = 1; - while (slt32(skip_threads_46057, 32)) { - if (sle32(skip_threads_46057, local_tid_46044 - - squot32(local_tid_46044, 32) * 32) && - ltid_in_bounds_46056) { - // read operands - { - x_40655 = ((volatile __local - float *) red_arr_mem_46048)[sext_i32_i64(local_tid_46044) - - sext_i32_i64(skip_threads_46057)]; - } - // perform operation - { - bool inactive_46058 = - slt64(srem64(sext_i32_i64(local_tid_46044), - i32_res_29181), - sext_i32_i64(local_tid_46044) - - sext_i32_i64(local_tid_46044 - - skip_threads_46057)); - - if (inactive_46058) { - x_40655 = x_40656; - } - if (!inactive_46058) { - float defunc_1_op_res_40657 = x_40655 + - x_40656; - - x_40655 = defunc_1_op_res_40657; - } - } - } - if (sle32(wave_sizze_46046, skip_threads_46057)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46057, local_tid_46044 - - squot32(local_tid_46044, 32) * 32) && - ltid_in_bounds_46056) { - // write result - { - ((volatile __local - float *) red_arr_mem_46048)[sext_i32_i64(local_tid_46044)] = - x_40655; - x_40656 = x_40655; - } - } - if (sle32(wave_sizze_46046, skip_threads_46057)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46057 *= 2; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' - { - if ((local_tid_46044 - squot32(local_tid_46044, 32) * 32) == - 31 && ltid_in_bounds_46056) { - ((volatile __local - float *) red_arr_mem_46048)[sext_i32_i64(squot32(local_tid_46044, - 32))] = - x_40655; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' - { - int32_t skip_threads_46059; - - // read input for in-block scan - { - if (squot32(local_tid_46044, 32) == 0 && - ltid_in_bounds_46056) { - x_46054 = ((volatile __local - float *) red_arr_mem_46048)[sext_i32_i64(local_tid_46044)]; - if ((local_tid_46044 - squot32(local_tid_46044, - 32) * 32) == 0) { - x_46053 = x_46054; - } - } - } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46059 = 1; - while (slt32(skip_threads_46059, 32)) { - if (sle32(skip_threads_46059, local_tid_46044 - - squot32(local_tid_46044, 32) * 32) && - (squot32(local_tid_46044, 32) == 0 && - ltid_in_bounds_46056)) { - // read operands - { - x_46053 = ((volatile __local - float *) red_arr_mem_46048)[sext_i32_i64(local_tid_46044) - - sext_i32_i64(skip_threads_46059)]; - } - // perform operation - { - bool inactive_46060 = - slt64(srem64(sext_i32_i64(local_tid_46044 * - 32 + 32 - 1), - i32_res_29181), - sext_i32_i64(local_tid_46044 * - 32 + 32 - 1) - - sext_i32_i64((local_tid_46044 - - skip_threads_46059) * - 32 + 32 - 1)); - - if (inactive_46060) { - x_46053 = x_46054; - } - if (!inactive_46060) { - float defunc_1_op_res_46055 = x_46053 + - x_46054; - - x_46053 = defunc_1_op_res_46055; - } - } - } - if (sle32(wave_sizze_46046, skip_threads_46059)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46059, local_tid_46044 - - squot32(local_tid_46044, 32) * 32) && - (squot32(local_tid_46044, 32) == 0 && - ltid_in_bounds_46056)) { - // write result - { - ((volatile __local - float *) red_arr_mem_46048)[sext_i32_i64(local_tid_46044)] = - x_46053; - x_46054 = x_46053; - } - } - if (sle32(wave_sizze_46046, skip_threads_46059)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46059 *= 2; - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_46044, 32) == 0 || - !ltid_in_bounds_46056)) { - // read operands - { - x_40656 = x_40655; - x_40655 = ((__local - float *) red_arr_mem_46048)[sext_i32_i64(squot32(local_tid_46044, - 32)) - - (int64_t) 1]; - } - // perform operation - { - bool inactive_46061 = - slt64(srem64(sext_i32_i64(local_tid_46044), - i32_res_29181), - sext_i32_i64(local_tid_46044) - - sext_i32_i64(squot32(local_tid_46044, - 32) * 32 - 1)); - - if (inactive_46061) { - x_40655 = x_40656; - } - if (!inactive_46061) { - float defunc_1_op_res_40657 = x_40655 + x_40656; - - x_40655 = defunc_1_op_res_40657; - } - } - // write final result - { - ((__local - float *) red_arr_mem_46048)[sext_i32_i64(local_tid_46044)] = - x_40655; - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_46044, 32) == 0) { - ((__local - float *) red_arr_mem_46048)[sext_i32_i64(local_tid_46044)] = - x_40656; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - } + if ((sle64((int64_t) 0, gtid_114004) && slt64(gtid_114004, m_75136)) && + (sle64((int64_t) 0, defunc_1_f_res_114159) && + slt64(defunc_1_f_res_114159, N_75135))) { + ((__global int64_t *) mem_124917)[gtid_114004 * N_75135 + + defunc_1_f_res_114159] = + gtid_114005; } - barrier(CLK_LOCAL_MEM_FENCE); - // save final values of segments - { - if (slt64(sext_i32_i64(virt_group_id_46052) * - squot64(segred_group_sizze_40651, - segment_sizze_nonzzero_46041) + - sext_i32_i64(local_tid_46044), m_29166 * i32_res_29181) && - slt64(sext_i32_i64(local_tid_46044), - squot64(segred_group_sizze_40651, - segment_sizze_nonzzero_46041))) { - ((__global - float *) mem_44910)[squot64(sext_i32_i64(virt_group_id_46052) * - squot64(segred_group_sizze_40651, - segment_sizze_nonzzero_46041) + - sext_i32_i64(local_tid_46044), - i32_res_29181) * i32_res_29181 + - (sext_i32_i64(virt_group_id_46052) * - squot64(segred_group_sizze_40651, - segment_sizze_nonzzero_46041) + - sext_i32_i64(local_tid_46044) - - squot64(sext_i32_i64(virt_group_id_46052) * - squot64(segred_group_sizze_40651, - segment_sizze_nonzzero_46041) + - sext_i32_i64(local_tid_46044), - i32_res_29181) * - i32_res_29181)] = ((__local - float *) red_arr_mem_46048)[(sext_i32_i64(local_tid_46044) + - (int64_t) 1) * - segment_sizze_nonzzero_46041 - - (int64_t) 1]; - } + if ((sle64((int64_t) 0, gtid_114004) && slt64(gtid_114004, m_75136)) && + (sle64((int64_t) 0, defunc_1_f_res_114159) && + slt64(defunc_1_f_res_114159, N_75135))) { + ((__global double *) mem_124914)[gtid_114004 * N_75135 + + defunc_1_f_res_114159] = x_114154; } - barrier(CLK_LOCAL_MEM_FENCE); - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - error_1: + error_0: return; - #undef segred_group_sizze_40651 + #undef segmap_group_sizze_114150 } -__kernel void mainzisegred_small_40733(__global int *global_failure, - __local volatile - int64_t *red_arr_mem_46180_backing_aligned_0, - int64_t N_29165, int64_t m_29166, - int64_t i32_res_29181, - int64_t num_groups_40780, - int64_t segment_sizze_nonzzero_46173, - __global unsigned char *mem_44397, - __global - unsigned char *defunc_4_map_res_mem_44916, - __global unsigned char *mem_45134) +__kernel void mainzisegmap_114177(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, int64_t N_75135, + int64_t m_75136, int64_t n_75139, + double hfrac_75141, int64_t k2p2_75149, + __global unsigned char *mem_124924, __global + unsigned char *mem_124927, __global + unsigned char *mem_124930, __global + unsigned char *mem_124932, __global + unsigned char *mem_124934) { - #define segred_group_sizze_40779 (mainzisegred_group_sizze_40727) + #define segmap_group_sizze_114213 (mainzisegmap_group_sizze_114179) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_46180_backing_0 = - (__local volatile - char *) red_arr_mem_46180_backing_aligned_0; if (*global_failure >= 0) return; - int32_t global_tid_46175; - int32_t local_tid_46176; - int64_t group_sizze_46179; - int32_t wave_sizze_46178; - int32_t group_tid_46177; - - global_tid_46175 = get_global_id(0); - local_tid_46176 = get_local_id(0); - group_sizze_46179 = get_local_size(0); - wave_sizze_46178 = LOCKSTEP_WIDTH; - group_tid_46177 = get_group_id(0); - - int32_t phys_tid_40733; + int32_t global_tid_129325; + int32_t local_tid_129326; + int64_t group_sizze_129329; + int32_t wave_sizze_129328; + int32_t group_tid_129327; - phys_tid_40733 = global_tid_46175; + global_tid_129325 = get_global_id(0); + local_tid_129326 = get_local_id(0); + group_sizze_129329 = get_local_size(0); + wave_sizze_129328 = LOCKSTEP_WIDTH; + group_tid_129327 = get_group_id(0); - __local char *red_arr_mem_46180; + int32_t phys_tid_114177; - red_arr_mem_46180 = (__local char *) red_arr_mem_46180_backing_0; + phys_tid_114177 = global_tid_129325; - int32_t phys_group_id_46182; + int64_t gtid_114176; - phys_group_id_46182 = get_group_id(0); - for (int32_t i_46183 = 0; i_46183 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_29166 * N_29165, - squot64(segred_group_sizze_40779, - segment_sizze_nonzzero_46173))) - - phys_group_id_46182, sext_i64_i32(num_groups_40780)); - i_46183++) { - int32_t virt_group_id_46184 = phys_group_id_46182 + i_46183 * - sext_i64_i32(num_groups_40780); - int64_t gtid_40722 = squot64(squot64(sext_i32_i64(local_tid_46176), - segment_sizze_nonzzero_46173) + - sext_i32_i64(virt_group_id_46184) * - squot64(segred_group_sizze_40779, - segment_sizze_nonzzero_46173), - N_29165); - int64_t gtid_40723 = squot64(sext_i32_i64(local_tid_46176), - segment_sizze_nonzzero_46173) + - sext_i32_i64(virt_group_id_46184) * - squot64(segred_group_sizze_40779, - segment_sizze_nonzzero_46173) - - squot64(squot64(sext_i32_i64(local_tid_46176), - segment_sizze_nonzzero_46173) + - sext_i32_i64(virt_group_id_46184) * - squot64(segred_group_sizze_40779, - segment_sizze_nonzzero_46173), N_29165) * - N_29165; - int64_t gtid_40732 = srem64(sext_i32_i64(local_tid_46176), - i32_res_29181); + gtid_114176 = sext_i32_i64(group_tid_129327) * segmap_group_sizze_114213 + + sext_i32_i64(local_tid_129326); + if (slt64(gtid_114176, m_75136)) { + int64_t defunc_0_f_res_114220; + int64_t redout_119950 = (int64_t) 0; - // apply map function if in bounds - { - if (slt64((int64_t) 0, i32_res_29181) && ((slt64(gtid_40722, - m_29166) && - slt64(gtid_40723, - N_29165)) && - slt64(sext_i32_i64(local_tid_46176), - i32_res_29181 * - squot64(segred_group_sizze_40779, - segment_sizze_nonzzero_46173)))) { - float x_40788 = ((__global - float *) defunc_4_map_res_mem_44916)[gtid_40722 * - i32_res_29181 + - gtid_40732]; - float x_40789 = ((__global float *) mem_44397)[gtid_40723 * - i32_res_29181 + - gtid_40732]; - float defunc_1_f_res_40790 = x_40788 * x_40789; - - // save map-out results - { } - // save results to be reduced - { - ((__local - float *) red_arr_mem_46180)[sext_i32_i64(local_tid_46176)] = - defunc_1_f_res_40790; - } - } else { - ((__local - float *) red_arr_mem_46180)[sext_i32_i64(local_tid_46176)] = - 0.0F; - } + for (int64_t i_119951 = 0; i_119951 < n_75139; i_119951++) { + double x_114224 = ((__global double *) mem_124924)[i_119951 * + m_75136 + + gtid_114176]; + bool isnan_res_114225; + + isnan_res_114225 = futrts_isnan64(x_114224); + + bool cond_114226 = !isnan_res_114225; + int64_t defunc_0_f_res_114227 = btoi_bool_i64(cond_114226); + int64_t defunc_1_op_res_114223 = add64(defunc_0_f_res_114227, + redout_119950); + int64_t redout_tmp_129330 = defunc_1_op_res_114223; + + redout_119950 = redout_tmp_129330; } - barrier(CLK_LOCAL_MEM_FENCE); - if (slt64((int64_t) 0, i32_res_29181)) { - // perform segmented scan to imitate reduction - { - float x_40783; - float x_40784; - float x_46185; - float x_46186; - bool ltid_in_bounds_46188; - - ltid_in_bounds_46188 = slt64(sext_i32_i64(local_tid_46176), - i32_res_29181 * - squot64(segred_group_sizze_40779, - segment_sizze_nonzzero_46173)); - - int32_t skip_threads_46189; + defunc_0_f_res_114220 = redout_119950; + + double defunc_0_f_res_114228; + double redout_115314 = 0.0; + + for (int64_t i_115315 = 0; i_115315 < n_75139; i_115315++) { + bool cond_114233 = slt64(i_115315, defunc_0_f_res_114220); + double defunc_0_f_res_114234; + + if (cond_114233) { + bool y_114236 = slt64(i_115315, N_75135); + bool index_certs_114238; - // read input for in-block scan - { - if (ltid_in_bounds_46188) { - x_40784 = ((volatile __local - float *) red_arr_mem_46180)[sext_i32_i64(local_tid_46176)]; - if ((local_tid_46176 - squot32(local_tid_46176, 32) * - 32) == 0) { - x_40783 = x_40784; - } - } - } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46189 = 1; - while (slt32(skip_threads_46189, 32)) { - if (sle32(skip_threads_46189, local_tid_46176 - - squot32(local_tid_46176, 32) * 32) && - ltid_in_bounds_46188) { - // read operands - { - x_40783 = ((volatile __local - float *) red_arr_mem_46180)[sext_i32_i64(local_tid_46176) - - sext_i32_i64(skip_threads_46189)]; - } - // perform operation - { - bool inactive_46190 = - slt64(srem64(sext_i32_i64(local_tid_46176), - i32_res_29181), - sext_i32_i64(local_tid_46176) - - sext_i32_i64(local_tid_46176 - - skip_threads_46189)); - - if (inactive_46190) { - x_40783 = x_40784; - } - if (!inactive_46190) { - float defunc_1_op_res_40785 = x_40783 + - x_40784; - - x_40783 = defunc_1_op_res_40785; - } - } - } - if (sle32(wave_sizze_46178, skip_threads_46189)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46189, local_tid_46176 - - squot32(local_tid_46176, 32) * 32) && - ltid_in_bounds_46188) { - // write result - { - ((volatile __local - float *) red_arr_mem_46180)[sext_i32_i64(local_tid_46176)] = - x_40783; - x_40784 = x_40783; - } - } - if (sle32(wave_sizze_46178, skip_threads_46189)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46189 *= 2; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' - { - if ((local_tid_46176 - squot32(local_tid_46176, 32) * 32) == - 31 && ltid_in_bounds_46188) { - ((volatile __local - float *) red_arr_mem_46180)[sext_i32_i64(squot32(local_tid_46176, - 32))] = - x_40783; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' - { - int32_t skip_threads_46191; - - // read input for in-block scan - { - if (squot32(local_tid_46176, 32) == 0 && - ltid_in_bounds_46188) { - x_46186 = ((volatile __local - float *) red_arr_mem_46180)[sext_i32_i64(local_tid_46176)]; - if ((local_tid_46176 - squot32(local_tid_46176, - 32) * 32) == 0) { - x_46185 = x_46186; - } - } - } - // in-block scan (hopefully no barriers needed) + if (!y_114236) { { - skip_threads_46191 = 1; - while (slt32(skip_threads_46191, 32)) { - if (sle32(skip_threads_46191, local_tid_46176 - - squot32(local_tid_46176, 32) * 32) && - (squot32(local_tid_46176, 32) == 0 && - ltid_in_bounds_46188)) { - // read operands - { - x_46185 = ((volatile __local - float *) red_arr_mem_46180)[sext_i32_i64(local_tid_46176) - - sext_i32_i64(skip_threads_46191)]; - } - // perform operation - { - bool inactive_46192 = - slt64(srem64(sext_i32_i64(local_tid_46176 * - 32 + 32 - 1), - i32_res_29181), - sext_i32_i64(local_tid_46176 * - 32 + 32 - 1) - - sext_i32_i64((local_tid_46176 - - skip_threads_46191) * - 32 + 32 - 1)); - - if (inactive_46192) { - x_46185 = x_46186; - } - if (!inactive_46192) { - float defunc_1_op_res_46187 = x_46185 + - x_46186; - - x_46185 = defunc_1_op_res_46187; - } - } - } - if (sle32(wave_sizze_46178, skip_threads_46191)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46191, local_tid_46176 - - squot32(local_tid_46176, 32) * 32) && - (squot32(local_tid_46176, 32) == 0 && - ltid_in_bounds_46188)) { - // write result - { - ((volatile __local - float *) red_arr_mem_46180)[sext_i32_i64(local_tid_46176)] = - x_46185; - x_46186 = x_46185; - } - } - if (sle32(wave_sizze_46178, skip_threads_46191)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46191 *= 2; - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_46176, 32) == 0 || - !ltid_in_bounds_46188)) { - // read operands - { - x_40784 = x_40783; - x_40783 = ((__local - float *) red_arr_mem_46180)[sext_i32_i64(squot32(local_tid_46176, - 32)) - - (int64_t) 1]; - } - // perform operation - { - bool inactive_46193 = - slt64(srem64(sext_i32_i64(local_tid_46176), - i32_res_29181), - sext_i32_i64(local_tid_46176) - - sext_i32_i64(squot32(local_tid_46176, - 32) * 32 - 1)); - - if (inactive_46193) { - x_40783 = x_40784; - } - if (!inactive_46193) { - float defunc_1_op_res_40785 = x_40783 + x_40784; - - x_40783 = defunc_1_op_res_40785; - } - } - // write final result - { - ((__local - float *) red_arr_mem_46180)[sext_i32_i64(local_tid_46176)] = - x_40783; + if (atomic_cmpxchg_i32_global(global_failure, -1, + 195) == -1) { + global_failure_args[0] = i_115315; + global_failure_args[1] = N_75135; + ; } + return; } } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_46176, 32) == 0) { - ((__local - float *) red_arr_mem_46180)[sext_i32_i64(local_tid_46176)] = - x_40784; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // save final values of segments - { - if (slt64(sext_i32_i64(virt_group_id_46184) * - squot64(segred_group_sizze_40779, - segment_sizze_nonzzero_46173) + - sext_i32_i64(local_tid_46176), m_29166 * N_29165) && - slt64(sext_i32_i64(local_tid_46176), - squot64(segred_group_sizze_40779, - segment_sizze_nonzzero_46173))) { - ((__global - float *) mem_45134)[squot64(sext_i32_i64(virt_group_id_46184) * - squot64(segred_group_sizze_40779, - segment_sizze_nonzzero_46173) + - sext_i32_i64(local_tid_46176), - N_29165) * N_29165 + - (sext_i32_i64(virt_group_id_46184) * - squot64(segred_group_sizze_40779, - segment_sizze_nonzzero_46173) + - sext_i32_i64(local_tid_46176) - - squot64(sext_i32_i64(virt_group_id_46184) * - squot64(segred_group_sizze_40779, - segment_sizze_nonzzero_46173) + - sext_i32_i64(local_tid_46176), - N_29165) * N_29165)] = ((__local - float *) red_arr_mem_46180)[(sext_i32_i64(local_tid_46176) + - (int64_t) 1) * - segment_sizze_nonzzero_46173 - - (int64_t) 1]; + + double defunc_0_f_res_t_res_114239 = ((__global + double *) mem_124927)[i_115315 * + m_75136 + + gtid_114176]; + + defunc_0_f_res_114234 = defunc_0_f_res_t_res_114239; + } else { + defunc_0_f_res_114234 = 0.0; } + + double defunc_0_f_res_114240 = defunc_0_f_res_114234 * + defunc_0_f_res_114234; + double defunc_1_op_res_114231 = defunc_0_f_res_114240 + + redout_115314; + double redout_tmp_129331 = defunc_1_op_res_114231; + + redout_115314 = redout_tmp_129331; } - barrier(CLK_LOCAL_MEM_FENCE); - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + defunc_0_f_res_114228 = redout_115314; + + int64_t i64_arg_114241 = sub64(defunc_0_f_res_114220, k2p2_75149); + double i64_res_114242 = sitofp_i64_f64(i64_arg_114241); + double sqrt_arg_114243 = defunc_0_f_res_114228 / i64_res_114242; + double sqrt_res_114244; + + sqrt_res_114244 = futrts_sqrt64(sqrt_arg_114243); + + double i64_res_114245 = sitofp_i64_f64(defunc_0_f_res_114220); + double f64_arg_114246 = hfrac_75141 * i64_res_114245; + int64_t f64_res_114247 = fptosi_f64_i64(f64_arg_114246); + + ((__global int64_t *) mem_124930)[gtid_114176] = f64_res_114247; + ((__global int64_t *) mem_124932)[gtid_114176] = defunc_0_f_res_114220; + ((__global double *) mem_124934)[gtid_114176] = sqrt_res_114244; } - error_1: + error_0: return; - #undef segred_group_sizze_40779 + #undef segmap_group_sizze_114213 } -__kernel void mainzisegred_small_41311(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, - __local volatile - int64_t *red_arr_mem_46432_backing_aligned_0, - int64_t N_29165, int64_t m_29166, - int64_t i32_res_29175, - int64_t num_groups_41362, - int64_t segment_sizze_nonzzero_46425, - __global - unsigned char *defunc_4_map_res_mem_45178, - __global unsigned char *mem_45232, - __global unsigned char *mem_45235) +__kernel void mainzisegmap_114290(__global int *global_failure, int64_t m_75136, + double hfrac_75141, int64_t k2p2_75149, + __global unsigned char *mem_124946, __global + unsigned char *mem_124949, __global + unsigned char *mem_124952, __global + unsigned char *mem_124954) { - #define segred_group_sizze_41361 (mainzisegred_group_sizze_41305) + #define segmap_group_sizze_114381 (mainzisegmap_group_sizze_114292) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_46432_backing_0 = - (__local volatile - char *) red_arr_mem_46432_backing_aligned_0; - volatile __local bool local_failure; - if (failure_is_an_option) { - int failed = *global_failure >= 0; - - if (failed) - return; - } - local_failure = false; - barrier(CLK_LOCAL_MEM_FENCE); + if (*global_failure >= 0) + return; - int32_t global_tid_46427; - int32_t local_tid_46428; - int64_t group_sizze_46431; - int32_t wave_sizze_46430; - int32_t group_tid_46429; + int32_t global_tid_129466; + int32_t local_tid_129467; + int64_t group_sizze_129470; + int32_t wave_sizze_129469; + int32_t group_tid_129468; - global_tid_46427 = get_global_id(0); - local_tid_46428 = get_local_id(0); - group_sizze_46431 = get_local_size(0); - wave_sizze_46430 = LOCKSTEP_WIDTH; - group_tid_46429 = get_group_id(0); + global_tid_129466 = get_global_id(0); + local_tid_129467 = get_local_id(0); + group_sizze_129470 = get_local_size(0); + wave_sizze_129469 = LOCKSTEP_WIDTH; + group_tid_129468 = get_group_id(0); - int32_t phys_tid_41311; + int32_t phys_tid_114290; - phys_tid_41311 = global_tid_46427; + phys_tid_114290 = global_tid_129466; - __local char *red_arr_mem_46432; + int64_t gtid_114289; - red_arr_mem_46432 = (__local char *) red_arr_mem_46432_backing_0; + gtid_114289 = sext_i32_i64(group_tid_129468) * segmap_group_sizze_114381 + + sext_i32_i64(local_tid_129467); + if (slt64(gtid_114289, m_75136)) { + int64_t defunc_0_f_res_114385 = ((__global + int64_t *) mem_124946)[gtid_114289]; + double defunc_0_f_res_114386 = ((__global + double *) mem_124949)[gtid_114289]; + int64_t i64_arg_114387 = sub64(defunc_0_f_res_114385, k2p2_75149); + double i64_res_114388 = sitofp_i64_f64(i64_arg_114387); + double sqrt_arg_114389 = defunc_0_f_res_114386 / i64_res_114388; + double sqrt_res_114390; + + sqrt_res_114390 = futrts_sqrt64(sqrt_arg_114389); + + double i64_res_114391 = sitofp_i64_f64(defunc_0_f_res_114385); + double f64_arg_114392 = hfrac_75141 * i64_res_114391; + int64_t f64_res_114393 = fptosi_f64_i64(f64_arg_114392); + + ((__global int64_t *) mem_124952)[gtid_114289] = f64_res_114393; + ((__global double *) mem_124954)[gtid_114289] = sqrt_res_114390; + } - int32_t phys_group_id_46434; + error_0: + return; + #undef segmap_group_sizze_114381 +} +__kernel void mainzisegmap_114413(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, int64_t N_75135, + int64_t m_75136, + int64_t defunc_2_reduce_comm_res_76995, + __global + unsigned char *defunc_4_map_res_mem_124920, + __global + unsigned char *defunc_3_map_res_mem_124958, + __global + unsigned char *defunc_3_map_res_mem_124959, + __global unsigned char *mem_124966) +{ + #define segmap_group_sizze_114437 (mainzisegmap_group_sizze_114415) - phys_group_id_46434 = get_group_id(0); - for (int32_t i_46435 = 0; i_46435 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_29166, - squot64(segred_group_sizze_41361, - segment_sizze_nonzzero_46425))) - - phys_group_id_46434, sext_i64_i32(num_groups_41362)); - i_46435++) { - int32_t virt_group_id_46436 = phys_group_id_46434 + i_46435 * - sext_i64_i32(num_groups_41362); - int64_t gtid_41302 = squot64(sext_i32_i64(local_tid_46428), - segment_sizze_nonzzero_46425) + - sext_i32_i64(virt_group_id_46436) * - squot64(segred_group_sizze_41361, segment_sizze_nonzzero_46425); - int64_t gtid_41310 = srem64(sext_i32_i64(local_tid_46428), - i32_res_29175); - - // apply map function if in bounds - { - if (slt64((int64_t) 0, i32_res_29175) && (slt64(gtid_41302, - m_29166) && - slt64(sext_i32_i64(local_tid_46428), - i32_res_29175 * - squot64(segred_group_sizze_41361, - segment_sizze_nonzzero_46425)))) { - int32_t defunc_0_f_res_41369 = ((__global - int32_t *) mem_45232)[gtid_41302]; - int32_t index_primexp_42385 = sext_i64_i32(gtid_41310); - bool cond_41371 = slt32(index_primexp_42385, - defunc_0_f_res_41369); - float defunc_0_f_res_41372; - - if (cond_41371) { - int64_t i_41373 = sext_i32_i64(index_primexp_42385); - bool x_41374 = sle64((int64_t) 0, i_41373); - bool y_41375 = slt64(i_41373, N_29165); - bool bounds_check_41376 = x_41374 && y_41375; - bool index_certs_41377; - - if (!bounds_check_41376) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 18) == -1) { - global_failure_args[0] = i_41373; - global_failure_args[1] = N_29165; - ; - } - local_failure = true; - goto error_0; + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129505; + int32_t local_tid_129506; + int64_t group_sizze_129509; + int32_t wave_sizze_129508; + int32_t group_tid_129507; + + global_tid_129505 = get_global_id(0); + local_tid_129506 = get_local_id(0); + group_sizze_129509 = get_local_size(0); + wave_sizze_129508 = LOCKSTEP_WIDTH; + group_tid_129507 = get_group_id(0); + + int32_t phys_tid_114413; + + phys_tid_114413 = global_tid_129505; + + int64_t gtid_114412; + + gtid_114412 = sext_i32_i64(group_tid_129507) * segmap_group_sizze_114437 + + sext_i32_i64(local_tid_129506); + if (slt64(gtid_114412, m_75136)) { + int64_t x_114441 = ((__global + int64_t *) defunc_3_map_res_mem_124959)[gtid_114412]; + int64_t x_114442 = ((__global + int64_t *) defunc_3_map_res_mem_124958)[gtid_114412]; + double defunc_0_f_res_114443; + double redout_115320 = 0.0; + + for (int64_t i_115321 = 0; i_115321 < defunc_2_reduce_comm_res_76995; + i_115321++) { + bool cond_114448 = slt64(i_115321, x_114442); + double defunc_0_f_res_114449; + + if (cond_114448) { + int64_t x_114450 = add64(x_114441, i_115321); + int64_t x_114451 = sub64(x_114450, x_114442); + int64_t i_114452 = add64((int64_t) 1, x_114451); + bool x_114453 = sle64((int64_t) 0, i_114452); + bool y_114454 = slt64(i_114452, N_75135); + bool bounds_check_114455 = x_114453 && y_114454; + bool index_certs_114456; + + if (!bounds_check_114455) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 199) == -1) { + global_failure_args[0] = i_114452; + global_failure_args[1] = N_75135; + ; } + return; } - - float defunc_0_f_res_t_res_41378 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_41302 * - N_29165 + - i_41373]; - - defunc_0_f_res_41372 = defunc_0_f_res_t_res_41378; - } else { - defunc_0_f_res_41372 = 0.0F; } - float defunc_0_f_res_41379 = defunc_0_f_res_41372 * - defunc_0_f_res_41372; + double defunc_0_f_res_t_res_114457 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_114412 * + N_75135 + + i_114452]; - // save map-out results - { } - // save results to be reduced - { - ((__local - float *) red_arr_mem_46432)[sext_i32_i64(local_tid_46428)] = - defunc_0_f_res_41379; - } + defunc_0_f_res_114449 = defunc_0_f_res_t_res_114457; } else { - ((__local - float *) red_arr_mem_46432)[sext_i32_i64(local_tid_46428)] = - 0.0F; + defunc_0_f_res_114449 = 0.0; } + + double defunc_1_op_res_114446 = defunc_0_f_res_114449 + + redout_115320; + double redout_tmp_129510 = defunc_1_op_res_114446; + + redout_115320 = redout_tmp_129510; } + defunc_0_f_res_114443 = redout_115320; + ((__global double *) mem_124966)[gtid_114412] = defunc_0_f_res_114443; + } + + error_0: + return; + #undef segmap_group_sizze_114437 +} +__kernel void mainzisegmap_114510(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, int64_t N_75135, + int64_t n_75139, double lam_75143, + int64_t iota_arg_77024, double i64_res_77031, + __global + unsigned char *mappingindices_mem_120107, + __global unsigned char *mem_124973) +{ + #define segmap_group_sizze_114530 (mainzisegmap_group_sizze_114512) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129572; + int32_t local_tid_129573; + int64_t group_sizze_129576; + int32_t wave_sizze_129575; + int32_t group_tid_129574; + + global_tid_129572 = get_global_id(0); + local_tid_129573 = get_local_id(0); + group_sizze_129576 = get_local_size(0); + wave_sizze_129575 = LOCKSTEP_WIDTH; + group_tid_129574 = get_group_id(0); + + int32_t phys_tid_114510; + + phys_tid_114510 = global_tid_129572; + + int64_t gtid_114509; + + gtid_114509 = sext_i32_i64(group_tid_129574) * segmap_group_sizze_114530 + + sext_i32_i64(local_tid_129573); + if (slt64(gtid_114509, iota_arg_77024)) { + int64_t i_114534 = add64(n_75139, gtid_114509); + bool x_114535 = sle64((int64_t) 0, i_114534); + bool y_114536 = slt64(i_114534, N_75135); + bool bounds_check_114537 = x_114535 && y_114536; + bool index_certs_114538; - error_0: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); - if (slt64((int64_t) 0, i32_res_29175)) { - // perform segmented scan to imitate reduction + if (!bounds_check_114537) { { - float x_41365; - float x_41366; - float x_46437; - float x_46438; - bool ltid_in_bounds_46440; - - ltid_in_bounds_46440 = slt64(sext_i32_i64(local_tid_46428), - i32_res_29175 * - squot64(segred_group_sizze_41361, - segment_sizze_nonzzero_46425)); - - int32_t skip_threads_46441; - - // read input for in-block scan - { - if (ltid_in_bounds_46440) { - x_41366 = ((volatile __local - float *) red_arr_mem_46432)[sext_i32_i64(local_tid_46428)]; - if ((local_tid_46428 - squot32(local_tid_46428, 32) * - 32) == 0) { - x_41365 = x_41366; - } - } - } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46441 = 1; - while (slt32(skip_threads_46441, 32)) { - if (sle32(skip_threads_46441, local_tid_46428 - - squot32(local_tid_46428, 32) * 32) && - ltid_in_bounds_46440) { - // read operands - { - x_41365 = ((volatile __local - float *) red_arr_mem_46432)[sext_i32_i64(local_tid_46428) - - sext_i32_i64(skip_threads_46441)]; - } - // perform operation - { - bool inactive_46442 = - slt64(srem64(sext_i32_i64(local_tid_46428), - i32_res_29175), - sext_i32_i64(local_tid_46428) - - sext_i32_i64(local_tid_46428 - - skip_threads_46441)); - - if (inactive_46442) { - x_41365 = x_41366; - } - if (!inactive_46442) { - float defunc_1_op_res_41367 = x_41365 + - x_41366; - - x_41365 = defunc_1_op_res_41367; - } - } - } - if (sle32(wave_sizze_46430, skip_threads_46441)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46441, local_tid_46428 - - squot32(local_tid_46428, 32) * 32) && - ltid_in_bounds_46440) { - // write result - { - ((volatile __local - float *) red_arr_mem_46432)[sext_i32_i64(local_tid_46428)] = - x_41365; - x_41366 = x_41365; - } - } - if (sle32(wave_sizze_46430, skip_threads_46441)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46441 *= 2; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' - { - if ((local_tid_46428 - squot32(local_tid_46428, 32) * 32) == - 31 && ltid_in_bounds_46440) { - ((volatile __local - float *) red_arr_mem_46432)[sext_i32_i64(squot32(local_tid_46428, - 32))] = - x_41365; - } + if (atomic_cmpxchg_i32_global(global_failure, -1, 202) == -1) { + global_failure_args[0] = i_114534; + global_failure_args[1] = N_75135; + ; } - barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' - { - int32_t skip_threads_46443; - - // read input for in-block scan - { - if (squot32(local_tid_46428, 32) == 0 && - ltid_in_bounds_46440) { - x_46438 = ((volatile __local - float *) red_arr_mem_46432)[sext_i32_i64(local_tid_46428)]; - if ((local_tid_46428 - squot32(local_tid_46428, - 32) * 32) == 0) { - x_46437 = x_46438; - } - } - } - // in-block scan (hopefully no barriers needed) + return; + } + } + + int64_t time_114539 = ((__global + int64_t *) mappingindices_mem_120107)[i_114534]; + double i64_res_114540 = sitofp_i64_f64(time_114539); + double logplus_arg_114541 = i64_res_114540 / i64_res_77031; + bool cond_114542 = 2.718281828459045 < logplus_arg_114541; + double logplus_res_114543; + + if (cond_114542) { + double log_res_114544; + + log_res_114544 = futrts_log64(logplus_arg_114541); + logplus_res_114543 = log_res_114544; + } else { + logplus_res_114543 = 1.0; + } + + double sqrt_res_114545; + + sqrt_res_114545 = futrts_sqrt64(logplus_res_114543); + + double defunc_0_f_res_114546 = lam_75143 * sqrt_res_114545; + + ((__global double *) mem_124973)[gtid_114509] = defunc_0_f_res_114546; + } + + error_0: + return; + #undef segmap_group_sizze_114530 +} +__kernel void mainzisegmap_114681(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, int64_t N_75135, + int64_t m_75136, int64_t n_75139, __global + unsigned char *defunc_4_map_res_mem_124921, + __global + unsigned char *defunc_3_map_res_mem_124959, + __global unsigned char *mem_124987, __global + unsigned char *mem_124994, __global + unsigned char *mem_124996, __global + unsigned char *mem_124998, __global + unsigned char *mem_125001, __global + unsigned char *mem_125003) +{ + #define segmap_group_sizze_114960 (mainzisegmap_group_sizze_114683) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129767; + int32_t local_tid_129768; + int64_t group_sizze_129771; + int32_t wave_sizze_129770; + int32_t group_tid_129769; + + global_tid_129767 = get_global_id(0); + local_tid_129768 = get_local_id(0); + group_sizze_129771 = get_local_size(0); + wave_sizze_129770 = LOCKSTEP_WIDTH; + group_tid_129769 = get_group_id(0); + + int32_t phys_tid_114681; + + phys_tid_114681 = global_tid_129767; + + int64_t gtid_114680; + + gtid_114680 = sext_i32_i64(group_tid_129769) * segmap_group_sizze_114960 + + sext_i32_i64(local_tid_129768); + if (slt64(gtid_114680, m_75136)) { + int64_t x_114964 = ((__global + int64_t *) defunc_3_map_res_mem_124959)[gtid_114680]; + int64_t y_114966 = ((__global int64_t *) mem_124987)[gtid_114680]; + bool acc0_114968 = ((__global bool *) mem_124994)[gtid_114680]; + bool x_114973 = acc0_114968 && acc0_114968; + int64_t defunc_1_op_res_f_res_114977; + + if (acc0_114968) { + int64_t acc0_114969 = ((__global + int64_t *) mem_124996)[gtid_114680]; + + defunc_1_op_res_f_res_114977 = acc0_114969; + } else { + defunc_1_op_res_f_res_114977 = (int64_t) -1; + } + + bool cond_114983 = y_114966 == (int64_t) 0; + double defunc_0_f_res_114984; + + if (cond_114983) { + defunc_0_f_res_114984 = 0.0; + } else { + double acc0_114970 = ((__global double *) mem_124998)[gtid_114680]; + double i64_res_114985 = sitofp_i64_f64(y_114966); + double defunc_0_f_res_f_res_114986 = acc0_114970 / i64_res_114985; + + defunc_0_f_res_114984 = defunc_0_f_res_f_res_114986; + } + + bool cond_114987 = !x_114973; + int64_t fst_breakzq_114988; + + if (cond_114987) { + fst_breakzq_114988 = (int64_t) -1; + } else { + bool cond_114989 = slt64(defunc_1_op_res_f_res_114977, y_114966); + int64_t adjustValInds_res_114990; + + if (cond_114989) { + int64_t i_114991 = add64(x_114964, + defunc_1_op_res_f_res_114977); + bool x_114992 = sle64((int64_t) 0, i_114991); + bool y_114993 = slt64(i_114991, N_75135); + bool bounds_check_114994 = x_114992 && y_114993; + bool index_certs_114995; + + if (!bounds_check_114994) { { - skip_threads_46443 = 1; - while (slt32(skip_threads_46443, 32)) { - if (sle32(skip_threads_46443, local_tid_46428 - - squot32(local_tid_46428, 32) * 32) && - (squot32(local_tid_46428, 32) == 0 && - ltid_in_bounds_46440)) { - // read operands - { - x_46437 = ((volatile __local - float *) red_arr_mem_46432)[sext_i32_i64(local_tid_46428) - - sext_i32_i64(skip_threads_46443)]; - } - // perform operation - { - bool inactive_46444 = - slt64(srem64(sext_i32_i64(local_tid_46428 * - 32 + 32 - 1), - i32_res_29175), - sext_i32_i64(local_tid_46428 * - 32 + 32 - 1) - - sext_i32_i64((local_tid_46428 - - skip_threads_46443) * - 32 + 32 - 1)); - - if (inactive_46444) { - x_46437 = x_46438; - } - if (!inactive_46444) { - float defunc_1_op_res_46439 = x_46437 + - x_46438; - - x_46437 = defunc_1_op_res_46439; - } - } - } - if (sle32(wave_sizze_46430, skip_threads_46443)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46443, local_tid_46428 - - squot32(local_tid_46428, 32) * 32) && - (squot32(local_tid_46428, 32) == 0 && - ltid_in_bounds_46440)) { - // write result - { - ((volatile __local - float *) red_arr_mem_46432)[sext_i32_i64(local_tid_46428)] = - x_46437; - x_46438 = x_46437; - } - } - if (sle32(wave_sizze_46430, skip_threads_46443)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46443 *= 2; - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_46428, 32) == 0 || - !ltid_in_bounds_46440)) { - // read operands - { - x_41366 = x_41365; - x_41365 = ((__local - float *) red_arr_mem_46432)[sext_i32_i64(squot32(local_tid_46428, - 32)) - - (int64_t) 1]; - } - // perform operation - { - bool inactive_46445 = - slt64(srem64(sext_i32_i64(local_tid_46428), - i32_res_29175), - sext_i32_i64(local_tid_46428) - - sext_i32_i64(squot32(local_tid_46428, - 32) * 32 - 1)); - - if (inactive_46445) { - x_41365 = x_41366; - } - if (!inactive_46445) { - float defunc_1_op_res_41367 = x_41365 + x_41366; - - x_41365 = defunc_1_op_res_41367; - } - } - // write final result - { - ((__local - float *) red_arr_mem_46432)[sext_i32_i64(local_tid_46428)] = - x_41365; + if (atomic_cmpxchg_i32_global(global_failure, -1, + 208) == -1) { + global_failure_args[0] = i_114991; + global_failure_args[1] = N_75135; + ; } + return; } } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_46428, 32) == 0) { - ((__local - float *) red_arr_mem_46432)[sext_i32_i64(local_tid_46428)] = - x_41366; - } - } - barrier(CLK_LOCAL_MEM_FENCE); + + int64_t x_114996 = ((__global + int64_t *) defunc_4_map_res_mem_124921)[gtid_114680 * + N_75135 + + i_114991]; + int64_t adjustValInds_res_t_res_114997 = sub64(x_114996, + n_75139); + + adjustValInds_res_114990 = adjustValInds_res_t_res_114997; + } else { + adjustValInds_res_114990 = (int64_t) -1; } + fst_breakzq_114988 = adjustValInds_res_114990; } - barrier(CLK_LOCAL_MEM_FENCE); - // save final values of segments - { - if (slt64(sext_i32_i64(virt_group_id_46436) * - squot64(segred_group_sizze_41361, - segment_sizze_nonzzero_46425) + - sext_i32_i64(local_tid_46428), m_29166) && - slt64(sext_i32_i64(local_tid_46428), - squot64(segred_group_sizze_41361, - segment_sizze_nonzzero_46425))) { - ((__global - float *) mem_45235)[sext_i32_i64(virt_group_id_46436) * - squot64(segred_group_sizze_41361, - segment_sizze_nonzzero_46425) + - sext_i32_i64(local_tid_46428)] = ((__local - float *) red_arr_mem_46432)[(sext_i32_i64(local_tid_46428) + - (int64_t) 1) * - segment_sizze_nonzzero_46425 - - (int64_t) 1]; - } + + bool cond_114998 = sle64(x_114964, (int64_t) 5); + bool cond_f_res_114999 = sle64(y_114966, (int64_t) 5); + bool x_115000 = !cond_114998; + bool y_115001 = cond_f_res_114999 && x_115000; + bool cond_115002 = cond_114998 || y_115001; + int64_t fst_breakzq_115003; + + if (cond_115002) { + fst_breakzq_115003 = (int64_t) -2; + } else { + fst_breakzq_115003 = fst_breakzq_114988; } - barrier(CLK_LOCAL_MEM_FENCE); - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + ((__global int64_t *) mem_125001)[gtid_114680] = fst_breakzq_115003; + ((__global double *) mem_125003)[gtid_114680] = defunc_0_f_res_114984; } - error_1: + error_0: return; - #undef segred_group_sizze_41361 + #undef segmap_group_sizze_114960 } -__kernel void mainzisegred_small_41336(__global int *global_failure, - __local volatile - int64_t *red_arr_mem_46372_backing_aligned_0, - int64_t N_29165, int64_t m_29166, - int64_t i32_res_29175, - int64_t num_groups_41348, - int64_t segment_sizze_nonzzero_46365, - __global unsigned char *images_mem_44381, - __global unsigned char *mem_45232) +__kernel void mainzisegmap_114832(__global int *global_failure, int64_t m_75136, + int64_t num_groups_114855, __global + unsigned char *defunc_4_map_res_mem_124919, + __global + unsigned char *defunc_3_map_res_mem_124959, + __global + unsigned char *defunc_3_map_res_mem_124960, + __global unsigned char *mem_124985, __global + unsigned char *mem_124987) { - #define segred_group_sizze_41347 (mainzisegred_group_sizze_41330) + #define segmap_group_sizze_114854 (mainzisegmap_group_sizze_114834) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_46372_backing_0 = - (__local volatile - char *) red_arr_mem_46372_backing_aligned_0; if (*global_failure >= 0) return; - int32_t global_tid_46367; - int32_t local_tid_46368; - int64_t group_sizze_46371; - int32_t wave_sizze_46370; - int32_t group_tid_46369; + int32_t global_tid_129605; + int32_t local_tid_129606; + int64_t group_sizze_129609; + int32_t wave_sizze_129608; + int32_t group_tid_129607; + + global_tid_129605 = get_global_id(0); + local_tid_129606 = get_local_id(0); + group_sizze_129609 = get_local_size(0); + wave_sizze_129608 = LOCKSTEP_WIDTH; + group_tid_129607 = get_group_id(0); + + int32_t phys_tid_114832; + + phys_tid_114832 = global_tid_129605; + + int32_t phys_group_id_129610; + + phys_group_id_129610 = get_group_id(0); + for (int32_t i_129611 = 0; i_129611 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, segmap_group_sizze_114854)) - + phys_group_id_129610, sext_i64_i32(num_groups_114855)); + i_129611++) { + int32_t virt_group_id_129612 = phys_group_id_129610 + i_129611 * + sext_i64_i32(num_groups_114855); + int64_t gtid_114831 = sext_i32_i64(virt_group_id_129612) * + segmap_group_sizze_114854 + sext_i32_i64(local_tid_129606); + + if (slt64(gtid_114831, m_75136)) { + int64_t x_114861 = ((__global + int64_t *) defunc_4_map_res_mem_124919)[gtid_114831]; + int64_t x_114862 = ((__global + int64_t *) defunc_3_map_res_mem_124959)[gtid_114831]; + double x_114863 = ((__global + double *) defunc_3_map_res_mem_124960)[gtid_114831]; + int64_t y_114864 = sub64(x_114861, x_114862); + double i64_res_114865 = sitofp_i64_f64(x_114862); + double sqrt_res_114866; + + sqrt_res_114866 = futrts_sqrt64(i64_res_114865); + + double y_114867 = x_114863 * sqrt_res_114866; + + ((__global double *) mem_124985)[gtid_114831] = y_114867; + ((__global int64_t *) mem_124987)[gtid_114831] = y_114864; + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } - global_tid_46367 = get_global_id(0); - local_tid_46368 = get_local_id(0); - group_sizze_46371 = get_local_size(0); - wave_sizze_46370 = LOCKSTEP_WIDTH; - group_tid_46369 = get_group_id(0); + error_0: + return; + #undef segmap_group_sizze_114854 +} +__kernel void mainzisegmap_intragroup_102969(__global int *global_failure, + __local volatile + int64_t *mem_120188_backing_aligned_0, + __local volatile + int64_t *mem_120186_backing_aligned_1, + __local volatile + int64_t *mem_120184_backing_aligned_2, + int64_t N_75135, int64_t n_75139, + int64_t m_75231, __global + unsigned char *images_mem_120108, + __global unsigned char *mem_120191, + __global unsigned char *mem_120194, + __global unsigned char *mem_120197) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_120188_backing_2 = (__local volatile + char *) mem_120188_backing_aligned_0; + __local volatile char *restrict mem_120186_backing_1 = (__local volatile + char *) mem_120186_backing_aligned_1; + __local volatile char *restrict mem_120184_backing_0 = (__local volatile + char *) mem_120184_backing_aligned_2; - int32_t phys_tid_41336; + if (*global_failure >= 0) + return; - phys_tid_41336 = global_tid_46367; + int32_t global_tid_126393; + int32_t local_tid_126394; + int64_t group_sizze_126397; + int32_t wave_sizze_126396; + int32_t group_tid_126395; - __local char *red_arr_mem_46372; + global_tid_126393 = get_global_id(0); + local_tid_126394 = get_local_id(0); + group_sizze_126397 = get_local_size(0); + wave_sizze_126396 = LOCKSTEP_WIDTH; + group_tid_126395 = get_group_id(0); - red_arr_mem_46372 = (__local char *) red_arr_mem_46372_backing_0; + int32_t phys_tid_102969; - int32_t phys_group_id_46374; + phys_tid_102969 = group_tid_126395; - phys_group_id_46374 = get_group_id(0); - for (int32_t i_46375 = 0; i_46375 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_29166, - squot64(segred_group_sizze_41347, - segment_sizze_nonzzero_46365))) - - phys_group_id_46374, sext_i64_i32(num_groups_41348)); - i_46375++) { - int32_t virt_group_id_46376 = phys_group_id_46374 + i_46375 * - sext_i64_i32(num_groups_41348); - int64_t gtid_41327 = squot64(sext_i32_i64(local_tid_46368), - segment_sizze_nonzzero_46365) + - sext_i32_i64(virt_group_id_46376) * - squot64(segred_group_sizze_41347, segment_sizze_nonzzero_46365); - int64_t gtid_41335 = srem64(sext_i32_i64(local_tid_46368), - i32_res_29175); - - // apply map function if in bounds - { - if (slt64((int64_t) 0, i32_res_29175) && (slt64(gtid_41327, - m_29166) && - slt64(sext_i32_i64(local_tid_46368), - i32_res_29175 * - squot64(segred_group_sizze_41347, - segment_sizze_nonzzero_46365)))) { - float x_41355 = ((__global - float *) images_mem_44381)[gtid_41327 * - N_29165 + - gtid_41335]; - bool isnan_res_41356; - - isnan_res_41356 = futrts_isnan32(x_41355); - - bool cond_41357 = !isnan_res_41356; - int32_t defunc_0_f_res_41358 = btoi_bool_i32(cond_41357); - - // save map-out results - { } - // save results to be reduced - { - ((__local - int32_t *) red_arr_mem_46372)[sext_i32_i64(local_tid_46368)] = - defunc_0_f_res_41358; - } - } else { - ((__local - int32_t *) red_arr_mem_46372)[sext_i32_i64(local_tid_46368)] = - 0; + int32_t ltid_pre_126398; + + ltid_pre_126398 = local_tid_126394; + + int64_t gtid_102961; + + gtid_102961 = sext_i32_i64(group_tid_126395); + + __local char *mem_120184; + + mem_120184 = (__local char *) mem_120184_backing_0; + + int64_t gtid_102964 = sext_i32_i64(ltid_pre_126398); + int32_t phys_tid_102965 = local_tid_126394; + int64_t binop_y_115020 = (int64_t) -1 * gtid_102964; + int64_t slice_115021 = m_75231 + binop_y_115020; + double x_103179 = ((__global double *) images_mem_120108)[gtid_102961 * + N_75135 + + slice_115021]; + bool defunc_0_f_res_103180; + + defunc_0_f_res_103180 = futrts_isnan64(x_103179); + + bool defunc_0_g_res_103181 = !defunc_0_f_res_103180; + int64_t defunc_0_f_res_103182 = btoi_bool_i64(defunc_0_g_res_103181); + + ((__local int64_t *) mem_120184)[gtid_102964] = defunc_0_f_res_103182; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t dims_flat_126399; + + dims_flat_126399 = n_75139; + + int64_t x_103176; + int64_t x_103177; + int64_t x_126401; + int64_t x_126402; + bool ltid_in_bounds_126404; + + ltid_in_bounds_126404 = slt64(sext_i32_i64(local_tid_126394), n_75139); + + int32_t skip_threads_126405; + + // read input for in-block scan + { + if (ltid_in_bounds_126404) { + x_103177 = ((volatile __local + int64_t *) mem_120184)[sext_i32_i64(local_tid_126394)]; + if ((local_tid_126394 - squot32(local_tid_126394, 32) * 32) == 0) { + x_103176 = x_103177; } } - barrier(CLK_LOCAL_MEM_FENCE); - if (slt64((int64_t) 0, i32_res_29175)) { - // perform segmented scan to imitate reduction - { - int32_t x_41351; - int32_t x_41352; - int32_t x_46377; - int32_t x_46378; - bool ltid_in_bounds_46380; - - ltid_in_bounds_46380 = slt64(sext_i32_i64(local_tid_46368), - i32_res_29175 * - squot64(segred_group_sizze_41347, - segment_sizze_nonzzero_46365)); - - int32_t skip_threads_46381; - - // read input for in-block scan + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_126405 = 1; + while (slt32(skip_threads_126405, 32)) { + if (sle32(skip_threads_126405, local_tid_126394 - + squot32(local_tid_126394, 32) * 32) && + ltid_in_bounds_126404) { + // read operands { - if (ltid_in_bounds_46380) { - x_41352 = ((volatile __local - int32_t *) red_arr_mem_46372)[sext_i32_i64(local_tid_46368)]; - if ((local_tid_46368 - squot32(local_tid_46368, 32) * - 32) == 0) { - x_41351 = x_41352; - } - } + x_103176 = ((volatile __local + int64_t *) mem_120184)[sext_i32_i64(local_tid_126394) - + sext_i32_i64(skip_threads_126405)]; } - // in-block scan (hopefully no barriers needed) + // perform operation { - skip_threads_46381 = 1; - while (slt32(skip_threads_46381, 32)) { - if (sle32(skip_threads_46381, local_tid_46368 - - squot32(local_tid_46368, 32) * 32) && - ltid_in_bounds_46380) { - // read operands - { - x_41351 = ((volatile __local - int32_t *) red_arr_mem_46372)[sext_i32_i64(local_tid_46368) - - sext_i32_i64(skip_threads_46381)]; - } - // perform operation - { - bool inactive_46382 = - slt64(srem64(sext_i32_i64(local_tid_46368), - i32_res_29175), - sext_i32_i64(local_tid_46368) - - sext_i32_i64(local_tid_46368 - - skip_threads_46381)); - - if (inactive_46382) { - x_41351 = x_41352; - } - if (!inactive_46382) { - int32_t defunc_1_op_res_41353 = - add32(x_41351, x_41352); - - x_41351 = defunc_1_op_res_41353; - } - } - } - if (sle32(wave_sizze_46370, skip_threads_46381)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46381, local_tid_46368 - - squot32(local_tid_46368, 32) * 32) && - ltid_in_bounds_46380) { - // write result - { - ((volatile __local - int32_t *) red_arr_mem_46372)[sext_i32_i64(local_tid_46368)] = - x_41351; - x_41352 = x_41351; - } - } - if (sle32(wave_sizze_46370, skip_threads_46381)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46381 *= 2; + bool inactive_126406 = + slt64(srem64(sext_i32_i64(local_tid_126394), n_75139), + sext_i32_i64(local_tid_126394) - + sext_i32_i64(local_tid_126394 - + skip_threads_126405)); + + if (inactive_126406) { + x_103176 = x_103177; + } + if (!inactive_126406) { + int64_t defunc_1_op_res_103178 = add64(x_103176, + x_103177); + + x_103176 = defunc_1_op_res_103178; } } + } + if (sle32(wave_sizze_126396, skip_threads_126405)) { barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' + } + if (sle32(skip_threads_126405, local_tid_126394 - + squot32(local_tid_126394, 32) * 32) && + ltid_in_bounds_126404) { + // write result { - if ((local_tid_46368 - squot32(local_tid_46368, 32) * 32) == - 31 && ltid_in_bounds_46380) { - ((volatile __local - int32_t *) red_arr_mem_46372)[sext_i32_i64(squot32(local_tid_46368, - 32))] = - x_41351; - } + ((volatile __local + int64_t *) mem_120184)[sext_i32_i64(local_tid_126394)] = + x_103176; + x_103177 = x_103176; } + } + if (sle32(wave_sizze_126396, skip_threads_126405)) { barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' - { - int32_t skip_threads_46383; - - // read input for in-block scan + } + skip_threads_126405 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_126394 - squot32(local_tid_126394, 32) * 32) == 31 && + ltid_in_bounds_126404) { + ((volatile __local + int64_t *) mem_120184)[sext_i32_i64(squot32(local_tid_126394, + 32))] = x_103176; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_126407; + + // read input for in-block scan + { + if (squot32(local_tid_126394, 32) == 0 && ltid_in_bounds_126404) { + x_126402 = ((volatile __local + int64_t *) mem_120184)[sext_i32_i64(local_tid_126394)]; + if ((local_tid_126394 - squot32(local_tid_126394, 32) * 32) == + 0) { + x_126401 = x_126402; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_126407 = 1; + while (slt32(skip_threads_126407, 32)) { + if (sle32(skip_threads_126407, local_tid_126394 - + squot32(local_tid_126394, 32) * 32) && + (squot32(local_tid_126394, 32) == 0 && + ltid_in_bounds_126404)) { + // read operands { - if (squot32(local_tid_46368, 32) == 0 && - ltid_in_bounds_46380) { - x_46378 = ((volatile __local - int32_t *) red_arr_mem_46372)[sext_i32_i64(local_tid_46368)]; - if ((local_tid_46368 - squot32(local_tid_46368, - 32) * 32) == 0) { - x_46377 = x_46378; - } - } + x_126401 = ((volatile __local + int64_t *) mem_120184)[sext_i32_i64(local_tid_126394) - + sext_i32_i64(skip_threads_126407)]; } - // in-block scan (hopefully no barriers needed) + // perform operation { - skip_threads_46383 = 1; - while (slt32(skip_threads_46383, 32)) { - if (sle32(skip_threads_46383, local_tid_46368 - - squot32(local_tid_46368, 32) * 32) && - (squot32(local_tid_46368, 32) == 0 && - ltid_in_bounds_46380)) { - // read operands - { - x_46377 = ((volatile __local - int32_t *) red_arr_mem_46372)[sext_i32_i64(local_tid_46368) - - sext_i32_i64(skip_threads_46383)]; - } - // perform operation - { - bool inactive_46384 = - slt64(srem64(sext_i32_i64(local_tid_46368 * - 32 + 32 - 1), - i32_res_29175), - sext_i32_i64(local_tid_46368 * - 32 + 32 - 1) - - sext_i32_i64((local_tid_46368 - - skip_threads_46383) * - 32 + 32 - 1)); - - if (inactive_46384) { - x_46377 = x_46378; - } - if (!inactive_46384) { - int32_t defunc_1_op_res_46379 = - add32(x_46377, x_46378); - - x_46377 = defunc_1_op_res_46379; - } - } - } - if (sle32(wave_sizze_46370, skip_threads_46383)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46383, local_tid_46368 - - squot32(local_tid_46368, 32) * 32) && - (squot32(local_tid_46368, 32) == 0 && - ltid_in_bounds_46380)) { - // write result - { - ((volatile __local - int32_t *) red_arr_mem_46372)[sext_i32_i64(local_tid_46368)] = - x_46377; - x_46378 = x_46377; - } - } - if (sle32(wave_sizze_46370, skip_threads_46383)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46383 *= 2; - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_46368, 32) == 0 || - !ltid_in_bounds_46380)) { - // read operands - { - x_41352 = x_41351; - x_41351 = ((__local - int32_t *) red_arr_mem_46372)[sext_i32_i64(squot32(local_tid_46368, - 32)) - - (int64_t) 1]; + bool inactive_126408 = + slt64(srem64(sext_i32_i64(local_tid_126394 * 32 + + 32 - 1), n_75139), + sext_i32_i64(local_tid_126394 * 32 + 32 - + 1) - sext_i32_i64((local_tid_126394 - + skip_threads_126407) * + 32 + 32 - 1)); + + if (inactive_126408) { + x_126401 = x_126402; } - // perform operation - { - bool inactive_46385 = - slt64(srem64(sext_i32_i64(local_tid_46368), - i32_res_29175), - sext_i32_i64(local_tid_46368) - - sext_i32_i64(squot32(local_tid_46368, - 32) * 32 - 1)); + if (!inactive_126408) { + int64_t defunc_1_op_res_126403 = add64(x_126401, + x_126402); - if (inactive_46385) { - x_41351 = x_41352; - } - if (!inactive_46385) { - int32_t defunc_1_op_res_41353 = add32(x_41351, - x_41352); - - x_41351 = defunc_1_op_res_41353; - } - } - // write final result - { - ((__local - int32_t *) red_arr_mem_46372)[sext_i32_i64(local_tid_46368)] = - x_41351; + x_126401 = defunc_1_op_res_126403; } } } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_46368, 32) == 0) { - ((__local - int32_t *) red_arr_mem_46372)[sext_i32_i64(local_tid_46368)] = - x_41352; + if (sle32(wave_sizze_126396, skip_threads_126407)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_126407, local_tid_126394 - + squot32(local_tid_126394, 32) * 32) && + (squot32(local_tid_126394, 32) == 0 && + ltid_in_bounds_126404)) { + // write result + { + ((volatile __local + int64_t *) mem_120184)[sext_i32_i64(local_tid_126394)] = + x_126401; + x_126402 = x_126401; } } - barrier(CLK_LOCAL_MEM_FENCE); + if (sle32(wave_sizze_126396, skip_threads_126407)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_126407 *= 2; } } - barrier(CLK_LOCAL_MEM_FENCE); - // save final values of segments - { - if (slt64(sext_i32_i64(virt_group_id_46376) * - squot64(segred_group_sizze_41347, - segment_sizze_nonzzero_46365) + - sext_i32_i64(local_tid_46368), m_29166) && - slt64(sext_i32_i64(local_tid_46368), - squot64(segred_group_sizze_41347, - segment_sizze_nonzzero_46365))) { - ((__global - int32_t *) mem_45232)[sext_i32_i64(virt_group_id_46376) * - squot64(segred_group_sizze_41347, - segment_sizze_nonzzero_46365) + - sext_i32_i64(local_tid_46368)] = - ((__local - int32_t *) red_arr_mem_46372)[(sext_i32_i64(local_tid_46368) + - (int64_t) 1) * - segment_sizze_nonzzero_46365 - + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_126394, 32) == 0 || !ltid_in_bounds_126404)) { + // read operands + { + x_103177 = x_103176; + x_103176 = ((__local + int64_t *) mem_120184)[sext_i32_i64(squot32(local_tid_126394, + 32)) - (int64_t) 1]; } + // perform operation + { + bool inactive_126409 = + slt64(srem64(sext_i32_i64(local_tid_126394), n_75139), + sext_i32_i64(local_tid_126394) - + sext_i32_i64(squot32(local_tid_126394, 32) * 32 - + 1)); + + if (inactive_126409) { + x_103176 = x_103177; + } + if (!inactive_126409) { + int64_t defunc_1_op_res_103178 = add64(x_103176, x_103177); + + x_103176 = defunc_1_op_res_103178; + } + } + // write final result + { + ((__local + int64_t *) mem_120184)[sext_i32_i64(local_tid_126394)] = + x_103176; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_126394, 32) == 0) { + ((__local int64_t *) mem_120184)[sext_i32_i64(local_tid_126394)] = + x_103177; } - barrier(CLK_LOCAL_MEM_FENCE); - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } + barrier(CLK_LOCAL_MEM_FENCE); - error_1: + int64_t last_res_103183 = ((__local int64_t *) mem_120184)[m_75231]; + __local char *mem_120186; + + mem_120186 = (__local char *) mem_120186_backing_1; + ((__local double *) mem_120186)[sext_i32_i64(local_tid_126394)] = NAN; + barrier(CLK_LOCAL_MEM_FENCE); + + __local char *mem_120188; + + mem_120188 = (__local char *) mem_120188_backing_2; + ((__local int64_t *) mem_120188)[sext_i32_i64(local_tid_126394)] = + (int64_t) 0; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t write_i_102966 = sext_i32_i64(ltid_pre_126398); + int32_t phys_tid_102967 = local_tid_126394; + int64_t binop_y_115024 = (int64_t) -1 * write_i_102966; + int64_t slice_115025 = m_75231 + binop_y_115024; + double x_103188 = ((__global double *) images_mem_120108)[gtid_102961 * + N_75135 + + slice_115025]; + bool defunc_0_f_res_103191; + + defunc_0_f_res_103191 = futrts_isnan64(x_103188); + + bool defunc_0_g_res_103192 = !defunc_0_f_res_103191; + int64_t defunc_1_f_res_103193; + + if (defunc_0_g_res_103192) { + int64_t x_103189 = ((__local int64_t *) mem_120184)[write_i_102966]; + int64_t defunc_1_f_res_t_res_103194 = sub64(x_103189, (int64_t) 1); + + defunc_1_f_res_103193 = defunc_1_f_res_t_res_103194; + } else { + defunc_1_f_res_103193 = (int64_t) -1; + } + if (sle64((int64_t) 0, defunc_1_f_res_103193) && + slt64(defunc_1_f_res_103193, n_75139)) { + ((__local int64_t *) mem_120188)[defunc_1_f_res_103193] = + write_i_102966; + } + if (sle64((int64_t) 0, defunc_1_f_res_103193) && + slt64(defunc_1_f_res_103193, n_75139)) { + ((__local double *) mem_120186)[defunc_1_f_res_103193] = x_103188; + } + barrier(CLK_LOCAL_MEM_FENCE); + if (local_tid_126394 == 0) { + ((__global int64_t *) mem_120191)[gtid_102961] = last_res_103183; + } + ((__global double *) mem_120194)[gtid_102961 * n_75139 + + sext_i32_i64(local_tid_126394)] = ((__local + double *) mem_120186)[sext_i32_i64(local_tid_126394)]; + barrier(CLK_LOCAL_MEM_FENCE); + ((__global int64_t *) mem_120197)[gtid_102961 * n_75139 + + sext_i32_i64(local_tid_126394)] = + ((__local int64_t *) mem_120188)[sext_i32_i64(local_tid_126394)]; + barrier(CLK_LOCAL_MEM_FENCE); + + error_2: return; - #undef segred_group_sizze_41347 } -__kernel void mainzisegred_small_41499(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, - __local volatile - int64_t *red_arr_mem_46541_backing_aligned_0, - int64_t N_29165, int64_t m_29166, - int64_t i32_res_29568, - int64_t num_groups_41521, - int64_t segment_sizze_nonzzero_46534, - __global - unsigned char *defunc_4_map_res_mem_45178, - __global - unsigned char *defunc_3_map_res_mem_45244, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global unsigned char *mem_45278) +__kernel void mainzisegmap_intragroup_110903(__global int *global_failure, + __local volatile + int64_t *mem_123888_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128051_backing_aligned_1, + __local volatile + int64_t *mem_123885_backing_aligned_2, + __local volatile + int64_t *mem_123882_backing_aligned_3, + __local volatile + int64_t *red_arr_mem_128047_backing_aligned_4, + __local volatile + int64_t *red_arr_mem_128042_backing_aligned_5, + __local volatile + int64_t *mem_123878_backing_aligned_6, + int64_t m_75136, int64_t n_75139, + int64_t k2p2zq_75151, + int64_t defunc_2_reduce_res_75260, + int64_t index_primexp_76437, + int64_t computed_group_sizze_110870, + int64_t binop_x_120251, __global + unsigned char *defunc_3_map_res_mem_120231, + __global unsigned char *mem_120246, + __global + unsigned char *mem_param_123778, + __global + unsigned char *mem_param_123786, + __global unsigned char *mem_123892, + __global unsigned char *mem_123895, + __global unsigned char *mem_123897) { - #define segred_group_sizze_41520 (mainzisegred_group_sizze_41493) - const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_46541_backing_0 = + __local volatile char *restrict mem_123888_backing_6 = (__local volatile + char *) mem_123888_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128051_backing_5 = (__local volatile - char *) red_arr_mem_46541_backing_aligned_0; - volatile __local bool local_failure; + char *) red_arr_mem_128051_backing_aligned_1; + __local volatile char *restrict mem_123885_backing_4 = (__local volatile + char *) mem_123885_backing_aligned_2; + __local volatile char *restrict mem_123882_backing_3 = (__local volatile + char *) mem_123882_backing_aligned_3; + __local volatile char *restrict red_arr_mem_128047_backing_2 = + (__local volatile + char *) red_arr_mem_128047_backing_aligned_4; + __local volatile char *restrict red_arr_mem_128042_backing_1 = + (__local volatile + char *) red_arr_mem_128042_backing_aligned_5; + __local volatile char *restrict mem_123878_backing_0 = (__local volatile + char *) mem_123878_backing_aligned_6; - if (failure_is_an_option) { - int failed = *global_failure >= 0; - - if (failed) - return; - } - local_failure = false; - barrier(CLK_LOCAL_MEM_FENCE); + if (*global_failure >= 0) + return; - int32_t global_tid_46536; - int32_t local_tid_46537; - int64_t group_sizze_46540; - int32_t wave_sizze_46539; - int32_t group_tid_46538; + int32_t global_tid_128034; + int32_t local_tid_128035; + int64_t group_sizze_128038; + int32_t wave_sizze_128037; + int32_t group_tid_128036; - global_tid_46536 = get_global_id(0); - local_tid_46537 = get_local_id(0); - group_sizze_46540 = get_local_size(0); - wave_sizze_46539 = LOCKSTEP_WIDTH; - group_tid_46538 = get_group_id(0); + global_tid_128034 = get_global_id(0); + local_tid_128035 = get_local_id(0); + group_sizze_128038 = get_local_size(0); + wave_sizze_128037 = LOCKSTEP_WIDTH; + group_tid_128036 = get_group_id(0); - int32_t phys_tid_41499; + int32_t phys_tid_110903; - phys_tid_41499 = global_tid_46536; + phys_tid_110903 = group_tid_128036; - __local char *red_arr_mem_46541; + int32_t ltid_pre_128039; - red_arr_mem_46541 = (__local char *) red_arr_mem_46541_backing_0; + ltid_pre_128039 = local_tid_128035; - int32_t phys_group_id_46543; + int32_t ltid_pre_128040; - phys_group_id_46543 = get_group_id(0); - for (int32_t i_46544 = 0; i_46544 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_29166, - squot64(segred_group_sizze_41520, - segment_sizze_nonzzero_46534))) - - phys_group_id_46543, sext_i64_i32(num_groups_41521)); - i_46544++) { - int32_t virt_group_id_46545 = phys_group_id_46543 + i_46544 * - sext_i64_i32(num_groups_41521); - int64_t gtid_41490 = squot64(sext_i32_i64(local_tid_46537), - segment_sizze_nonzzero_46534) + - sext_i32_i64(virt_group_id_46545) * - squot64(segred_group_sizze_41520, segment_sizze_nonzzero_46534); - int64_t gtid_41498 = srem64(sext_i32_i64(local_tid_46537), - i32_res_29568); + ltid_pre_128040 = squot32(local_tid_128035, sext_i64_i32(k2p2zq_75151)); + + int32_t ltid_pre_128041; + + ltid_pre_128041 = local_tid_128035 - squot32(local_tid_128035, + sext_i64_i32(k2p2zq_75151)) * + sext_i64_i32(k2p2zq_75151); + + int64_t gtid_110868; + + gtid_110868 = sext_i32_i64(group_tid_128036); + + double defunc_11_internal_map_res_transformed_row_111033; + + defunc_11_internal_map_res_transformed_row_111033 = ((__global + double *) defunc_3_map_res_mem_120231)[gtid_110868 * + n_75139 + + index_primexp_76437]; + + __local char *mem_123878; + + mem_123878 = (__local char *) mem_123878_backing_0; + + double defunc_0_f_res_111034; + int64_t gtid_110871 = sext_i32_i64(ltid_pre_128039); + int32_t phys_tid_110872 = local_tid_128035; + __local char *red_arr_mem_128042; + + red_arr_mem_128042 = (__local char *) red_arr_mem_128042_backing_1; + if (slt64(gtid_110871, k2p2zq_75151)) { + double x_111040 = ((__global double *) mem_120246)[gtid_110871 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_110868 * + defunc_2_reduce_res_75260 + + index_primexp_76437]; + double defunc_0_f_res_111041; + double redout_119887 = 0.0; - // apply map function if in bounds - { - if (slt64((int64_t) 0, i32_res_29568) && (slt64(gtid_41490, - m_29166) && - slt64(sext_i32_i64(local_tid_46537), - i32_res_29568 * - squot64(segred_group_sizze_41520, - segment_sizze_nonzzero_46534)))) { - int32_t x_41529 = ((__global - int32_t *) defunc_3_map_res_mem_45244)[gtid_41490]; - int32_t index_primexp_42390 = sext_i64_i32(gtid_41498); - bool cond_41531 = slt32(index_primexp_42390, x_41529); - float defunc_0_f_res_41532; - - if (cond_41531) { - int32_t x_41528 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_41490]; - int32_t x_41533 = add32(x_41528, index_primexp_42390); - int32_t x_41534 = sub32(x_41533, x_41529); - int32_t i_41535 = add32(1, x_41534); - int64_t i_41536 = sext_i32_i64(i_41535); - bool x_41537 = sle64((int64_t) 0, i_41536); - bool y_41538 = slt64(i_41536, N_29165); - bool bounds_check_41539 = x_41537 && y_41538; - bool index_certs_41540; - - if (!bounds_check_41539) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 22) == -1) { - global_failure_args[0] = i_41536; - global_failure_args[1] = N_29165; - ; - } - local_failure = true; - goto error_0; - } - } - - float defunc_0_f_res_t_res_41541 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_41490 * - N_29165 + - i_41536]; - - defunc_0_f_res_41532 = defunc_0_f_res_t_res_41541; - } else { - defunc_0_f_res_41532 = 0.0F; - } - // save map-out results - { } - // save results to be reduced - { - ((__local - float *) red_arr_mem_46541)[sext_i32_i64(local_tid_46537)] = - defunc_0_f_res_41532; - } - } else { - ((__local - float *) red_arr_mem_46541)[sext_i32_i64(local_tid_46537)] = - 0.0F; - } + for (int64_t i_119888 = 0; i_119888 < k2p2zq_75151; i_119888++) { + double x_111045 = ((__global double *) mem_120246)[i_119888 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_110868 * + defunc_2_reduce_res_75260 + + index_primexp_76437]; + double x_111046 = ((__global + double *) mem_param_123778)[gtid_110868 * + binop_x_120251 + + gtid_110871 * + k2p2zq_75151 + + i_119888]; + double defunc_1_f_res_111047 = x_111045 * x_111046; + double defunc_1_op_res_111044 = defunc_1_f_res_111047 + + redout_119887; + double redout_tmp_128044 = defunc_1_op_res_111044; + + redout_119887 = redout_tmp_128044; } + defunc_0_f_res_111041 = redout_119887; - error_0: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); - if (slt64((int64_t) 0, i32_res_29568)) { - // perform segmented scan to imitate reduction + double defunc_1_f_res_111048 = x_111040 * defunc_0_f_res_111041; + + ((__local double *) red_arr_mem_128042)[gtid_110871] = + defunc_1_f_res_111048; + ((__local double *) mem_123878)[gtid_110871] = defunc_0_f_res_111041; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128045; + int32_t skip_waves_128046; + + skip_waves_128046 = 1; + + double x_111036; + double x_111037; + + offset_128045 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128035, sext_i64_i32(k2p2zq_75151))) { + x_111036 = ((__local + double *) red_arr_mem_128042)[sext_i32_i64(local_tid_128035 + + offset_128045)]; + } + } + offset_128045 = 1; + while (slt32(offset_128045, wave_sizze_128037)) { + if (slt32(local_tid_128035 + offset_128045, + sext_i64_i32(k2p2zq_75151)) && ((local_tid_128035 - + squot32(local_tid_128035, + wave_sizze_128037) * + wave_sizze_128037) & (2 * + offset_128045 - + 1)) == + 0) { + // read array element + { + x_111037 = ((volatile __local + double *) red_arr_mem_128042)[sext_i32_i64(local_tid_128035 + + offset_128045)]; + } + // apply reduction operation { - float x_41524; - float x_41525; - float x_46546; - float x_46547; - bool ltid_in_bounds_46549; + double defunc_1_op_res_111038 = x_111036 + x_111037; - ltid_in_bounds_46549 = slt64(sext_i32_i64(local_tid_46537), - i32_res_29568 * - squot64(segred_group_sizze_41520, - segment_sizze_nonzzero_46534)); + x_111036 = defunc_1_op_res_111038; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128042)[sext_i32_i64(local_tid_128035)] = + x_111036; + } + } + offset_128045 *= 2; + } + while (slt32(skip_waves_128046, + squot32(sext_i64_i32(computed_group_sizze_110870) + + wave_sizze_128037 - 1, wave_sizze_128037))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128045 = skip_waves_128046 * wave_sizze_128037; + if (slt32(local_tid_128035 + offset_128045, + sext_i64_i32(k2p2zq_75151)) && ((local_tid_128035 - + squot32(local_tid_128035, + wave_sizze_128037) * + wave_sizze_128037) == 0 && + (squot32(local_tid_128035, + wave_sizze_128037) & + (2 * skip_waves_128046 - + 1)) == 0)) { + // read array element + { + x_111037 = ((__local + double *) red_arr_mem_128042)[sext_i32_i64(local_tid_128035 + + offset_128045)]; + } + // apply reduction operation + { + double defunc_1_op_res_111038 = x_111036 + x_111037; - int32_t skip_threads_46550; + x_111036 = defunc_1_op_res_111038; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128042)[sext_i32_i64(local_tid_128035)] = + x_111036; + } + } + skip_waves_128046 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + defunc_0_f_res_111034 = ((__local + double *) red_arr_mem_128042)[(int64_t) 0]; + + double fr_111049 = 1.0 + defunc_0_f_res_111034; + double defunc_0_f_res_111050; + int64_t gtid_110873 = sext_i32_i64(ltid_pre_128039); + int32_t phys_tid_110874 = local_tid_128035; + __local char *red_arr_mem_128047; + + red_arr_mem_128047 = (__local char *) red_arr_mem_128047_backing_2; + if (slt64(gtid_110873, k2p2zq_75151)) { + double x_111054 = ((__global double *) mem_120246)[gtid_110873 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_110868 * + defunc_2_reduce_res_75260 + + index_primexp_76437]; + double x_111055 = ((__global double *) mem_param_123786)[gtid_110868 * + k2p2zq_75151 + + gtid_110873]; + double defunc_1_f_res_111056 = x_111054 * x_111055; + + ((__local double *) red_arr_mem_128047)[gtid_110873] = + defunc_1_f_res_111056; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128049; + int32_t skip_waves_128050; + + skip_waves_128050 = 1; + + double x_111051; + double x_111052; + + offset_128049 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128035, sext_i64_i32(k2p2zq_75151))) { + x_111051 = ((__local + double *) red_arr_mem_128047)[sext_i32_i64(local_tid_128035 + + offset_128049)]; + } + } + offset_128049 = 1; + while (slt32(offset_128049, wave_sizze_128037)) { + if (slt32(local_tid_128035 + offset_128049, + sext_i64_i32(k2p2zq_75151)) && ((local_tid_128035 - + squot32(local_tid_128035, + wave_sizze_128037) * + wave_sizze_128037) & (2 * + offset_128049 - + 1)) == + 0) { + // read array element + { + x_111052 = ((volatile __local + double *) red_arr_mem_128047)[sext_i32_i64(local_tid_128035 + + offset_128049)]; + } + // apply reduction operation + { + double defunc_1_op_res_111053 = x_111051 + x_111052; - // read input for in-block scan - { - if (ltid_in_bounds_46549) { - x_41525 = ((volatile __local - float *) red_arr_mem_46541)[sext_i32_i64(local_tid_46537)]; - if ((local_tid_46537 - squot32(local_tid_46537, 32) * - 32) == 0) { - x_41524 = x_41525; - } - } - } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46550 = 1; - while (slt32(skip_threads_46550, 32)) { - if (sle32(skip_threads_46550, local_tid_46537 - - squot32(local_tid_46537, 32) * 32) && - ltid_in_bounds_46549) { - // read operands - { - x_41524 = ((volatile __local - float *) red_arr_mem_46541)[sext_i32_i64(local_tid_46537) - - sext_i32_i64(skip_threads_46550)]; - } - // perform operation - { - bool inactive_46551 = - slt64(srem64(sext_i32_i64(local_tid_46537), - i32_res_29568), - sext_i32_i64(local_tid_46537) - - sext_i32_i64(local_tid_46537 - - skip_threads_46550)); - - if (inactive_46551) { - x_41524 = x_41525; - } - if (!inactive_46551) { - float defunc_1_op_res_41526 = x_41524 + - x_41525; - - x_41524 = defunc_1_op_res_41526; - } - } - } - if (sle32(wave_sizze_46539, skip_threads_46550)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46550, local_tid_46537 - - squot32(local_tid_46537, 32) * 32) && - ltid_in_bounds_46549) { - // write result - { - ((volatile __local - float *) red_arr_mem_46541)[sext_i32_i64(local_tid_46537)] = - x_41524; - x_41525 = x_41524; - } - } - if (sle32(wave_sizze_46539, skip_threads_46550)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46550 *= 2; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' - { - if ((local_tid_46537 - squot32(local_tid_46537, 32) * 32) == - 31 && ltid_in_bounds_46549) { - ((volatile __local - float *) red_arr_mem_46541)[sext_i32_i64(squot32(local_tid_46537, - 32))] = - x_41524; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' - { - int32_t skip_threads_46552; - - // read input for in-block scan - { - if (squot32(local_tid_46537, 32) == 0 && - ltid_in_bounds_46549) { - x_46547 = ((volatile __local - float *) red_arr_mem_46541)[sext_i32_i64(local_tid_46537)]; - if ((local_tid_46537 - squot32(local_tid_46537, - 32) * 32) == 0) { - x_46546 = x_46547; - } - } - } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46552 = 1; - while (slt32(skip_threads_46552, 32)) { - if (sle32(skip_threads_46552, local_tid_46537 - - squot32(local_tid_46537, 32) * 32) && - (squot32(local_tid_46537, 32) == 0 && - ltid_in_bounds_46549)) { - // read operands - { - x_46546 = ((volatile __local - float *) red_arr_mem_46541)[sext_i32_i64(local_tid_46537) - - sext_i32_i64(skip_threads_46552)]; - } - // perform operation - { - bool inactive_46553 = - slt64(srem64(sext_i32_i64(local_tid_46537 * - 32 + 32 - 1), - i32_res_29568), - sext_i32_i64(local_tid_46537 * - 32 + 32 - 1) - - sext_i32_i64((local_tid_46537 - - skip_threads_46552) * - 32 + 32 - 1)); - - if (inactive_46553) { - x_46546 = x_46547; - } - if (!inactive_46553) { - float defunc_1_op_res_46548 = x_46546 + - x_46547; - - x_46546 = defunc_1_op_res_46548; - } - } - } - if (sle32(wave_sizze_46539, skip_threads_46552)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46552, local_tid_46537 - - squot32(local_tid_46537, 32) * 32) && - (squot32(local_tid_46537, 32) == 0 && - ltid_in_bounds_46549)) { - // write result - { - ((volatile __local - float *) red_arr_mem_46541)[sext_i32_i64(local_tid_46537)] = - x_46546; - x_46547 = x_46546; - } - } - if (sle32(wave_sizze_46539, skip_threads_46552)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46552 *= 2; - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_46537, 32) == 0 || - !ltid_in_bounds_46549)) { - // read operands - { - x_41525 = x_41524; - x_41524 = ((__local - float *) red_arr_mem_46541)[sext_i32_i64(squot32(local_tid_46537, - 32)) - - (int64_t) 1]; - } - // perform operation - { - bool inactive_46554 = - slt64(srem64(sext_i32_i64(local_tid_46537), - i32_res_29568), - sext_i32_i64(local_tid_46537) - - sext_i32_i64(squot32(local_tid_46537, - 32) * 32 - 1)); - - if (inactive_46554) { - x_41524 = x_41525; - } - if (!inactive_46554) { - float defunc_1_op_res_41526 = x_41524 + x_41525; - - x_41524 = defunc_1_op_res_41526; - } - } - // write final result - { - ((__local - float *) red_arr_mem_46541)[sext_i32_i64(local_tid_46537)] = - x_41524; - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_46537, 32) == 0) { - ((__local - float *) red_arr_mem_46541)[sext_i32_i64(local_tid_46537)] = - x_41525; - } - } - barrier(CLK_LOCAL_MEM_FENCE); + x_111051 = defunc_1_op_res_111053; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128047)[sext_i32_i64(local_tid_128035)] = + x_111051; } } + offset_128049 *= 2; + } + while (slt32(skip_waves_128050, + squot32(sext_i64_i32(computed_group_sizze_110870) + + wave_sizze_128037 - 1, wave_sizze_128037))) { barrier(CLK_LOCAL_MEM_FENCE); - // save final values of segments - { - if (slt64(sext_i32_i64(virt_group_id_46545) * - squot64(segred_group_sizze_41520, - segment_sizze_nonzzero_46534) + - sext_i32_i64(local_tid_46537), m_29166) && - slt64(sext_i32_i64(local_tid_46537), - squot64(segred_group_sizze_41520, - segment_sizze_nonzzero_46534))) { - ((__global - float *) mem_45278)[sext_i32_i64(virt_group_id_46545) * - squot64(segred_group_sizze_41520, - segment_sizze_nonzzero_46534) + - sext_i32_i64(local_tid_46537)] = ((__local - float *) red_arr_mem_46541)[(sext_i32_i64(local_tid_46537) + - (int64_t) 1) * - segment_sizze_nonzzero_46534 - - (int64_t) 1]; + offset_128049 = skip_waves_128050 * wave_sizze_128037; + if (slt32(local_tid_128035 + offset_128049, + sext_i64_i32(k2p2zq_75151)) && ((local_tid_128035 - + squot32(local_tid_128035, + wave_sizze_128037) * + wave_sizze_128037) == 0 && + (squot32(local_tid_128035, + wave_sizze_128037) & + (2 * skip_waves_128050 - + 1)) == 0)) { + // read array element + { + x_111052 = ((__local + double *) red_arr_mem_128047)[sext_i32_i64(local_tid_128035 + + offset_128049)]; + } + // apply reduction operation + { + double defunc_1_op_res_111053 = x_111051 + x_111052; + + x_111051 = defunc_1_op_res_111053; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128047)[sext_i32_i64(local_tid_128035)] = + x_111051; } } - barrier(CLK_LOCAL_MEM_FENCE); - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + skip_waves_128050 *= 2; } + barrier(CLK_LOCAL_MEM_FENCE); + defunc_0_f_res_111050 = ((__local + double *) red_arr_mem_128047)[(int64_t) 0]; + + double resid_111057 = defunc_11_internal_map_res_transformed_row_111033 - + defunc_0_f_res_111050; + double sqrt_res_111058; + + sqrt_res_111058 = futrts_sqrt64(fr_111049); + + double recresid_r_111059 = resid_111057 / sqrt_res_111058; + __local char *mem_123882; + + mem_123882 = (__local char *) mem_123882_backing_3; + + __local char *mem_123885; + + mem_123885 = (__local char *) mem_123885_backing_4; + + int64_t gtid_110882 = sext_i32_i64(ltid_pre_128040); + int64_t gtid_110883 = sext_i32_i64(ltid_pre_128041); + int32_t phys_tid_110884 = local_tid_128035; + __local char *red_arr_mem_128051; + + red_arr_mem_128051 = (__local char *) red_arr_mem_128051_backing_5; + if (slt64(gtid_110882, k2p2zq_75151) && slt64(gtid_110883, k2p2zq_75151)) { + double x_111065 = ((__local double *) mem_123878)[gtid_110882]; + double x_111067 = ((__local double *) mem_123878)[gtid_110883]; + double x_111068 = ((__global double *) mem_param_123778)[gtid_110868 * + binop_x_120251 + + gtid_110882 * + k2p2zq_75151 + + gtid_110883]; + double x_111069 = ((__global double *) mem_120246)[gtid_110883 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_110868 * + defunc_2_reduce_res_75260 + + index_primexp_76437]; + double x_111070 = x_111065 * x_111067; + double y_111071 = x_111070 / fr_111049; + double defunc_1_f_res_111072 = x_111068 - y_111071; + double defunc_1_f_res_111073 = x_111069 * defunc_1_f_res_111072; + + ((__local double *) red_arr_mem_128051)[gtid_110882 * k2p2zq_75151 + + gtid_110883] = + defunc_1_f_res_111073; + ((__local double *) mem_123885)[gtid_110882 * k2p2zq_75151 + + gtid_110883] = defunc_1_f_res_111072; + } + barrier(CLK_LOCAL_MEM_FENCE); - error_1: - return; - #undef segred_group_sizze_41520 -} -__kernel void mainzisegred_small_42060(__global int *global_failure, - __local volatile - int64_t *red_arr_mem_46703_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_46701_backing_aligned_1, - __local volatile - int64_t *red_arr_mem_46699_backing_aligned_2, - int64_t m_29166, - int64_t iota32_arg_29597, - int64_t num_groups_42247, - int64_t segment_sizze_nonzzero_46692, - __global unsigned char *mem_45284, - __global unsigned char *mem_45296, - __global unsigned char *mem_45298, - __global unsigned char *mem_45302, - __global unsigned char *mem_45305, - __global unsigned char *mem_45307, - __global unsigned char *mem_45309) -{ - #define segred_group_sizze_42246 (mainzisegred_group_sizze_42054) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_46703_backing_2 = - (__local volatile - char *) red_arr_mem_46703_backing_aligned_0; - __local volatile char *restrict red_arr_mem_46701_backing_1 = - (__local volatile - char *) red_arr_mem_46701_backing_aligned_1; - __local volatile char *restrict red_arr_mem_46699_backing_0 = - (__local volatile - char *) red_arr_mem_46699_backing_aligned_2; - - if (*global_failure >= 0) - return; - - int32_t global_tid_46694; - int32_t local_tid_46695; - int64_t group_sizze_46698; - int32_t wave_sizze_46697; - int32_t group_tid_46696; - - global_tid_46694 = get_global_id(0); - local_tid_46695 = get_local_id(0); - group_sizze_46698 = get_local_size(0); - wave_sizze_46697 = LOCKSTEP_WIDTH; - group_tid_46696 = get_group_id(0); - - int32_t phys_tid_42060; - - phys_tid_42060 = global_tid_46694; - - __local char *red_arr_mem_46699; - - red_arr_mem_46699 = (__local char *) red_arr_mem_46699_backing_0; - - __local char *red_arr_mem_46701; + int64_t dims_flat_128053; - red_arr_mem_46701 = (__local char *) red_arr_mem_46701_backing_1; + dims_flat_128053 = k2p2zq_75151 * k2p2zq_75151; - __local char *red_arr_mem_46703; + double x_111062; + double x_111063; + double x_128055; + double x_128056; + bool ltid_in_bounds_128058; - red_arr_mem_46703 = (__local char *) red_arr_mem_46703_backing_2; + ltid_in_bounds_128058 = slt64(sext_i32_i64(local_tid_128035), k2p2zq_75151 * + k2p2zq_75151); - int32_t phys_group_id_46705; + int32_t skip_threads_128059; - phys_group_id_46705 = get_group_id(0); - for (int32_t i_46706 = 0; i_46706 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_29166, - squot64(segred_group_sizze_42246, - segment_sizze_nonzzero_46692))) - - phys_group_id_46705, sext_i64_i32(num_groups_42247)); - i_46706++) { - int32_t virt_group_id_46707 = phys_group_id_46705 + i_46706 * - sext_i64_i32(num_groups_42247); - int64_t gtid_42051 = squot64(sext_i32_i64(local_tid_46695), - segment_sizze_nonzzero_46692) + - sext_i32_i64(virt_group_id_46707) * - squot64(segred_group_sizze_42246, segment_sizze_nonzzero_46692); - int64_t gtid_42059 = srem64(sext_i32_i64(local_tid_46695), - iota32_arg_29597); - - // apply map function if in bounds - { - if (slt64((int64_t) 0, iota32_arg_29597) && (slt64(gtid_42051, - m_29166) && - slt64(sext_i32_i64(local_tid_46695), - iota32_arg_29597 * - squot64(segred_group_sizze_42246, - segment_sizze_nonzzero_46692)))) { - int32_t y_42266 = ((__global int32_t *) mem_45298)[gtid_42051]; - float y_42267 = ((__global float *) mem_45296)[gtid_42051]; - float x_42271 = ((__global float *) mem_45302)[gtid_42051 * - iota32_arg_29597 + - gtid_42059]; - float x_42272 = ((__global float *) mem_45284)[gtid_42059]; - int32_t index_primexp_42404 = sext_i64_i32(gtid_42059); - float defunc_0_f_res_42275 = x_42271 / y_42267; - bool cond_42276 = slt32(index_primexp_42404, y_42266); - bool isnan_res_42277; - - isnan_res_42277 = futrts_isnan32(defunc_0_f_res_42275); - - bool cond_t_res_42278 = !isnan_res_42277; - bool x_42279 = cond_42276 && cond_t_res_42278; - float abs_res_42280 = (float) fabs(defunc_0_f_res_42275); - bool defunc_2_f_res_t_res_42281 = x_42272 < abs_res_42280; - bool x_42282 = x_42279 && defunc_2_f_res_t_res_42281; - float defunc_1_f_res_42283; - - if (cond_42276) { - defunc_1_f_res_42283 = defunc_0_f_res_42275; - } else { - defunc_1_f_res_42283 = 0.0F; - } - // save map-out results - { } - // save results to be reduced - { - ((__local - bool *) red_arr_mem_46699)[sext_i32_i64(local_tid_46695)] = - x_42282; - ((__local - int32_t *) red_arr_mem_46701)[sext_i32_i64(local_tid_46695)] = - index_primexp_42404; - ((__local - float *) red_arr_mem_46703)[sext_i32_i64(local_tid_46695)] = - defunc_1_f_res_42283; - } - } else { - ((__local - bool *) red_arr_mem_46699)[sext_i32_i64(local_tid_46695)] = 0; - ((__local - int32_t *) red_arr_mem_46701)[sext_i32_i64(local_tid_46695)] = - -1; - ((__local - float *) red_arr_mem_46703)[sext_i32_i64(local_tid_46695)] = - 0.0F; + // read input for in-block scan + { + if (ltid_in_bounds_128058) { + x_111063 = ((volatile __local + double *) red_arr_mem_128051)[sext_i32_i64(local_tid_128035)]; + if ((local_tid_128035 - squot32(local_tid_128035, 32) * 32) == 0) { + x_111062 = x_111063; } } - barrier(CLK_LOCAL_MEM_FENCE); - if (slt64((int64_t) 0, iota32_arg_29597)) { - // perform segmented scan to imitate reduction - { - bool x_42252; - int32_t x_42253; - float x_42254; - bool x_42255; - int32_t x_42256; - float x_42257; - bool x_46708; - int32_t x_46709; - float x_46710; - bool x_46711; - int32_t x_46712; - float x_46713; - bool ltid_in_bounds_46722; - - ltid_in_bounds_46722 = slt64(sext_i32_i64(local_tid_46695), - iota32_arg_29597 * - squot64(segred_group_sizze_42246, - segment_sizze_nonzzero_46692)); - - int32_t skip_threads_46723; - - // read input for in-block scan + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128059 = 1; + while (slt32(skip_threads_128059, 32)) { + if (sle32(skip_threads_128059, local_tid_128035 - + squot32(local_tid_128035, 32) * 32) && + ltid_in_bounds_128058) { + // read operands { - if (ltid_in_bounds_46722) { - x_42255 = ((volatile __local - bool *) red_arr_mem_46699)[sext_i32_i64(local_tid_46695)]; - x_42256 = ((volatile __local - int32_t *) red_arr_mem_46701)[sext_i32_i64(local_tid_46695)]; - x_42257 = ((volatile __local - float *) red_arr_mem_46703)[sext_i32_i64(local_tid_46695)]; - if ((local_tid_46695 - squot32(local_tid_46695, 32) * - 32) == 0) { - x_42252 = x_42255; - x_42253 = x_42256; - x_42254 = x_42257; - } - } + x_111062 = ((volatile __local + double *) red_arr_mem_128051)[sext_i32_i64(local_tid_128035) - + sext_i32_i64(skip_threads_128059)]; } - // in-block scan (hopefully no barriers needed) + // perform operation { - skip_threads_46723 = 1; - while (slt32(skip_threads_46723, 32)) { - if (sle32(skip_threads_46723, local_tid_46695 - - squot32(local_tid_46695, 32) * 32) && - ltid_in_bounds_46722) { - // read operands - { - x_42252 = ((volatile __local - bool *) red_arr_mem_46699)[sext_i32_i64(local_tid_46695) - - sext_i32_i64(skip_threads_46723)]; - x_42253 = ((volatile __local - int32_t *) red_arr_mem_46701)[sext_i32_i64(local_tid_46695) - - sext_i32_i64(skip_threads_46723)]; - x_42254 = ((volatile __local - float *) red_arr_mem_46703)[sext_i32_i64(local_tid_46695) - - sext_i32_i64(skip_threads_46723)]; - } - // perform operation - { - bool inactive_46724 = - slt64(srem64(sext_i32_i64(local_tid_46695), - iota32_arg_29597), - sext_i32_i64(local_tid_46695) - - sext_i32_i64(local_tid_46695 - - skip_threads_46723)); - - if (inactive_46724) { - x_42252 = x_42255; - x_42253 = x_42256; - x_42254 = x_42257; - } - if (!inactive_46724) { - bool defunc_1_op_res_42258; - int32_t defunc_1_op_res_42259; - - if (x_42252) { - defunc_1_op_res_42258 = x_42252; - defunc_1_op_res_42259 = x_42253; - } else { - bool x_42260 = x_42255 && x_42255; - bool x_42261 = !x_42255; - bool y_42262 = x_42252 && x_42261; - bool defunc_1_op_res_f_res_42263 = - x_42260 || y_42262; - int32_t defunc_1_op_res_f_res_42264; - - if (x_42255) { - defunc_1_op_res_f_res_42264 = - x_42256; - } else { - defunc_1_op_res_f_res_42264 = - x_42253; - } - defunc_1_op_res_42258 = - defunc_1_op_res_f_res_42263; - defunc_1_op_res_42259 = - defunc_1_op_res_f_res_42264; - } - - float defunc_1_op_res_42265 = x_42254 + - x_42257; - - x_42252 = defunc_1_op_res_42258; - x_42253 = defunc_1_op_res_42259; - x_42254 = defunc_1_op_res_42265; - } - } - } - if (sle32(wave_sizze_46697, skip_threads_46723)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46723, local_tid_46695 - - squot32(local_tid_46695, 32) * 32) && - ltid_in_bounds_46722) { - // write result - { - ((volatile __local - bool *) red_arr_mem_46699)[sext_i32_i64(local_tid_46695)] = - x_42252; - x_42255 = x_42252; - ((volatile __local - int32_t *) red_arr_mem_46701)[sext_i32_i64(local_tid_46695)] = - x_42253; - x_42256 = x_42253; - ((volatile __local - float *) red_arr_mem_46703)[sext_i32_i64(local_tid_46695)] = - x_42254; - x_42257 = x_42254; - } - } - if (sle32(wave_sizze_46697, skip_threads_46723)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46723 *= 2; + bool inactive_128060 = + slt64(srem64(sext_i32_i64(local_tid_128035), + k2p2zq_75151), + sext_i32_i64(local_tid_128035) - + sext_i32_i64(local_tid_128035 - + skip_threads_128059)); + + if (inactive_128060) { + x_111062 = x_111063; + } + if (!inactive_128060) { + double defunc_1_op_res_111064 = x_111062 + x_111063; + + x_111062 = defunc_1_op_res_111064; } } + } + if (sle32(wave_sizze_128037, skip_threads_128059)) { barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' + } + if (sle32(skip_threads_128059, local_tid_128035 - + squot32(local_tid_128035, 32) * 32) && + ltid_in_bounds_128058) { + // write result { - if ((local_tid_46695 - squot32(local_tid_46695, 32) * 32) == - 31 && ltid_in_bounds_46722) { - ((volatile __local - bool *) red_arr_mem_46699)[sext_i32_i64(squot32(local_tid_46695, - 32))] = - x_42252; - ((volatile __local - int32_t *) red_arr_mem_46701)[sext_i32_i64(squot32(local_tid_46695, - 32))] = - x_42253; - ((volatile __local - float *) red_arr_mem_46703)[sext_i32_i64(squot32(local_tid_46695, - 32))] = - x_42254; - } + ((volatile __local + double *) red_arr_mem_128051)[sext_i32_i64(local_tid_128035)] = + x_111062; + x_111063 = x_111062; } + } + if (sle32(wave_sizze_128037, skip_threads_128059)) { barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' - { - int32_t skip_threads_46725; - - // read input for in-block scan + } + skip_threads_128059 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128035 - squot32(local_tid_128035, 32) * 32) == 31 && + ltid_in_bounds_128058) { + ((volatile __local + double *) red_arr_mem_128051)[sext_i32_i64(squot32(local_tid_128035, + 32))] = + x_111062; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128061; + + // read input for in-block scan + { + if (squot32(local_tid_128035, 32) == 0 && ltid_in_bounds_128058) { + x_128056 = ((volatile __local + double *) red_arr_mem_128051)[sext_i32_i64(local_tid_128035)]; + if ((local_tid_128035 - squot32(local_tid_128035, 32) * 32) == + 0) { + x_128055 = x_128056; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128061 = 1; + while (slt32(skip_threads_128061, 32)) { + if (sle32(skip_threads_128061, local_tid_128035 - + squot32(local_tid_128035, 32) * 32) && + (squot32(local_tid_128035, 32) == 0 && + ltid_in_bounds_128058)) { + // read operands { - if (squot32(local_tid_46695, 32) == 0 && - ltid_in_bounds_46722) { - x_46711 = ((volatile __local - bool *) red_arr_mem_46699)[sext_i32_i64(local_tid_46695)]; - x_46712 = ((volatile __local - int32_t *) red_arr_mem_46701)[sext_i32_i64(local_tid_46695)]; - x_46713 = ((volatile __local - float *) red_arr_mem_46703)[sext_i32_i64(local_tid_46695)]; - if ((local_tid_46695 - squot32(local_tid_46695, - 32) * 32) == 0) { - x_46708 = x_46711; - x_46709 = x_46712; - x_46710 = x_46713; - } - } + x_128055 = ((volatile __local + double *) red_arr_mem_128051)[sext_i32_i64(local_tid_128035) - + sext_i32_i64(skip_threads_128061)]; } - // in-block scan (hopefully no barriers needed) + // perform operation { - skip_threads_46725 = 1; - while (slt32(skip_threads_46725, 32)) { - if (sle32(skip_threads_46725, local_tid_46695 - - squot32(local_tid_46695, 32) * 32) && - (squot32(local_tid_46695, 32) == 0 && - ltid_in_bounds_46722)) { - // read operands - { - x_46708 = ((volatile __local - bool *) red_arr_mem_46699)[sext_i32_i64(local_tid_46695) - - sext_i32_i64(skip_threads_46725)]; - x_46709 = ((volatile __local - int32_t *) red_arr_mem_46701)[sext_i32_i64(local_tid_46695) - - sext_i32_i64(skip_threads_46725)]; - x_46710 = ((volatile __local - float *) red_arr_mem_46703)[sext_i32_i64(local_tid_46695) - - sext_i32_i64(skip_threads_46725)]; - } - // perform operation - { - bool inactive_46726 = - slt64(srem64(sext_i32_i64(local_tid_46695 * - 32 + 32 - 1), - iota32_arg_29597), - sext_i32_i64(local_tid_46695 * - 32 + 32 - 1) - - sext_i32_i64((local_tid_46695 - - skip_threads_46725) * - 32 + 32 - 1)); - - if (inactive_46726) { - x_46708 = x_46711; - x_46709 = x_46712; - x_46710 = x_46713; - } - if (!inactive_46726) { - bool defunc_1_op_res_46714; - int32_t defunc_1_op_res_46715; - - if (x_46708) { - defunc_1_op_res_46714 = x_46708; - defunc_1_op_res_46715 = x_46709; - } else { - bool x_46716 = x_46711 && x_46711; - bool x_46717 = !x_46711; - bool y_46718 = x_46708 && x_46717; - bool defunc_1_op_res_f_res_46719 = - x_46716 || y_46718; - int32_t defunc_1_op_res_f_res_46720; - - if (x_46711) { - defunc_1_op_res_f_res_46720 = - x_46712; - } else { - defunc_1_op_res_f_res_46720 = - x_46709; - } - defunc_1_op_res_46714 = - defunc_1_op_res_f_res_46719; - defunc_1_op_res_46715 = - defunc_1_op_res_f_res_46720; - } - - float defunc_1_op_res_46721 = x_46710 + - x_46713; - - x_46708 = defunc_1_op_res_46714; - x_46709 = defunc_1_op_res_46715; - x_46710 = defunc_1_op_res_46721; - } - } - } - if (sle32(wave_sizze_46697, skip_threads_46725)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46725, local_tid_46695 - - squot32(local_tid_46695, 32) * 32) && - (squot32(local_tid_46695, 32) == 0 && - ltid_in_bounds_46722)) { - // write result - { - ((volatile __local - bool *) red_arr_mem_46699)[sext_i32_i64(local_tid_46695)] = - x_46708; - x_46711 = x_46708; - ((volatile __local - int32_t *) red_arr_mem_46701)[sext_i32_i64(local_tid_46695)] = - x_46709; - x_46712 = x_46709; - ((volatile __local - float *) red_arr_mem_46703)[sext_i32_i64(local_tid_46695)] = - x_46710; - x_46713 = x_46710; - } - } - if (sle32(wave_sizze_46697, skip_threads_46725)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46725 *= 2; - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_46695, 32) == 0 || - !ltid_in_bounds_46722)) { - // read operands - { - x_42255 = x_42252; - x_42256 = x_42253; - x_42257 = x_42254; - x_42252 = ((__local - bool *) red_arr_mem_46699)[sext_i32_i64(squot32(local_tid_46695, - 32)) - - (int64_t) 1]; - x_42253 = ((__local - int32_t *) red_arr_mem_46701)[sext_i32_i64(squot32(local_tid_46695, - 32)) - - (int64_t) 1]; - x_42254 = ((__local - float *) red_arr_mem_46703)[sext_i32_i64(squot32(local_tid_46695, - 32)) - - (int64_t) 1]; + bool inactive_128062 = + slt64(srem64(sext_i32_i64(local_tid_128035 * 32 + + 32 - 1), k2p2zq_75151), + sext_i32_i64(local_tid_128035 * 32 + 32 - + 1) - sext_i32_i64((local_tid_128035 - + skip_threads_128061) * + 32 + 32 - 1)); + + if (inactive_128062) { + x_128055 = x_128056; } - // perform operation - { - bool inactive_46727 = - slt64(srem64(sext_i32_i64(local_tid_46695), - iota32_arg_29597), - sext_i32_i64(local_tid_46695) - - sext_i32_i64(squot32(local_tid_46695, - 32) * 32 - 1)); + if (!inactive_128062) { + double defunc_1_op_res_128057 = x_128055 + x_128056; - if (inactive_46727) { - x_42252 = x_42255; - x_42253 = x_42256; - x_42254 = x_42257; - } - if (!inactive_46727) { - bool defunc_1_op_res_42258; - int32_t defunc_1_op_res_42259; - - if (x_42252) { - defunc_1_op_res_42258 = x_42252; - defunc_1_op_res_42259 = x_42253; - } else { - bool x_42260 = x_42255 && x_42255; - bool x_42261 = !x_42255; - bool y_42262 = x_42252 && x_42261; - bool defunc_1_op_res_f_res_42263 = - x_42260 || y_42262; - int32_t defunc_1_op_res_f_res_42264; - - if (x_42255) { - defunc_1_op_res_f_res_42264 = x_42256; - } else { - defunc_1_op_res_f_res_42264 = x_42253; - } - defunc_1_op_res_42258 = - defunc_1_op_res_f_res_42263; - defunc_1_op_res_42259 = - defunc_1_op_res_f_res_42264; - } - - float defunc_1_op_res_42265 = x_42254 + x_42257; - - x_42252 = defunc_1_op_res_42258; - x_42253 = defunc_1_op_res_42259; - x_42254 = defunc_1_op_res_42265; - } - } - // write final result - { - ((__local - bool *) red_arr_mem_46699)[sext_i32_i64(local_tid_46695)] = - x_42252; - ((__local - int32_t *) red_arr_mem_46701)[sext_i32_i64(local_tid_46695)] = - x_42253; - ((__local - float *) red_arr_mem_46703)[sext_i32_i64(local_tid_46695)] = - x_42254; + x_128055 = defunc_1_op_res_128057; } } } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_46695, 32) == 0) { - ((__local - bool *) red_arr_mem_46699)[sext_i32_i64(local_tid_46695)] = - x_42255; - ((__local - int32_t *) red_arr_mem_46701)[sext_i32_i64(local_tid_46695)] = - x_42256; - ((__local - float *) red_arr_mem_46703)[sext_i32_i64(local_tid_46695)] = - x_42257; + if (sle32(wave_sizze_128037, skip_threads_128061)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128061, local_tid_128035 - + squot32(local_tid_128035, 32) * 32) && + (squot32(local_tid_128035, 32) == 0 && + ltid_in_bounds_128058)) { + // write result + { + ((volatile __local + double *) red_arr_mem_128051)[sext_i32_i64(local_tid_128035)] = + x_128055; + x_128056 = x_128055; } } - barrier(CLK_LOCAL_MEM_FENCE); + if (sle32(wave_sizze_128037, skip_threads_128061)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128061 *= 2; } } - barrier(CLK_LOCAL_MEM_FENCE); - // save final values of segments - { - if (slt64(sext_i32_i64(virt_group_id_46707) * - squot64(segred_group_sizze_42246, - segment_sizze_nonzzero_46692) + - sext_i32_i64(local_tid_46695), m_29166) && - slt64(sext_i32_i64(local_tid_46695), - squot64(segred_group_sizze_42246, - segment_sizze_nonzzero_46692))) { - ((__global - bool *) mem_45305)[sext_i32_i64(virt_group_id_46707) * - squot64(segred_group_sizze_42246, - segment_sizze_nonzzero_46692) + - sext_i32_i64(local_tid_46695)] = ((__local - bool *) red_arr_mem_46699)[(sext_i32_i64(local_tid_46695) + - (int64_t) 1) * - segment_sizze_nonzzero_46692 - - (int64_t) 1]; - ((__global - int32_t *) mem_45307)[sext_i32_i64(virt_group_id_46707) * - squot64(segred_group_sizze_42246, - segment_sizze_nonzzero_46692) + - sext_i32_i64(local_tid_46695)] = - ((__local - int32_t *) red_arr_mem_46701)[(sext_i32_i64(local_tid_46695) + - (int64_t) 1) * - segment_sizze_nonzzero_46692 - - (int64_t) 1]; - ((__global - float *) mem_45309)[sext_i32_i64(virt_group_id_46707) * - squot64(segred_group_sizze_42246, - segment_sizze_nonzzero_46692) + - sext_i32_i64(local_tid_46695)] = ((__local - float *) red_arr_mem_46703)[(sext_i32_i64(local_tid_46695) + - (int64_t) 1) * - segment_sizze_nonzzero_46692 - - (int64_t) 1]; + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128035, 32) == 0 || !ltid_in_bounds_128058)) { + // read operands + { + x_111063 = x_111062; + x_111062 = ((__local + double *) red_arr_mem_128051)[sext_i32_i64(squot32(local_tid_128035, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128063 = + slt64(srem64(sext_i32_i64(local_tid_128035), k2p2zq_75151), + sext_i32_i64(local_tid_128035) - + sext_i32_i64(squot32(local_tid_128035, 32) * 32 - + 1)); + + if (inactive_128063) { + x_111062 = x_111063; + } + if (!inactive_128063) { + double defunc_1_op_res_111064 = x_111062 + x_111063; + + x_111062 = defunc_1_op_res_111064; + } + } + // write final result + { + ((__local + double *) red_arr_mem_128051)[sext_i32_i64(local_tid_128035)] = + x_111062; } } - barrier(CLK_LOCAL_MEM_FENCE); - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128035, 32) == 0) { + ((__local + double *) red_arr_mem_128051)[sext_i32_i64(local_tid_128035)] = + x_111063; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); + for (int64_t i_128064 = 0; i_128064 < sdiv_up64(k2p2zq_75151 - + sext_i32_i64(local_tid_128035), + computed_group_sizze_110870); + i_128064++) { + ((__local double *) mem_123882)[i_128064 * computed_group_sizze_110870 + + sext_i32_i64(local_tid_128035)] = + ((__local double *) red_arr_mem_128051)[(i_128064 * + computed_group_sizze_110870 + + sext_i32_i64(local_tid_128035)) * + k2p2zq_75151 + + (k2p2zq_75151 - + (int64_t) 1)]; + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); - error_1: - return; - #undef segred_group_sizze_42246 -} -__kernel void mainDetailedzicopy_45861(int64_t m_27772, int64_t nm_27920, - int64_t ctx_param_ext_44580, - int64_t ctx_param_ext_44581, - int64_t ctx_param_ext_44583, __global - unsigned char *mem_param_44585, __global - unsigned char *mem_44590) -{ - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - int32_t copy_gtid_45861; - int32_t copy_ltid_45862; - int32_t copy_gid_45863; + __local char *mem_123888; - copy_gtid_45861 = get_global_id(0); - copy_ltid_45862 = get_local_id(0); - copy_gid_45863 = get_group_id(0); - if (slt64(sext_i32_i64(copy_gtid_45861), m_27772 * nm_27920)) { - ((__global float *) mem_44590)[(sext_i32_i64(copy_gtid_45861) - - squot64(sext_i32_i64(copy_gtid_45861), - nm_27920) * nm_27920) * - m_27772 + - squot64(sext_i32_i64(copy_gtid_45861), - nm_27920)] = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (squot64(sext_i32_i64(copy_gtid_45861), - nm_27920) * - ctx_param_ext_44581 + - (sext_i32_i64(copy_gtid_45861) - - squot64(sext_i32_i64(copy_gtid_45861), - nm_27920) * - nm_27920) * - ctx_param_ext_44583)]; + mem_123888 = (__local char *) mem_123888_backing_6; + + int64_t gtid_110876 = sext_i32_i64(ltid_pre_128039); + int32_t phys_tid_110877 = local_tid_128035; + + if (slt64(gtid_110876, k2p2zq_75151)) { + double x_111075 = ((__global double *) mem_param_123786)[gtid_110868 * + k2p2zq_75151 + + gtid_110876]; + double defunc_0_f_res_111076 = ((__local + double *) mem_123882)[gtid_110876]; + double defunc_0_g_res_111077 = resid_111057 * defunc_0_f_res_111076; + double defunc_1_f_res_111078 = x_111075 + defunc_0_g_res_111077; + + ((__local double *) mem_123888)[gtid_110876] = defunc_1_f_res_111078; + } + barrier(CLK_LOCAL_MEM_FENCE); + for (int64_t i_128065 = 0; i_128065 < sdiv_up64(k2p2zq_75151 * + k2p2zq_75151 - + sext_i32_i64(local_tid_128035), + computed_group_sizze_110870); + i_128065++) { + ((__global double *) mem_123892)[gtid_110868 * (k2p2zq_75151 * + k2p2zq_75151) + + squot64(i_128065 * + computed_group_sizze_110870 + + sext_i32_i64(local_tid_128035), + k2p2zq_75151) * k2p2zq_75151 + + (i_128065 * + computed_group_sizze_110870 + + sext_i32_i64(local_tid_128035) - + squot64(i_128065 * + computed_group_sizze_110870 + + sext_i32_i64(local_tid_128035), + k2p2zq_75151) * + k2p2zq_75151)] = ((__local + double *) mem_123885)[squot64(i_128065 * + computed_group_sizze_110870 + + sext_i32_i64(local_tid_128035), + k2p2zq_75151) * + k2p2zq_75151 + + (i_128065 * + computed_group_sizze_110870 + + sext_i32_i64(local_tid_128035) - + squot64(i_128065 * + computed_group_sizze_110870 + + sext_i32_i64(local_tid_128035), + k2p2zq_75151) * + k2p2zq_75151)]; + } + barrier(CLK_LOCAL_MEM_FENCE); + for (int64_t i_128066 = 0; i_128066 < sdiv_up64(k2p2zq_75151 - + sext_i32_i64(local_tid_128035), + computed_group_sizze_110870); + i_128066++) { + ((__global double *) mem_123895)[gtid_110868 * k2p2zq_75151 + + (i_128066 * + computed_group_sizze_110870 + + sext_i32_i64(local_tid_128035))] = + ((__local double *) mem_123888)[i_128066 * + computed_group_sizze_110870 + + sext_i32_i64(local_tid_128035)]; + } + barrier(CLK_LOCAL_MEM_FENCE); + if (local_tid_128035 == 0) { + ((__global double *) mem_123897)[gtid_110868] = recresid_r_111059; } - error_0: + error_7: return; } -__kernel void mainDetailedziscan_stage1_32356(__global int *global_failure, - __local volatile - int64_t *scan_arr_mem_46273_backing_aligned_0, - int64_t N_27771, int64_t m_27772, - int32_t num_threads_46267, - __global - unsigned char *images_mem_44381, - __global - unsigned char *defunc_3_map_res_mem_45140, - __global unsigned char *mem_45163, - __global unsigned char *mem_45166) +__kernel void mainzisegmap_intragroup_111401(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + __local volatile + int64_t *mem_124032_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128306_backing_aligned_1, + __local volatile + int64_t *red_arr_mem_128302_backing_aligned_2, + int64_t m_75136, + int64_t k2p2zq_75151, + int64_t num_recresids_padded_75809, + int64_t Nmk_76536, + int64_t computed_group_sizze_111392, + __global + unsigned char *defunc_3_map_res_mem_120230, + __global unsigned char *mem_121934, + __global unsigned char *mem_124035, + __global unsigned char *mem_124037) { - #define segscan_group_sizze_32373 (mainDetailedzisegscan_group_sizze_32350) - const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict scan_arr_mem_46273_backing_0 = + __local volatile char *restrict mem_124032_backing_2 = (__local volatile + char *) mem_124032_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128306_backing_1 = + (__local volatile + char *) red_arr_mem_128306_backing_aligned_1; + __local volatile char *restrict red_arr_mem_128302_backing_0 = (__local volatile - char *) scan_arr_mem_46273_backing_aligned_0; + char *) red_arr_mem_128302_backing_aligned_2; + volatile __local bool local_failure; - if (*global_failure >= 0) - return; + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_128295; + int32_t local_tid_128296; + int64_t group_sizze_128299; + int32_t wave_sizze_128298; + int32_t group_tid_128297; - int32_t global_tid_46268; - int32_t local_tid_46269; - int64_t group_sizze_46272; - int32_t wave_sizze_46271; - int32_t group_tid_46270; + global_tid_128295 = get_global_id(0); + local_tid_128296 = get_local_id(0); + group_sizze_128299 = get_local_size(0); + wave_sizze_128298 = LOCKSTEP_WIDTH; + group_tid_128297 = get_group_id(0); - global_tid_46268 = get_global_id(0); - local_tid_46269 = get_local_id(0); - group_sizze_46272 = get_local_size(0); - wave_sizze_46271 = LOCKSTEP_WIDTH; - group_tid_46270 = get_group_id(0); + int32_t phys_tid_111401; - int32_t phys_tid_32356; + phys_tid_111401 = group_tid_128297; - phys_tid_32356 = global_tid_46268; + int32_t ltid_pre_128300; - __local char *scan_arr_mem_46273; + ltid_pre_128300 = local_tid_128296; - scan_arr_mem_46273 = (__local char *) scan_arr_mem_46273_backing_0; + int32_t ltid_pre_128301; - int64_t x_32378; - int64_t x_32379; + ltid_pre_128301 = local_tid_128296; - x_32378 = (int64_t) 0; - for (int64_t j_46275 = 0; j_46275 < sdiv_up64(m_27772 * N_27771, - sext_i32_i64(num_threads_46267)); - j_46275++) { - int64_t chunk_offset_46276 = segscan_group_sizze_32373 * j_46275 + - sext_i32_i64(group_tid_46270) * (segscan_group_sizze_32373 * - sdiv_up64(m_27772 * N_27771, - sext_i32_i64(num_threads_46267))); - int64_t flat_idx_46277 = chunk_offset_46276 + - sext_i32_i64(local_tid_46269); - int64_t gtid_32347 = squot64(flat_idx_46277, N_27771); - int64_t gtid_32355 = flat_idx_46277 - squot64(flat_idx_46277, N_27771) * - N_27771; + int64_t gtid_111390; + + gtid_111390 = sext_i32_i64(group_tid_128297); + + int64_t x_111511; + + x_111511 = ((__global int64_t *) defunc_3_map_res_mem_120230)[gtid_111390]; + + int64_t n_111512 = sub64(x_111511, k2p2zq_75151); + double i64_res_111513 = sitofp_i64_f64(n_111512); + double defunc_2_reduce_res_111514; + int64_t gtid_111393 = sext_i32_i64(ltid_pre_128300); + int32_t phys_tid_111394 = local_tid_128296; + __local char *red_arr_mem_128302; + + red_arr_mem_128302 = (__local char *) red_arr_mem_128302_backing_0; + if (slt64(gtid_111393, num_recresids_padded_75809)) { + double x_111522 = ((__global double *) mem_121934)[gtid_111393 * + m_75136 + + gtid_111390]; - // threads in bounds read input - { - if (slt64(gtid_32347, m_27772) && slt64(gtid_32355, N_27771)) { - float x_32383 = ((__global - float *) images_mem_44381)[gtid_32347 * - N_27771 + - gtid_32355]; - bool isnan_res_32385; + ((__local double *) red_arr_mem_128302)[gtid_111393] = x_111522; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128304; + int32_t skip_waves_128305; + + skip_waves_128305 = 1; + + double x_111515; + double x_111516; + + offset_128304 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128296, sext_i64_i32(num_recresids_padded_75809))) { + x_111515 = ((__local + double *) red_arr_mem_128302)[sext_i32_i64(local_tid_128296 + + offset_128304)]; + } + } + offset_128304 = 1; + while (slt32(offset_128304, wave_sizze_128298)) { + if (slt32(local_tid_128296 + offset_128304, + sext_i64_i32(num_recresids_padded_75809)) && + ((local_tid_128296 - squot32(local_tid_128296, wave_sizze_128298) * + wave_sizze_128298) & (2 * offset_128304 - 1)) == 0) { + // read array element + { + x_111516 = ((volatile __local + double *) red_arr_mem_128302)[sext_i32_i64(local_tid_128296 + + offset_128304)]; + } + // apply reduction operation + { + bool isnan_res_111517; - isnan_res_32385 = futrts_isnan32(x_32383); + isnan_res_111517 = futrts_isnan64(x_111515); - bool cond_32386 = !isnan_res_32385; - float defunc_1_f_res_32387; + double defunc_1_op_res_111518; - if (cond_32386) { - float x_32384 = ((__global - float *) defunc_3_map_res_mem_45140)[gtid_32347 * - N_27771 + - gtid_32355]; - float defunc_1_f_res_t_res_32388 = x_32383 - x_32384; - - defunc_1_f_res_32387 = defunc_1_f_res_t_res_32388; + if (isnan_res_111517) { + defunc_1_op_res_111518 = x_111516; } else { - defunc_1_f_res_32387 = NAN; + bool isnan_res_111519; + + isnan_res_111519 = futrts_isnan64(x_111516); + + double defunc_1_op_res_f_res_111520; + + if (isnan_res_111519) { + defunc_1_op_res_f_res_111520 = x_111515; + } else { + double defunc_1_op_res_f_res_f_res_111521 = x_111515 + + x_111516; + + defunc_1_op_res_f_res_111520 = + defunc_1_op_res_f_res_f_res_111521; + } + defunc_1_op_res_111518 = defunc_1_op_res_f_res_111520; } + x_111515 = defunc_1_op_res_111518; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128302)[sext_i32_i64(local_tid_128296)] = + x_111515; + } + } + offset_128304 *= 2; + } + while (slt32(skip_waves_128305, + squot32(sext_i64_i32(computed_group_sizze_111392) + + wave_sizze_128298 - 1, wave_sizze_128298))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128304 = skip_waves_128305 * wave_sizze_128298; + if (slt32(local_tid_128296 + offset_128304, + sext_i64_i32(num_recresids_padded_75809)) && + ((local_tid_128296 - squot32(local_tid_128296, wave_sizze_128298) * + wave_sizze_128298) == 0 && (squot32(local_tid_128296, + wave_sizze_128298) & (2 * + skip_waves_128305 - + 1)) == + 0)) { + // read array element + { + x_111516 = ((__local + double *) red_arr_mem_128302)[sext_i32_i64(local_tid_128296 + + offset_128304)]; + } + // apply reduction operation + { + bool isnan_res_111517; - bool isnan_res_32389; - - isnan_res_32389 = futrts_isnan32(defunc_1_f_res_32387); + isnan_res_111517 = futrts_isnan64(x_111515); - bool defunc_0_p_res_32390 = !isnan_res_32389; - int64_t defunc_0_f_res_32391 = - btoi_bool_i64(defunc_0_p_res_32390); + double defunc_1_op_res_111518; - // write to-scan values to parameters - { - x_32379 = defunc_0_f_res_32391; - } - // write mapped values results to global memory - { - ((__global float *) mem_45166)[gtid_32347 * N_27771 + - gtid_32355] = - defunc_1_f_res_32387; + if (isnan_res_111517) { + defunc_1_op_res_111518 = x_111516; + } else { + bool isnan_res_111519; + + isnan_res_111519 = futrts_isnan64(x_111516); + + double defunc_1_op_res_f_res_111520; + + if (isnan_res_111519) { + defunc_1_op_res_f_res_111520 = x_111515; + } else { + double defunc_1_op_res_f_res_f_res_111521 = x_111515 + + x_111516; + + defunc_1_op_res_f_res_111520 = + defunc_1_op_res_f_res_f_res_111521; + } + defunc_1_op_res_111518 = defunc_1_op_res_f_res_111520; } + x_111515 = defunc_1_op_res_111518; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128302)[sext_i32_i64(local_tid_128296)] = + x_111515; } } - // do one intra-group scan operation - { - // maybe restore some to-scan values to parameters, or read neutral + skip_waves_128305 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + defunc_2_reduce_res_111514 = ((__local + double *) red_arr_mem_128302)[(int64_t) 0]; + + double x_mean_111523 = defunc_2_reduce_res_111514 / i64_res_111513; + double defunc_2_reduce_res_111524; + int64_t gtid_111395 = sext_i32_i64(ltid_pre_128300); + int32_t phys_tid_111396 = local_tid_128296; + __local char *red_arr_mem_128306; + + red_arr_mem_128306 = (__local char *) red_arr_mem_128306_backing_1; + if (slt64(gtid_111395, num_recresids_padded_75809)) { + double x_111528 = ((__global double *) mem_121934)[gtid_111395 * + m_75136 + + gtid_111390]; + bool isnan_res_111529; + + isnan_res_111529 = futrts_isnan64(x_111528); + + double defunc_0_f_res_111530; + + if (isnan_res_111529) { + defunc_0_f_res_111530 = 0.0; + } else { + double x_111531 = x_111528 - x_mean_111523; + double defunc_0_f_res_f_res_111532 = fpow64(x_111531, 2.0); + + defunc_0_f_res_111530 = defunc_0_f_res_f_res_111532; + } + ((__local double *) red_arr_mem_128306)[gtid_111395] = + defunc_0_f_res_111530; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128308; + int32_t skip_waves_128309; + + skip_waves_128309 = 1; + + double x_111525; + double x_111526; + + offset_128308 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128296, sext_i64_i32(num_recresids_padded_75809))) { + x_111525 = ((__local + double *) red_arr_mem_128306)[sext_i32_i64(local_tid_128296 + + offset_128308)]; + } + } + offset_128308 = 1; + while (slt32(offset_128308, wave_sizze_128298)) { + if (slt32(local_tid_128296 + offset_128308, + sext_i64_i32(num_recresids_padded_75809)) && + ((local_tid_128296 - squot32(local_tid_128296, wave_sizze_128298) * + wave_sizze_128298) & (2 * offset_128308 - 1)) == 0) { + // read array element { - if (!(slt64(gtid_32347, m_27772) && slt64(gtid_32355, - N_27771))) { - x_32379 = (int64_t) 0; - } + x_111526 = ((volatile __local + double *) red_arr_mem_128306)[sext_i32_i64(local_tid_128296 + + offset_128308)]; } - // combine with carry and write to local memory + // apply reduction operation { - int64_t defunc_1_op_res_32380 = add64(x_32378, x_32379); + double defunc_1_op_res_111527 = x_111525 + x_111526; - ((__local - int64_t *) scan_arr_mem_46273)[sext_i32_i64(local_tid_46269)] = - defunc_1_op_res_32380; + x_111525 = defunc_1_op_res_111527; } - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t x_46278; - int64_t x_46279; - int64_t x_46281; - int64_t x_46282; - bool ltid_in_bounds_46284; - - ltid_in_bounds_46284 = slt64(sext_i32_i64(local_tid_46269), - segscan_group_sizze_32373); - - int32_t skip_threads_46285; - - // read input for in-block scan + // write result of operation { - if (ltid_in_bounds_46284) { - x_46279 = ((volatile __local - int64_t *) scan_arr_mem_46273)[sext_i32_i64(local_tid_46269)]; - if ((local_tid_46269 - squot32(local_tid_46269, 32) * 32) == - 0) { - x_46278 = x_46279; - } - } + ((volatile __local + double *) red_arr_mem_128306)[sext_i32_i64(local_tid_128296)] = + x_111525; } - // in-block scan (hopefully no barriers needed) + } + offset_128308 *= 2; + } + while (slt32(skip_waves_128309, + squot32(sext_i64_i32(computed_group_sizze_111392) + + wave_sizze_128298 - 1, wave_sizze_128298))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128308 = skip_waves_128309 * wave_sizze_128298; + if (slt32(local_tid_128296 + offset_128308, + sext_i64_i32(num_recresids_padded_75809)) && + ((local_tid_128296 - squot32(local_tid_128296, wave_sizze_128298) * + wave_sizze_128298) == 0 && (squot32(local_tid_128296, + wave_sizze_128298) & (2 * + skip_waves_128309 - + 1)) == + 0)) { + // read array element { - skip_threads_46285 = 1; - while (slt32(skip_threads_46285, 32)) { - if (sle32(skip_threads_46285, local_tid_46269 - - squot32(local_tid_46269, 32) * 32) && - ltid_in_bounds_46284) { - // read operands - { - x_46278 = ((volatile __local - int64_t *) scan_arr_mem_46273)[sext_i32_i64(local_tid_46269) - - sext_i32_i64(skip_threads_46285)]; - } - // perform operation - { - bool inactive_46286 = - slt64(srem64(sext_i32_i64(local_tid_46269) + - chunk_offset_46276, N_27771), - sext_i32_i64(local_tid_46269) + - chunk_offset_46276 - - (sext_i32_i64(local_tid_46269 - - skip_threads_46285) + - chunk_offset_46276)); - - if (inactive_46286) { - x_46278 = x_46279; - } - if (!inactive_46286) { - int64_t defunc_1_op_res_46280 = add64(x_46278, - x_46279); - - x_46278 = defunc_1_op_res_46280; - } - } - } - if (sle32(wave_sizze_46271, skip_threads_46285)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46285, local_tid_46269 - - squot32(local_tid_46269, 32) * 32) && - ltid_in_bounds_46284) { - // write result - { - ((volatile __local - int64_t *) scan_arr_mem_46273)[sext_i32_i64(local_tid_46269)] = - x_46278; - x_46279 = x_46278; - } - } - if (sle32(wave_sizze_46271, skip_threads_46285)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46285 *= 2; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' - { - if ((local_tid_46269 - squot32(local_tid_46269, 32) * 32) == - 31 && ltid_in_bounds_46284) { - ((volatile __local - int64_t *) scan_arr_mem_46273)[sext_i32_i64(squot32(local_tid_46269, - 32))] = - x_46278; - } + x_111526 = ((__local + double *) red_arr_mem_128306)[sext_i32_i64(local_tid_128296 + + offset_128308)]; } - barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + // apply reduction operation { - int32_t skip_threads_46287; + double defunc_1_op_res_111527 = x_111525 + x_111526; - // read input for in-block scan - { - if (squot32(local_tid_46269, 32) == 0 && - ltid_in_bounds_46284) { - x_46282 = ((volatile __local - int64_t *) scan_arr_mem_46273)[sext_i32_i64(local_tid_46269)]; - if ((local_tid_46269 - squot32(local_tid_46269, 32) * - 32) == 0) { - x_46281 = x_46282; - } - } - } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46287 = 1; - while (slt32(skip_threads_46287, 32)) { - if (sle32(skip_threads_46287, local_tid_46269 - - squot32(local_tid_46269, 32) * 32) && - (squot32(local_tid_46269, 32) == 0 && - ltid_in_bounds_46284)) { - // read operands - { - x_46281 = ((volatile __local - int64_t *) scan_arr_mem_46273)[sext_i32_i64(local_tid_46269) - - sext_i32_i64(skip_threads_46287)]; - } - // perform operation - { - bool inactive_46288 = - slt64(srem64(sext_i32_i64(local_tid_46269 * - 32 + 32 - 1) + - chunk_offset_46276, N_27771), - sext_i32_i64(local_tid_46269 * 32 + - 32 - 1) + chunk_offset_46276 - - (sext_i32_i64((local_tid_46269 - - skip_threads_46287) * - 32 + 32 - 1) + chunk_offset_46276)); - - if (inactive_46288) { - x_46281 = x_46282; - } - if (!inactive_46288) { - int64_t defunc_1_op_res_46283 = - add64(x_46281, x_46282); - - x_46281 = defunc_1_op_res_46283; - } - } - } - if (sle32(wave_sizze_46271, skip_threads_46287)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46287, local_tid_46269 - - squot32(local_tid_46269, 32) * 32) && - (squot32(local_tid_46269, 32) == 0 && - ltid_in_bounds_46284)) { - // write result - { - ((volatile __local - int64_t *) scan_arr_mem_46273)[sext_i32_i64(local_tid_46269)] = - x_46281; - x_46282 = x_46281; - } - } - if (sle32(wave_sizze_46271, skip_threads_46287)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46287 *= 2; - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_46269, 32) == 0 || - !ltid_in_bounds_46284)) { - // read operands - { - x_46279 = x_46278; - x_46278 = ((__local - int64_t *) scan_arr_mem_46273)[sext_i32_i64(squot32(local_tid_46269, - 32)) - - (int64_t) 1]; - } - // perform operation - { - bool inactive_46289 = - slt64(srem64(sext_i32_i64(local_tid_46269) + - chunk_offset_46276, N_27771), - sext_i32_i64(local_tid_46269) + - chunk_offset_46276 - - (sext_i32_i64(squot32(local_tid_46269, 32) * - 32 - 1) + chunk_offset_46276)); - - if (inactive_46289) { - x_46278 = x_46279; - } - if (!inactive_46289) { - int64_t defunc_1_op_res_46280 = add64(x_46278, - x_46279); - - x_46278 = defunc_1_op_res_46280; - } - } - // write final result - { - ((__local - int64_t *) scan_arr_mem_46273)[sext_i32_i64(local_tid_46269)] = - x_46278; - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_46269, 32) == 0) { - ((__local - int64_t *) scan_arr_mem_46273)[sext_i32_i64(local_tid_46269)] = - x_46279; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // threads in bounds write partial scan result - { - if (slt64(gtid_32347, m_27772) && slt64(gtid_32355, N_27771)) { - ((__global int64_t *) mem_45163)[gtid_32347 * N_27771 + - gtid_32355] = ((__local - int64_t *) scan_arr_mem_46273)[sext_i32_i64(local_tid_46269)]; - } + x_111525 = defunc_1_op_res_111527; } - barrier(CLK_LOCAL_MEM_FENCE); - // first thread reads last element as carry-in for next iteration + // write result of operation { - bool crosses_segment_46290 = slt64(srem64(chunk_offset_46276 + - segscan_group_sizze_32373, - N_27771), - chunk_offset_46276 + - segscan_group_sizze_32373 - - (chunk_offset_46276 + - segscan_group_sizze_32373 - - (int64_t) 1)); - bool should_load_carry_46291 = local_tid_46269 == 0 && - !crosses_segment_46290; - - if (should_load_carry_46291) { - x_32378 = ((__local - int64_t *) scan_arr_mem_46273)[segscan_group_sizze_32373 - - (int64_t) 1]; - } - if (!should_load_carry_46291) { - x_32378 = (int64_t) 0; - } + ((__local + double *) red_arr_mem_128306)[sext_i32_i64(local_tid_128296)] = + x_111525; } - barrier(CLK_LOCAL_MEM_FENCE); } + skip_waves_128309 *= 2; } - - error_1: - return; - #undef segscan_group_sizze_32373 -} -__kernel void mainDetailedziscan_stage1_34045(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, - __local volatile - int64_t *scan_arr_mem_46697_backing_aligned_0, - int64_t N_27771, int64_t m_27772, - int64_t iota32_arg_28233, - int32_t num_threads_46691, - __global - unsigned char *defunc_4_map_res_mem_45178, - __global - unsigned char *defunc_3_map_res_mem_45244, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global - unsigned char *defunc_0_f_res_mem_45279, - __global unsigned char *mem_45399, - __global unsigned char *mem_45403) -{ - #define segscan_group_sizze_34093 (mainDetailedzisegscan_group_sizze_34039) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - __local volatile char *restrict scan_arr_mem_46697_backing_0 = - (__local volatile - char *) scan_arr_mem_46697_backing_aligned_0; - volatile __local bool local_failure; - - if (failure_is_an_option) { - int failed = *global_failure >= 0; - - if (failed) - return; - } - local_failure = false; barrier(CLK_LOCAL_MEM_FENCE); + defunc_2_reduce_res_111524 = ((__local + double *) red_arr_mem_128306)[(int64_t) 0]; - int32_t global_tid_46692; - int32_t local_tid_46693; - int64_t group_sizze_46696; - int32_t wave_sizze_46695; - int32_t group_tid_46694; + double y_111533 = i64_res_111513 - 1.0; + double binop_p_111534 = defunc_2_reduce_res_111524 / y_111533; + double defunc_0_f_res_111535; - global_tid_46692 = get_global_id(0); - local_tid_46693 = get_local_id(0); - group_sizze_46696 = get_local_size(0); - wave_sizze_46695 = LOCKSTEP_WIDTH; - group_tid_46694 = get_group_id(0); + defunc_0_f_res_111535 = futrts_sqrt64(binop_p_111534); - int32_t phys_tid_34045; + double sqrt_res_111536; - phys_tid_34045 = global_tid_46692; + sqrt_res_111536 = futrts_sqrt64(i64_res_111513); - __local char *scan_arr_mem_46697; + double fr_111537 = defunc_0_f_res_111535 * sqrt_res_111536; + __local char *mem_124032; - scan_arr_mem_46697 = (__local char *) scan_arr_mem_46697_backing_0; + mem_124032 = (__local char *) mem_124032_backing_2; - float x_34097; - float x_34098; + int64_t gtid_111397 = sext_i32_i64(ltid_pre_128301); + int32_t phys_tid_111398 = local_tid_128296; - x_34097 = 0.0F; - for (int64_t j_46699 = 0; j_46699 < sdiv_up64(m_27772 * iota32_arg_28233, - sext_i32_i64(num_threads_46691)); - j_46699++) { - int64_t chunk_offset_46700 = segscan_group_sizze_34093 * j_46699 + - sext_i32_i64(group_tid_46694) * (segscan_group_sizze_34093 * - sdiv_up64(m_27772 * - iota32_arg_28233, - sext_i32_i64(num_threads_46691))); - int64_t flat_idx_46701 = chunk_offset_46700 + - sext_i32_i64(local_tid_46693); - int64_t gtid_34036 = squot64(flat_idx_46701, iota32_arg_28233); - int64_t gtid_34044 = flat_idx_46701 - squot64(flat_idx_46701, - iota32_arg_28233) * - iota32_arg_28233; + if (slt64(gtid_111397, Nmk_76536)) { + bool cond_111543 = gtid_111397 == (int64_t) 0; + double defunc_0_f_res_111544; - // threads in bounds read input - { - if (slt64(gtid_34036, m_27772) && slt64(gtid_34044, - iota32_arg_28233)) { - int32_t y_34104 = ((__global int32_t *) mem_45399)[gtid_34036]; - int32_t index_primexp_42411 = sext_i64_i32(gtid_34044); - bool cond_34106 = sle32(y_34104, index_primexp_42411); - float defunc_0_f_res_34107; - - if (cond_34106) { - defunc_0_f_res_34107 = 0.0F; - } else { - int32_t x_34100 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_34036]; - int32_t x_34101 = ((__global - int32_t *) defunc_3_map_res_mem_45244)[gtid_34036]; - float x_34102 = ((__global - float *) defunc_0_f_res_mem_45279)[gtid_34036]; - bool cond_34108 = index_primexp_42411 == 0; - float defunc_0_f_res_f_res_34109; - - if (cond_34108) { - defunc_0_f_res_f_res_34109 = x_34102; - } else { - int32_t i_34110 = add32(x_34100, index_primexp_42411); - int64_t i_34111 = sext_i32_i64(i_34110); - bool x_34112 = sle64((int64_t) 0, i_34111); - bool y_34113 = slt64(i_34111, N_27771); - bool bounds_check_34114 = x_34112 && y_34113; - bool index_certs_34115; - - if (!bounds_check_34114) { - { - if (atomic_cmpxchg_i32_global(global_failure, - -1, 75) == -1) { - global_failure_args[0] = i_34111; - global_failure_args[1] = N_27771; - ; - } - local_failure = true; - goto error_0; - } - } - - float x_34116 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_34036 * - N_27771 + - i_34111]; - int32_t x_34117 = sub32(x_34100, x_34101); - int32_t i_34118 = add32(x_34117, index_primexp_42411); - int64_t i_34119 = sext_i32_i64(i_34118); - bool x_34120 = sle64((int64_t) 0, i_34119); - bool y_34121 = slt64(i_34119, N_27771); - bool bounds_check_34122 = x_34120 && y_34121; - bool index_certs_34123; - - if (!bounds_check_34122) { - { - if (atomic_cmpxchg_i32_global(global_failure, - -1, 76) == -1) { - global_failure_args[0] = i_34119; - global_failure_args[1] = N_27771; - ; - } - local_failure = true; - goto error_0; - } - } - - float y_34124 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_34036 * - N_27771 + - i_34119]; - float defunc_0_f_res_f_res_f_res_34125 = x_34116 - - y_34124; - - defunc_0_f_res_f_res_34109 = - defunc_0_f_res_f_res_f_res_34125; - } - defunc_0_f_res_34107 = defunc_0_f_res_f_res_34109; - } - // write to-scan values to parameters - { - x_34098 = defunc_0_f_res_34107; - } - // write mapped values results to global memory - { } - } - } - // do one intra-group scan operation - { - // maybe restore some to-scan values to parameters, or read neutral - { - if (!(slt64(gtid_34036, m_27772) && slt64(gtid_34044, - iota32_arg_28233))) { - x_34098 = 0.0F; - } - } - // combine with carry and write to local memory - { - float defunc_1_op_res_34099 = x_34097 + x_34098; - - ((__local - float *) scan_arr_mem_46697)[sext_i32_i64(local_tid_46693)] = - defunc_1_op_res_34099; - } - - error_0: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); - - float x_46702; - float x_46703; - float x_46705; - float x_46706; - bool ltid_in_bounds_46708; - - ltid_in_bounds_46708 = slt64(sext_i32_i64(local_tid_46693), - segscan_group_sizze_34093); - - int32_t skip_threads_46709; + if (cond_111543) { + defunc_0_f_res_111544 = 0.0; + } else { + int64_t i_111545 = sub64(gtid_111397, (int64_t) 1); + bool x_111546 = sle64((int64_t) 0, i_111545); + bool y_111547 = slt64(i_111545, num_recresids_padded_75809); + bool bounds_check_111548 = x_111546 && y_111547; + bool index_certs_111549; - // read input for in-block scan - { - if (ltid_in_bounds_46708) { - x_46703 = ((volatile __local - float *) scan_arr_mem_46697)[sext_i32_i64(local_tid_46693)]; - if ((local_tid_46693 - squot32(local_tid_46693, 32) * 32) == - 0) { - x_46702 = x_46703; - } - } - } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46709 = 1; - while (slt32(skip_threads_46709, 32)) { - if (sle32(skip_threads_46709, local_tid_46693 - - squot32(local_tid_46693, 32) * 32) && - ltid_in_bounds_46708) { - // read operands - { - x_46702 = ((volatile __local - float *) scan_arr_mem_46697)[sext_i32_i64(local_tid_46693) - - sext_i32_i64(skip_threads_46709)]; - } - // perform operation - { - bool inactive_46710 = - slt64(srem64(sext_i32_i64(local_tid_46693) + - chunk_offset_46700, - iota32_arg_28233), - sext_i32_i64(local_tid_46693) + - chunk_offset_46700 - - (sext_i32_i64(local_tid_46693 - - skip_threads_46709) + - chunk_offset_46700)); - - if (inactive_46710) { - x_46702 = x_46703; - } - if (!inactive_46710) { - float defunc_1_op_res_46704 = x_46702 + x_46703; - - x_46702 = defunc_1_op_res_46704; - } - } - } - if (sle32(wave_sizze_46695, skip_threads_46709)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46709, local_tid_46693 - - squot32(local_tid_46693, 32) * 32) && - ltid_in_bounds_46708) { - // write result - { - ((volatile __local - float *) scan_arr_mem_46697)[sext_i32_i64(local_tid_46693)] = - x_46702; - x_46703 = x_46702; - } - } - if (sle32(wave_sizze_46695, skip_threads_46709)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46709 *= 2; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' - { - if ((local_tid_46693 - squot32(local_tid_46693, 32) * 32) == - 31 && ltid_in_bounds_46708) { - ((volatile __local - float *) scan_arr_mem_46697)[sext_i32_i64(squot32(local_tid_46693, - 32))] = - x_46702; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' - { - int32_t skip_threads_46711; - - // read input for in-block scan - { - if (squot32(local_tid_46693, 32) == 0 && - ltid_in_bounds_46708) { - x_46706 = ((volatile __local - float *) scan_arr_mem_46697)[sext_i32_i64(local_tid_46693)]; - if ((local_tid_46693 - squot32(local_tid_46693, 32) * - 32) == 0) { - x_46705 = x_46706; - } - } - } - // in-block scan (hopefully no barriers needed) + if (!bounds_check_111548) { { - skip_threads_46711 = 1; - while (slt32(skip_threads_46711, 32)) { - if (sle32(skip_threads_46711, local_tid_46693 - - squot32(local_tid_46693, 32) * 32) && - (squot32(local_tid_46693, 32) == 0 && - ltid_in_bounds_46708)) { - // read operands - { - x_46705 = ((volatile __local - float *) scan_arr_mem_46697)[sext_i32_i64(local_tid_46693) - - sext_i32_i64(skip_threads_46711)]; - } - // perform operation - { - bool inactive_46712 = - slt64(srem64(sext_i32_i64(local_tid_46693 * - 32 + 32 - 1) + - chunk_offset_46700, - iota32_arg_28233), - sext_i32_i64(local_tid_46693 * 32 + - 32 - 1) + chunk_offset_46700 - - (sext_i32_i64((local_tid_46693 - - skip_threads_46711) * - 32 + 32 - 1) + chunk_offset_46700)); - - if (inactive_46712) { - x_46705 = x_46706; - } - if (!inactive_46712) { - float defunc_1_op_res_46707 = x_46705 + - x_46706; - - x_46705 = defunc_1_op_res_46707; - } - } - } - if (sle32(wave_sizze_46695, skip_threads_46711)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46711, local_tid_46693 - - squot32(local_tid_46693, 32) * 32) && - (squot32(local_tid_46693, 32) == 0 && - ltid_in_bounds_46708)) { - // write result - { - ((volatile __local - float *) scan_arr_mem_46697)[sext_i32_i64(local_tid_46693)] = - x_46705; - x_46706 = x_46705; - } - } - if (sle32(wave_sizze_46695, skip_threads_46711)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46711 *= 2; - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_46693, 32) == 0 || - !ltid_in_bounds_46708)) { - // read operands - { - x_46703 = x_46702; - x_46702 = ((__local - float *) scan_arr_mem_46697)[sext_i32_i64(squot32(local_tid_46693, - 32)) - - (int64_t) 1]; - } - // perform operation - { - bool inactive_46713 = - slt64(srem64(sext_i32_i64(local_tid_46693) + - chunk_offset_46700, iota32_arg_28233), - sext_i32_i64(local_tid_46693) + - chunk_offset_46700 - - (sext_i32_i64(squot32(local_tid_46693, 32) * - 32 - 1) + chunk_offset_46700)); - - if (inactive_46713) { - x_46702 = x_46703; - } - if (!inactive_46713) { - float defunc_1_op_res_46704 = x_46702 + x_46703; - - x_46702 = defunc_1_op_res_46704; - } - } - // write final result - { - ((__local - float *) scan_arr_mem_46697)[sext_i32_i64(local_tid_46693)] = - x_46702; + if (atomic_cmpxchg_i32_global(global_failure, -1, 178) == + -1) { + global_failure_args[0] = i_111545; + global_failure_args[1] = num_recresids_padded_75809; + ; } + local_failure = true; + goto error_4; } } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_46693, 32) == 0) { - ((__local - float *) scan_arr_mem_46697)[sext_i32_i64(local_tid_46693)] = - x_46703; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // threads in bounds write partial scan result - { - if (slt64(gtid_34036, m_27772) && slt64(gtid_34044, - iota32_arg_28233)) { - ((__global float *) mem_45403)[gtid_34036 * - iota32_arg_28233 + - gtid_34044] = ((__local - float *) scan_arr_mem_46697)[sext_i32_i64(local_tid_46693)]; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // first thread reads last element as carry-in for next iteration - { - bool crosses_segment_46714 = slt64(srem64(chunk_offset_46700 + - segscan_group_sizze_34093, - iota32_arg_28233), - chunk_offset_46700 + - segscan_group_sizze_34093 - - (chunk_offset_46700 + - segscan_group_sizze_34093 - - (int64_t) 1)); - bool should_load_carry_46715 = local_tid_46693 == 0 && - !crosses_segment_46714; - - if (should_load_carry_46715) { - x_34097 = ((__local - float *) scan_arr_mem_46697)[segscan_group_sizze_34093 - - (int64_t) 1]; - } - if (!should_load_carry_46715) { - x_34097 = 0.0F; - } - } - barrier(CLK_LOCAL_MEM_FENCE); + + double x_111550 = ((__global double *) mem_121934)[i_111545 * + m_75136 + + gtid_111390]; + double defunc_0_f_res_f_res_111551 = x_111550 / fr_111537; + + defunc_0_f_res_111544 = defunc_0_f_res_f_res_111551; } + ((__local double *) mem_124032)[gtid_111397] = defunc_0_f_res_111544; } - error_1: - return; - #undef segscan_group_sizze_34093 -} -__kernel void mainDetailedziscan_stage2_32356(__global int *global_failure, - __local volatile - int64_t *scan_arr_mem_46297_backing_aligned_0, - int64_t N_27771, int64_t m_27772, - int64_t stage1_num_groups_46266, - int32_t num_threads_46267, - __global unsigned char *mem_45163) -{ - #define segscan_group_sizze_32373 (mainDetailedzisegscan_group_sizze_32350) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - __local volatile char *restrict scan_arr_mem_46297_backing_0 = - (__local volatile - char *) scan_arr_mem_46297_backing_aligned_0; - - if (*global_failure >= 0) + error_4: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) return; + barrier(CLK_LOCAL_MEM_FENCE); - int32_t global_tid_46292; - int32_t local_tid_46293; - int64_t group_sizze_46296; - int32_t wave_sizze_46295; - int32_t group_tid_46294; - - global_tid_46292 = get_global_id(0); - local_tid_46293 = get_local_id(0); - group_sizze_46296 = get_local_size(0); - wave_sizze_46295 = LOCKSTEP_WIDTH; - group_tid_46294 = get_group_id(0); - - int32_t phys_tid_32356; - - phys_tid_32356 = global_tid_46292; - - __local char *scan_arr_mem_46297; - - scan_arr_mem_46297 = (__local char *) scan_arr_mem_46297_backing_0; - - int64_t flat_idx_46299; - - flat_idx_46299 = (sext_i32_i64(local_tid_46293) + (int64_t) 1) * - (segscan_group_sizze_32373 * sdiv_up64(m_27772 * N_27771, - sext_i32_i64(num_threads_46267))) - - (int64_t) 1; - - int64_t gtid_32347; - - gtid_32347 = squot64(flat_idx_46299, N_27771); - - int64_t gtid_32355; + int64_t dims_flat_128310; - gtid_32355 = flat_idx_46299 - squot64(flat_idx_46299, N_27771) * N_27771; - // threads in bound read carries; others get neutral element - { - if (slt64(gtid_32347, m_27772) && slt64(gtid_32355, N_27771)) { - ((__local - int64_t *) scan_arr_mem_46297)[sext_i32_i64(local_tid_46293)] = - ((__global int64_t *) mem_45163)[gtid_32347 * N_27771 + - gtid_32355]; - } else { - ((__local - int64_t *) scan_arr_mem_46297)[sext_i32_i64(local_tid_46293)] = - (int64_t) 0; - } - } - barrier(CLK_LOCAL_MEM_FENCE); + dims_flat_128310 = Nmk_76536; - int64_t x_32378; - int64_t x_32379; - int64_t x_46300; - int64_t x_46301; - bool ltid_in_bounds_46303; + double x_111539; + double x_111540; + double x_128312; + double x_128313; + bool ltid_in_bounds_128315; - ltid_in_bounds_46303 = slt64(sext_i32_i64(local_tid_46293), - stage1_num_groups_46266); + ltid_in_bounds_128315 = slt64(sext_i32_i64(local_tid_128296), Nmk_76536); - int32_t skip_threads_46304; + int32_t skip_threads_128316; // read input for in-block scan { - if (ltid_in_bounds_46303) { - x_32379 = ((volatile __local - int64_t *) scan_arr_mem_46297)[sext_i32_i64(local_tid_46293)]; - if ((local_tid_46293 - squot32(local_tid_46293, 32) * 32) == 0) { - x_32378 = x_32379; + if (ltid_in_bounds_128315) { + x_111540 = ((volatile __local + double *) mem_124032)[sext_i32_i64(local_tid_128296)]; + if ((local_tid_128296 - squot32(local_tid_128296, 32) * 32) == 0) { + x_111539 = x_111540; } } } // in-block scan (hopefully no barriers needed) { - skip_threads_46304 = 1; - while (slt32(skip_threads_46304, 32)) { - if (sle32(skip_threads_46304, local_tid_46293 - - squot32(local_tid_46293, 32) * 32) && - ltid_in_bounds_46303) { + skip_threads_128316 = 1; + while (slt32(skip_threads_128316, 32)) { + if (sle32(skip_threads_128316, local_tid_128296 - + squot32(local_tid_128296, 32) * 32) && + ltid_in_bounds_128315) { // read operands { - x_32378 = ((volatile __local - int64_t *) scan_arr_mem_46297)[sext_i32_i64(local_tid_46293) - - sext_i32_i64(skip_threads_46304)]; + x_111539 = ((volatile __local + double *) mem_124032)[sext_i32_i64(local_tid_128296) - + sext_i32_i64(skip_threads_128316)]; } // perform operation { - bool inactive_46305 = - slt64(srem64((sext_i32_i64(local_tid_46293) + - (int64_t) 1) * - (segscan_group_sizze_32373 * - sdiv_up64(m_27772 * N_27771, - sext_i32_i64(num_threads_46267))) - - (int64_t) 1, N_27771), - (sext_i32_i64(local_tid_46293) + (int64_t) 1) * - (segscan_group_sizze_32373 * sdiv_up64(m_27772 * - N_27771, - sext_i32_i64(num_threads_46267))) - - (int64_t) 1 - ((sext_i32_i64(local_tid_46293 - - skip_threads_46304) + - (int64_t) 1) * - (segscan_group_sizze_32373 * - sdiv_up64(m_27772 * N_27771, - sext_i32_i64(num_threads_46267))) - - (int64_t) 1)); + bool inactive_128317 = + slt64(srem64(sext_i32_i64(local_tid_128296), + Nmk_76536), + sext_i32_i64(local_tid_128296) - + sext_i32_i64(local_tid_128296 - + skip_threads_128316)); - if (inactive_46305) { - x_32378 = x_32379; + if (inactive_128317) { + x_111539 = x_111540; } - if (!inactive_46305) { - int64_t defunc_1_op_res_32380 = add64(x_32378, x_32379); + if (!inactive_128317) { + double defunc_1_op_res_111541 = x_111539 + x_111540; - x_32378 = defunc_1_op_res_32380; + x_111539 = defunc_1_op_res_111541; } } } - if (sle32(wave_sizze_46295, skip_threads_46304)) { + if (sle32(wave_sizze_128298, skip_threads_128316)) { barrier(CLK_LOCAL_MEM_FENCE); } - if (sle32(skip_threads_46304, local_tid_46293 - - squot32(local_tid_46293, 32) * 32) && - ltid_in_bounds_46303) { + if (sle32(skip_threads_128316, local_tid_128296 - + squot32(local_tid_128296, 32) * 32) && + ltid_in_bounds_128315) { // write result { ((volatile __local - int64_t *) scan_arr_mem_46297)[sext_i32_i64(local_tid_46293)] = - x_32378; - x_32379 = x_32378; + double *) mem_124032)[sext_i32_i64(local_tid_128296)] = + x_111539; + x_111540 = x_111539; } } - if (sle32(wave_sizze_46295, skip_threads_46304)) { + if (sle32(wave_sizze_128298, skip_threads_128316)) { barrier(CLK_LOCAL_MEM_FENCE); } - skip_threads_46304 *= 2; + skip_threads_128316 *= 2; } } barrier(CLK_LOCAL_MEM_FENCE); // last thread of block 'i' writes its result to offset 'i' { - if ((local_tid_46293 - squot32(local_tid_46293, 32) * 32) == 31 && - ltid_in_bounds_46303) { + if ((local_tid_128296 - squot32(local_tid_128296, 32) * 32) == 31 && + ltid_in_bounds_128315) { ((volatile __local - int64_t *) scan_arr_mem_46297)[sext_i32_i64(squot32(local_tid_46293, - 32))] = - x_32378; + double *) mem_124032)[sext_i32_i64(squot32(local_tid_128296, + 32))] = x_111539; } } barrier(CLK_LOCAL_MEM_FENCE); // scan the first block, after which offset 'i' contains carry-in for block 'i+1' { - int32_t skip_threads_46306; + int32_t skip_threads_128318; // read input for in-block scan { - if (squot32(local_tid_46293, 32) == 0 && ltid_in_bounds_46303) { - x_46301 = ((volatile __local - int64_t *) scan_arr_mem_46297)[sext_i32_i64(local_tid_46293)]; - if ((local_tid_46293 - squot32(local_tid_46293, 32) * 32) == + if (squot32(local_tid_128296, 32) == 0 && ltid_in_bounds_128315) { + x_128313 = ((volatile __local + double *) mem_124032)[sext_i32_i64(local_tid_128296)]; + if ((local_tid_128296 - squot32(local_tid_128296, 32) * 32) == 0) { - x_46300 = x_46301; + x_128312 = x_128313; } } } // in-block scan (hopefully no barriers needed) { - skip_threads_46306 = 1; - while (slt32(skip_threads_46306, 32)) { - if (sle32(skip_threads_46306, local_tid_46293 - - squot32(local_tid_46293, 32) * 32) && - (squot32(local_tid_46293, 32) == 0 && - ltid_in_bounds_46303)) { + skip_threads_128318 = 1; + while (slt32(skip_threads_128318, 32)) { + if (sle32(skip_threads_128318, local_tid_128296 - + squot32(local_tid_128296, 32) * 32) && + (squot32(local_tid_128296, 32) == 0 && + ltid_in_bounds_128315)) { // read operands { - x_46300 = ((volatile __local - int64_t *) scan_arr_mem_46297)[sext_i32_i64(local_tid_46293) - - sext_i32_i64(skip_threads_46306)]; + x_128312 = ((volatile __local + double *) mem_124032)[sext_i32_i64(local_tid_128296) - + sext_i32_i64(skip_threads_128318)]; } // perform operation { - bool inactive_46307 = - slt64(srem64((sext_i32_i64(local_tid_46293 * 32 + - 32 - 1) + (int64_t) 1) * - (segscan_group_sizze_32373 * - sdiv_up64(m_27772 * N_27771, - sext_i32_i64(num_threads_46267))) - - (int64_t) 1, N_27771), - (sext_i32_i64(local_tid_46293 * 32 + 32 - - 1) + (int64_t) 1) * - (segscan_group_sizze_32373 * - sdiv_up64(m_27772 * N_27771, - sext_i32_i64(num_threads_46267))) - - (int64_t) 1 - - ((sext_i32_i64((local_tid_46293 - - skip_threads_46306) * 32 + - 32 - 1) + (int64_t) 1) * - (segscan_group_sizze_32373 * - sdiv_up64(m_27772 * N_27771, - sext_i32_i64(num_threads_46267))) - - (int64_t) 1)); + bool inactive_128319 = + slt64(srem64(sext_i32_i64(local_tid_128296 * 32 + + 32 - 1), Nmk_76536), + sext_i32_i64(local_tid_128296 * 32 + 32 - + 1) - sext_i32_i64((local_tid_128296 - + skip_threads_128318) * + 32 + 32 - 1)); - if (inactive_46307) { - x_46300 = x_46301; + if (inactive_128319) { + x_128312 = x_128313; } - if (!inactive_46307) { - int64_t defunc_1_op_res_46302 = add64(x_46300, - x_46301); + if (!inactive_128319) { + double defunc_1_op_res_128314 = x_128312 + x_128313; - x_46300 = defunc_1_op_res_46302; + x_128312 = defunc_1_op_res_128314; } } } - if (sle32(wave_sizze_46295, skip_threads_46306)) { + if (sle32(wave_sizze_128298, skip_threads_128318)) { barrier(CLK_LOCAL_MEM_FENCE); } - if (sle32(skip_threads_46306, local_tid_46293 - - squot32(local_tid_46293, 32) * 32) && - (squot32(local_tid_46293, 32) == 0 && - ltid_in_bounds_46303)) { + if (sle32(skip_threads_128318, local_tid_128296 - + squot32(local_tid_128296, 32) * 32) && + (squot32(local_tid_128296, 32) == 0 && + ltid_in_bounds_128315)) { // write result { ((volatile __local - int64_t *) scan_arr_mem_46297)[sext_i32_i64(local_tid_46293)] = - x_46300; - x_46301 = x_46300; + double *) mem_124032)[sext_i32_i64(local_tid_128296)] = + x_128312; + x_128313 = x_128312; } } - if (sle32(wave_sizze_46295, skip_threads_46306)) { + if (sle32(wave_sizze_128298, skip_threads_128318)) { barrier(CLK_LOCAL_MEM_FENCE); } - skip_threads_46306 *= 2; + skip_threads_128318 *= 2; } } } barrier(CLK_LOCAL_MEM_FENCE); // carry-in for every block except the first { - if (!(squot32(local_tid_46293, 32) == 0 || !ltid_in_bounds_46303)) { + if (!(squot32(local_tid_128296, 32) == 0 || !ltid_in_bounds_128315)) { // read operands { - x_32379 = x_32378; - x_32378 = ((__local - int64_t *) scan_arr_mem_46297)[sext_i32_i64(squot32(local_tid_46293, - 32)) - - (int64_t) 1]; + x_111540 = x_111539; + x_111539 = ((__local + double *) mem_124032)[sext_i32_i64(squot32(local_tid_128296, + 32)) - + (int64_t) 1]; } // perform operation { - bool inactive_46308 = - slt64(srem64((sext_i32_i64(local_tid_46293) + - (int64_t) 1) * (segscan_group_sizze_32373 * - sdiv_up64(m_27772 * N_27771, - sext_i32_i64(num_threads_46267))) - - (int64_t) 1, N_27771), - (sext_i32_i64(local_tid_46293) + (int64_t) 1) * - (segscan_group_sizze_32373 * sdiv_up64(m_27772 * - N_27771, - sext_i32_i64(num_threads_46267))) - - (int64_t) 1 - ((sext_i32_i64(squot32(local_tid_46293, - 32) * 32 - 1) + - (int64_t) 1) * - (segscan_group_sizze_32373 * - sdiv_up64(m_27772 * N_27771, - sext_i32_i64(num_threads_46267))) - - (int64_t) 1)); - - if (inactive_46308) { - x_32378 = x_32379; - } - if (!inactive_46308) { - int64_t defunc_1_op_res_32380 = add64(x_32378, x_32379); - - x_32378 = defunc_1_op_res_32380; + bool inactive_128320 = + slt64(srem64(sext_i32_i64(local_tid_128296), Nmk_76536), + sext_i32_i64(local_tid_128296) - + sext_i32_i64(squot32(local_tid_128296, 32) * 32 - + 1)); + + if (inactive_128320) { + x_111539 = x_111540; + } + if (!inactive_128320) { + double defunc_1_op_res_111541 = x_111539 + x_111540; + + x_111539 = defunc_1_op_res_111541; } } // write final result { ((__local - int64_t *) scan_arr_mem_46297)[sext_i32_i64(local_tid_46293)] = - x_32378; + double *) mem_124032)[sext_i32_i64(local_tid_128296)] = + x_111539; } } } barrier(CLK_LOCAL_MEM_FENCE); // restore correct values for first block { - if (squot32(local_tid_46293, 32) == 0) { - ((__local - int64_t *) scan_arr_mem_46297)[sext_i32_i64(local_tid_46293)] = - x_32379; + if (squot32(local_tid_128296, 32) == 0) { + ((__local double *) mem_124032)[sext_i32_i64(local_tid_128296)] = + x_111540; } } barrier(CLK_LOCAL_MEM_FENCE); - // threads in bounds write scanned carries - { - if (slt64(gtid_32347, m_27772) && slt64(gtid_32355, N_27771)) { - ((__global int64_t *) mem_45163)[gtid_32347 * N_27771 + - gtid_32355] = ((__local - int64_t *) scan_arr_mem_46297)[sext_i32_i64(local_tid_46293)]; - } + for (int64_t i_128321 = 0; i_128321 < sdiv_up64(Nmk_76536 - + sext_i32_i64(local_tid_128296), + computed_group_sizze_111392); + i_128321++) { + ((__global double *) mem_124035)[gtid_111390 * Nmk_76536 + (i_128321 * + computed_group_sizze_111392 + + sext_i32_i64(local_tid_128296))] = + ((__local double *) mem_124032)[i_128321 * + computed_group_sizze_111392 + + sext_i32_i64(local_tid_128296)]; + } + barrier(CLK_LOCAL_MEM_FENCE); + if (local_tid_128296 == 0) { + ((__global int64_t *) mem_124037)[gtid_111390] = n_111512; } - error_0: + error_5: return; - #undef segscan_group_sizze_32373 } -__kernel void mainDetailedziscan_stage2_34045(__global int *global_failure, - __local volatile - int64_t *scan_arr_mem_46721_backing_aligned_0, - int64_t m_27772, - int64_t iota32_arg_28233, - int64_t stage1_num_groups_46690, - int32_t num_threads_46691, - __global unsigned char *mem_45403) +__kernel void mainzisegmap_intragroup_111825(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_128558_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128554_backing_aligned_1, + double level_75142, + int64_t num_recresids_padded_75809, + int64_t Nmk_76536, __global + unsigned char *defunc_3_map_res_mem_124068, + __global + unsigned char *defunc_3_map_res_mem_124069, + __global unsigned char *mem_124078, + __global unsigned char *mem_124118) { - #define segscan_group_sizze_34093 (mainDetailedzisegscan_group_sizze_34039) - const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict scan_arr_mem_46721_backing_0 = + __local volatile char *restrict red_arr_mem_128558_backing_1 = + (__local volatile + char *) red_arr_mem_128558_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128554_backing_0 = (__local volatile - char *) scan_arr_mem_46721_backing_aligned_0; + char *) red_arr_mem_128554_backing_aligned_1; if (*global_failure >= 0) return; - int32_t global_tid_46716; - int32_t local_tid_46717; - int64_t group_sizze_46720; - int32_t wave_sizze_46719; - int32_t group_tid_46718; + int32_t global_tid_128548; + int32_t local_tid_128549; + int64_t group_sizze_128552; + int32_t wave_sizze_128551; + int32_t group_tid_128550; - global_tid_46716 = get_global_id(0); - local_tid_46717 = get_local_id(0); - group_sizze_46720 = get_local_size(0); - wave_sizze_46719 = LOCKSTEP_WIDTH; - group_tid_46718 = get_group_id(0); + global_tid_128548 = get_global_id(0); + local_tid_128549 = get_local_id(0); + group_sizze_128552 = get_local_size(0); + wave_sizze_128551 = LOCKSTEP_WIDTH; + group_tid_128550 = get_group_id(0); - int32_t phys_tid_34045; + int32_t phys_tid_111825; - phys_tid_34045 = global_tid_46716; + phys_tid_111825 = group_tid_128550; - __local char *scan_arr_mem_46721; + int32_t ltid_pre_128553; - scan_arr_mem_46721 = (__local char *) scan_arr_mem_46721_backing_0; + ltid_pre_128553 = local_tid_128549; - int64_t flat_idx_46723; + int64_t gtid_111816; - flat_idx_46723 = (sext_i32_i64(local_tid_46717) + (int64_t) 1) * - (segscan_group_sizze_34093 * sdiv_up64(m_27772 * iota32_arg_28233, - sext_i32_i64(num_threads_46691))) - - (int64_t) 1; + gtid_111816 = sext_i32_i64(group_tid_128550); - int64_t gtid_34036; + int64_t x_112106; - gtid_34036 = squot64(flat_idx_46723, iota32_arg_28233); + x_112106 = ((__global int64_t *) defunc_3_map_res_mem_124069)[gtid_111816]; - int64_t gtid_34044; + double i64_res_112111 = sitofp_i64_f64(x_112106); + double defunc_2_reduce_res_112112; + int64_t gtid_111819 = sext_i32_i64(ltid_pre_128553); + int32_t phys_tid_111820 = local_tid_128549; + __local char *red_arr_mem_128554; - gtid_34044 = flat_idx_46723 - squot64(flat_idx_46723, iota32_arg_28233) * - iota32_arg_28233; - // threads in bound read carries; others get neutral element - { - if (slt64(gtid_34036, m_27772) && slt64(gtid_34044, iota32_arg_28233)) { - ((__local - float *) scan_arr_mem_46721)[sext_i32_i64(local_tid_46717)] = - ((__global float *) mem_45403)[gtid_34036 * iota32_arg_28233 + - gtid_34044]; - } else { - ((__local - float *) scan_arr_mem_46721)[sext_i32_i64(local_tid_46717)] = - 0.0F; - } - } + red_arr_mem_128554 = (__local char *) red_arr_mem_128554_backing_0; + + int64_t slice_115279; + + slice_115279 = (int64_t) 1 + gtid_111819; + + double x_112116 = ((__global + double *) defunc_3_map_res_mem_124068)[gtid_111816 * + Nmk_76536 + + slice_115279]; + int64_t x_112118 = mul64((int64_t) 2, gtid_111819); + int64_t i64_arg_112119 = add64((int64_t) 2, x_112118); + double i64_res_112120 = sitofp_i64_f64(i64_arg_112119); + double y_112121 = i64_res_112120 / i64_res_112111; + double lifted_div_res_112122 = 1.0 + y_112121; + double abs_arg_112123 = x_112116 / lifted_div_res_112122; + double abs_res_112124 = fabs(abs_arg_112123); + + ((__local double *) red_arr_mem_128554)[gtid_111819] = abs_res_112124; barrier(CLK_LOCAL_MEM_FENCE); - float x_34097; - float x_34098; - float x_46724; - float x_46725; - bool ltid_in_bounds_46727; + int32_t offset_128556; + int32_t skip_waves_128557; - ltid_in_bounds_46727 = slt64(sext_i32_i64(local_tid_46717), - stage1_num_groups_46690); + skip_waves_128557 = 1; - int32_t skip_threads_46728; + double x_112113; + double x_112114; - // read input for in-block scan + offset_128556 = 0; + // participating threads read initial accumulator { - if (ltid_in_bounds_46727) { - x_34098 = ((volatile __local - float *) scan_arr_mem_46721)[sext_i32_i64(local_tid_46717)]; - if ((local_tid_46717 - squot32(local_tid_46717, 32) * 32) == 0) { - x_34097 = x_34098; - } + if (slt32(local_tid_128549, sext_i64_i32(num_recresids_padded_75809))) { + x_112113 = ((__local + double *) red_arr_mem_128554)[sext_i32_i64(local_tid_128549 + + offset_128556)]; } } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46728 = 1; - while (slt32(skip_threads_46728, 32)) { - if (sle32(skip_threads_46728, local_tid_46717 - - squot32(local_tid_46717, 32) * 32) && - ltid_in_bounds_46727) { - // read operands - { - x_34097 = ((volatile __local - float *) scan_arr_mem_46721)[sext_i32_i64(local_tid_46717) - - sext_i32_i64(skip_threads_46728)]; - } - // perform operation - { - bool inactive_46729 = - slt64(srem64((sext_i32_i64(local_tid_46717) + - (int64_t) 1) * - (segscan_group_sizze_34093 * - sdiv_up64(m_27772 * iota32_arg_28233, - sext_i32_i64(num_threads_46691))) - - (int64_t) 1, iota32_arg_28233), - (sext_i32_i64(local_tid_46717) + (int64_t) 1) * - (segscan_group_sizze_34093 * sdiv_up64(m_27772 * - iota32_arg_28233, - sext_i32_i64(num_threads_46691))) - - (int64_t) 1 - ((sext_i32_i64(local_tid_46717 - - skip_threads_46728) + - (int64_t) 1) * - (segscan_group_sizze_34093 * - sdiv_up64(m_27772 * - iota32_arg_28233, - sext_i32_i64(num_threads_46691))) - - (int64_t) 1)); - - if (inactive_46729) { - x_34097 = x_34098; - } - if (!inactive_46729) { - float defunc_1_op_res_34099 = x_34097 + x_34098; - - x_34097 = defunc_1_op_res_34099; - } - } + offset_128556 = 1; + while (slt32(offset_128556, wave_sizze_128551)) { + if (slt32(local_tid_128549 + offset_128556, + sext_i64_i32(num_recresids_padded_75809)) && + ((local_tid_128549 - squot32(local_tid_128549, wave_sizze_128551) * + wave_sizze_128551) & (2 * offset_128556 - 1)) == 0) { + // read array element + { + x_112114 = ((volatile __local + double *) red_arr_mem_128554)[sext_i32_i64(local_tid_128549 + + offset_128556)]; } - if (sle32(wave_sizze_46719, skip_threads_46728)) { - barrier(CLK_LOCAL_MEM_FENCE); + // apply reduction operation + { + double defunc_1_op_res_112115 = fmax64(x_112113, x_112114); + + x_112113 = defunc_1_op_res_112115; } - if (sle32(skip_threads_46728, local_tid_46717 - - squot32(local_tid_46717, 32) * 32) && - ltid_in_bounds_46727) { - // write result - { - ((volatile __local - float *) scan_arr_mem_46721)[sext_i32_i64(local_tid_46717)] = - x_34097; - x_34098 = x_34097; - } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128554)[sext_i32_i64(local_tid_128549)] = + x_112113; } - if (sle32(wave_sizze_46719, skip_threads_46728)) { - barrier(CLK_LOCAL_MEM_FENCE); + } + offset_128556 *= 2; + } + while (slt32(skip_waves_128557, + squot32(sext_i64_i32(num_recresids_padded_75809) + + wave_sizze_128551 - 1, wave_sizze_128551))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128556 = skip_waves_128557 * wave_sizze_128551; + if (slt32(local_tid_128549 + offset_128556, + sext_i64_i32(num_recresids_padded_75809)) && + ((local_tid_128549 - squot32(local_tid_128549, wave_sizze_128551) * + wave_sizze_128551) == 0 && (squot32(local_tid_128549, + wave_sizze_128551) & (2 * + skip_waves_128557 - + 1)) == + 0)) { + // read array element + { + x_112114 = ((__local + double *) red_arr_mem_128554)[sext_i32_i64(local_tid_128549 + + offset_128556)]; + } + // apply reduction operation + { + double defunc_1_op_res_112115 = fmax64(x_112113, x_112114); + + x_112113 = defunc_1_op_res_112115; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128554)[sext_i32_i64(local_tid_128549)] = + x_112113; } - skip_threads_46728 *= 2; } + skip_waves_128557 *= 2; } barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' - { - if ((local_tid_46717 - squot32(local_tid_46717, 32) * 32) == 31 && - ltid_in_bounds_46727) { - ((volatile __local - float *) scan_arr_mem_46721)[sext_i32_i64(squot32(local_tid_46717, - 32))] = x_34097; - } + defunc_2_reduce_res_112112 = ((__local + double *) red_arr_mem_128554)[(int64_t) 0]; + + double defunc_0_Q_arg_112125 = 3.0 * defunc_2_reduce_res_112112; + double zs_res_112126 = defunc_0_Q_arg_112125 / 1.4142135623730951; + double abs_res_112127 = fabs(zs_res_112126); + double zs_res_112128 = abs_res_112127 / 2.0; + double zp_res_112129 = 1.0 + zs_res_112128; + double zs_res_112130 = 1.0 / zp_res_112129; + double zt_res_112131 = zs_res_112130 * zs_res_112130; + double zt_res_112132 = zs_res_112130 * zt_res_112131; + double zt_res_112133 = zt_res_112131 * zt_res_112131; + double zt_res_112134 = zt_res_112131 * zt_res_112132; + double zt_res_112135 = zt_res_112132 * zt_res_112132; + double zt_res_112136 = zt_res_112132 * zt_res_112133; + double zt_res_112137 = zt_res_112133 * zt_res_112133; + double zt_res_112138 = zt_res_112133 * zt_res_112134; + double zt_res_112139 = 0.17087277 * zt_res_112138; + double zt_res_112140 = 0.82215223 * zt_res_112137; + double zt_res_112141 = 1.48851587 * zt_res_112136; + double zt_res_112142 = 1.13520398 * zt_res_112135; + double zt_res_112143 = 0.27886807 * zt_res_112134; + double zt_res_112144 = 0.18628806 * zt_res_112133; + double zt_res_112145 = 9.678418e-2 * zt_res_112132; + double zt_res_112146 = 0.37409196 * zt_res_112131; + double zt_res_112147 = 1.00002368 * zs_res_112130; + double zt_res_112148 = zs_res_112126 * zs_res_112126; + double zm_res_112149 = 0.0 - zt_res_112148; + double zm_res_112150 = zm_res_112149 - 1.26551223; + double zp_res_112151 = zt_res_112147 + zm_res_112150; + double zp_res_112152 = zt_res_112146 + zp_res_112151; + double zp_res_112153 = zt_res_112145 + zp_res_112152; + double zm_res_112154 = zp_res_112153 - zt_res_112144; + double zp_res_112155 = zt_res_112143 + zm_res_112154; + double zm_res_112156 = zp_res_112155 - zt_res_112142; + double zp_res_112157 = zt_res_112141 + zm_res_112156; + double zm_res_112158 = zp_res_112157 - zt_res_112140; + double zp_res_112159 = zt_res_112139 + zm_res_112158; + double exp_res_112160; + + exp_res_112160 = futrts_exp64(zp_res_112159); + + double zt_res_112161 = zs_res_112130 * exp_res_112160; + bool zgze_res_112162 = 0.0 <= zs_res_112126; + double erf_res_112163; + + if (zgze_res_112162) { + double zm_res_112164 = 1.0 - zt_res_112161; + + erf_res_112163 = zm_res_112164; + } else { + double zm_res_112165 = zt_res_112161 - 1.0; + + erf_res_112163 = zm_res_112165; + } + + double zp_res_112166 = 1.0 + erf_res_112163; + double zs_res_112167 = zp_res_112166 / 2.0; + double defunc_0_Q_res_112168 = 1.0 - zs_res_112167; + double y_112169 = fpow64(defunc_2_reduce_res_112112, 2.0); + double negate_arg_112170 = 4.0 * y_112169; + double defunc_0_exp_arg_112171 = 0.0 - negate_arg_112170; + double defunc_0_exp_res_112172 = fpow64(2.718281828459045, + defunc_0_exp_arg_112171); + double x_112173 = defunc_0_Q_res_112168 + defunc_0_exp_res_112172; + double zs_res_112174 = defunc_2_reduce_res_112112 / 1.4142135623730951; + double abs_res_112175 = fabs(zs_res_112174); + double zs_res_112176 = abs_res_112175 / 2.0; + double zp_res_112177 = 1.0 + zs_res_112176; + double zs_res_112178 = 1.0 / zp_res_112177; + double zt_res_112179 = zs_res_112178 * zs_res_112178; + double zt_res_112180 = zs_res_112178 * zt_res_112179; + double zt_res_112181 = zt_res_112179 * zt_res_112179; + double zt_res_112182 = zt_res_112179 * zt_res_112180; + double zt_res_112183 = zt_res_112180 * zt_res_112180; + double zt_res_112184 = zt_res_112180 * zt_res_112181; + double zt_res_112185 = zt_res_112181 * zt_res_112181; + double zt_res_112186 = zt_res_112181 * zt_res_112182; + double zt_res_112187 = 0.17087277 * zt_res_112186; + double zt_res_112188 = 0.82215223 * zt_res_112185; + double zt_res_112189 = 1.48851587 * zt_res_112184; + double zt_res_112190 = 1.13520398 * zt_res_112183; + double zt_res_112191 = 0.27886807 * zt_res_112182; + double zt_res_112192 = 0.18628806 * zt_res_112181; + double zt_res_112193 = 9.678418e-2 * zt_res_112180; + double zt_res_112194 = 0.37409196 * zt_res_112179; + double zt_res_112195 = 1.00002368 * zs_res_112178; + double zt_res_112196 = zs_res_112174 * zs_res_112174; + double zm_res_112197 = 0.0 - zt_res_112196; + double zm_res_112198 = zm_res_112197 - 1.26551223; + double zp_res_112199 = zt_res_112195 + zm_res_112198; + double zp_res_112200 = zt_res_112194 + zp_res_112199; + double zp_res_112201 = zt_res_112193 + zp_res_112200; + double zm_res_112202 = zp_res_112201 - zt_res_112192; + double zp_res_112203 = zt_res_112191 + zm_res_112202; + double zm_res_112204 = zp_res_112203 - zt_res_112190; + double zp_res_112205 = zt_res_112189 + zm_res_112204; + double zm_res_112206 = zp_res_112205 - zt_res_112188; + double zp_res_112207 = zt_res_112187 + zm_res_112206; + double exp_res_112208; + + exp_res_112208 = futrts_exp64(zp_res_112207); + + double zt_res_112209 = zs_res_112178 * exp_res_112208; + bool zgze_res_112210 = 0.0 <= zs_res_112174; + double erf_res_112211; + + if (zgze_res_112210) { + double zm_res_112212 = 1.0 - zt_res_112209; + + erf_res_112211 = zm_res_112212; + } else { + double zm_res_112213 = zt_res_112209 - 1.0; + + erf_res_112211 = zm_res_112213; + } + + double zp_res_112214 = 1.0 + erf_res_112211; + double zs_res_112215 = zp_res_112214 / 2.0; + double defunc_0_Q_res_112216 = 1.0 - zs_res_112215; + double y_112217 = defunc_0_exp_res_112172 * defunc_0_Q_res_112216; + double y_112218 = x_112173 - y_112217; + double pval_brownian_motion_max_res_112219 = 2.0 * y_112218; + int64_t defunc_0_f_res_112220; + int64_t gtid_111821 = sext_i32_i64(ltid_pre_128553); + int32_t phys_tid_111822 = local_tid_128549; + __local char *red_arr_mem_128558; + + red_arr_mem_128558 = (__local char *) red_arr_mem_128558_backing_1; + + int64_t slice_115281; + + slice_115281 = (int64_t) 1 + gtid_111821; + + double x_112225 = ((__global + double *) defunc_3_map_res_mem_124068)[gtid_111816 * + Nmk_76536 + + slice_115281]; + double x_112226 = ((__global double *) mem_124078)[gtid_111816 * Nmk_76536 + + slice_115281]; + double abs_res_112227 = fabs(x_112225); + bool cond_112228 = x_112226 < abs_res_112227; + int64_t defunc_2_f_res_112229; + + if (cond_112228) { + defunc_2_f_res_112229 = gtid_111821; + } else { + defunc_2_f_res_112229 = (int64_t) 9223372036854775807; } + ((__local int64_t *) red_arr_mem_128558)[gtid_111821] = + defunc_2_f_res_112229; barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + + int32_t offset_128560; + int32_t skip_waves_128561; + + skip_waves_128561 = 1; + + int64_t x_112221; + int64_t x_112222; + + offset_128560 = 0; + // participating threads read initial accumulator { - int32_t skip_threads_46730; - - // read input for in-block scan - { - if (squot32(local_tid_46717, 32) == 0 && ltid_in_bounds_46727) { - x_46725 = ((volatile __local - float *) scan_arr_mem_46721)[sext_i32_i64(local_tid_46717)]; - if ((local_tid_46717 - squot32(local_tid_46717, 32) * 32) == - 0) { - x_46724 = x_46725; - } - } + if (slt32(local_tid_128549, sext_i64_i32(num_recresids_padded_75809))) { + x_112221 = ((__local + int64_t *) red_arr_mem_128558)[sext_i32_i64(local_tid_128549 + + offset_128560)]; } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46730 = 1; - while (slt32(skip_threads_46730, 32)) { - if (sle32(skip_threads_46730, local_tid_46717 - - squot32(local_tid_46717, 32) * 32) && - (squot32(local_tid_46717, 32) == 0 && - ltid_in_bounds_46727)) { - // read operands - { - x_46724 = ((volatile __local - float *) scan_arr_mem_46721)[sext_i32_i64(local_tid_46717) - - sext_i32_i64(skip_threads_46730)]; - } - // perform operation - { - bool inactive_46731 = - slt64(srem64((sext_i32_i64(local_tid_46717 * 32 + - 32 - 1) + (int64_t) 1) * - (segscan_group_sizze_34093 * - sdiv_up64(m_27772 * iota32_arg_28233, - sext_i32_i64(num_threads_46691))) - - (int64_t) 1, iota32_arg_28233), - (sext_i32_i64(local_tid_46717 * 32 + 32 - - 1) + (int64_t) 1) * - (segscan_group_sizze_34093 * - sdiv_up64(m_27772 * iota32_arg_28233, - sext_i32_i64(num_threads_46691))) - - (int64_t) 1 - - ((sext_i32_i64((local_tid_46717 - - skip_threads_46730) * 32 + - 32 - 1) + (int64_t) 1) * - (segscan_group_sizze_34093 * - sdiv_up64(m_27772 * iota32_arg_28233, - sext_i32_i64(num_threads_46691))) - - (int64_t) 1)); - - if (inactive_46731) { - x_46724 = x_46725; - } - if (!inactive_46731) { - float defunc_1_op_res_46726 = x_46724 + x_46725; - - x_46724 = defunc_1_op_res_46726; - } - } - } - if (sle32(wave_sizze_46719, skip_threads_46730)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46730, local_tid_46717 - - squot32(local_tid_46717, 32) * 32) && - (squot32(local_tid_46717, 32) == 0 && - ltid_in_bounds_46727)) { - // write result - { - ((volatile __local - float *) scan_arr_mem_46721)[sext_i32_i64(local_tid_46717)] = - x_46724; - x_46725 = x_46724; - } - } - if (sle32(wave_sizze_46719, skip_threads_46730)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46730 *= 2; + } + offset_128560 = 1; + while (slt32(offset_128560, wave_sizze_128551)) { + if (slt32(local_tid_128549 + offset_128560, + sext_i64_i32(num_recresids_padded_75809)) && + ((local_tid_128549 - squot32(local_tid_128549, wave_sizze_128551) * + wave_sizze_128551) & (2 * offset_128560 - 1)) == 0) { + // read array element + { + x_112222 = ((volatile __local + int64_t *) red_arr_mem_128558)[sext_i32_i64(local_tid_128549 + + offset_128560)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_112223 = smin64(x_112221, x_112222); + + x_112221 = defunc_1_op_res_112223; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_128558)[sext_i32_i64(local_tid_128549)] = + x_112221; } } + offset_128560 *= 2; } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_46717, 32) == 0 || !ltid_in_bounds_46727)) { - // read operands + while (slt32(skip_waves_128561, + squot32(sext_i64_i32(num_recresids_padded_75809) + + wave_sizze_128551 - 1, wave_sizze_128551))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128560 = skip_waves_128561 * wave_sizze_128551; + if (slt32(local_tid_128549 + offset_128560, + sext_i64_i32(num_recresids_padded_75809)) && + ((local_tid_128549 - squot32(local_tid_128549, wave_sizze_128551) * + wave_sizze_128551) == 0 && (squot32(local_tid_128549, + wave_sizze_128551) & (2 * + skip_waves_128561 - + 1)) == + 0)) { + // read array element { - x_34098 = x_34097; - x_34097 = ((__local - float *) scan_arr_mem_46721)[sext_i32_i64(squot32(local_tid_46717, - 32)) - - (int64_t) 1]; + x_112222 = ((__local + int64_t *) red_arr_mem_128558)[sext_i32_i64(local_tid_128549 + + offset_128560)]; } - // perform operation + // apply reduction operation { - bool inactive_46732 = - slt64(srem64((sext_i32_i64(local_tid_46717) + - (int64_t) 1) * (segscan_group_sizze_34093 * - sdiv_up64(m_27772 * - iota32_arg_28233, - sext_i32_i64(num_threads_46691))) - - (int64_t) 1, iota32_arg_28233), - (sext_i32_i64(local_tid_46717) + (int64_t) 1) * - (segscan_group_sizze_34093 * sdiv_up64(m_27772 * - iota32_arg_28233, - sext_i32_i64(num_threads_46691))) - - (int64_t) 1 - ((sext_i32_i64(squot32(local_tid_46717, - 32) * 32 - 1) + - (int64_t) 1) * - (segscan_group_sizze_34093 * - sdiv_up64(m_27772 * iota32_arg_28233, - sext_i32_i64(num_threads_46691))) - - (int64_t) 1)); - - if (inactive_46732) { - x_34097 = x_34098; - } - if (!inactive_46732) { - float defunc_1_op_res_34099 = x_34097 + x_34098; - - x_34097 = defunc_1_op_res_34099; - } + int64_t defunc_1_op_res_112223 = smin64(x_112221, x_112222); + + x_112221 = defunc_1_op_res_112223; } - // write final result + // write result of operation { ((__local - float *) scan_arr_mem_46721)[sext_i32_i64(local_tid_46717)] = - x_34097; + int64_t *) red_arr_mem_128558)[sext_i32_i64(local_tid_128549)] = + x_112221; } } + skip_waves_128561 *= 2; } barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_46717, 32) == 0) { - ((__local - float *) scan_arr_mem_46721)[sext_i32_i64(local_tid_46717)] = - x_34098; - } + defunc_0_f_res_112220 = ((__local + int64_t *) red_arr_mem_128558)[(int64_t) 0]; + + bool isnan_res_112230; + + isnan_res_112230 = futrts_isnan64(pval_brownian_motion_max_res_112219); + + bool cond_112231 = !isnan_res_112230; + bool cond_t_res_112232 = pval_brownian_motion_max_res_112219 < level_75142; + bool x_112233 = cond_112231 && cond_t_res_112232; + bool chk_t_res_112234 = defunc_0_f_res_112220 == + (int64_t) 9223372036854775807; + bool chk_t_res_112235 = !chk_t_res_112234; + bool x_112236 = x_112233 && chk_t_res_112235; + int64_t y_start_112237; + + if (x_112236) { + int64_t y_start_t_res_112238 = sub64(x_112106, defunc_0_f_res_112220); + + y_start_112237 = y_start_t_res_112238; + } else { + y_start_112237 = (int64_t) 0; } - barrier(CLK_LOCAL_MEM_FENCE); - // threads in bounds write scanned carries - { - if (slt64(gtid_34036, m_27772) && slt64(gtid_34044, iota32_arg_28233)) { - ((__global float *) mem_45403)[gtid_34036 * iota32_arg_28233 + - gtid_34044] = ((__local - float *) scan_arr_mem_46721)[sext_i32_i64(local_tid_46717)]; - } + if (local_tid_128549 == 0) { + ((__global int64_t *) mem_124118)[gtid_111816] = y_start_112237; } - error_0: + error_4: return; - #undef segscan_group_sizze_34093 } -__kernel void mainDetailedziscan_stage3_32356(__global int *global_failure, - int64_t N_27771, int64_t m_27772, - int64_t num_groups_32374, - int32_t num_threads_46267, - int32_t required_groups_46309, - __global unsigned char *mem_45163) +__kernel void mainzisegmap_intragroup_112961(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + __local volatile + int64_t *mem_124308_backing_aligned_0, + __local volatile + int64_t *mem_124298_backing_aligned_1, + int64_t k2p2zq_75151, + int64_t m_76774, int64_t nm_76775, + __global + unsigned char *defunc_3_map_res_mem_124294, + __global unsigned char *mem_124318) { - #define segscan_group_sizze_32373 (mainDetailedzisegscan_group_sizze_32350) - const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; + __local volatile char *restrict mem_124308_backing_1 = (__local volatile + char *) mem_124308_backing_aligned_0; + __local volatile char *restrict mem_124298_backing_0 = (__local volatile + char *) mem_124298_backing_aligned_1; + volatile __local bool local_failure; - if (*global_failure >= 0) - return; + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); - int32_t global_tid_46310; - int32_t local_tid_46311; - int64_t group_sizze_46314; - int32_t wave_sizze_46313; - int32_t group_tid_46312; + int32_t global_tid_128829; + int32_t local_tid_128830; + int64_t group_sizze_128833; + int32_t wave_sizze_128832; + int32_t group_tid_128831; - global_tid_46310 = get_global_id(0); - local_tid_46311 = get_local_id(0); - group_sizze_46314 = get_local_size(0); - wave_sizze_46313 = LOCKSTEP_WIDTH; - group_tid_46312 = get_group_id(0); + global_tid_128829 = get_global_id(0); + local_tid_128830 = get_local_id(0); + group_sizze_128833 = get_local_size(0); + wave_sizze_128832 = LOCKSTEP_WIDTH; + group_tid_128831 = get_group_id(0); - int32_t phys_tid_32356; + int32_t phys_tid_112961; - phys_tid_32356 = global_tid_46310; + phys_tid_112961 = group_tid_128831; - int32_t phys_group_id_46315; + int32_t ltid_pre_128834; - phys_group_id_46315 = get_group_id(0); - for (int32_t i_46316 = 0; i_46316 < sdiv_up32(required_groups_46309 - - phys_group_id_46315, - sext_i64_i32(num_groups_32374)); - i_46316++) { - int32_t virt_group_id_46317 = phys_group_id_46315 + i_46316 * - sext_i64_i32(num_groups_32374); - int64_t flat_idx_46318 = sext_i32_i64(virt_group_id_46317) * - segscan_group_sizze_32373 + sext_i32_i64(local_tid_46311); - int64_t gtid_32347 = squot64(flat_idx_46318, N_27771); - int64_t gtid_32355 = flat_idx_46318 - squot64(flat_idx_46318, N_27771) * - N_27771; - int64_t orig_group_46319 = squot64(flat_idx_46318, - segscan_group_sizze_32373 * - sdiv_up64(m_27772 * N_27771, - sext_i32_i64(num_threads_46267))); - int64_t carry_in_flat_idx_46320 = orig_group_46319 * - (segscan_group_sizze_32373 * sdiv_up64(m_27772 * N_27771, - sext_i32_i64(num_threads_46267))) - - (int64_t) 1; - - if (slt64(gtid_32347, m_27772) && slt64(gtid_32355, N_27771)) { - if (!(orig_group_46319 == (int64_t) 0 || (flat_idx_46318 == - (orig_group_46319 + - (int64_t) 1) * - (segscan_group_sizze_32373 * - sdiv_up64(m_27772 * - N_27771, - sext_i32_i64(num_threads_46267))) - - (int64_t) 1 || - slt64(srem64(flat_idx_46318, - N_27771), - flat_idx_46318 - - carry_in_flat_idx_46320)))) { - int64_t x_32378; - int64_t x_32379; - - x_32378 = ((__global - int64_t *) mem_45163)[squot64(carry_in_flat_idx_46320, - N_27771) * N_27771 + - (carry_in_flat_idx_46320 - - squot64(carry_in_flat_idx_46320, - N_27771) * N_27771)]; - x_32379 = ((__global int64_t *) mem_45163)[gtid_32347 * - N_27771 + - gtid_32355]; - - int64_t defunc_1_op_res_32380; - - defunc_1_op_res_32380 = add64(x_32378, x_32379); - x_32378 = defunc_1_op_res_32380; - ((__global int64_t *) mem_45163)[gtid_32347 * N_27771 + - gtid_32355] = x_32378; - } - } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - } - - error_0: - return; - #undef segscan_group_sizze_32373 -} -__kernel void mainDetailedziscan_stage3_34045(__global int *global_failure, - int64_t m_27772, - int64_t iota32_arg_28233, - int64_t num_groups_34094, - int32_t num_threads_46691, - int32_t required_groups_46733, - __global unsigned char *mem_45403) -{ - #define segscan_group_sizze_34093 (mainDetailedzisegscan_group_sizze_34039) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - - if (*global_failure >= 0) - return; + ltid_pre_128834 = local_tid_128830; - int32_t global_tid_46734; - int32_t local_tid_46735; - int64_t group_sizze_46738; - int32_t wave_sizze_46737; - int32_t group_tid_46736; + int64_t gtid_112899; - global_tid_46734 = get_global_id(0); - local_tid_46735 = get_local_id(0); - group_sizze_46738 = get_local_size(0); - wave_sizze_46737 = LOCKSTEP_WIDTH; - group_tid_46736 = get_group_id(0); + gtid_112899 = sext_i32_i64(group_tid_128831); - int32_t phys_tid_34045; + __local char *mem_124298; - phys_tid_34045 = global_tid_46734; + mem_124298 = (__local char *) mem_124298_backing_0; - int32_t phys_group_id_46739; + int64_t gtid_112902 = sext_i32_i64(ltid_pre_128834); + int32_t phys_tid_112903 = local_tid_128830; + int64_t defunc_0_f_res_112972 = sdiv64(gtid_112902, m_76774); + int64_t defunc_0_f_res_112973 = smod64(gtid_112902, m_76774); + bool cond_112974 = slt64(defunc_0_f_res_112973, k2p2zq_75151); + double defunc_0_f_res_112975; - phys_group_id_46739 = get_group_id(0); - for (int32_t i_46740 = 0; i_46740 < sdiv_up32(required_groups_46733 - - phys_group_id_46739, - sext_i64_i32(num_groups_34094)); - i_46740++) { - int32_t virt_group_id_46741 = phys_group_id_46739 + i_46740 * - sext_i64_i32(num_groups_34094); - int64_t flat_idx_46742 = sext_i32_i64(virt_group_id_46741) * - segscan_group_sizze_34093 + sext_i32_i64(local_tid_46735); - int64_t gtid_34036 = squot64(flat_idx_46742, iota32_arg_28233); - int64_t gtid_34044 = flat_idx_46742 - squot64(flat_idx_46742, - iota32_arg_28233) * - iota32_arg_28233; - int64_t orig_group_46743 = squot64(flat_idx_46742, - segscan_group_sizze_34093 * - sdiv_up64(m_27772 * iota32_arg_28233, - sext_i32_i64(num_threads_46691))); - int64_t carry_in_flat_idx_46744 = orig_group_46743 * - (segscan_group_sizze_34093 * sdiv_up64(m_27772 * - iota32_arg_28233, - sext_i32_i64(num_threads_46691))) - - (int64_t) 1; + if (cond_112974) { + bool x_112976 = sle64((int64_t) 0, defunc_0_f_res_112972); + bool y_112977 = slt64(defunc_0_f_res_112972, k2p2zq_75151); + bool bounds_check_112978 = x_112976 && y_112977; + bool x_112979 = sle64((int64_t) 0, defunc_0_f_res_112973); + bool bounds_check_112980 = cond_112974 && x_112979; + bool index_ok_112981 = bounds_check_112978 && bounds_check_112980; + bool index_certs_112982; - if (slt64(gtid_34036, m_27772) && slt64(gtid_34044, iota32_arg_28233)) { - if (!(orig_group_46743 == (int64_t) 0 || (flat_idx_46742 == - (orig_group_46743 + - (int64_t) 1) * - (segscan_group_sizze_34093 * - sdiv_up64(m_27772 * - iota32_arg_28233, - sext_i32_i64(num_threads_46691))) - - (int64_t) 1 || - slt64(srem64(flat_idx_46742, - iota32_arg_28233), - flat_idx_46742 - - carry_in_flat_idx_46744)))) { - float x_34097; - float x_34098; - - x_34097 = ((__global - float *) mem_45403)[squot64(carry_in_flat_idx_46744, - iota32_arg_28233) * - iota32_arg_28233 + - (carry_in_flat_idx_46744 - - squot64(carry_in_flat_idx_46744, - iota32_arg_28233) * - iota32_arg_28233)]; - x_34098 = ((__global float *) mem_45403)[gtid_34036 * - iota32_arg_28233 + - gtid_34044]; - - float defunc_1_op_res_34099; - - defunc_1_op_res_34099 = x_34097 + x_34098; - x_34097 = defunc_1_op_res_34099; - ((__global float *) mem_45403)[gtid_34036 * iota32_arg_28233 + - gtid_34044] = x_34097; + if (!index_ok_112981) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 180) == -1) { + global_failure_args[0] = defunc_0_f_res_112972; + global_failure_args[1] = defunc_0_f_res_112973; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + ; + } + local_failure = true; + goto error_0; } } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + double defunc_0_f_res_t_res_112983 = ((__global + double *) defunc_3_map_res_mem_124294)[gtid_112899 * + (k2p2zq_75151 * + k2p2zq_75151) + + defunc_0_f_res_112972 * + k2p2zq_75151 + + defunc_0_f_res_112973]; + + defunc_0_f_res_112975 = defunc_0_f_res_t_res_112983; + } else { + int64_t y_112984 = add64(k2p2zq_75151, defunc_0_f_res_112972); + bool cond_112985 = defunc_0_f_res_112973 == y_112984; + double defunc_0_f_res_f_res_112986; + + if (cond_112985) { + defunc_0_f_res_f_res_112986 = 1.0; + } else { + defunc_0_f_res_f_res_112986 = 0.0; + } + defunc_0_f_res_112975 = defunc_0_f_res_f_res_112986; } + ((__local double *) mem_124298)[gtid_112902] = defunc_0_f_res_112975; error_0: - return; - #undef segscan_group_sizze_34093 -} -__kernel void mainDetailedzisegmap_29975(__global int *global_failure, - int64_t N_27771, float freq_27776, - int64_t i32_res_27787, __global - unsigned char *mappingindices_mem_44380, - __global unsigned char *mem_44385) -{ - #define segmap_group_sizze_30048 (mainDetailedzisegmap_group_sizze_29978) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - - if (*global_failure >= 0) + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) return; + barrier(CLK_LOCAL_MEM_FENCE); - int32_t global_tid_45691; - int32_t local_tid_45692; - int64_t group_sizze_45695; - int32_t wave_sizze_45694; - int32_t group_tid_45693; - - global_tid_45691 = get_global_id(0); - local_tid_45692 = get_local_id(0); - group_sizze_45695 = get_local_size(0); - wave_sizze_45694 = LOCKSTEP_WIDTH; - group_tid_45693 = get_group_id(0); - - int32_t phys_tid_29975; - - phys_tid_29975 = global_tid_45691; - - int64_t gtid_29973; - - gtid_29973 = squot64(sext_i32_i64(group_tid_45693) * - segmap_group_sizze_30048 + - sext_i32_i64(local_tid_45692), N_27771); - - int64_t gtid_29974; + __local char *mem_124308; - gtid_29974 = sext_i32_i64(group_tid_45693) * segmap_group_sizze_30048 + - sext_i32_i64(local_tid_45692) - squot64(sext_i32_i64(group_tid_45693) * - segmap_group_sizze_30048 + - sext_i32_i64(local_tid_45692), - N_27771) * N_27771; - if (slt64(gtid_29973, i32_res_27787) && slt64(gtid_29974, N_27771)) { - int32_t index_primexp_42340 = sext_i64_i32(gtid_29973); - bool index_primexp_42337 = index_primexp_42340 == 0; - float defunc_0_f_res_30054; + mem_124308 = (__local char *) mem_124308_backing_1; + for (int64_t i_112988 = 0; i_112988 < k2p2zq_75151; i_112988++) { + bool y_112990 = slt64(i_112988, nm_76775); + bool index_certs_112991; - if (index_primexp_42337) { - defunc_0_f_res_30054 = 1.0F; - } else { - int32_t x_30053 = ((__global - int32_t *) mappingindices_mem_44380)[gtid_29974]; - bool cond_30055 = index_primexp_42340 == 1; - float defunc_0_f_res_f_res_30056; - - if (cond_30055) { - float i32_res_30057 = sitofp_i32_f32(x_30053); - - defunc_0_f_res_f_res_30056 = i32_res_30057; - } else { - int32_t r32_arg_30058 = sdiv32(index_primexp_42340, 2); - float i32_res_30059 = sitofp_i32_f32(r32_arg_30058); - float i32_res_30060 = sitofp_i32_f32(x_30053); - float x_30061 = 6.2831855F * i32_res_30059; - float x_30062 = i32_res_30060 * x_30061; - float angle_30063 = x_30062 / freq_27776; - int32_t x_30064 = smod32(index_primexp_42340, 2); - bool cond_30065 = x_30064 == 0; - float defunc_0_f_res_f_res_f_res_30066; - - if (cond_30065) { - float sin_res_30067; - - sin_res_30067 = futrts_sin32(angle_30063); - defunc_0_f_res_f_res_f_res_30066 = sin_res_30067; - } else { - float cos_res_30068; - - cos_res_30068 = futrts_cos32(angle_30063); - defunc_0_f_res_f_res_f_res_30066 = cos_res_30068; + if (!y_112990) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 181) == -1) { + global_failure_args[0] = i_112988; + global_failure_args[1] = nm_76775; + ; } - defunc_0_f_res_f_res_30056 = defunc_0_f_res_f_res_f_res_30066; + local_failure = true; + goto error_1; } - defunc_0_f_res_30054 = defunc_0_f_res_f_res_30056; } - ((__global float *) mem_44385)[gtid_29973 * N_27771 + gtid_29974] = - defunc_0_f_res_30054; - } - - error_0: - return; - #undef segmap_group_sizze_30048 -} -__kernel void mainDetailedzisegmap_30153(__global int *global_failure, - int64_t N_27771, float freq_27776, - int64_t i32_res_27787, __global - unsigned char *mappingindices_mem_44380, - __global unsigned char *mem_44389) -{ - #define segmap_group_sizze_30222 (mainDetailedzisegmap_group_sizze_30156) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - - if (*global_failure >= 0) - return; - - int32_t global_tid_45696; - int32_t local_tid_45697; - int64_t group_sizze_45700; - int32_t wave_sizze_45699; - int32_t group_tid_45698; - - global_tid_45696 = get_global_id(0); - local_tid_45697 = get_local_id(0); - group_sizze_45700 = get_local_size(0); - wave_sizze_45699 = LOCKSTEP_WIDTH; - group_tid_45698 = get_group_id(0); - - int32_t phys_tid_30153; - - phys_tid_30153 = global_tid_45696; - - int64_t gtid_30151; - - gtid_30151 = squot64(sext_i32_i64(group_tid_45698) * - segmap_group_sizze_30222 + - sext_i32_i64(local_tid_45697), N_27771); - - int64_t gtid_30152; - - gtid_30152 = sext_i32_i64(group_tid_45698) * segmap_group_sizze_30222 + - sext_i32_i64(local_tid_45697) - squot64(sext_i32_i64(group_tid_45698) * - segmap_group_sizze_30222 + - sext_i32_i64(local_tid_45697), - N_27771) * N_27771; - if (slt64(gtid_30151, i32_res_27787) && slt64(gtid_30152, N_27771)) { - int32_t index_primexp_42349 = sext_i64_i32(gtid_30151); - bool index_primexp_42346 = index_primexp_42349 == 0; - float defunc_0_f_res_30228; - if (index_primexp_42346) { - defunc_0_f_res_30228 = 1.0F; + double v1_112992 = ((__local double *) mem_124298)[i_112988]; + bool cond_112993 = v1_112992 == 0.0; + int64_t gtid_112920 = sext_i32_i64(ltid_pre_128834); + int32_t phys_tid_112921 = local_tid_128830; + int64_t defunc_0_f_res_112996 = sdiv64(gtid_112920, m_76774); + int64_t defunc_0_f_res_112997 = smod64(gtid_112920, m_76774); + double defunc_0_f_res_112998; + + if (cond_112993) { + int64_t x_112999 = mul64(m_76774, defunc_0_f_res_112996); + int64_t i_113000 = add64(defunc_0_f_res_112997, x_112999); + bool x_113001 = sle64((int64_t) 0, i_113000); + bool y_113002 = slt64(i_113000, nm_76775); + bool bounds_check_113003 = x_113001 && y_113002; + bool index_certs_113004; + + if (!bounds_check_113003) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 182) == + -1) { + global_failure_args[0] = i_113000; + global_failure_args[1] = nm_76775; + ; + } + local_failure = true; + goto error_1; + } + } + + double defunc_0_f_res_t_res_113005 = ((__local + double *) mem_124298)[i_113000]; + + defunc_0_f_res_112998 = defunc_0_f_res_t_res_113005; } else { - int32_t x_30227 = ((__global - int32_t *) mappingindices_mem_44380)[gtid_30152]; - int32_t i_30229 = add32(1, index_primexp_42349); - int32_t r32_arg_30230 = sdiv32(i_30229, 2); - float i32_res_30231 = sitofp_i32_f32(r32_arg_30230); - float i32_res_30232 = sitofp_i32_f32(x_30227); - float x_30233 = 6.2831855F * i32_res_30231; - float x_30234 = i32_res_30232 * x_30233; - float angle_30235 = x_30234 / freq_27776; - int32_t x_30236 = smod32(i_30229, 2); - bool cond_30237 = x_30236 == 0; - float defunc_0_f_res_f_res_30238; + bool x_113006 = sle64((int64_t) 0, defunc_0_f_res_112997); + bool y_113007 = slt64(defunc_0_f_res_112997, nm_76775); + bool bounds_check_113008 = x_113006 && y_113007; + bool index_certs_113009; + + if (!bounds_check_113008) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 183) == + -1) { + global_failure_args[0] = defunc_0_f_res_112997; + global_failure_args[1] = nm_76775; + ; + } + local_failure = true; + goto error_1; + } + } + + double x_113010 = ((__local + double *) mem_124298)[defunc_0_f_res_112997]; + double x_113011 = x_113010 / v1_112992; + int64_t y_113012 = sub64(k2p2zq_75151, (int64_t) 1); + bool cond_113013 = slt64(defunc_0_f_res_112996, y_113012); + double defunc_0_f_res_f_res_113014; - if (cond_30237) { - float sin_res_30239; + if (cond_113013) { + int64_t x_113015 = add64((int64_t) 1, defunc_0_f_res_112996); + int64_t x_113016 = mul64(m_76774, x_113015); + int64_t i_113017 = add64(defunc_0_f_res_112997, x_113016); + bool x_113018 = sle64((int64_t) 0, i_113017); + bool y_113019 = slt64(i_113017, nm_76775); + bool bounds_check_113020 = x_113018 && y_113019; + bool index_certs_113021; + + if (!bounds_check_113020) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 184) == -1) { + global_failure_args[0] = i_113017; + global_failure_args[1] = nm_76775; + ; + } + local_failure = true; + goto error_1; + } + } - sin_res_30239 = futrts_sin32(angle_30235); - defunc_0_f_res_f_res_30238 = sin_res_30239; - } else { - float cos_res_30240; + double x_113022 = ((__local double *) mem_124298)[i_113017]; + int64_t i_113023 = add64(i_112988, x_113016); + bool x_113024 = sle64((int64_t) 0, i_113023); + bool y_113025 = slt64(i_113023, nm_76775); + bool bounds_check_113026 = x_113024 && y_113025; + bool index_certs_113027; + + if (!bounds_check_113026) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 185) == -1) { + global_failure_args[0] = i_113023; + global_failure_args[1] = nm_76775; + ; + } + local_failure = true; + goto error_1; + } + } - cos_res_30240 = futrts_cos32(angle_30235); - defunc_0_f_res_f_res_30238 = cos_res_30240; + double x_113028 = ((__local double *) mem_124298)[i_113023]; + double y_113029 = x_113011 * x_113028; + double defunc_0_f_res_f_res_t_res_113030 = x_113022 - y_113029; + + defunc_0_f_res_f_res_113014 = defunc_0_f_res_f_res_t_res_113030; + } else { + defunc_0_f_res_f_res_113014 = x_113011; } - defunc_0_f_res_30228 = defunc_0_f_res_f_res_30238; + defunc_0_f_res_112998 = defunc_0_f_res_f_res_113014; + } + ((__local double *) mem_124308)[gtid_112920] = defunc_0_f_res_112998; + + error_1: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t write_i_112958 = sext_i32_i64(ltid_pre_128834); + int32_t phys_tid_112959 = local_tid_128830; + double write_value_113033 = ((__local + double *) mem_124308)[write_i_112958]; + + if (sle64((int64_t) 0, write_i_112958) && slt64(write_i_112958, + nm_76775)) { + ((__local double *) mem_124298)[write_i_112958] = + write_value_113033; } - ((__global float *) mem_44389)[gtid_30151 * N_27771 + gtid_30152] = - defunc_0_f_res_30228; + barrier(CLK_LOCAL_MEM_FENCE); } + for (int64_t i_128836 = 0; i_128836 < sdiv_up64(k2p2zq_75151 * + k2p2zq_75151 - + sext_i32_i64(local_tid_128830), + nm_76775); i_128836++) { + ((__global double *) mem_124318)[gtid_112899 * (k2p2zq_75151 * + k2p2zq_75151) + + squot64(i_128836 * nm_76775 + + sext_i32_i64(local_tid_128830), + k2p2zq_75151) * k2p2zq_75151 + + (i_128836 * nm_76775 + + sext_i32_i64(local_tid_128830) - + squot64(i_128836 * nm_76775 + + sext_i32_i64(local_tid_128830), + k2p2zq_75151) * + k2p2zq_75151)] = ((__local + double *) mem_124298)[k2p2zq_75151 + + (squot64(i_128836 * + nm_76775 + + sext_i32_i64(local_tid_128830), + k2p2zq_75151) * + m_76774 + + (i_128836 * + nm_76775 + + sext_i32_i64(local_tid_128830) - + squot64(i_128836 * + nm_76775 + + sext_i32_i64(local_tid_128830), + k2p2zq_75151) * + k2p2zq_75151))]; + } + barrier(CLK_LOCAL_MEM_FENCE); - error_0: + error_3: return; - #undef segmap_group_sizze_30222 } -__kernel void mainDetailedzisegmap_30281(__global int *global_failure, - int64_t N_27771, int64_t i32_res_27787, - float i32_res_27852, __global - unsigned char *mem_44393, __global - unsigned char *mem_44397) +__kernel void mainzisegmap_intragroup_113099(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + __local volatile + int64_t *mem_124338_backing_aligned_0, + __local volatile + int64_t *mem_124335_backing_aligned_1, + int64_t k2p2zq_75151, + int64_t m_76774, int64_t nm_76775, + int64_t i_113419, + int64_t ctx_param_ext_124325, + int64_t ctx_param_ext_124326, + int64_t ctx_param_ext_124328, + __global + unsigned char *mem_param_124330, + __global unsigned char *mem_124342) { - #define segmap_group_sizze_30305 (mainDetailedzisegmap_group_sizze_30284) - const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; + __local volatile char *restrict mem_124338_backing_1 = (__local volatile + char *) mem_124338_backing_aligned_0; + __local volatile char *restrict mem_124335_backing_0 = (__local volatile + char *) mem_124335_backing_aligned_1; + volatile __local bool local_failure; - if (*global_failure >= 0) - return; - - int32_t global_tid_45701; - int32_t local_tid_45702; - int64_t group_sizze_45705; - int32_t wave_sizze_45704; - int32_t group_tid_45703; - - global_tid_45701 = get_global_id(0); - local_tid_45702 = get_local_id(0); - group_sizze_45705 = get_local_size(0); - wave_sizze_45704 = LOCKSTEP_WIDTH; - group_tid_45703 = get_group_id(0); - - int32_t phys_tid_30281; - - phys_tid_30281 = global_tid_45701; - - int64_t gtid_30279; - - gtid_30279 = squot64(sext_i32_i64(group_tid_45703) * - segmap_group_sizze_30305 + - sext_i32_i64(local_tid_45702), i32_res_27787); - - int64_t gtid_30280; - - gtid_30280 = sext_i32_i64(group_tid_45703) * segmap_group_sizze_30305 + - sext_i32_i64(local_tid_45702) - squot64(sext_i32_i64(group_tid_45703) * - segmap_group_sizze_30305 + - sext_i32_i64(local_tid_45702), - i32_res_27787) * i32_res_27787; - if (slt64(gtid_30279, N_27771) && slt64(gtid_30280, i32_res_27787)) { - float x_30308 = ((__global float *) mem_44393)[gtid_30279 * - i32_res_27787 + - gtid_30280]; - float defunc_0_f_res_30309 = i32_res_27852 + x_30308; + if (failure_is_an_option) { + int failed = *global_failure >= 0; - ((__global float *) mem_44397)[gtid_30279 * i32_res_27787 + - gtid_30280] = defunc_0_f_res_30309; + if (failed) + return; } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); - error_0: - return; - #undef segmap_group_sizze_30305 -} -__kernel void mainDetailedzisegmap_30314(__global int *global_failure, - int64_t N_27771, int64_t m_27772, - int32_t n_27775, int32_t k2p2zq_27785, - int64_t i32_res_27787, - int64_t num_groups_30339, __global - unsigned char *binop_p_mem_44390, - __global unsigned char *mem_44397, - __global unsigned char *mem_44400, - __global unsigned char *mem_44404, - __global unsigned char *mem_44446) -{ - #define segmap_group_sizze_30338 (mainDetailedzisegmap_group_sizze_30316) + int32_t global_tid_128858; + int32_t local_tid_128859; + int64_t group_sizze_128862; + int32_t wave_sizze_128861; + int32_t group_tid_128860; - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; + global_tid_128858 = get_global_id(0); + local_tid_128859 = get_local_id(0); + group_sizze_128862 = get_local_size(0); + wave_sizze_128861 = LOCKSTEP_WIDTH; + group_tid_128860 = get_group_id(0); - if (*global_failure >= 0) - return; + int32_t phys_tid_113099; + + phys_tid_113099 = group_tid_128860; - int32_t global_tid_45709; - int32_t local_tid_45710; - int64_t group_sizze_45713; - int32_t wave_sizze_45712; - int32_t group_tid_45711; + int32_t ltid_pre_128863; - global_tid_45709 = get_global_id(0); - local_tid_45710 = get_local_id(0); - group_sizze_45713 = get_local_size(0); - wave_sizze_45712 = LOCKSTEP_WIDTH; - group_tid_45711 = get_group_id(0); + ltid_pre_128863 = local_tid_128859; - int32_t phys_tid_30314; + int64_t gtid_113055; - phys_tid_30314 = global_tid_45709; + gtid_113055 = sext_i32_i64(group_tid_128860); - int32_t phys_group_id_45714; + __local char *mem_124335; - phys_group_id_45714 = get_group_id(0); - for (int32_t i_45715 = 0; i_45715 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_27772, segmap_group_sizze_30338)) - - phys_group_id_45714, sext_i64_i32(num_groups_30339)); - i_45715++) { - int32_t virt_group_id_45716 = phys_group_id_45714 + i_45715 * - sext_i64_i32(num_groups_30339); - int64_t gtid_30313 = sext_i32_i64(virt_group_id_45716) * - segmap_group_sizze_30338 + sext_i32_i64(local_tid_45710); + mem_124335 = (__local char *) mem_124335_backing_0; + ((__local double *) mem_124335)[sext_i32_i64(local_tid_128859)] = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + gtid_113055 * + ctx_param_ext_124326 + + sext_i32_i64(local_tid_128859) * + ctx_param_ext_124328]; + barrier(CLK_LOCAL_MEM_FENCE); + + double v1_113434 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_113055 * + ctx_param_ext_124326 + + i_113419 * + ctx_param_ext_124328)]; + bool cond_113435 = v1_113434 == 0.0; + __local char *mem_124338; + + mem_124338 = (__local char *) mem_124338_backing_1; + + int64_t gtid_113058 = sext_i32_i64(ltid_pre_128863); + int32_t phys_tid_113059 = local_tid_128859; + int64_t defunc_0_f_res_113438 = sdiv64(gtid_113058, m_76774); + int64_t defunc_0_f_res_113439 = smod64(gtid_113058, m_76774); + double defunc_0_f_res_113440; + + if (cond_113435) { + int64_t x_113441 = mul64(m_76774, defunc_0_f_res_113438); + int64_t i_113442 = add64(defunc_0_f_res_113439, x_113441); + bool x_113443 = sle64((int64_t) 0, i_113442); + bool y_113444 = slt64(i_113442, nm_76775); + bool bounds_check_113445 = x_113443 && y_113444; + bool index_certs_113446; + + if (!bounds_check_113445) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 187) == -1) { + global_failure_args[0] = i_113442; + global_failure_args[1] = nm_76775; + ; + } + local_failure = true; + goto error_0; + } + } - if (slt64(gtid_30313, m_27772)) { - for (int32_t i_44360 = 0; i_44360 < k2p2zq_27785; i_44360++) { - int64_t i_44289 = sext_i32_i64(i_44360); - - for (int32_t i_44359 = 0; i_44359 < k2p2zq_27785; i_44359++) { - int64_t i_44293 = sext_i32_i64(i_44359); - float defunc_2_reduce_res_30347; - float redout_44295 = 0.0F; - - for (int32_t i_44358 = 0; i_44358 < n_27775; i_44358++) { - int64_t i_44296 = sext_i32_i64(i_44358); - float x_30351 = ((__global float *) mem_44400)[i_44296 * - m_27772 + - gtid_30313]; - float x_30352 = ((__global - float *) binop_p_mem_44390)[i_44289 * - N_27771 + - i_44296]; - float x_30353 = ((__global float *) mem_44397)[i_44296 * - i32_res_27787 + - i_44293]; - float x_30354 = x_30352 * x_30353; - bool isnan_res_30355; - - isnan_res_30355 = futrts_isnan32(x_30351); - - float y_30356; - - if (isnan_res_30355) { - y_30356 = 0.0F; - } else { - y_30356 = 1.0F; - } - - float defunc_2_f_res_30357 = x_30354 * y_30356; - float defunc_1_op_res_30350 = defunc_2_f_res_30357 + - redout_44295; - float redout_tmp_45719 = defunc_1_op_res_30350; - - redout_44295 = redout_tmp_45719; + double defunc_0_f_res_t_res_113447 = ((__local + double *) mem_124335)[i_113442]; + + defunc_0_f_res_113440 = defunc_0_f_res_t_res_113447; + } else { + bool x_113448 = sle64((int64_t) 0, defunc_0_f_res_113439); + bool y_113449 = slt64(defunc_0_f_res_113439, nm_76775); + bool bounds_check_113450 = x_113448 && y_113449; + bool index_certs_113451; + + if (!bounds_check_113450) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 188) == -1) { + global_failure_args[0] = defunc_0_f_res_113439; + global_failure_args[1] = nm_76775; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_113452 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_113055 * + ctx_param_ext_124326 + + defunc_0_f_res_113439 * + ctx_param_ext_124328)]; + double x_113453 = x_113452 / v1_113434; + int64_t y_113454 = sub64(k2p2zq_75151, (int64_t) 1); + bool cond_113455 = slt64(defunc_0_f_res_113438, y_113454); + double defunc_0_f_res_f_res_113456; + + if (cond_113455) { + int64_t x_113457 = add64((int64_t) 1, defunc_0_f_res_113438); + int64_t x_113458 = mul64(m_76774, x_113457); + int64_t i_113459 = add64(defunc_0_f_res_113439, x_113458); + bool x_113460 = sle64((int64_t) 0, i_113459); + bool y_113461 = slt64(i_113459, nm_76775); + bool bounds_check_113462 = x_113460 && y_113461; + bool index_certs_113463; + + if (!bounds_check_113462) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 189) == + -1) { + global_failure_args[0] = i_113459; + global_failure_args[1] = nm_76775; + ; } - defunc_2_reduce_res_30347 = redout_44295; - ((__global float *) mem_44404)[phys_tid_30314 + (i_44289 * - (num_groups_30339 * - segmap_group_sizze_30338 * - i32_res_27787) + - i_44293 * - (num_groups_30339 * - segmap_group_sizze_30338))] = - defunc_2_reduce_res_30347; + local_failure = true; + goto error_0; } } - for (int64_t i_45720 = 0; i_45720 < i32_res_27787; i_45720++) { - for (int64_t i_45721 = 0; i_45721 < i32_res_27787; i_45721++) { - ((__global float *) mem_44446)[i_45720 * (m_27772 * - i32_res_27787) + - i_45721 * m_27772 + - gtid_30313] = ((__global - float *) mem_44404)[phys_tid_30314 + - (i_45720 * - (num_groups_30339 * - segmap_group_sizze_30338 * - i32_res_27787) + - i_45721 * - (num_groups_30339 * - segmap_group_sizze_30338))]; + + double x_113464 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_113055 * + ctx_param_ext_124326 + + i_113459 * + ctx_param_ext_124328)]; + int64_t i_113465 = add64(i_113419, x_113458); + bool x_113466 = sle64((int64_t) 0, i_113465); + bool y_113467 = slt64(i_113465, nm_76775); + bool bounds_check_113468 = x_113466 && y_113467; + bool index_certs_113469; + + if (!bounds_check_113468) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 190) == + -1) { + global_failure_args[0] = i_113465; + global_failure_args[1] = nm_76775; + ; + } + local_failure = true; + goto error_0; } } + + double x_113470 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_113055 * + ctx_param_ext_124326 + + i_113465 * + ctx_param_ext_124328)]; + double y_113471 = x_113453 * x_113470; + double defunc_0_f_res_f_res_t_res_113472 = x_113464 - y_113471; + + defunc_0_f_res_f_res_113456 = defunc_0_f_res_f_res_t_res_113472; + } else { + defunc_0_f_res_f_res_113456 = x_113453; } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + defunc_0_f_res_113440 = defunc_0_f_res_f_res_113456; } + ((__local double *) mem_124338)[gtid_113058] = defunc_0_f_res_113440; error_0: - return; - #undef segmap_group_sizze_30338 -} -__kernel void mainDetailedzisegmap_30360(__global int *global_failure, - int64_t N_27771, int64_t m_27772, - int32_t n_27775, int32_t k2p2zq_27785, - int64_t i32_res_27787, - int64_t num_groups_30516, __global - unsigned char *images_mem_44381, - __global unsigned char *mem_44393, - __global unsigned char *mem_44397, - __global unsigned char *mem_44449, - __global unsigned char *mem_44465) -{ - #define segmap_group_sizze_30515 (mainDetailedzisegmap_group_sizze_30363) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - - if (*global_failure >= 0) + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) return; + barrier(CLK_LOCAL_MEM_FENCE); - int32_t global_tid_45722; - int32_t local_tid_45723; - int64_t group_sizze_45726; - int32_t wave_sizze_45725; - int32_t group_tid_45724; - - global_tid_45722 = get_global_id(0); - local_tid_45723 = get_local_id(0); - group_sizze_45726 = get_local_size(0); - wave_sizze_45725 = LOCKSTEP_WIDTH; - group_tid_45724 = get_group_id(0); - - int32_t phys_tid_30360; - - phys_tid_30360 = global_tid_45722; - - int32_t phys_group_id_45727; + int64_t write_i_113096 = sext_i32_i64(ltid_pre_128863); + int32_t phys_tid_113097 = local_tid_128859; + double write_value_113475 = ((__local double *) mem_124338)[write_i_113096]; - phys_group_id_45727 = get_group_id(0); - for (int32_t i_45728 = 0; i_45728 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_27772 * i32_res_27787, - segmap_group_sizze_30515)) - - phys_group_id_45727, sext_i64_i32(num_groups_30516)); - i_45728++) { - int32_t virt_group_id_45729 = phys_group_id_45727 + i_45728 * - sext_i64_i32(num_groups_30516); - int64_t gtid_30358 = squot64(sext_i32_i64(virt_group_id_45729) * - segmap_group_sizze_30515 + - sext_i32_i64(local_tid_45723), - i32_res_27787); - int64_t gtid_30359 = sext_i32_i64(virt_group_id_45729) * - segmap_group_sizze_30515 + sext_i32_i64(local_tid_45723) - - squot64(sext_i32_i64(virt_group_id_45729) * - segmap_group_sizze_30515 + - sext_i32_i64(local_tid_45723), i32_res_27787) * - i32_res_27787; - - if (slt64(gtid_30358, m_27772) && slt64(gtid_30359, i32_res_27787)) { - for (int32_t i_44362 = 0; i_44362 < k2p2zq_27785; i_44362++) { - int64_t i_44299 = sext_i32_i64(i_44362); - float defunc_2_reduce_res_30527; - float redout_44301 = 0.0F; - - for (int32_t i_44361 = 0; i_44361 < n_27775; i_44361++) { - int64_t i_44302 = sext_i32_i64(i_44361); - float x_30531 = ((__global - float *) images_mem_44381)[gtid_30358 * - N_27771 + - i_44302]; - float x_30532 = ((__global float *) mem_44393)[i_44302 * - i32_res_27787 + - gtid_30359]; - float x_30533 = ((__global float *) mem_44397)[i_44302 * - i32_res_27787 + - i_44299]; - float x_30534 = x_30532 * x_30533; - bool isnan_res_30535; - - isnan_res_30535 = futrts_isnan32(x_30531); - - float y_30536; - - if (isnan_res_30535) { - y_30536 = 0.0F; - } else { - y_30536 = 1.0F; - } - - float defunc_2_f_res_30537 = x_30534 * y_30536; - float defunc_1_op_res_30530 = defunc_2_f_res_30537 + - redout_44301; - float redout_tmp_45731 = defunc_1_op_res_30530; - - redout_44301 = redout_tmp_45731; - } - defunc_2_reduce_res_30527 = redout_44301; - ((__global float *) mem_44449)[phys_tid_30360 + i_44299 * - (num_groups_30516 * - segmap_group_sizze_30515)] = - defunc_2_reduce_res_30527; - } - for (int64_t i_45732 = 0; i_45732 < i32_res_27787; i_45732++) { - ((__global float *) mem_44465)[i_45732 * (i32_res_27787 * - m_27772) + - gtid_30358 * i32_res_27787 + - gtid_30359] = ((__global - float *) mem_44449)[phys_tid_30360 + - i_45732 * - (num_groups_30516 * - segmap_group_sizze_30515)]; - } - } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + if (sle64((int64_t) 0, write_i_113096) && slt64(write_i_113096, nm_76775)) { + ((__local double *) mem_124335)[write_i_113096] = write_value_113475; } + barrier(CLK_LOCAL_MEM_FENCE); + ((__global double *) mem_124342)[gtid_113055 * nm_76775 + + sext_i32_i64(local_tid_128859)] = ((__local + double *) mem_124335)[sext_i32_i64(local_tid_128859)]; + barrier(CLK_LOCAL_MEM_FENCE); - error_0: + error_2: return; - #undef segmap_group_sizze_30515 } -__kernel void mainDetailedzisegmap_30952(__global int *global_failure, - int64_t m_27772, int64_t i32_res_27787, - int64_t nm_27920, - int64_t i32_res_27935, int64_t x_27936, - int64_t j_m_i_27939, - int64_t gauss_jordan_res_r_ixfn_44617, - int64_t gauss_jordan_res_r_ixfn_44618, - int64_t gauss_jordan_res_r_ixfn_44620, - __global - unsigned char *gauss_jordan_res_r_mem_44622, - __global unsigned char *mem_44627) +__kernel void mainzisegmap_intragroup_113961(__global int *global_failure, + __local volatile + int64_t *mem_124893_backing_aligned_0, + __local volatile + int64_t *mem_124891_backing_aligned_1, + __local volatile + int64_t *mem_124889_backing_aligned_2, + __local volatile + int64_t *mem_124887_backing_aligned_3, + int64_t N_75135, int64_t i_76911, + __global unsigned char *mem_124142, + __global + unsigned char *defunc_3_map_res_mem_124883, + __global unsigned char *mem_124896, + __global unsigned char *mem_124899, + __global unsigned char *mem_124902) { - #define segmap_group_sizze_31699 (mainDetailedzisegmap_group_sizze_30956) - const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; + __local volatile char *restrict mem_124893_backing_3 = (__local volatile + char *) mem_124893_backing_aligned_0; + __local volatile char *restrict mem_124891_backing_2 = (__local volatile + char *) mem_124891_backing_aligned_1; + __local volatile char *restrict mem_124889_backing_1 = (__local volatile + char *) mem_124889_backing_aligned_2; + __local volatile char *restrict mem_124887_backing_0 = (__local volatile + char *) mem_124887_backing_aligned_3; if (*global_failure >= 0) return; - int32_t global_tid_45889; - int32_t local_tid_45890; - int64_t group_sizze_45893; - int32_t wave_sizze_45892; - int32_t group_tid_45891; - - global_tid_45889 = get_global_id(0); - local_tid_45890 = get_local_id(0); - group_sizze_45893 = get_local_size(0); - wave_sizze_45892 = LOCKSTEP_WIDTH; - group_tid_45891 = get_group_id(0); + int32_t global_tid_129239; + int32_t local_tid_129240; + int64_t group_sizze_129243; + int32_t wave_sizze_129242; + int32_t group_tid_129241; - int32_t phys_tid_30952; + global_tid_129239 = get_global_id(0); + local_tid_129240 = get_local_id(0); + group_sizze_129243 = get_local_size(0); + wave_sizze_129242 = LOCKSTEP_WIDTH; + group_tid_129241 = get_group_id(0); - phys_tid_30952 = global_tid_45889; + int32_t phys_tid_113961; - int64_t gtid_30949; + phys_tid_113961 = group_tid_129241; - gtid_30949 = squot64(sext_i32_i64(group_tid_45891) * - segmap_group_sizze_31699 + - sext_i32_i64(local_tid_45890), i32_res_27787 * - j_m_i_27939); + int32_t ltid_pre_129244; - int64_t gtid_slice_30947; - - gtid_slice_30947 = squot64(sext_i32_i64(group_tid_45891) * - segmap_group_sizze_31699 + - sext_i32_i64(local_tid_45890) - - squot64(sext_i32_i64(group_tid_45891) * - segmap_group_sizze_31699 + - sext_i32_i64(local_tid_45890), - i32_res_27787 * j_m_i_27939) * - (i32_res_27787 * j_m_i_27939), j_m_i_27939); - - int64_t gtid_slice_30948; - - gtid_slice_30948 = sext_i32_i64(group_tid_45891) * - segmap_group_sizze_31699 + sext_i32_i64(local_tid_45890) - - squot64(sext_i32_i64(group_tid_45891) * segmap_group_sizze_31699 + - sext_i32_i64(local_tid_45890), i32_res_27787 * j_m_i_27939) * - (i32_res_27787 * j_m_i_27939) - squot64(sext_i32_i64(group_tid_45891) * - segmap_group_sizze_31699 + - sext_i32_i64(local_tid_45890) - - squot64(sext_i32_i64(group_tid_45891) * - segmap_group_sizze_31699 + - sext_i32_i64(local_tid_45890), - i32_res_27787 * - j_m_i_27939) * - (i32_res_27787 * j_m_i_27939), - j_m_i_27939) * j_m_i_27939; - if ((slt64(gtid_30949, m_27772) && slt64(gtid_slice_30947, - i32_res_27787)) && - slt64(gtid_slice_30948, j_m_i_27939)) { - int64_t slice_31703 = i32_res_27787 + gtid_slice_30948; - int64_t binop_x_42430 = x_27936 * gtid_30949; - int64_t binop_y_42431 = i32_res_27935 * gtid_slice_30947; - int64_t binop_x_42432 = binop_x_42430 + binop_y_42431; - int64_t binop_x_42433 = slice_31703 + binop_x_42432; - int64_t new_index_42434 = squot64(binop_x_42433, nm_27920); - int64_t binop_y_42446 = nm_27920 * new_index_42434; - int64_t new_index_42447 = binop_x_42433 - binop_y_42446; - float v_31704 = ((__global - float *) gauss_jordan_res_r_mem_44622)[gauss_jordan_res_r_ixfn_44617 + - (new_index_42434 * - gauss_jordan_res_r_ixfn_44618 + - new_index_42447 * - gauss_jordan_res_r_ixfn_44620)]; - - ((__global float *) mem_44627)[gtid_30949 * (j_m_i_27939 * - i32_res_27787) + - gtid_slice_30947 * j_m_i_27939 + - gtid_slice_30948] = v_31704; - } - - error_0: - return; - #undef segmap_group_sizze_31699 -} -__kernel void mainDetailedzisegmap_31182(__global int *global_failure, - int64_t m_27772, int64_t nm_27920, - int64_t ctx_param_ext_44580, - int64_t ctx_param_ext_44581, - int64_t ctx_param_ext_44583, __global - unsigned char *mem_param_44585, - __global unsigned char *mem_44605) -{ - #define segmap_group_sizze_31687 (mainDetailedzisegmap_group_sizze_31185) + ltid_pre_129244 = local_tid_129240; - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; + int64_t gtid_113952; - if (*global_failure >= 0) - return; + gtid_113952 = sext_i32_i64(group_tid_129241); - int32_t global_tid_45883; - int32_t local_tid_45884; - int64_t group_sizze_45887; - int32_t wave_sizze_45886; - int32_t group_tid_45885; + __local char *mem_124887; - global_tid_45883 = get_global_id(0); - local_tid_45884 = get_local_id(0); - group_sizze_45887 = get_local_size(0); - wave_sizze_45886 = LOCKSTEP_WIDTH; - group_tid_45885 = get_group_id(0); + mem_124887 = (__local char *) mem_124887_backing_0; - int32_t phys_tid_31182; + __local char *mem_124889; - phys_tid_31182 = global_tid_45883; + mem_124889 = (__local char *) mem_124889_backing_1; - int64_t gtid_31180; + int64_t gtid_113955 = sext_i32_i64(ltid_pre_129244); + int32_t phys_tid_113956 = local_tid_129240; + double x_113979 = ((__global double *) mem_124142)[gtid_113952 * N_75135 + + gtid_113955]; + bool isnan_res_113981; - gtid_31180 = squot64(sext_i32_i64(group_tid_45885) * - segmap_group_sizze_31687 + - sext_i32_i64(local_tid_45884), nm_27920); + isnan_res_113981 = futrts_isnan64(x_113979); - int64_t gtid_31181; + bool cond_113982 = !isnan_res_113981; + double defunc_1_f_res_113983; - gtid_31181 = sext_i32_i64(group_tid_45885) * segmap_group_sizze_31687 + - sext_i32_i64(local_tid_45884) - squot64(sext_i32_i64(group_tid_45885) * - segmap_group_sizze_31687 + - sext_i32_i64(local_tid_45884), - nm_27920) * nm_27920; - if (slt64(gtid_31180, m_27772) && slt64(gtid_31181, nm_27920)) { - float write_value_31693 = ((__global float *) mem_44605)[gtid_31180 * - nm_27920 + - gtid_31181]; + if (cond_113982) { + double x_113980 = ((__global + double *) defunc_3_map_res_mem_124883)[gtid_113952 * + N_75135 + + gtid_113955]; + double defunc_1_f_res_t_res_113984 = x_113979 - x_113980; - if ((sle64((int64_t) 0, gtid_31180) && slt64(gtid_31180, m_27772)) && - (sle64((int64_t) 0, gtid_31181) && slt64(gtid_31181, nm_27920))) { - ((__global float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_31180 * - ctx_param_ext_44581 + - gtid_31181 * - ctx_param_ext_44583)] = - write_value_31693; - } + defunc_1_f_res_113983 = defunc_1_f_res_t_res_113984; + } else { + defunc_1_f_res_113983 = NAN; } - error_0: - return; - #undef segmap_group_sizze_31687 -} -__kernel void mainDetailedzisegmap_31252(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, - int64_t m_27772, int32_t k2p2zq_27785, - int32_t m_27918, int64_t nm_27920, - int32_t i_31554, int64_t i32_res_31556, - int64_t ctx_param_ext_44580, - int64_t ctx_param_ext_44581, - int64_t ctx_param_ext_44583, __global - unsigned char *mem_param_44585, - __global unsigned char *mem_44601, - __global unsigned char *mem_44605) -{ - #define segmap_group_sizze_31637 (mainDetailedzisegmap_group_sizze_31255) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; + bool isnan_res_113985; - if (*global_failure >= 0) - return; + isnan_res_113985 = futrts_isnan64(defunc_1_f_res_113983); - int32_t global_tid_45878; - int32_t local_tid_45879; - int64_t group_sizze_45882; - int32_t wave_sizze_45881; - int32_t group_tid_45880; + bool defunc_0_p_res_113986 = !isnan_res_113985; + int64_t defunc_0_f_res_113987 = btoi_bool_i64(defunc_0_p_res_113986); - global_tid_45878 = get_global_id(0); - local_tid_45879 = get_local_id(0); - group_sizze_45882 = get_local_size(0); - wave_sizze_45881 = LOCKSTEP_WIDTH; - group_tid_45880 = get_group_id(0); + ((__local int64_t *) mem_124887)[gtid_113955] = defunc_0_f_res_113987; + ((__local double *) mem_124889)[gtid_113955] = defunc_1_f_res_113983; + barrier(CLK_LOCAL_MEM_FENCE); - int32_t phys_tid_31252; + int64_t dims_flat_129245; - phys_tid_31252 = global_tid_45878; + dims_flat_129245 = N_75135; - int64_t gtid_31250; + int64_t x_113976; + int64_t x_113977; + int64_t x_129247; + int64_t x_129248; + bool ltid_in_bounds_129250; - gtid_31250 = squot64(sext_i32_i64(group_tid_45880) * - segmap_group_sizze_31637 + - sext_i32_i64(local_tid_45879), nm_27920); + ltid_in_bounds_129250 = slt64(sext_i32_i64(local_tid_129240), N_75135); - int64_t gtid_31251; + int32_t skip_threads_129251; - gtid_31251 = sext_i32_i64(group_tid_45880) * segmap_group_sizze_31637 + - sext_i32_i64(local_tid_45879) - squot64(sext_i32_i64(group_tid_45880) * - segmap_group_sizze_31637 + - sext_i32_i64(local_tid_45879), - nm_27920) * nm_27920; - if (slt64(gtid_31250, m_27772) && slt64(gtid_31251, nm_27920)) { - bool cond_31642 = ((__global bool *) mem_44601)[gtid_31250]; - int32_t defunc_0_f_res_31644 = sext_i64_i32(gtid_31251); - int32_t defunc_0_f_res_31645 = sdiv32(defunc_0_f_res_31644, m_27918); - int32_t defunc_0_f_res_31646 = smod32(defunc_0_f_res_31644, m_27918); - float defunc_0_f_res_31647; - - if (cond_31642) { - int32_t x_31648 = mul32(m_27918, defunc_0_f_res_31645); - int32_t i32_arg_31649 = add32(defunc_0_f_res_31646, x_31648); - int64_t i32_res_31650 = sext_i32_i64(i32_arg_31649); - bool x_31651 = sle64((int64_t) 0, i32_res_31650); - bool y_31652 = slt64(i32_res_31650, nm_27920); - bool bounds_check_31653 = x_31651 && y_31652; - bool index_certs_31654; - - if (!bounds_check_31653) { + // read input for in-block scan + { + if (ltid_in_bounds_129250) { + x_113977 = ((volatile __local + int64_t *) mem_124887)[sext_i32_i64(local_tid_129240)]; + if ((local_tid_129240 - squot32(local_tid_129240, 32) * 32) == 0) { + x_113976 = x_113977; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129251 = 1; + while (slt32(skip_threads_129251, 32)) { + if (sle32(skip_threads_129251, local_tid_129240 - + squot32(local_tid_129240, 32) * 32) && + ltid_in_bounds_129250) { + // read operands { - if (atomic_cmpxchg_i32_global(global_failure, -1, 42) == - -1) { - global_failure_args[0] = i32_res_31650; - global_failure_args[1] = nm_27920; - ; + x_113976 = ((volatile __local + int64_t *) mem_124887)[sext_i32_i64(local_tid_129240) - + sext_i32_i64(skip_threads_129251)]; + } + // perform operation + { + bool inactive_129252 = + slt64(srem64(sext_i32_i64(local_tid_129240), N_75135), + sext_i32_i64(local_tid_129240) - + sext_i32_i64(local_tid_129240 - + skip_threads_129251)); + + if (inactive_129252) { + x_113976 = x_113977; + } + if (!inactive_129252) { + int64_t defunc_1_op_res_113978 = add64(x_113976, + x_113977); + + x_113976 = defunc_1_op_res_113978; } - return; } } - - float defunc_0_f_res_t_res_31655 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_31250 * - ctx_param_ext_44581 + - i32_res_31650 * - ctx_param_ext_44583)]; - - defunc_0_f_res_31647 = defunc_0_f_res_t_res_31655; - } else { - float v1_31641 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_31250 * - ctx_param_ext_44581 + - i32_res_31556 * - ctx_param_ext_44583)]; - int64_t i32_res_31656 = sext_i32_i64(defunc_0_f_res_31646); - bool x_31657 = sle64((int64_t) 0, i32_res_31656); - bool y_31658 = slt64(i32_res_31656, nm_27920); - bool bounds_check_31659 = x_31657 && y_31658; - bool index_certs_31660; - - if (!bounds_check_31659) { + if (sle32(wave_sizze_129242, skip_threads_129251)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129251, local_tid_129240 - + squot32(local_tid_129240, 32) * 32) && + ltid_in_bounds_129250) { + // write result { - if (atomic_cmpxchg_i32_global(global_failure, -1, 43) == - -1) { - global_failure_args[0] = i32_res_31656; - global_failure_args[1] = nm_27920; - ; - } - return; + ((volatile __local + int64_t *) mem_124887)[sext_i32_i64(local_tid_129240)] = + x_113976; + x_113977 = x_113976; } } - - float x_31661 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_31250 * - ctx_param_ext_44581 + - i32_res_31656 * - ctx_param_ext_44583)]; - float x_31662 = x_31661 / v1_31641; - int32_t y_31663 = sub32(k2p2zq_27785, 1); - bool cond_31664 = slt32(defunc_0_f_res_31645, y_31663); - float defunc_0_f_res_f_res_31665; - - if (cond_31664) { - int32_t x_31666 = add32(1, defunc_0_f_res_31645); - int32_t x_31667 = mul32(m_27918, x_31666); - int32_t i32_arg_31668 = add32(defunc_0_f_res_31646, x_31667); - int64_t i32_res_31669 = sext_i32_i64(i32_arg_31668); - bool x_31670 = sle64((int64_t) 0, i32_res_31669); - bool y_31671 = slt64(i32_res_31669, nm_27920); - bool bounds_check_31672 = x_31670 && y_31671; - bool index_certs_31673; - - if (!bounds_check_31672) { + if (sle32(wave_sizze_129242, skip_threads_129251)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129251 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129240 - squot32(local_tid_129240, 32) * 32) == 31 && + ltid_in_bounds_129250) { + ((volatile __local + int64_t *) mem_124887)[sext_i32_i64(squot32(local_tid_129240, + 32))] = x_113976; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129253; + + // read input for in-block scan + { + if (squot32(local_tid_129240, 32) == 0 && ltid_in_bounds_129250) { + x_129248 = ((volatile __local + int64_t *) mem_124887)[sext_i32_i64(local_tid_129240)]; + if ((local_tid_129240 - squot32(local_tid_129240, 32) * 32) == + 0) { + x_129247 = x_129248; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129253 = 1; + while (slt32(skip_threads_129253, 32)) { + if (sle32(skip_threads_129253, local_tid_129240 - + squot32(local_tid_129240, 32) * 32) && + (squot32(local_tid_129240, 32) == 0 && + ltid_in_bounds_129250)) { + // read operands { - if (atomic_cmpxchg_i32_global(global_failure, -1, 44) == - -1) { - global_failure_args[0] = i32_res_31669; - global_failure_args[1] = nm_27920; - ; + x_129247 = ((volatile __local + int64_t *) mem_124887)[sext_i32_i64(local_tid_129240) - + sext_i32_i64(skip_threads_129253)]; + } + // perform operation + { + bool inactive_129254 = + slt64(srem64(sext_i32_i64(local_tid_129240 * 32 + + 32 - 1), N_75135), + sext_i32_i64(local_tid_129240 * 32 + 32 - + 1) - sext_i32_i64((local_tid_129240 - + skip_threads_129253) * + 32 + 32 - 1)); + + if (inactive_129254) { + x_129247 = x_129248; + } + if (!inactive_129254) { + int64_t defunc_1_op_res_129249 = add64(x_129247, + x_129248); + + x_129247 = defunc_1_op_res_129249; } - return; } } - - float x_31674 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_31250 * - ctx_param_ext_44581 + - i32_res_31669 * - ctx_param_ext_44583)]; - int32_t i32_arg_31675 = add32(i_31554, x_31667); - int64_t i32_res_31676 = sext_i32_i64(i32_arg_31675); - bool x_31677 = sle64((int64_t) 0, i32_res_31676); - bool y_31678 = slt64(i32_res_31676, nm_27920); - bool bounds_check_31679 = x_31677 && y_31678; - bool index_certs_31680; - - if (!bounds_check_31679) { + if (sle32(wave_sizze_129242, skip_threads_129253)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129253, local_tid_129240 - + squot32(local_tid_129240, 32) * 32) && + (squot32(local_tid_129240, 32) == 0 && + ltid_in_bounds_129250)) { + // write result { - if (atomic_cmpxchg_i32_global(global_failure, -1, 45) == - -1) { - global_failure_args[0] = i32_res_31676; - global_failure_args[1] = nm_27920; - ; - } - return; + ((volatile __local + int64_t *) mem_124887)[sext_i32_i64(local_tid_129240)] = + x_129247; + x_129248 = x_129247; } } + if (sle32(wave_sizze_129242, skip_threads_129253)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129253 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129240, 32) == 0 || !ltid_in_bounds_129250)) { + // read operands + { + x_113977 = x_113976; + x_113976 = ((__local + int64_t *) mem_124887)[sext_i32_i64(squot32(local_tid_129240, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129255 = + slt64(srem64(sext_i32_i64(local_tid_129240), N_75135), + sext_i32_i64(local_tid_129240) - + sext_i32_i64(squot32(local_tid_129240, 32) * 32 - + 1)); - float x_31681 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_31250 * - ctx_param_ext_44581 + - i32_res_31676 * - ctx_param_ext_44583)]; - float y_31682 = x_31662 * x_31681; - float defunc_0_f_res_f_res_t_res_31683 = x_31674 - y_31682; - - defunc_0_f_res_f_res_31665 = defunc_0_f_res_f_res_t_res_31683; - } else { - defunc_0_f_res_f_res_31665 = x_31662; + if (inactive_129255) { + x_113976 = x_113977; + } + if (!inactive_129255) { + int64_t defunc_1_op_res_113978 = add64(x_113976, x_113977); + + x_113976 = defunc_1_op_res_113978; + } + } + // write final result + { + ((__local + int64_t *) mem_124887)[sext_i32_i64(local_tid_129240)] = + x_113976; } - defunc_0_f_res_31647 = defunc_0_f_res_f_res_31665; } - ((__global float *) mem_44605)[gtid_31250 * nm_27920 + gtid_31251] = - defunc_0_f_res_31647; } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129240, 32) == 0) { + ((__local int64_t *) mem_124887)[sext_i32_i64(local_tid_129240)] = + x_113977; + } + } + barrier(CLK_LOCAL_MEM_FENCE); - error_0: - return; - #undef segmap_group_sizze_31637 -} -__kernel void mainDetailedzisegmap_31352(__global int *global_failure, - int64_t m_27772, int64_t i32_res_31556, - int64_t ctx_param_ext_44580, - int64_t ctx_param_ext_44581, - int64_t ctx_param_ext_44583, __global - unsigned char *mem_param_44585, - __global unsigned char *mem_44601) -{ - #define segmap_group_sizze_31621 (mainDetailedzisegmap_group_sizze_31354) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; + int64_t last_res_113988 = ((__local int64_t *) mem_124887)[i_76911]; + __local char *mem_124891; - if (*global_failure >= 0) - return; + mem_124891 = (__local char *) mem_124891_backing_2; + ((__local double *) mem_124891)[sext_i32_i64(local_tid_129240)] = NAN; + barrier(CLK_LOCAL_MEM_FENCE); - int32_t global_tid_45873; - int32_t local_tid_45874; - int64_t group_sizze_45877; - int32_t wave_sizze_45876; - int32_t group_tid_45875; + __local char *mem_124893; - global_tid_45873 = get_global_id(0); - local_tid_45874 = get_local_id(0); - group_sizze_45877 = get_local_size(0); - wave_sizze_45876 = LOCKSTEP_WIDTH; - group_tid_45875 = get_group_id(0); + mem_124893 = (__local char *) mem_124893_backing_3; + ((__local int64_t *) mem_124893)[sext_i32_i64(local_tid_129240)] = + (int64_t) 0; + barrier(CLK_LOCAL_MEM_FENCE); - int32_t phys_tid_31352; + int64_t write_i_113957 = sext_i32_i64(ltid_pre_129244); + int32_t phys_tid_113958 = local_tid_129240; + double x_113993 = ((__local double *) mem_124889)[write_i_113957]; + bool isnan_res_113996; - phys_tid_31352 = global_tid_45873; + isnan_res_113996 = futrts_isnan64(x_113993); - int64_t gtid_31351; + bool defunc_0_p_res_113997 = !isnan_res_113996; + int64_t defunc_1_f_res_113998; - gtid_31351 = sext_i32_i64(group_tid_45875) * segmap_group_sizze_31621 + - sext_i32_i64(local_tid_45874); - if (slt64(gtid_31351, m_27772)) { - float v1_31626 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_31351 * - ctx_param_ext_44581 + - i32_res_31556 * - ctx_param_ext_44583)]; - bool cond_31627 = v1_31626 == 0.0F; + if (defunc_0_p_res_113997) { + int64_t x_113994 = ((__local int64_t *) mem_124887)[write_i_113957]; + int64_t defunc_1_f_res_t_res_113999 = sub64(x_113994, (int64_t) 1); - ((__global bool *) mem_44601)[gtid_31351] = cond_31627; + defunc_1_f_res_113998 = defunc_1_f_res_t_res_113999; + } else { + defunc_1_f_res_113998 = (int64_t) -1; + } + if (sle64((int64_t) 0, defunc_1_f_res_113998) && + slt64(defunc_1_f_res_113998, N_75135)) { + ((__local int64_t *) mem_124893)[defunc_1_f_res_113998] = + write_i_113957; + } + if (sle64((int64_t) 0, defunc_1_f_res_113998) && + slt64(defunc_1_f_res_113998, N_75135)) { + ((__local double *) mem_124891)[defunc_1_f_res_113998] = x_113993; + } + barrier(CLK_LOCAL_MEM_FENCE); + if (local_tid_129240 == 0) { + ((__global int64_t *) mem_124896)[gtid_113952] = last_res_113988; } + ((__global double *) mem_124899)[gtid_113952 * N_75135 + + sext_i32_i64(local_tid_129240)] = ((__local + double *) mem_124891)[sext_i32_i64(local_tid_129240)]; + barrier(CLK_LOCAL_MEM_FENCE); + ((__global int64_t *) mem_124902)[gtid_113952 * N_75135 + + sext_i32_i64(local_tid_129240)] = + ((__local int64_t *) mem_124893)[sext_i32_i64(local_tid_129240)]; + barrier(CLK_LOCAL_MEM_FENCE); - error_0: + error_2: return; - #undef segmap_group_sizze_31621 } -__kernel void mainDetailedzisegmap_31469(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, - int64_t m_27772, int32_t k2p2zq_27785, - int64_t i32_res_27787, int32_t m_27918, - int64_t nm_27920, __global - unsigned char *defunc_3_map_res_mem_44549, - __global unsigned char *mem_44577) +__kernel void mainzisegmap_intragroup_114173(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + __local volatile + int64_t *red_arr_mem_129342_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_129338_backing_aligned_1, + int64_t N_75135, int64_t n_75139, + double hfrac_75141, + int64_t k2p2_75149, __global + unsigned char *mem_124142, __global + unsigned char *defunc_4_map_res_mem_124920, + __global unsigned char *mem_124939, + __global unsigned char *mem_124941, + __global unsigned char *mem_124943) { - #define segmap_group_sizze_31529 (mainDetailedzisegmap_group_sizze_31472) - const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_129342_backing_1 = + (__local volatile + char *) red_arr_mem_129342_backing_aligned_0; + __local volatile char *restrict red_arr_mem_129338_backing_0 = + (__local volatile + char *) red_arr_mem_129338_backing_aligned_1; + volatile __local bool local_failure; - if (*global_failure >= 0) - return; + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); - int32_t global_tid_45840; - int32_t local_tid_45841; - int64_t group_sizze_45844; - int32_t wave_sizze_45843; - int32_t group_tid_45842; + int32_t global_tid_129332; + int32_t local_tid_129333; + int64_t group_sizze_129336; + int32_t wave_sizze_129335; + int32_t group_tid_129334; - global_tid_45840 = get_global_id(0); - local_tid_45841 = get_local_id(0); - group_sizze_45844 = get_local_size(0); - wave_sizze_45843 = LOCKSTEP_WIDTH; - group_tid_45842 = get_group_id(0); + global_tid_129332 = get_global_id(0); + local_tid_129333 = get_local_id(0); + group_sizze_129336 = get_local_size(0); + wave_sizze_129335 = LOCKSTEP_WIDTH; + group_tid_129334 = get_group_id(0); - int32_t phys_tid_31469; + int32_t phys_tid_114173; - phys_tid_31469 = global_tid_45840; + phys_tid_114173 = group_tid_129334; - int64_t gtid_31467; + int32_t ltid_pre_129337; - gtid_31467 = squot64(sext_i32_i64(group_tid_45842) * - segmap_group_sizze_31529 + - sext_i32_i64(local_tid_45841), nm_27920); + ltid_pre_129337 = local_tid_129333; - int64_t gtid_31468; + int64_t gtid_114164; - gtid_31468 = sext_i32_i64(group_tid_45842) * segmap_group_sizze_31529 + - sext_i32_i64(local_tid_45841) - squot64(sext_i32_i64(group_tid_45842) * - segmap_group_sizze_31529 + - sext_i32_i64(local_tid_45841), - nm_27920) * nm_27920; - if (slt64(gtid_31467, m_27772) && slt64(gtid_31468, nm_27920)) { - int32_t index_primexp_42361 = sext_i64_i32(gtid_31468); - int32_t defunc_0_f_res_31534 = sdiv32(index_primexp_42361, m_27918); - int32_t defunc_0_f_res_31535 = smod32(index_primexp_42361, m_27918); - bool cond_31536 = slt32(defunc_0_f_res_31535, k2p2zq_27785); - float defunc_0_f_res_31537; - - if (cond_31536) { - int64_t i_31538 = sext_i32_i64(defunc_0_f_res_31534); - bool x_31539 = sle64((int64_t) 0, i_31538); - bool y_31540 = slt64(i_31538, i32_res_27787); - bool bounds_check_31541 = x_31539 && y_31540; - int64_t j_31542 = sext_i32_i64(defunc_0_f_res_31535); - bool x_31543 = sle64((int64_t) 0, j_31542); - bool y_31544 = slt64(j_31542, i32_res_27787); - bool bounds_check_31545 = x_31543 && y_31544; - bool index_ok_31546 = bounds_check_31541 && bounds_check_31545; - bool index_certs_31547; - - if (!index_ok_31546) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 37) == - -1) { - global_failure_args[0] = i_31538; - global_failure_args[1] = j_31542; - global_failure_args[2] = i32_res_27787; - global_failure_args[3] = i32_res_27787; - ; - } - return; - } - } - - float defunc_0_f_res_t_res_31548 = ((__global - float *) defunc_3_map_res_mem_44549)[gtid_31467 * - (i32_res_27787 * - i32_res_27787) + - i_31538 * - i32_res_27787 + - j_31542]; - - defunc_0_f_res_31537 = defunc_0_f_res_t_res_31548; - } else { - int32_t y_31549 = add32(k2p2zq_27785, defunc_0_f_res_31534); - bool cond_31550 = defunc_0_f_res_31535 == y_31549; - float defunc_0_f_res_f_res_31551; - - if (cond_31550) { - defunc_0_f_res_f_res_31551 = 1.0F; - } else { - defunc_0_f_res_f_res_31551 = 0.0F; - } - defunc_0_f_res_31537 = defunc_0_f_res_f_res_31551; - } - ((__global float *) mem_44577)[gtid_31467 * nm_27920 + gtid_31468] = - defunc_0_f_res_31537; - } + gtid_114164 = sext_i32_i64(group_tid_129334); - error_0: - return; - #undef segmap_group_sizze_31529 -} -__kernel void mainDetailedzisegmap_31710(__global int *global_failure, - int64_t N_27771, int64_t m_27772, - int32_t n_27775, int32_t k2p2zq_27785, - int64_t i32_res_27787, - int64_t num_groups_31731, __global - unsigned char *binop_p_mem_44390, - __global unsigned char *mem_44632, - __global unsigned char *mem_44635, - __global unsigned char *mem_44650) -{ - #define segmap_group_sizze_31730 (mainDetailedzisegmap_group_sizze_31712) + int64_t defunc_0_f_res_114260; + int64_t gtid_114167 = sext_i32_i64(ltid_pre_129337); + int32_t phys_tid_114168 = local_tid_129333; + __local char *red_arr_mem_129338; - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; + red_arr_mem_129338 = (__local char *) red_arr_mem_129338_backing_0; - if (*global_failure >= 0) - return; + double x_114264; - int32_t global_tid_45897; - int32_t local_tid_45898; - int64_t group_sizze_45901; - int32_t wave_sizze_45900; - int32_t group_tid_45899; + x_114264 = ((__global double *) mem_124142)[gtid_114164 * N_75135 + + gtid_114167]; - global_tid_45897 = get_global_id(0); - local_tid_45898 = get_local_id(0); - group_sizze_45901 = get_local_size(0); - wave_sizze_45900 = LOCKSTEP_WIDTH; - group_tid_45899 = get_group_id(0); + bool isnan_res_114265; - int32_t phys_tid_31710; + isnan_res_114265 = futrts_isnan64(x_114264); - phys_tid_31710 = global_tid_45897; + bool cond_114266 = !isnan_res_114265; + int64_t defunc_0_f_res_114267 = btoi_bool_i64(cond_114266); - int32_t phys_group_id_45902; + ((__local int64_t *) red_arr_mem_129338)[gtid_114167] = + defunc_0_f_res_114267; + barrier(CLK_LOCAL_MEM_FENCE); - phys_group_id_45902 = get_group_id(0); - for (int32_t i_45903 = 0; i_45903 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_27772, segmap_group_sizze_31730)) - - phys_group_id_45902, sext_i64_i32(num_groups_31731)); - i_45903++) { - int32_t virt_group_id_45904 = phys_group_id_45902 + i_45903 * - sext_i64_i32(num_groups_31731); - int64_t gtid_31709 = sext_i32_i64(virt_group_id_45904) * - segmap_group_sizze_31730 + sext_i32_i64(local_tid_45898); - - if (slt64(gtid_31709, m_27772)) { - for (int32_t i_44364 = 0; i_44364 < k2p2zq_27785; i_44364++) { - int64_t i_44305 = sext_i32_i64(i_44364); - float defunc_2_reduce_res_31737; - float redout_44307 = 0.0F; + int32_t offset_129340; + int32_t skip_waves_129341; + + skip_waves_129341 = 1; + + int64_t x_114261; + int64_t x_114262; + + offset_129340 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129333, sext_i64_i32(n_75139))) { + x_114261 = ((__local + int64_t *) red_arr_mem_129338)[sext_i32_i64(local_tid_129333 + + offset_129340)]; + } + } + offset_129340 = 1; + while (slt32(offset_129340, wave_sizze_129335)) { + if (slt32(local_tid_129333 + offset_129340, sext_i64_i32(n_75139)) && + ((local_tid_129333 - squot32(local_tid_129333, wave_sizze_129335) * + wave_sizze_129335) & (2 * offset_129340 - 1)) == 0) { + // read array element + { + x_114262 = ((volatile __local + int64_t *) red_arr_mem_129338)[sext_i32_i64(local_tid_129333 + + offset_129340)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_114263 = add64(x_114261, x_114262); - for (int32_t i_44363 = 0; i_44363 < n_27775; i_44363++) { - int64_t i_44308 = sext_i32_i64(i_44363); - float x_31742 = ((__global float *) mem_44632)[i_44308 * - m_27772 + - gtid_31709]; - bool isnan_res_31743; - - isnan_res_31743 = futrts_isnan32(x_31742); - - float defunc_1_f_res_31744; - - if (isnan_res_31743) { - defunc_1_f_res_31744 = 0.0F; - } else { - float x_31741 = ((__global - float *) binop_p_mem_44390)[i_44305 * - N_27771 + - i_44308]; - float defunc_1_f_res_f_res_31745 = x_31741 * x_31742; - - defunc_1_f_res_31744 = defunc_1_f_res_f_res_31745; - } - - float defunc_1_op_res_31740 = defunc_1_f_res_31744 + - redout_44307; - float redout_tmp_45906 = defunc_1_op_res_31740; - - redout_44307 = redout_tmp_45906; - } - defunc_2_reduce_res_31737 = redout_44307; - ((__global float *) mem_44635)[phys_tid_31710 + i_44305 * - (num_groups_31731 * - segmap_group_sizze_31730)] = - defunc_2_reduce_res_31737; + x_114261 = defunc_1_op_res_114263; } - for (int64_t i_45907 = 0; i_45907 < i32_res_27787; i_45907++) { - ((__global float *) mem_44650)[i_45907 * m_27772 + gtid_31709] = - ((__global float *) mem_44635)[phys_tid_31710 + i_45907 * - (num_groups_31731 * - segmap_group_sizze_31730)]; + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_129338)[sext_i32_i64(local_tid_129333)] = + x_114261; } } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + offset_129340 *= 2; } + while (slt32(skip_waves_129341, squot32(sext_i64_i32(n_75139) + + wave_sizze_129335 - 1, + wave_sizze_129335))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129340 = skip_waves_129341 * wave_sizze_129335; + if (slt32(local_tid_129333 + offset_129340, sext_i64_i32(n_75139)) && + ((local_tid_129333 - squot32(local_tid_129333, wave_sizze_129335) * + wave_sizze_129335) == 0 && (squot32(local_tid_129333, + wave_sizze_129335) & (2 * + skip_waves_129341 - + 1)) == + 0)) { + // read array element + { + x_114262 = ((__local + int64_t *) red_arr_mem_129338)[sext_i32_i64(local_tid_129333 + + offset_129340)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_114263 = add64(x_114261, x_114262); + + x_114261 = defunc_1_op_res_114263; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_129338)[sext_i32_i64(local_tid_129333)] = + x_114261; + } + } + skip_waves_129341 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + defunc_0_f_res_114260 = ((__local + int64_t *) red_arr_mem_129338)[(int64_t) 0]; - error_0: - return; - #undef segmap_group_sizze_31730 -} -__kernel void mainDetailedzisegmap_31851(__global int *global_failure, - int64_t m_27772, int32_t k2p2zq_27785, - int64_t i32_res_27787, - int64_t num_groups_31871, __global - unsigned char *mem_44854, __global - unsigned char *mem_44857, __global - unsigned char *mem_44860, __global - unsigned char *mem_44875) -{ - #define segmap_group_sizze_31870 (mainDetailedzisegmap_group_sizze_31853) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - - if (*global_failure >= 0) - return; - - int32_t global_tid_46029; - int32_t local_tid_46030; - int64_t group_sizze_46033; - int32_t wave_sizze_46032; - int32_t group_tid_46031; + double defunc_0_f_res_114268; + int64_t gtid_114169 = sext_i32_i64(ltid_pre_129337); + int32_t phys_tid_114170 = local_tid_129333; + __local char *red_arr_mem_129342; - global_tid_46029 = get_global_id(0); - local_tid_46030 = get_local_id(0); - group_sizze_46033 = get_local_size(0); - wave_sizze_46032 = LOCKSTEP_WIDTH; - group_tid_46031 = get_group_id(0); + red_arr_mem_129342 = (__local char *) red_arr_mem_129342_backing_1; - int32_t phys_tid_31851; + bool cond_114273; - phys_tid_31851 = global_tid_46029; + cond_114273 = slt64(gtid_114169, defunc_0_f_res_114260); - int32_t phys_group_id_46034; + double defunc_0_f_res_114274; - phys_group_id_46034 = get_group_id(0); - for (int32_t i_46035 = 0; i_46035 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_27772, segmap_group_sizze_31870)) - - phys_group_id_46034, sext_i64_i32(num_groups_31871)); - i_46035++) { - int32_t virt_group_id_46036 = phys_group_id_46034 + i_46035 * - sext_i64_i32(num_groups_31871); - int64_t gtid_31850 = sext_i32_i64(virt_group_id_46036) * - segmap_group_sizze_31870 + sext_i32_i64(local_tid_46030); + if (cond_114273) { + bool y_114276 = slt64(gtid_114169, N_75135); + bool index_certs_114278; - if (slt64(gtid_31850, m_27772)) { - for (int32_t i_44366 = 0; i_44366 < k2p2zq_27785; i_44366++) { - int64_t i_44311 = sext_i32_i64(i_44366); - float defunc_0_f_res_31878; - float redout_44313 = 0.0F; - - for (int32_t i_44365 = 0; i_44365 < k2p2zq_27785; i_44365++) { - int64_t i_44314 = sext_i32_i64(i_44365); - float x_31882 = ((__global float *) mem_44857)[i_44314 * - m_27772 + - gtid_31850]; - float x_31883 = ((__global float *) mem_44854)[i_44311 * - (m_27772 * - i32_res_27787) + - i_44314 * - m_27772 + - gtid_31850]; - float defunc_1_f_res_31884 = x_31882 * x_31883; - float defunc_1_op_res_31881 = defunc_1_f_res_31884 + - redout_44313; - float redout_tmp_46038 = defunc_1_op_res_31881; - - redout_44313 = redout_tmp_46038; + if (!y_114276) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 196) == -1) { + global_failure_args[0] = gtid_114169; + global_failure_args[1] = N_75135; + ; } - defunc_0_f_res_31878 = redout_44313; - ((__global float *) mem_44860)[phys_tid_31851 + i_44311 * - (num_groups_31871 * - segmap_group_sizze_31870)] = - defunc_0_f_res_31878; - } - for (int64_t i_46039 = 0; i_46039 < i32_res_27787; i_46039++) { - ((__global float *) mem_44875)[i_46039 * m_27772 + gtid_31850] = - ((__global float *) mem_44860)[phys_tid_31851 + i_46039 * - (num_groups_31871 * - segmap_group_sizze_31870)]; + local_failure = true; + goto error_2; } } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + double defunc_0_f_res_t_res_114279 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_114164 * + N_75135 + + gtid_114169]; + + defunc_0_f_res_114274 = defunc_0_f_res_t_res_114279; + } else { + defunc_0_f_res_114274 = 0.0; } - error_0: - return; - #undef segmap_group_sizze_31870 -} -__kernel void mainDetailedzisegmap_31983(__global int *global_failure, - int64_t N_27771, int64_t m_27772, - int32_t k2p2zq_27785, - int64_t i32_res_27787, - int64_t num_groups_32002, __global - unsigned char *mem_44397, __global - unsigned char *mem_44919, __global - unsigned char *mem_44922, __global - unsigned char *mem_44937) -{ - #define segmap_group_sizze_32001 (mainDetailedzisegmap_group_sizze_31985) + double defunc_0_f_res_114280 = defunc_0_f_res_114274 * + defunc_0_f_res_114274; - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; + ((__local double *) red_arr_mem_129342)[gtid_114169] = + defunc_0_f_res_114280; - if (*global_failure >= 0) + error_2: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) return; + barrier(CLK_LOCAL_MEM_FENCE); - int32_t global_tid_46117; - int32_t local_tid_46118; - int64_t group_sizze_46121; - int32_t wave_sizze_46120; - int32_t group_tid_46119; - - global_tid_46117 = get_global_id(0); - local_tid_46118 = get_local_id(0); - group_sizze_46121 = get_local_size(0); - wave_sizze_46120 = LOCKSTEP_WIDTH; - group_tid_46119 = get_group_id(0); - - int32_t phys_tid_31983; + int32_t offset_129344; + int32_t skip_waves_129345; - phys_tid_31983 = global_tid_46117; + skip_waves_129345 = 1; - int32_t phys_group_id_46122; + double x_114269; + double x_114270; - phys_group_id_46122 = get_group_id(0); - for (int32_t i_46123 = 0; i_46123 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_27772, segmap_group_sizze_32001)) - - phys_group_id_46122, sext_i64_i32(num_groups_32002)); - i_46123++) { - int32_t virt_group_id_46124 = phys_group_id_46122 + i_46123 * - sext_i64_i32(num_groups_32002); - int64_t gtid_31982 = sext_i32_i64(virt_group_id_46124) * - segmap_group_sizze_32001 + sext_i32_i64(local_tid_46118); - - if (slt64(gtid_31982, m_27772)) { - for (int64_t i_44321 = 0; i_44321 < N_27771; i_44321++) { - float defunc_0_f_res_32008; - float redout_44323 = 0.0F; + offset_129344 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129333, sext_i64_i32(n_75139))) { + x_114269 = ((__local + double *) red_arr_mem_129342)[sext_i32_i64(local_tid_129333 + + offset_129344)]; + } + } + offset_129344 = 1; + while (slt32(offset_129344, wave_sizze_129335)) { + if (slt32(local_tid_129333 + offset_129344, sext_i64_i32(n_75139)) && + ((local_tid_129333 - squot32(local_tid_129333, wave_sizze_129335) * + wave_sizze_129335) & (2 * offset_129344 - 1)) == 0) { + // read array element + { + x_114270 = ((volatile __local + double *) red_arr_mem_129342)[sext_i32_i64(local_tid_129333 + + offset_129344)]; + } + // apply reduction operation + { + double defunc_1_op_res_114271 = x_114269 + x_114270; - for (int32_t i_44369 = 0; i_44369 < k2p2zq_27785; i_44369++) { - int64_t i_44324 = sext_i32_i64(i_44369); - float x_32012 = ((__global float *) mem_44919)[i_44324 * - m_27772 + - gtid_31982]; - float x_32013 = ((__global float *) mem_44397)[i_44321 * - i32_res_27787 + - i_44324]; - float defunc_1_f_res_32014 = x_32012 * x_32013; - float defunc_1_op_res_32011 = defunc_1_f_res_32014 + - redout_44323; - float redout_tmp_46126 = defunc_1_op_res_32011; - - redout_44323 = redout_tmp_46126; - } - defunc_0_f_res_32008 = redout_44323; - ((__global float *) mem_44922)[phys_tid_31983 + i_44321 * - (num_groups_32002 * - segmap_group_sizze_32001)] = - defunc_0_f_res_32008; + x_114269 = defunc_1_op_res_114271; } - for (int64_t i_46127 = 0; i_46127 < N_27771; i_46127++) { - ((__global float *) mem_44937)[i_46127 * m_27772 + gtid_31982] = - ((__global float *) mem_44922)[phys_tid_31983 + i_46127 * - (num_groups_32002 * - segmap_group_sizze_32001)]; + // write result of operation + { + ((volatile __local + double *) red_arr_mem_129342)[sext_i32_i64(local_tid_129333)] = + x_114269; } } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + offset_129344 *= 2; + } + while (slt32(skip_waves_129345, squot32(sext_i64_i32(n_75139) + + wave_sizze_129335 - 1, + wave_sizze_129335))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129344 = skip_waves_129345 * wave_sizze_129335; + if (slt32(local_tid_129333 + offset_129344, sext_i64_i32(n_75139)) && + ((local_tid_129333 - squot32(local_tid_129333, wave_sizze_129335) * + wave_sizze_129335) == 0 && (squot32(local_tid_129333, + wave_sizze_129335) & (2 * + skip_waves_129345 - + 1)) == + 0)) { + // read array element + { + x_114270 = ((__local + double *) red_arr_mem_129342)[sext_i32_i64(local_tid_129333 + + offset_129344)]; + } + // apply reduction operation + { + double defunc_1_op_res_114271 = x_114269 + x_114270; + + x_114269 = defunc_1_op_res_114271; + } + // write result of operation + { + ((__local + double *) red_arr_mem_129342)[sext_i32_i64(local_tid_129333)] = + x_114269; + } + } + skip_waves_129345 *= 2; } + barrier(CLK_LOCAL_MEM_FENCE); + defunc_0_f_res_114268 = ((__local + double *) red_arr_mem_129342)[(int64_t) 0]; - error_0: + int64_t i64_arg_114281 = sub64(defunc_0_f_res_114260, k2p2_75149); + double i64_res_114282 = sitofp_i64_f64(i64_arg_114281); + double sqrt_arg_114283 = defunc_0_f_res_114268 / i64_res_114282; + double sqrt_res_114284; + + sqrt_res_114284 = futrts_sqrt64(sqrt_arg_114283); + + double i64_res_114285 = sitofp_i64_f64(defunc_0_f_res_114260); + double f64_arg_114286 = hfrac_75141 * i64_res_114285; + int64_t f64_res_114287 = fptosi_f64_i64(f64_arg_114286); + + if (local_tid_129333 == 0) { + ((__global int64_t *) mem_124939)[gtid_114164] = f64_res_114287; + } + if (local_tid_129333 == 0) { + ((__global int64_t *) mem_124941)[gtid_114164] = defunc_0_f_res_114260; + } + if (local_tid_129333 == 0) { + ((__global double *) mem_124943)[gtid_114164] = sqrt_res_114284; + } + + error_4: return; - #undef segmap_group_sizze_32001 } -__kernel void mainDetailedzisegmap_32263(__global int *global_failure, - int64_t N_27771, int64_t m_27772, - __global unsigned char *mem_45163, - __global unsigned char *mem_45166, - __global unsigned char *mem_45172, - __global unsigned char *mem_45175) +__kernel void mainzisegmap_intragroup_114556(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + __local volatile + int64_t *red_arr_mem_129601_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_129599_backing_aligned_1, + __local volatile + int64_t *red_arr_mem_129597_backing_aligned_2, + __local volatile + int64_t *mem_124977_backing_aligned_3, + int64_t N_75135, int64_t n_75139, + int64_t iota_arg_77024, __global + unsigned char *defunc_4_map_res_mem_124919, + __global + unsigned char *defunc_4_map_res_mem_124920, + __global + unsigned char *defunc_4_map_res_mem_124921, + __global + unsigned char *defunc_3_map_res_mem_124958, + __global + unsigned char *defunc_3_map_res_mem_124959, + __global + unsigned char *defunc_3_map_res_mem_124960, + __global + unsigned char *defunc_0_f_res_mem_124970, + __global unsigned char *mem_124973, + __global unsigned char *mem_124980, + __global unsigned char *mem_124982) { - #define segmap_group_sizze_32429 (mainDetailedzisegmap_group_sizze_32266) - const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_129601_backing_3 = + (__local volatile + char *) red_arr_mem_129601_backing_aligned_0; + __local volatile char *restrict red_arr_mem_129599_backing_2 = + (__local volatile + char *) red_arr_mem_129599_backing_aligned_1; + __local volatile char *restrict red_arr_mem_129597_backing_1 = + (__local volatile + char *) red_arr_mem_129597_backing_aligned_2; + __local volatile char *restrict mem_124977_backing_0 = (__local volatile + char *) mem_124977_backing_aligned_3; + volatile __local bool local_failure; - if (*global_failure >= 0) - return; + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); - int32_t global_tid_46344; - int32_t local_tid_46345; - int64_t group_sizze_46348; - int32_t wave_sizze_46347; - int32_t group_tid_46346; + int32_t global_tid_129580; + int32_t local_tid_129581; + int64_t group_sizze_129584; + int32_t wave_sizze_129583; + int32_t group_tid_129582; - global_tid_46344 = get_global_id(0); - local_tid_46345 = get_local_id(0); - group_sizze_46348 = get_local_size(0); - wave_sizze_46347 = LOCKSTEP_WIDTH; - group_tid_46346 = get_group_id(0); + global_tid_129580 = get_global_id(0); + local_tid_129581 = get_local_id(0); + group_sizze_129584 = get_local_size(0); + wave_sizze_129583 = LOCKSTEP_WIDTH; + group_tid_129582 = get_group_id(0); - int32_t phys_tid_32263; + int32_t phys_tid_114556; - phys_tid_32263 = global_tid_46344; + phys_tid_114556 = group_tid_129582; - int64_t gtid_32261; + int32_t ltid_pre_129585; - gtid_32261 = squot64(sext_i32_i64(group_tid_46346) * - segmap_group_sizze_32429 + - sext_i32_i64(local_tid_46345), N_27771); + ltid_pre_129585 = local_tid_129581; - int64_t gtid_32262; + int64_t gtid_114547; - gtid_32262 = sext_i32_i64(group_tid_46346) * segmap_group_sizze_32429 + - sext_i32_i64(local_tid_46345) - squot64(sext_i32_i64(group_tid_46346) * - segmap_group_sizze_32429 + - sext_i32_i64(local_tid_46345), - N_27771) * N_27771; - if (slt64(gtid_32261, m_27772) && slt64(gtid_32262, N_27771)) { - float x_32437 = ((__global float *) mem_45166)[gtid_32261 * N_27771 + - gtid_32262]; - int32_t index_primexp_42377 = sext_i64_i32(gtid_32262); - bool isnan_res_32440; - - isnan_res_32440 = futrts_isnan32(x_32437); - - bool defunc_0_p_res_32441 = !isnan_res_32440; - int64_t defunc_1_f_res_32442; + gtid_114547 = sext_i32_i64(group_tid_129582); + + int64_t x_114564; + + x_114564 = ((__global int64_t *) defunc_4_map_res_mem_124919)[gtid_114547]; + + int64_t x_114565 = ((__global + int64_t *) defunc_3_map_res_mem_124959)[gtid_114547]; + double x_114566 = ((__global + double *) defunc_3_map_res_mem_124960)[gtid_114547]; + int64_t x_114567 = ((__global + int64_t *) defunc_3_map_res_mem_124958)[gtid_114547]; + double x_114568 = ((__global + double *) defunc_0_f_res_mem_124970)[gtid_114547]; + int64_t y_114573 = sub64(x_114564, x_114565); + double i64_res_114574 = sitofp_i64_f64(x_114565); + double sqrt_res_114575; + + sqrt_res_114575 = futrts_sqrt64(i64_res_114574); + + double y_114576 = x_114566 * sqrt_res_114575; + __local char *mem_124977; + + mem_124977 = (__local char *) mem_124977_backing_0; + + int64_t gtid_114550 = sext_i32_i64(ltid_pre_129585); + int32_t phys_tid_114551 = local_tid_129581; + bool cond_114589 = sle64(y_114573, gtid_114550); + double defunc_0_f_res_114590; + + if (cond_114589) { + defunc_0_f_res_114590 = 0.0; + } else { + bool cond_114591 = gtid_114550 == (int64_t) 0; + double defunc_0_f_res_f_res_114592; - if (defunc_0_p_res_32441) { - int64_t x_32438 = ((__global int64_t *) mem_45163)[gtid_32261 * - N_27771 + - gtid_32262]; - int64_t defunc_1_f_res_t_res_32443 = sub64(x_32438, (int64_t) 1); - - defunc_1_f_res_32442 = defunc_1_f_res_t_res_32443; + if (cond_114591) { + defunc_0_f_res_f_res_114592 = x_114568; } else { - defunc_1_f_res_32442 = (int64_t) -1; - } - if ((sle64((int64_t) 0, gtid_32261) && slt64(gtid_32261, m_27772)) && - (sle64((int64_t) 0, defunc_1_f_res_32442) && - slt64(defunc_1_f_res_32442, N_27771))) { - ((__global int32_t *) mem_45175)[gtid_32261 * N_27771 + - defunc_1_f_res_32442] = - index_primexp_42377; - } - if ((sle64((int64_t) 0, gtid_32261) && slt64(gtid_32261, m_27772)) && - (sle64((int64_t) 0, defunc_1_f_res_32442) && - slt64(defunc_1_f_res_32442, N_27771))) { - ((__global float *) mem_45172)[gtid_32261 * N_27771 + - defunc_1_f_res_32442] = x_32437; + int64_t i_114593 = add64(gtid_114550, x_114565); + bool x_114594 = sle64((int64_t) 0, i_114593); + bool y_114595 = slt64(i_114593, N_75135); + bool bounds_check_114596 = x_114594 && y_114595; + bool index_certs_114597; + + if (!bounds_check_114596) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 203) == + -1) { + global_failure_args[0] = i_114593; + global_failure_args[1] = N_75135; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_114598 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_114547 * + N_75135 + + i_114593]; + int64_t x_114599 = sub64(x_114565, x_114567); + int64_t i_114600 = add64(gtid_114550, x_114599); + bool x_114601 = sle64((int64_t) 0, i_114600); + bool y_114602 = slt64(i_114600, N_75135); + bool bounds_check_114603 = x_114601 && y_114602; + bool index_certs_114604; + + if (!bounds_check_114603) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 204) == + -1) { + global_failure_args[0] = i_114600; + global_failure_args[1] = N_75135; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_114605 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_114547 * + N_75135 + + i_114600]; + double defunc_0_f_res_f_res_f_res_114606 = x_114598 - y_114605; + + defunc_0_f_res_f_res_114592 = defunc_0_f_res_f_res_f_res_114606; } + defunc_0_f_res_114590 = defunc_0_f_res_f_res_114592; } + ((__local double *) mem_124977)[gtid_114550] = defunc_0_f_res_114590; error_0: - return; - #undef segmap_group_sizze_32429 -} -__kernel void mainDetailedzisegmap_32339(__global int *global_failure, - int64_t N_27771, int64_t m_27772, - int64_t i_28075, __global - unsigned char *mem_45163, __global - unsigned char *mem_45169) -{ - #define segmap_group_sizze_32393 (mainDetailedzisegmap_group_sizze_32341) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - - if (*global_failure >= 0) + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) return; + barrier(CLK_LOCAL_MEM_FENCE); - int32_t global_tid_46321; - int32_t local_tid_46322; - int64_t group_sizze_46325; - int32_t wave_sizze_46324; - int32_t group_tid_46323; + int64_t dims_flat_129586; - global_tid_46321 = get_global_id(0); - local_tid_46322 = get_local_id(0); - group_sizze_46325 = get_local_size(0); - wave_sizze_46324 = LOCKSTEP_WIDTH; - group_tid_46323 = get_group_id(0); + dims_flat_129586 = iota_arg_77024; - int32_t phys_tid_32339; + double x_114585; + double x_114586; + double x_129588; + double x_129589; + bool ltid_in_bounds_129591; - phys_tid_32339 = global_tid_46321; + ltid_in_bounds_129591 = slt64(sext_i32_i64(local_tid_129581), + iota_arg_77024); - int64_t gtid_32338; + int32_t skip_threads_129592; - gtid_32338 = sext_i32_i64(group_tid_46323) * segmap_group_sizze_32393 + - sext_i32_i64(local_tid_46322); - if (slt64(gtid_32338, m_27772)) { - int64_t last_res_32397 = ((__global int64_t *) mem_45163)[gtid_32338 * - N_27771 + - i_28075]; - int32_t defunc_0_f_res_32398 = sext_i64_i32(last_res_32397); + // read input for in-block scan + { + if (ltid_in_bounds_129591) { + x_114586 = ((volatile __local + double *) mem_124977)[sext_i32_i64(local_tid_129581)]; + if ((local_tid_129581 - squot32(local_tid_129581, 32) * 32) == 0) { + x_114585 = x_114586; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129592 = 1; + while (slt32(skip_threads_129592, 32)) { + if (sle32(skip_threads_129592, local_tid_129581 - + squot32(local_tid_129581, 32) * 32) && + ltid_in_bounds_129591) { + // read operands + { + x_114585 = ((volatile __local + double *) mem_124977)[sext_i32_i64(local_tid_129581) - + sext_i32_i64(skip_threads_129592)]; + } + // perform operation + { + bool inactive_129593 = + slt64(srem64(sext_i32_i64(local_tid_129581), + iota_arg_77024), + sext_i32_i64(local_tid_129581) - + sext_i32_i64(local_tid_129581 - + skip_threads_129592)); + + if (inactive_129593) { + x_114585 = x_114586; + } + if (!inactive_129593) { + double defunc_1_op_res_114587 = x_114585 + x_114586; + + x_114585 = defunc_1_op_res_114587; + } + } + } + if (sle32(wave_sizze_129583, skip_threads_129592)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129592, local_tid_129581 - + squot32(local_tid_129581, 32) * 32) && + ltid_in_bounds_129591) { + // write result + { + ((volatile __local + double *) mem_124977)[sext_i32_i64(local_tid_129581)] = + x_114585; + x_114586 = x_114585; + } + } + if (sle32(wave_sizze_129583, skip_threads_129592)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129592 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129581 - squot32(local_tid_129581, 32) * 32) == 31 && + ltid_in_bounds_129591) { + ((volatile __local + double *) mem_124977)[sext_i32_i64(squot32(local_tid_129581, + 32))] = x_114585; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129594; - ((__global int32_t *) mem_45169)[gtid_32338] = defunc_0_f_res_32398; + // read input for in-block scan + { + if (squot32(local_tid_129581, 32) == 0 && ltid_in_bounds_129591) { + x_129589 = ((volatile __local + double *) mem_124977)[sext_i32_i64(local_tid_129581)]; + if ((local_tid_129581 - squot32(local_tid_129581, 32) * 32) == + 0) { + x_129588 = x_129589; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129594 = 1; + while (slt32(skip_threads_129594, 32)) { + if (sle32(skip_threads_129594, local_tid_129581 - + squot32(local_tid_129581, 32) * 32) && + (squot32(local_tid_129581, 32) == 0 && + ltid_in_bounds_129591)) { + // read operands + { + x_129588 = ((volatile __local + double *) mem_124977)[sext_i32_i64(local_tid_129581) - + sext_i32_i64(skip_threads_129594)]; + } + // perform operation + { + bool inactive_129595 = + slt64(srem64(sext_i32_i64(local_tid_129581 * 32 + + 32 - 1), iota_arg_77024), + sext_i32_i64(local_tid_129581 * 32 + 32 - + 1) - sext_i32_i64((local_tid_129581 - + skip_threads_129594) * + 32 + 32 - 1)); + + if (inactive_129595) { + x_129588 = x_129589; + } + if (!inactive_129595) { + double defunc_1_op_res_129590 = x_129588 + x_129589; + + x_129588 = defunc_1_op_res_129590; + } + } + } + if (sle32(wave_sizze_129583, skip_threads_129594)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129594, local_tid_129581 - + squot32(local_tid_129581, 32) * 32) && + (squot32(local_tid_129581, 32) == 0 && + ltid_in_bounds_129591)) { + // write result + { + ((volatile __local + double *) mem_124977)[sext_i32_i64(local_tid_129581)] = + x_129588; + x_129589 = x_129588; + } + } + if (sle32(wave_sizze_129583, skip_threads_129594)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129594 *= 2; + } + } } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129581, 32) == 0 || !ltid_in_bounds_129591)) { + // read operands + { + x_114586 = x_114585; + x_114585 = ((__local + double *) mem_124977)[sext_i32_i64(squot32(local_tid_129581, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129596 = + slt64(srem64(sext_i32_i64(local_tid_129581), + iota_arg_77024), + sext_i32_i64(local_tid_129581) - + sext_i32_i64(squot32(local_tid_129581, 32) * 32 - + 1)); + + if (inactive_129596) { + x_114585 = x_114586; + } + if (!inactive_129596) { + double defunc_1_op_res_114587 = x_114585 + x_114586; + + x_114585 = defunc_1_op_res_114587; + } + } + // write final result + { + ((__local + double *) mem_124977)[sext_i32_i64(local_tid_129581)] = + x_114585; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129581, 32) == 0) { + ((__local double *) mem_124977)[sext_i32_i64(local_tid_129581)] = + x_114586; + } + } + barrier(CLK_LOCAL_MEM_FENCE); - error_0: - return; - #undef segmap_group_sizze_32393 -} -__kernel void mainDetailedzisegmap_32602(__global int *global_failure, - int64_t m_27772, float hfrac_27777, - int32_t k2p2_27783, __global - unsigned char *mem_45232, __global - unsigned char *mem_45235, __global - unsigned char *mem_45238, __global - unsigned char *mem_45240) -{ - #define segmap_group_sizze_32695 (mainDetailedzisegmap_group_sizze_32604) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - - if (*global_failure >= 0) - return; + bool acc0_114612; + int64_t acc0_114613; + double acc0_114614; + int64_t gtid_114552 = sext_i32_i64(ltid_pre_129585); + int32_t phys_tid_114553 = local_tid_129581; + __local char *red_arr_mem_129597; - int32_t global_tid_46497; - int32_t local_tid_46498; - int64_t group_sizze_46501; - int32_t wave_sizze_46500; - int32_t group_tid_46499; + red_arr_mem_129597 = (__local char *) red_arr_mem_129597_backing_1; - global_tid_46497 = get_global_id(0); - local_tid_46498 = get_local_id(0); - group_sizze_46501 = get_local_size(0); - wave_sizze_46500 = LOCKSTEP_WIDTH; - group_tid_46499 = get_group_id(0); + __local char *red_arr_mem_129599; - int32_t phys_tid_32602; + red_arr_mem_129599 = (__local char *) red_arr_mem_129599_backing_2; - phys_tid_32602 = global_tid_46497; + __local char *red_arr_mem_129601; - int64_t gtid_32601; + red_arr_mem_129601 = (__local char *) red_arr_mem_129601_backing_3; - gtid_32601 = sext_i32_i64(group_tid_46499) * segmap_group_sizze_32695 + - sext_i32_i64(local_tid_46498); - if (slt64(gtid_32601, m_27772)) { - int32_t defunc_0_f_res_32699 = ((__global - int32_t *) mem_45232)[gtid_32601]; - float defunc_0_f_res_32700 = ((__global float *) mem_45235)[gtid_32601]; - int32_t r32_arg_32701 = sub32(defunc_0_f_res_32699, k2p2_27783); - float i32_res_32702 = sitofp_i32_f32(r32_arg_32701); - float sqrt_arg_32703 = defunc_0_f_res_32700 / i32_res_32702; - float sqrt_res_32704; - - sqrt_res_32704 = futrts_sqrt32(sqrt_arg_32703); - - float i32_res_32705 = sitofp_i32_f32(defunc_0_f_res_32699); - float t32_arg_32706 = hfrac_27777 * i32_res_32705; - int32_t f32_res_32707 = fptosi_f32_i32(t32_arg_32706); - - ((__global int32_t *) mem_45238)[gtid_32601] = f32_res_32707; - ((__global float *) mem_45240)[gtid_32601] = sqrt_res_32704; - } + double x_114629; - error_0: - return; - #undef segmap_group_sizze_32695 -} -__kernel void mainDetailedzisegmap_32902(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, - int64_t N_27771, int32_t n_27775, - float lam_27778, - int64_t iota32_arg_28203, - float i32_res_28215, __global - unsigned char *mappingindices_mem_44380, - __global unsigned char *mem_45282) -{ - #define segmap_group_sizze_32924 (mainDetailedzisegmap_group_sizze_32904) + x_114629 = ((__local double *) mem_124977)[gtid_114552]; - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; + double x_114630 = ((__global double *) mem_124973)[gtid_114552]; + double defunc_0_f_res_114633 = x_114629 / y_114576; + bool cond_114634 = slt64(gtid_114552, y_114573); + bool isnan_res_114635; - if (*global_failure >= 0) - return; + isnan_res_114635 = futrts_isnan64(defunc_0_f_res_114633); - int32_t global_tid_46607; - int32_t local_tid_46608; - int64_t group_sizze_46611; - int32_t wave_sizze_46610; - int32_t group_tid_46609; + bool cond_t_res_114636 = !isnan_res_114635; + bool x_114637 = cond_114634 && cond_t_res_114636; + double abs_res_114638 = fabs(defunc_0_f_res_114633); + bool defunc_2_f_res_t_res_114639 = x_114630 < abs_res_114638; + bool x_114640 = x_114637 && defunc_2_f_res_t_res_114639; + double defunc_1_f_res_114641; - global_tid_46607 = get_global_id(0); - local_tid_46608 = get_local_id(0); - group_sizze_46611 = get_local_size(0); - wave_sizze_46610 = LOCKSTEP_WIDTH; - group_tid_46609 = get_group_id(0); + if (cond_114634) { + defunc_1_f_res_114641 = defunc_0_f_res_114633; + } else { + defunc_1_f_res_114641 = 0.0; + } + ((__local bool *) red_arr_mem_129597)[gtid_114552] = x_114640; + ((__local int64_t *) red_arr_mem_129599)[gtid_114552] = gtid_114552; + ((__local double *) red_arr_mem_129601)[gtid_114552] = + defunc_1_f_res_114641; + barrier(CLK_LOCAL_MEM_FENCE); - int32_t phys_tid_32902; + int32_t offset_129603; + int32_t skip_waves_129604; - phys_tid_32902 = global_tid_46607; + skip_waves_129604 = 1; - int64_t gtid_32901; + bool x_114615; + int64_t x_114616; + double x_114617; + bool x_114618; + int64_t x_114619; + double x_114620; - gtid_32901 = sext_i32_i64(group_tid_46609) * segmap_group_sizze_32924 + - sext_i32_i64(local_tid_46608); - if (slt64(gtid_32901, iota32_arg_28203)) { - int32_t defunc_0_f_res_32928 = sext_i64_i32(gtid_32901); - int32_t i_32929 = add32(n_27775, defunc_0_f_res_32928); - int64_t i_32930 = sext_i32_i64(i_32929); - bool x_32931 = sle64((int64_t) 0, i_32930); - bool y_32932 = slt64(i_32930, N_27771); - bool bounds_check_32933 = x_32931 && y_32932; - bool index_certs_32934; - - if (!bounds_check_32933) { + offset_129603 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129581, sext_i64_i32(iota_arg_77024))) { + x_114615 = ((__local + bool *) red_arr_mem_129597)[sext_i32_i64(local_tid_129581 + + offset_129603)]; + x_114616 = ((__local + int64_t *) red_arr_mem_129599)[sext_i32_i64(local_tid_129581 + + offset_129603)]; + x_114617 = ((__local + double *) red_arr_mem_129601)[sext_i32_i64(local_tid_129581 + + offset_129603)]; + } + } + offset_129603 = 1; + while (slt32(offset_129603, wave_sizze_129583)) { + if (slt32(local_tid_129581 + offset_129603, + sext_i64_i32(iota_arg_77024)) && ((local_tid_129581 - + squot32(local_tid_129581, + wave_sizze_129583) * + wave_sizze_129583) & (2 * + offset_129603 - + 1)) == + 0) { + // read array element { - if (atomic_cmpxchg_i32_global(global_failure, -1, 55) == -1) { - global_failure_args[0] = i_32930; - global_failure_args[1] = N_27771; - ; + x_114618 = ((volatile __local + bool *) red_arr_mem_129597)[sext_i32_i64(local_tid_129581 + + offset_129603)]; + x_114619 = ((volatile __local + int64_t *) red_arr_mem_129599)[sext_i32_i64(local_tid_129581 + + offset_129603)]; + x_114620 = ((volatile __local + double *) red_arr_mem_129601)[sext_i32_i64(local_tid_129581 + + offset_129603)]; + } + // apply reduction operation + { + bool defunc_1_op_res_114621; + int64_t defunc_1_op_res_114622; + + if (x_114615) { + defunc_1_op_res_114621 = x_114615; + defunc_1_op_res_114622 = x_114616; + } else { + bool x_114623 = x_114618 && x_114618; + bool x_114624 = !x_114618; + bool y_114625 = x_114615 && x_114624; + bool defunc_1_op_res_f_res_114626 = x_114623 || y_114625; + int64_t defunc_1_op_res_f_res_114627; + + if (x_114618) { + defunc_1_op_res_f_res_114627 = x_114619; + } else { + defunc_1_op_res_f_res_114627 = x_114616; + } + defunc_1_op_res_114621 = defunc_1_op_res_f_res_114626; + defunc_1_op_res_114622 = defunc_1_op_res_f_res_114627; } - return; + + double defunc_1_op_res_114628 = x_114617 + x_114620; + + x_114615 = defunc_1_op_res_114621; + x_114616 = defunc_1_op_res_114622; + x_114617 = defunc_1_op_res_114628; + } + // write result of operation + { + ((volatile __local + bool *) red_arr_mem_129597)[sext_i32_i64(local_tid_129581)] = + x_114615; + ((volatile __local + int64_t *) red_arr_mem_129599)[sext_i32_i64(local_tid_129581)] = + x_114616; + ((volatile __local + double *) red_arr_mem_129601)[sext_i32_i64(local_tid_129581)] = + x_114617; } } + offset_129603 *= 2; + } + while (slt32(skip_waves_129604, squot32(sext_i64_i32(iota_arg_77024) + + wave_sizze_129583 - 1, + wave_sizze_129583))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129603 = skip_waves_129604 * wave_sizze_129583; + if (slt32(local_tid_129581 + offset_129603, + sext_i64_i32(iota_arg_77024)) && ((local_tid_129581 - + squot32(local_tid_129581, + wave_sizze_129583) * + wave_sizze_129583) == 0 && + (squot32(local_tid_129581, + wave_sizze_129583) & + (2 * skip_waves_129604 - + 1)) == 0)) { + // read array element + { + x_114618 = ((__local + bool *) red_arr_mem_129597)[sext_i32_i64(local_tid_129581 + + offset_129603)]; + x_114619 = ((__local + int64_t *) red_arr_mem_129599)[sext_i32_i64(local_tid_129581 + + offset_129603)]; + x_114620 = ((__local + double *) red_arr_mem_129601)[sext_i32_i64(local_tid_129581 + + offset_129603)]; + } + // apply reduction operation + { + bool defunc_1_op_res_114621; + int64_t defunc_1_op_res_114622; + + if (x_114615) { + defunc_1_op_res_114621 = x_114615; + defunc_1_op_res_114622 = x_114616; + } else { + bool x_114623 = x_114618 && x_114618; + bool x_114624 = !x_114618; + bool y_114625 = x_114615 && x_114624; + bool defunc_1_op_res_f_res_114626 = x_114623 || y_114625; + int64_t defunc_1_op_res_f_res_114627; + + if (x_114618) { + defunc_1_op_res_f_res_114627 = x_114619; + } else { + defunc_1_op_res_f_res_114627 = x_114616; + } + defunc_1_op_res_114621 = defunc_1_op_res_f_res_114626; + defunc_1_op_res_114622 = defunc_1_op_res_f_res_114627; + } + + double defunc_1_op_res_114628 = x_114617 + x_114620; + + x_114615 = defunc_1_op_res_114621; + x_114616 = defunc_1_op_res_114622; + x_114617 = defunc_1_op_res_114628; + } + // write result of operation + { + ((__local + bool *) red_arr_mem_129597)[sext_i32_i64(local_tid_129581)] = + x_114615; + ((__local + int64_t *) red_arr_mem_129599)[sext_i32_i64(local_tid_129581)] = + x_114616; + ((__local + double *) red_arr_mem_129601)[sext_i32_i64(local_tid_129581)] = + x_114617; + } + } + skip_waves_129604 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + acc0_114612 = ((__local bool *) red_arr_mem_129597)[(int64_t) 0]; + acc0_114613 = ((__local int64_t *) red_arr_mem_129599)[(int64_t) 0]; + acc0_114614 = ((__local double *) red_arr_mem_129601)[(int64_t) 0]; + + bool x_114644 = acc0_114612 && acc0_114612; + int64_t defunc_1_op_res_f_res_114648; + + if (acc0_114612) { + defunc_1_op_res_f_res_114648 = acc0_114613; + } else { + defunc_1_op_res_f_res_114648 = (int64_t) -1; + } + + bool cond_114654 = y_114573 == (int64_t) 0; + double defunc_0_f_res_114655; + + if (cond_114654) { + defunc_0_f_res_114655 = 0.0; + } else { + double i64_res_114656 = sitofp_i64_f64(y_114573); + double defunc_0_f_res_f_res_114657 = acc0_114614 / i64_res_114656; - int32_t time_32935 = ((__global - int32_t *) mappingindices_mem_44380)[i_32930]; - float i32_res_32936 = sitofp_i32_f32(time_32935); - float logplus_arg_32937 = i32_res_32936 / i32_res_28215; - bool cond_32938 = 2.7182817F < logplus_arg_32937; - float logplus_res_32939; - - if (cond_32938) { - float log_res_32940; + defunc_0_f_res_114655 = defunc_0_f_res_f_res_114657; + } + + bool cond_114658 = !x_114644; + int64_t fst_breakzq_114659; + + if (cond_114658) { + fst_breakzq_114659 = (int64_t) -1; + } else { + bool cond_114660 = slt64(defunc_1_op_res_f_res_114648, y_114573); + int64_t adjustValInds_res_114661; + + if (cond_114660) { + int64_t i_114662 = add64(x_114565, defunc_1_op_res_f_res_114648); + bool x_114663 = sle64((int64_t) 0, i_114662); + bool y_114664 = slt64(i_114662, N_75135); + bool bounds_check_114665 = x_114663 && y_114664; + bool index_certs_114666; + + if (!bounds_check_114665) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 205) == + -1) { + global_failure_args[0] = i_114662; + global_failure_args[1] = N_75135; + ; + } + local_failure = true; + goto error_3; + } + } - log_res_32940 = futrts_log32(logplus_arg_32937); - logplus_res_32939 = log_res_32940; + int64_t x_114667 = ((__global + int64_t *) defunc_4_map_res_mem_124921)[gtid_114547 * + N_75135 + + i_114662]; + int64_t adjustValInds_res_t_res_114668 = sub64(x_114667, n_75139); + + adjustValInds_res_114661 = adjustValInds_res_t_res_114668; } else { - logplus_res_32939 = 1.0F; + adjustValInds_res_114661 = (int64_t) -1; } - - float sqrt_res_32941; - - sqrt_res_32941 = futrts_sqrt32(logplus_res_32939); - - float defunc_0_f_res_32942 = lam_27778 * sqrt_res_32941; - - ((__global float *) mem_45282)[gtid_32901] = defunc_0_f_res_32942; + fst_breakzq_114659 = adjustValInds_res_114661; } - error_0: + bool cond_114669 = sle64(x_114565, (int64_t) 5); + bool cond_f_res_114670 = sle64(y_114573, (int64_t) 5); + bool x_114671 = !cond_114669; + bool y_114672 = cond_f_res_114670 && x_114671; + bool cond_114673 = cond_114669 || y_114672; + int64_t fst_breakzq_114674; + + if (cond_114673) { + fst_breakzq_114674 = (int64_t) -2; + } else { + fst_breakzq_114674 = fst_breakzq_114659; + } + if (local_tid_129581 == 0) { + ((__global int64_t *) mem_124980)[gtid_114547] = fst_breakzq_114674; + } + if (local_tid_129581 == 0) { + ((__global double *) mem_124982)[gtid_114547] = defunc_0_f_res_114655; + } + + error_3: return; - #undef segmap_group_sizze_32924 } -__kernel void mainDetailedzisegmap_33188(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, - int64_t m_27772, - int64_t iota32_arg_28203, - int64_t iota32_arg_28233, - int64_t distance_28243, - int64_t segmap_usable_groups_33421, - __global - unsigned char *defunc_4_map_res_mem_45177, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global unsigned char *mem_45285, - __global unsigned char *mem_45292, - __global unsigned char *mem_45303, - __global unsigned char *mem_45323) +__kernel void mainzisegmap_intragroup_115661(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + __local volatile + int64_t *mem_125194_backing_aligned_0, + __local volatile + int64_t *mem_121428_backing_aligned_1, + __local volatile + int64_t *mem_121409_backing_aligned_2, + __local volatile + int64_t *mem_121400_backing_aligned_3, + __local volatile + int64_t *mem_121377_backing_aligned_4, + int64_t m_75136, + int64_t k2p2zq_75151, + int64_t num_groups_y_115659, + int64_t ctx_val_121390, + int64_t num_threads_126152, + __global unsigned char *mem_121359, + __global unsigned char *mem_121363, + __global unsigned char *mem_121366, + __global unsigned char *mem_121368, + __global unsigned char *mem_121446, + __global unsigned char *mem_125177) { - #define segmap_group_sizze_33420 (mainDetailedzisegmap_group_sizze_33190) + #define tile_sizze_115656 (mainzitile_sizze_115655) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; + __local volatile char *restrict mem_125194_backing_8 = (__local volatile + char *) mem_125194_backing_aligned_0; + __local volatile char *restrict mem_121428_backing_7 = (__local volatile + char *) mem_121428_backing_aligned_1; + __local volatile char *restrict mem_121409_backing_2 = (__local volatile + char *) mem_121409_backing_aligned_2; + __local volatile char *restrict mem_121400_backing_1 = (__local volatile + char *) mem_121400_backing_aligned_3; + __local volatile char *restrict mem_121377_backing_0 = (__local volatile + char *) mem_121377_backing_aligned_4; + volatile __local bool local_failure; - if (*global_failure >= 0) - return; + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_126875; + int32_t local_tid_126876; + int64_t group_sizze_126879; + int32_t wave_sizze_126878; + int32_t group_tid_126877; + + global_tid_126875 = get_global_id(0); + local_tid_126876 = get_local_id(0); + group_sizze_126879 = get_local_size(0); + wave_sizze_126878 = LOCKSTEP_WIDTH; + group_tid_126877 = get_group_id(0); - int32_t global_tid_46622; - int32_t local_tid_46623; - int64_t group_sizze_46626; - int32_t wave_sizze_46625; - int32_t group_tid_46624; + int32_t gid_flat_115661; - global_tid_46622 = get_global_id(0); - local_tid_46623 = get_local_id(0); - group_sizze_46626 = get_local_size(0); - wave_sizze_46625 = LOCKSTEP_WIDTH; - group_tid_46624 = get_group_id(0); + gid_flat_115661 = group_tid_126877; - int32_t phys_tid_33188; + int32_t ltid_pre_126880; - phys_tid_33188 = global_tid_46622; + ltid_pre_126880 = squot32(local_tid_126876, + sext_i64_i32(tile_sizze_115656)); - int64_t gtid_33187; + int32_t ltid_pre_126881; - gtid_33187 = sext_i32_i64(group_tid_46624) * segmap_group_sizze_33420 + - sext_i32_i64(local_tid_46623); - if (slt64(gtid_33187, m_27772)) { - int32_t x_33423 = ((__global - int32_t *) defunc_4_map_res_mem_45177)[gtid_33187]; - int32_t x_33424 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_33187]; - int32_t y_33425 = ((__global int32_t *) mem_45285)[gtid_33187]; + ltid_pre_126881 = local_tid_126876 - squot32(local_tid_126876, + sext_i64_i32(tile_sizze_115656)) * + sext_i64_i32(tile_sizze_115656); + + int64_t gid_x_115653; + + gid_x_115653 = squot64(sext_i32_i64(group_tid_126877), num_groups_y_115659); + + int64_t gid_y_115654; + + gid_y_115654 = sext_i32_i64(group_tid_126877) - + squot64(sext_i32_i64(group_tid_126877), num_groups_y_115659) * + num_groups_y_115659; + + int64_t binop_x_115688; + + binop_x_115688 = gid_x_115653 * tile_sizze_115656; + + int64_t binop_x_115690 = gid_y_115654 * tile_sizze_115656; + __local char *mem_121377; + + mem_121377 = (__local char *) mem_121377_backing_0; + + int64_t ltid_y_115680 = sext_i32_i64(ltid_pre_126880); + int64_t ltid_x_115678 = sext_i32_i64(ltid_pre_126881); + int32_t ltid_flat_115679 = local_tid_126876; + + if (slt64(ltid_y_115680, tile_sizze_115656) && slt64(ltid_x_115678, + tile_sizze_115656)) { + int64_t gtid_115689 = ltid_y_115680 + binop_x_115688; + int64_t gtid_115691 = ltid_x_115678 + binop_x_115690; + bool binop_x_115692 = slt64(gtid_115689, m_75136); + bool binop_y_115693 = slt64(gtid_115691, k2p2zq_75151); + bool cond_115694 = binop_x_115692 && binop_y_115693; - for (int64_t i_46627 = 0; i_46627 < iota32_arg_28233; i_46627++) { - ((__global float *) mem_45303)[phys_tid_33188 + i_46627 * - (segmap_usable_groups_33421 * - segmap_group_sizze_33420)] = - ((__global float *) mem_45292)[gtid_33187 + i_46627 * m_27772]; + if (cond_115694) { + for (int64_t i_126882 = 0; i_126882 < k2p2zq_75151; i_126882++) { + ((__global double *) mem_125177)[gid_flat_115661 + i_126882 * + num_threads_126152] = + ((__global double *) mem_121368)[i_126882]; + } } - for (int64_t i_33428 = 0; i_33428 < distance_28243; i_33428++) { - int64_t index_primexp_33430 = add64((int64_t) 1, i_33428); - bool cond_33431 = slt64((int64_t) 0, index_primexp_33430); - bool loop_cond_33432; - - if (cond_33431) { - bool x_33433 = sle64((int64_t) 0, index_primexp_33430); - bool y_33434 = slt64(index_primexp_33430, iota32_arg_28233); - bool bounds_check_33435 = x_33433 && y_33434; - bool index_certs_33436; - - if (!bounds_check_33435) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 58) == - -1) { - global_failure_args[0] = index_primexp_33430; - global_failure_args[1] = iota32_arg_28233; - ; - } - return; - } + for (int64_t i_126883 = 0; i_126883 < k2p2zq_75151; i_126883++) { + ((__local double *) mem_121377)[ltid_y_115680 * (k2p2zq_75151 * + tile_sizze_115656) + + ltid_x_115678 * k2p2zq_75151 + + i_126883] = ((__global + double *) mem_125177)[gid_flat_115661 + + i_126883 * + num_threads_126152]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_121395[1]; + __local char *mem_121400; + + mem_121400 = (__local char *) mem_121400_backing_1; + + __local char *mem_121409; + + mem_121409 = (__local char *) mem_121409_backing_2; + + double mem_121413[1]; + double mem_125187[1]; + __local char *tiled_inside_loop_mem_121442; + __local char *mem_param_121388; + + mem_param_121388 = mem_121377; + for (int64_t i_106468 = 0; i_106468 < k2p2zq_75151; i_106468++) { + int64_t x_106470 = sub64(k2p2zq_75151, i_106468); + int64_t i_106471 = sub64(x_106470, (int64_t) 1); + bool x_106472 = sle64((int64_t) 0, i_106471); + bool y_106473 = slt64(i_106471, k2p2zq_75151); + bool bounds_check_106474 = x_106472 && y_106473; + int64_t j_m_i_106475 = sub64(k2p2zq_75151, x_106470); + bool empty_slice_106476 = j_m_i_106475 == (int64_t) 0; + int64_t m_106477 = sub64(j_m_i_106475, (int64_t) 1); + int64_t i_p_m_t_s_106478 = add64(x_106470, m_106477); + bool zzero_leq_i_p_m_t_s_106479 = sle64((int64_t) 0, i_p_m_t_s_106478); + bool i_p_m_t_s_leq_w_106480 = slt64(i_p_m_t_s_106478, k2p2zq_75151); + bool zzero_lte_i_106481 = sle64((int64_t) 0, x_106470); + bool i_lte_j_106482 = sle64(x_106470, k2p2zq_75151); + bool y_106483 = i_p_m_t_s_leq_w_106480 && zzero_lte_i_106481; + bool y_106484 = zzero_leq_i_p_m_t_s_106479 && y_106483; + bool y_106485 = i_lte_j_106482 && y_106484; + bool forwards_ok_106486 = zzero_lte_i_106481 && y_106485; + bool ok_or_empty_106487 = empty_slice_106476 || forwards_ok_106486; + bool index_ok_106488 = bounds_check_106474 && ok_or_empty_106487; + bool index_certs_106489; + + if (!index_ok_106488) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 79) == -1) { + global_failure_args[0] = i_106471; + global_failure_args[1] = x_106470; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + global_failure_args[4] = k2p2zq_75151; + ; } - - float defunc_2_lifted_gt_arg_33437 = ((__global - float *) mem_45303)[phys_tid_33188 + - index_primexp_33430 * - (segmap_usable_groups_33421 * - segmap_group_sizze_33420)]; - bool y_33438 = slt64(i_33428, iota32_arg_28233); - bool index_certs_33439; - - if (!y_33438) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 59) == - -1) { - global_failure_args[0] = i_33428; - global_failure_args[1] = iota32_arg_28233; - ; - } - return; - } + local_failure = true; + goto error_1; + } + } + + bool index_certs_106490; + + if (!ok_or_empty_106487) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 80) == -1) { + global_failure_args[0] = x_106470; + global_failure_args[1] = k2p2zq_75151; + global_failure_args[2] = k2p2zq_75151; + ; } - - float defunc_1_lifted_gt_arg_33440 = ((__global - float *) mem_45303)[phys_tid_33188 + - i_33428 * - (segmap_usable_groups_33421 * - segmap_group_sizze_33420)]; - bool defunc_1_zlze_res_33441 = defunc_1_lifted_gt_arg_33440 <= - defunc_2_lifted_gt_arg_33437; - bool defunc_2_lifted_gt_res_33442 = !defunc_1_zlze_res_33441; - - loop_cond_33432 = defunc_2_lifted_gt_res_33442; + local_failure = true; + goto error_1; + } + } + + int64_t num_whole_tiles_115714 = squot64(j_m_i_106475, + tile_sizze_115656); + int64_t ltid_y_115717 = sext_i32_i64(ltid_pre_126880); + int64_t ltid_x_115715 = sext_i32_i64(ltid_pre_126881); + int32_t ltid_flat_115716 = local_tid_126876; + + if (slt64(ltid_y_115717, tile_sizze_115656) && slt64(ltid_x_115715, + tile_sizze_115656)) { + mem_121395[(int64_t) 0] = 0.0; + } + + error_1: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + double accs_mem_121405[1]; + double mem_param_121396[1]; + + for (int32_t i_3 = 0; i_3 < 1; i_3++) + mem_param_121396[i_3] = mem_121395[i_3]; + for (int64_t tile_id_115726 = 0; tile_id_115726 < + num_whole_tiles_115714; tile_id_115726++) { + int64_t binop_x_115802 = tile_sizze_115656 * tile_id_115726; + int64_t ltid_y_115729 = sext_i32_i64(ltid_pre_126880); + int64_t ltid_x_115727 = sext_i32_i64(ltid_pre_126881); + int32_t ltid_flat_115728 = local_tid_126876; + int64_t j_115803 = ltid_x_115727 + binop_x_115802; + int64_t gtid_115805 = binop_x_115688 + ltid_y_115729; + bool binop_x_115811 = slt64(j_115803, j_m_i_106475); + bool binop_y_115812 = slt64(gtid_115805, m_75136); + bool cond_115813 = binop_x_115811 && binop_y_115812; + double pre_115814; + + if (cond_115813) { + int64_t slice_119565 = x_106470 + j_115803; + double x_115815 = ((__global + double *) mem_121359)[slice_119565 * + (k2p2zq_75151 * + m_75136) + + gtid_115805 * + k2p2zq_75151 + + i_106471]; + + pre_115814 = x_115815; } else { - loop_cond_33432 = 0; + pre_115814 = 0.0; } + ((__local double *) mem_121400)[ltid_y_115729 * tile_sizze_115656 + + ltid_x_115727] = pre_115814; + barrier(CLK_LOCAL_MEM_FENCE); - bool xszq_33443; - int64_t xszq_33444; - bool loop_while_33446; - int64_t j_33447; + int64_t slice_119566 = x_106470 + binop_x_115802; + double mem_121404[1]; + int64_t ltid_y_115762 = sext_i32_i64(ltid_pre_126880); + int64_t ltid_x_115760 = sext_i32_i64(ltid_pre_126881); + int32_t ltid_flat_115761 = local_tid_126876; + int64_t gtid_115819 = binop_x_115688 + ltid_y_115762; + int64_t gtid_115821 = binop_x_115690 + ltid_x_115760; + double acc_115825 = mem_param_121396[(int64_t) 0]; + bool binop_x_115829 = slt64(gtid_115819, m_75136); + bool binop_y_115830 = slt64(gtid_115821, k2p2zq_75151); + bool cond_115831 = binop_x_115829 && binop_y_115830; + double acc_115832; - loop_while_33446 = loop_cond_33432; - j_33447 = index_primexp_33430; - while (loop_while_33446) { - int64_t loopres_33449 = sub64(j_33447, (int64_t) 1); - bool x_33450 = sle64((int64_t) 0, j_33447); - bool y_33451 = slt64(j_33447, iota32_arg_28233); - bool bounds_check_33452 = x_33450 && y_33451; - bool index_certs_33453; - - if (!bounds_check_33452) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 60) == - -1) { - global_failure_args[0] = j_33447; - global_failure_args[1] = iota32_arg_28233; - ; - } - return; - } - } - - float copy_arg_33454 = ((__global - float *) mem_45303)[phys_tid_33188 + - j_33447 * - (segmap_usable_groups_33421 * - segmap_group_sizze_33420)]; - bool x_33455 = sle64((int64_t) 0, loopres_33449); - bool y_33456 = slt64(loopres_33449, iota32_arg_28233); - bool bounds_check_33457 = x_33455 && y_33456; - bool index_certs_33458; - - if (!bounds_check_33457) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 61) == - -1) { - global_failure_args[0] = loopres_33449; - global_failure_args[1] = iota32_arg_28233; - ; - } - return; - } - } - - float copy_arg_33459 = ((__global - float *) mem_45303)[phys_tid_33188 + - loopres_33449 * - (segmap_usable_groups_33421 * - segmap_group_sizze_33420)]; - - ((__global float *) mem_45303)[phys_tid_33188 + j_33447 * - (segmap_usable_groups_33421 * - segmap_group_sizze_33420)] = - copy_arg_33459; - ((__global float *) mem_45303)[phys_tid_33188 + loopres_33449 * - (segmap_usable_groups_33421 * - segmap_group_sizze_33420)] = - copy_arg_33454; - - bool cond_33462 = slt64((int64_t) 0, loopres_33449); - bool loop_cond_33463; - - if (cond_33462) { - bool index_certs_33464; + if (cond_115831) { + double x_115833; + double redout_119719 = acc_115825; + + for (int64_t i_119720 = 0; i_119720 < tile_sizze_115656; + i_119720++) { + int64_t slice_120008 = slice_119566 + i_119720; + double x_115838 = ((__local + double *) mem_121400)[ltid_y_115762 * + tile_sizze_115656 + + i_119720]; + bool isnan_res_115839; - if (!bounds_check_33457) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 62) == -1) { - global_failure_args[0] = loopres_33449; - global_failure_args[1] = iota32_arg_28233; - ; - } - return; - } - } + isnan_res_115839 = futrts_isnan64(x_115838); - float defunc_2_lifted_gt_arg_33465 = ((__global - float *) mem_45303)[phys_tid_33188 + - loopres_33449 * - (segmap_usable_groups_33421 * - segmap_group_sizze_33420)]; - int64_t i_33466 = sub64(loopres_33449, (int64_t) 1); - bool x_33467 = sle64((int64_t) 0, i_33466); - bool y_33468 = slt64(i_33466, iota32_arg_28233); - bool bounds_check_33469 = x_33467 && y_33468; - bool index_certs_33470; + double defunc_1_f_res_115840; - if (!bounds_check_33469) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 63) == -1) { - global_failure_args[0] = i_33466; - global_failure_args[1] = iota32_arg_28233; - ; - } - return; - } + if (isnan_res_115839) { + defunc_1_f_res_115840 = 0.0; + } else { + double x_115837 = ((__local + double *) mem_param_121388)[ltid_y_115762 * + ctx_val_121390 + + ltid_x_115760 * + k2p2zq_75151 + + slice_120008]; + double defunc_1_f_res_f_res_115841 = x_115837 * + x_115838; + + defunc_1_f_res_115840 = defunc_1_f_res_f_res_115841; } - float defunc_1_lifted_gt_arg_33471 = ((__global - float *) mem_45303)[phys_tid_33188 + - i_33466 * - (segmap_usable_groups_33421 * - segmap_group_sizze_33420)]; - bool defunc_1_zlze_res_33472 = - defunc_1_lifted_gt_arg_33471 <= - defunc_2_lifted_gt_arg_33465; - bool defunc_2_lifted_gt_res_33473 = - !defunc_1_zlze_res_33472; + double defunc_1_op_res_115836 = defunc_1_f_res_115840 + + redout_119719; + double redout_tmp_126888 = defunc_1_op_res_115836; - loop_cond_33463 = defunc_2_lifted_gt_res_33473; - } else { - loop_cond_33463 = 0; + redout_119719 = redout_tmp_126888; } - - bool loop_while_tmp_46629 = loop_cond_33463; - int64_t j_tmp_46630 = loopres_33449; - - loop_while_33446 = loop_while_tmp_46629; - j_33447 = j_tmp_46630; + x_115833 = redout_119719; + acc_115832 = x_115833; + } else { + acc_115832 = acc_115825; } - xszq_33443 = loop_while_33446; - xszq_33444 = j_33447; + mem_121404[(int64_t) 0] = acc_115832; + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_param_tmp_126886[1]; + + for (int32_t i_4 = 0; i_4 < 1; i_4++) + mem_param_tmp_126886[i_4] = mem_121404[i_4]; + for (int32_t i_5 = 0; i_5 < 1; i_5++) + mem_param_121396[i_5] = mem_param_tmp_126886[i_5]; } + for (int32_t i_6 = 0; i_6 < 1; i_6++) + accs_mem_121405[i_6] = mem_param_121396[i_6]; - int32_t i_33474 = sdiv32(y_33425, 2); - int32_t j_33475 = sub32(i_33474, 1); - bool cond_33476 = x_33423 == x_33424; - float defunc_0_f_res_33477; + int64_t residual_input_115851 = srem64(j_m_i_106475, tile_sizze_115656); + bool cond_115852 = residual_input_115851 == (int64_t) 0; - if (cond_33476) { - defunc_0_f_res_33477 = 0.0F; + if (cond_115852) { + mem_125187[(int64_t) 0] = accs_mem_121405[(int64_t) 0]; } else { - int32_t x_33478 = smod32(y_33425, 2); - bool cond_33479 = x_33478 == 0; - float defunc_0_f_res_f_res_33480; + int64_t binop_x_115929 = tile_sizze_115656 * num_whole_tiles_115714; + int64_t ltid_y_115855 = sext_i32_i64(ltid_pre_126880); + int64_t ltid_x_115853 = sext_i32_i64(ltid_pre_126881); + int32_t ltid_flat_115854 = local_tid_126876; + int64_t j_115930 = ltid_x_115853 + binop_x_115929; + int64_t gtid_115932 = binop_x_115688 + ltid_y_115855; + bool binop_x_115938 = slt64(j_115930, j_m_i_106475); + bool binop_y_115939 = slt64(gtid_115932, m_75136); + bool cond_115940 = binop_x_115938 && binop_y_115939; + double pre_115941; - if (cond_33479) { - int64_t j_33481 = sext_i32_i64(j_33475); - bool x_33482 = sle64((int64_t) 0, j_33481); - bool y_33483 = slt64(j_33481, iota32_arg_28203); - bool bounds_check_33484 = x_33482 && y_33483; - bool index_certs_33485; - - if (!bounds_check_33484) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 64) == - -1) { - global_failure_args[0] = j_33481; - global_failure_args[1] = iota32_arg_28203; - ; - } - return; + if (cond_115940) { + int64_t slice_119567 = x_106470 + j_115930; + double x_115942 = ((__global + double *) mem_121359)[slice_119567 * + (k2p2zq_75151 * + m_75136) + + gtid_115932 * + k2p2zq_75151 + + i_106471]; + + pre_115941 = x_115942; + } else { + pre_115941 = 0.0; + } + ((__local double *) mem_121409)[ltid_y_115855 * tile_sizze_115656 + + ltid_x_115853] = pre_115941; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t slice_119568 = x_106470 + binop_x_115929; + int64_t ltid_y_115889 = sext_i32_i64(ltid_pre_126880); + int64_t ltid_x_115887 = sext_i32_i64(ltid_pre_126881); + int32_t ltid_flat_115888 = local_tid_126876; + int64_t gtid_115947 = binop_x_115688 + ltid_y_115889; + int64_t gtid_115949 = binop_x_115690 + ltid_x_115887; + double acc_115953 = accs_mem_121405[(int64_t) 0]; + bool binop_x_115957 = slt64(gtid_115947, m_75136); + bool binop_y_115958 = slt64(gtid_115949, k2p2zq_75151); + bool cond_115959 = binop_x_115957 && binop_y_115958; + double acc_115960; + + if (cond_115959) { + double x_115961; + double redout_119721 = acc_115953; + + for (int64_t i_119722 = 0; i_119722 < residual_input_115851; + i_119722++) { + int64_t slice_120009 = slice_119568 + i_119722; + double x_115966 = ((__local + double *) mem_121409)[ltid_y_115889 * + tile_sizze_115656 + + i_119722]; + bool isnan_res_115967; + + isnan_res_115967 = futrts_isnan64(x_115966); + + double defunc_1_f_res_115968; + + if (isnan_res_115967) { + defunc_1_f_res_115968 = 0.0; + } else { + double x_115965 = ((__local + double *) mem_param_121388)[ltid_y_115889 * + ctx_val_121390 + + ltid_x_115887 * + k2p2zq_75151 + + slice_120009]; + double defunc_1_f_res_f_res_115969 = x_115965 * + x_115966; + + defunc_1_f_res_115968 = defunc_1_f_res_f_res_115969; } + + double defunc_1_op_res_115964 = defunc_1_f_res_115968 + + redout_119721; + double redout_tmp_126889 = defunc_1_op_res_115964; + + redout_119721 = redout_tmp_126889; } - - float x_33486 = ((__global float *) mem_45303)[phys_tid_33188 + - j_33481 * - (segmap_usable_groups_33421 * - segmap_group_sizze_33420)]; - int64_t i_33487 = sext_i32_i64(i_33474); - bool x_33488 = sle64((int64_t) 0, i_33487); - bool y_33489 = slt64(i_33487, iota32_arg_28203); - bool bounds_check_33490 = x_33488 && y_33489; - bool index_certs_33491; - - if (!bounds_check_33490) { + x_115961 = redout_119721; + acc_115960 = x_115961; + } else { + acc_115960 = acc_115953; + } + mem_121413[(int64_t) 0] = acc_115960; + barrier(CLK_LOCAL_MEM_FENCE); + mem_125187[(int64_t) 0] = mem_121413[(int64_t) 0]; + } + + __local char *mem_121428; + + mem_121428 = (__local char *) mem_121428_backing_7; + + int64_t ltid_y_115973 = sext_i32_i64(ltid_pre_126880); + int64_t ltid_x_115971 = sext_i32_i64(ltid_pre_126881); + int32_t ltid_flat_115972 = local_tid_126876; + + if (slt64(ltid_y_115973, tile_sizze_115656) && slt64(ltid_x_115971, + tile_sizze_115656)) { + int64_t gtid_115982 = binop_x_115688 + ltid_y_115973; + int64_t gtid_115984 = binop_x_115690 + ltid_x_115971; + bool binop_x_115986 = slt64(gtid_115982, m_75136); + bool binop_y_115987 = slt64(gtid_115984, k2p2zq_75151); + bool cond_115988 = binop_x_115986 && binop_y_115987; + __local char *mem_125194; + + mem_125194 = (__local char *) mem_125194_backing_8; + if (cond_115988) { + double defunc_2_reduce_res_115985 = mem_125187[(int64_t) 0]; + bool index_ok_115993 = bounds_check_106474 && + bounds_check_106474; + bool index_certs_115994; + + if (!index_ok_115993) { { - if (atomic_cmpxchg_i32_global(global_failure, -1, 65) == + if (atomic_cmpxchg_i32_global(global_failure, -1, 81) == -1) { - global_failure_args[0] = i_33487; - global_failure_args[1] = iota32_arg_28203; + global_failure_args[0] = i_106471; + global_failure_args[1] = i_106471; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; ; } - return; + local_failure = true; + goto error_6; } } - float y_33492 = ((__global float *) mem_45303)[phys_tid_33188 + - i_33487 * - (segmap_usable_groups_33421 * - segmap_group_sizze_33420)]; - float x_33493 = x_33486 + y_33492; - float defunc_0_f_res_f_res_t_res_33494 = x_33493 / 2.0F; - - defunc_0_f_res_f_res_33480 = defunc_0_f_res_f_res_t_res_33494; - } else { - int64_t i_33495 = sext_i32_i64(i_33474); - bool x_33496 = sle64((int64_t) 0, i_33495); - bool y_33497 = slt64(i_33495, iota32_arg_28203); - bool bounds_check_33498 = x_33496 && y_33497; - bool index_certs_33499; + double zs_arg_115995 = ((__global + double *) mem_121363)[i_106471 * + (k2p2zq_75151 * + m_75136) + + gtid_115982 * + k2p2zq_75151 + + i_106471]; + bool index_certs_115996; - if (!bounds_check_33498) { + if (!bounds_check_106474) { { - if (atomic_cmpxchg_i32_global(global_failure, -1, 66) == + if (atomic_cmpxchg_i32_global(global_failure, -1, 82) == -1) { - global_failure_args[0] = i_33495; - global_failure_args[1] = iota32_arg_28203; + global_failure_args[0] = i_106471; + global_failure_args[1] = k2p2zq_75151; ; } - return; + local_failure = true; + goto error_6; } } - float defunc_0_f_res_f_res_f_res_33500 = ((__global - float *) mem_45303)[phys_tid_33188 + - i_33495 * - (segmap_usable_groups_33421 * - segmap_group_sizze_33420)]; + double zm_arg_115997 = ((__global + double *) mem_121366)[i_106471 * + k2p2zq_75151 + + gtid_115984]; + double zm_res_115998 = zm_arg_115997 - + defunc_2_reduce_res_115985; + double zs_res_115999 = zm_res_115998 / zs_arg_115995; - defunc_0_f_res_f_res_33480 = defunc_0_f_res_f_res_f_res_33500; + ((__local double *) mem_param_121388)[ltid_y_115973 * + ctx_val_121390 + + ltid_x_115971 * + k2p2zq_75151 + i_106471] = + zs_res_115999; + for (int64_t i_126890 = 0; i_126890 < k2p2zq_75151; + i_126890++) { + ((__local double *) mem_125194)[i_126890] = ((__local + double *) mem_param_121388)[ltid_y_115973 * + ctx_val_121390 + + ltid_x_115971 * + k2p2zq_75151 + + i_126890]; + } } - defunc_0_f_res_33477 = defunc_0_f_res_f_res_33480; + for (int64_t i_126891 = 0; i_126891 < k2p2zq_75151; i_126891++) { + ((__local double *) mem_121428)[ltid_y_115973 * (k2p2zq_75151 * + tile_sizze_115656) + + ltid_x_115971 * k2p2zq_75151 + + i_126891] = ((__local + double *) mem_125194)[i_126891]; + } + } + + error_6: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + __local char *mem_param_tmp_126884; + + mem_param_tmp_126884 = mem_121428; + mem_param_121388 = mem_param_tmp_126884; + } + tiled_inside_loop_mem_121442 = mem_param_121388; + + int64_t thread_out_index_126892 = gid_x_115653 * tile_sizze_115656 + + sext_i32_i64(ltid_pre_126880); + int64_t thread_out_index_126893 = gid_y_115654 * tile_sizze_115656 + + sext_i32_i64(ltid_pre_126881); + + if (slt64(thread_out_index_126892, m_75136) && + slt64(thread_out_index_126893, k2p2zq_75151)) { + for (int64_t i_126894 = 0; i_126894 < k2p2zq_75151; i_126894++) { + ((__global double *) mem_121446)[thread_out_index_126892 * + (k2p2zq_75151 * k2p2zq_75151) + + thread_out_index_126893 * + k2p2zq_75151 + i_126894] = + ((__local + double *) tiled_inside_loop_mem_121442)[sext_i32_i64(ltid_pre_126880) * + ctx_val_121390 + + sext_i32_i64(ltid_pre_126881) * + k2p2zq_75151 + + i_126894]; } - ((__global float *) mem_45323)[gtid_33187] = defunc_0_f_res_33477; } - error_0: + error_7: return; - #undef segmap_group_sizze_33420 + #undef tile_sizze_115656 } -__kernel void mainDetailedzisegmap_33309(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, - int64_t N_27771, int64_t m_27772, - int64_t iota32_arg_28233, __global - unsigned char *defunc_4_map_res_mem_45178, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global unsigned char *mem_45285, - __global unsigned char *mem_45289) +__kernel void mainzisegmap_intragroup_116023(__global int *global_failure, + __local volatile + int64_t *mem_121547_backing_aligned_0, + __local volatile + int64_t *mem_121531_backing_aligned_1, + __local volatile + int64_t *mem_121522_backing_aligned_2, + int64_t m_75136, + int64_t k2p2zq_75151, + int64_t x_106526, int64_t i_106527, + int64_t j_m_i_106531, + int64_t num_groups_y_116021, + int64_t num_whole_tiles_116039, + int64_t residual_input_116172, + unsigned char cond_116173, + int64_t num_threads_126157, + __global unsigned char *mem_120252, + __global unsigned char *mem_121351, + __global unsigned char *mem_121458, + __global unsigned char *mem_121508, + __global unsigned char *mem_121512, + __global unsigned char *mem_121551, + __global unsigned char *mem_125219) { - #define segmap_group_sizze_33391 (mainDetailedzisegmap_group_sizze_33312) + #define tile_sizze_116018 (mainzitile_sizze_116017) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; + __local volatile char *restrict mem_121547_backing_6 = (__local volatile + char *) mem_121547_backing_aligned_0; + __local volatile char *restrict mem_121531_backing_5 = (__local volatile + char *) mem_121531_backing_aligned_1; + __local volatile char *restrict mem_121522_backing_0 = (__local volatile + char *) mem_121522_backing_aligned_2; if (*global_failure >= 0) return; - int32_t global_tid_46617; - int32_t local_tid_46618; - int64_t group_sizze_46621; - int32_t wave_sizze_46620; - int32_t group_tid_46619; + int32_t global_tid_126912; + int32_t local_tid_126913; + int64_t group_sizze_126916; + int32_t wave_sizze_126915; + int32_t group_tid_126914; + + global_tid_126912 = get_global_id(0); + local_tid_126913 = get_local_id(0); + group_sizze_126916 = get_local_size(0); + wave_sizze_126915 = LOCKSTEP_WIDTH; + group_tid_126914 = get_group_id(0); + + int32_t gid_flat_116023; - global_tid_46617 = get_global_id(0); - local_tid_46618 = get_local_id(0); - group_sizze_46621 = get_local_size(0); - wave_sizze_46620 = LOCKSTEP_WIDTH; - group_tid_46619 = get_group_id(0); + gid_flat_116023 = group_tid_126914; - int32_t phys_tid_33309; + int32_t ltid_pre_126917; - phys_tid_33309 = global_tid_46617; + ltid_pre_126917 = squot32(local_tid_126913, + sext_i64_i32(tile_sizze_116018)); - int64_t gtid_33307; + int32_t ltid_pre_126918; - gtid_33307 = squot64(sext_i32_i64(group_tid_46619) * - segmap_group_sizze_33391 + - sext_i32_i64(local_tid_46618), iota32_arg_28233); + ltid_pre_126918 = local_tid_126913 - squot32(local_tid_126913, + sext_i64_i32(tile_sizze_116018)) * + sext_i64_i32(tile_sizze_116018); - int64_t gtid_33308; + int64_t gid_x_116015; - gtid_33308 = sext_i32_i64(group_tid_46619) * segmap_group_sizze_33391 + - sext_i32_i64(local_tid_46618) - squot64(sext_i32_i64(group_tid_46619) * - segmap_group_sizze_33391 + - sext_i32_i64(local_tid_46618), - iota32_arg_28233) * - iota32_arg_28233; - if (slt64(gtid_33307, m_27772) && slt64(gtid_33308, iota32_arg_28233)) { - int32_t x_33394 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_33307]; - int32_t y_33396 = ((__global int32_t *) mem_45285)[gtid_33307]; - int32_t index_primexp_42397 = sext_i64_i32(gtid_33308); - bool cond_33398 = slt32(index_primexp_42397, y_33396); - bool cond_33399; + gid_x_116015 = squot64(sext_i32_i64(group_tid_126914), num_groups_y_116021); + + int64_t gid_y_116016; + + gid_y_116016 = sext_i32_i64(group_tid_126914) - + squot64(sext_i32_i64(group_tid_126914), num_groups_y_116021) * + num_groups_y_116021; + + double mem_121517[1]; + int64_t ltid_y_116042 = sext_i32_i64(ltid_pre_126917); + int64_t ltid_x_116040 = sext_i32_i64(ltid_pre_126918); + int32_t ltid_flat_116041 = local_tid_126913; + + if (slt64(ltid_y_116042, tile_sizze_116018) && slt64(ltid_x_116040, + tile_sizze_116018)) { + mem_121517[(int64_t) 0] = 0.0; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t binop_x_116127 = gid_x_116015 * tile_sizze_116018; + int64_t binop_x_116142 = gid_y_116016 * tile_sizze_116018; + __local char *mem_121522; + + mem_121522 = (__local char *) mem_121522_backing_0; + + double accs_mem_121527[1]; + double mem_param_121518[1]; + + for (int32_t i_1 = 0; i_1 < 1; i_1++) + mem_param_121518[i_1] = mem_121517[i_1]; + for (int64_t tile_id_116051 = 0; tile_id_116051 < num_whole_tiles_116039; + tile_id_116051++) { + int64_t binop_x_116125 = tile_sizze_116018 * tile_id_116051; + int64_t ltid_y_116054 = sext_i32_i64(ltid_pre_126917); + int64_t ltid_x_116052 = sext_i32_i64(ltid_pre_126918); + int32_t ltid_flat_116053 = local_tid_126913; + int64_t j_116126 = ltid_x_116052 + binop_x_116125; + int64_t gtid_116128 = ltid_y_116054 + binop_x_116127; + bool binop_x_116133 = slt64(j_116126, j_m_i_106531); + bool binop_y_116134 = slt64(gtid_116128, m_75136); + bool cond_116135 = binop_x_116133 && binop_y_116134; + double pre_116136; + + if (cond_116135) { + int64_t slice_119569 = x_106526 + j_116126; + double x_116137 = ((__global double *) mem_121458)[slice_119569 * + (k2p2zq_75151 * + m_75136) + + gtid_116128 * + k2p2zq_75151 + + i_106527]; + + pre_116136 = x_116137; + } else { + pre_116136 = 0.0; + } + ((__local double *) mem_121522)[ltid_y_116054 * tile_sizze_116018 + + ltid_x_116052] = pre_116136; + barrier(CLK_LOCAL_MEM_FENCE); - if (cond_33398) { - int32_t i_33400 = add32(x_33394, index_primexp_42397); - int64_t i_33401 = sext_i32_i64(i_33400); - bool x_33402 = sle64((int64_t) 0, i_33401); - bool y_33403 = slt64(i_33401, N_27771); - bool bounds_check_33404 = x_33402 && y_33403; - bool index_certs_33405; + int64_t slice_119570 = x_106526 + binop_x_116125; + double mem_121526[1]; + int64_t ltid_y_116086 = sext_i32_i64(ltid_pre_126917); + int64_t ltid_x_116084 = sext_i32_i64(ltid_pre_126918); + int32_t ltid_flat_116085 = local_tid_126913; + int64_t gtid_116141 = ltid_y_116086 + binop_x_116127; + int64_t gtid_116143 = ltid_x_116084 + binop_x_116142; + double acc_116146 = mem_param_121518[(int64_t) 0]; + bool binop_x_116150 = slt64(gtid_116141, m_75136); + bool binop_y_116151 = slt64(gtid_116143, k2p2zq_75151); + bool cond_116152 = binop_x_116150 && binop_y_116151; + double acc_116153; + + if (cond_116152) { + double x_116154; + double redout_119730 = acc_116146; - if (!bounds_check_33404) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 56) == - -1) { - global_failure_args[0] = i_33401; - global_failure_args[1] = N_27771; - ; - } - return; + for (int64_t i_119731 = 0; i_119731 < tile_sizze_116018; + i_119731++) { + int64_t slice_120012 = slice_119570 + i_119731; + double x_116159 = ((__local + double *) mem_121522)[ltid_y_116086 * + tile_sizze_116018 + + i_119731]; + bool isnan_res_116160; + + isnan_res_116160 = futrts_isnan64(x_116159); + + double defunc_1_f_res_116161; + + if (isnan_res_116160) { + defunc_1_f_res_116161 = 0.0; + } else { + double x_116158 = ((__global + double *) mem_121512)[slice_120012 * + (k2p2zq_75151 * + m_75136) + + gtid_116141 * + k2p2zq_75151 + + gtid_116143]; + double defunc_1_f_res_f_res_116162 = x_116158 * x_116159; + + defunc_1_f_res_116161 = defunc_1_f_res_f_res_116162; } + + double defunc_1_op_res_116157 = defunc_1_f_res_116161 + + redout_119730; + double redout_tmp_126921 = defunc_1_op_res_116157; + + redout_119730 = redout_tmp_126921; } - - float isnan_arg_33406 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_33307 * - N_27771 + - i_33401]; - bool isnan_res_33407; - - isnan_res_33407 = futrts_isnan32(isnan_arg_33406); - - bool cond_t_res_33408 = !isnan_res_33407; - - cond_33399 = cond_t_res_33408; + x_116154 = redout_119730; + acc_116153 = x_116154; } else { - cond_33399 = 0; + acc_116153 = acc_116146; } + mem_121526[(int64_t) 0] = acc_116153; + barrier(CLK_LOCAL_MEM_FENCE); - float defunc_0_f_res_33409; + double mem_param_tmp_126919[1]; - if (cond_33399) { - int32_t i_33410 = add32(x_33394, index_primexp_42397); - int64_t i_33411 = sext_i32_i64(i_33410); - bool x_33412 = sle64((int64_t) 0, i_33411); - bool y_33413 = slt64(i_33411, N_27771); - bool bounds_check_33414 = x_33412 && y_33413; - bool index_certs_33415; + for (int32_t i_2 = 0; i_2 < 1; i_2++) + mem_param_tmp_126919[i_2] = mem_121526[i_2]; + for (int32_t i_3 = 0; i_3 < 1; i_3++) + mem_param_121518[i_3] = mem_param_tmp_126919[i_3]; + } + for (int32_t i_4 = 0; i_4 < 1; i_4++) + accs_mem_121527[i_4] = mem_param_121518[i_4]; + + __local char *mem_121531; + + mem_121531 = (__local char *) mem_121531_backing_5; + + double mem_121535[1]; + double mem_125212[1]; + + if (cond_116173) { + mem_125212[(int64_t) 0] = accs_mem_121527[(int64_t) 0]; + } else { + int64_t binop_x_116248 = tile_sizze_116018 * num_whole_tiles_116039; + int64_t ltid_y_116176 = sext_i32_i64(ltid_pre_126917); + int64_t ltid_x_116174 = sext_i32_i64(ltid_pre_126918); + int32_t ltid_flat_116175 = local_tid_126913; + int64_t j_116249 = ltid_x_116174 + binop_x_116248; + int64_t gtid_116251 = binop_x_116127 + ltid_y_116176; + bool binop_x_116256 = slt64(j_116249, j_m_i_106531); + bool binop_y_116257 = slt64(gtid_116251, m_75136); + bool cond_116258 = binop_x_116256 && binop_y_116257; + double pre_116259; + + if (cond_116258) { + int64_t slice_119571 = x_106526 + j_116249; + double x_116260 = ((__global double *) mem_121458)[slice_119571 * + (k2p2zq_75151 * + m_75136) + + gtid_116251 * + k2p2zq_75151 + + i_106527]; - if (!bounds_check_33414) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 57) == - -1) { - global_failure_args[0] = i_33411; - global_failure_args[1] = N_27771; - ; - } - return; + pre_116259 = x_116260; + } else { + pre_116259 = 0.0; + } + ((__local double *) mem_121531)[ltid_y_116176 * tile_sizze_116018 + + ltid_x_116174] = pre_116259; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t slice_119572 = x_106526 + binop_x_116248; + int64_t ltid_y_116209 = sext_i32_i64(ltid_pre_126917); + int64_t ltid_x_116207 = sext_i32_i64(ltid_pre_126918); + int32_t ltid_flat_116208 = local_tid_126913; + int64_t gtid_116265 = binop_x_116127 + ltid_y_116209; + int64_t gtid_116267 = binop_x_116142 + ltid_x_116207; + double acc_116270 = accs_mem_121527[(int64_t) 0]; + bool binop_x_116274 = slt64(gtid_116265, m_75136); + bool binop_y_116275 = slt64(gtid_116267, k2p2zq_75151); + bool cond_116276 = binop_x_116274 && binop_y_116275; + double acc_116277; + + if (cond_116276) { + double x_116278; + double redout_119732 = acc_116270; + + for (int64_t i_119733 = 0; i_119733 < residual_input_116172; + i_119733++) { + int64_t slice_120013 = slice_119572 + i_119733; + double x_116283 = ((__local + double *) mem_121531)[ltid_y_116209 * + tile_sizze_116018 + + i_119733]; + bool isnan_res_116284; + + isnan_res_116284 = futrts_isnan64(x_116283); + + double defunc_1_f_res_116285; + + if (isnan_res_116284) { + defunc_1_f_res_116285 = 0.0; + } else { + double x_116282 = ((__global + double *) mem_121512)[slice_120013 * + (k2p2zq_75151 * + m_75136) + + gtid_116265 * + k2p2zq_75151 + + gtid_116267]; + double defunc_1_f_res_f_res_116286 = x_116282 * x_116283; + + defunc_1_f_res_116285 = defunc_1_f_res_f_res_116286; } + + double defunc_1_op_res_116281 = defunc_1_f_res_116285 + + redout_119732; + double redout_tmp_126922 = defunc_1_op_res_116281; + + redout_119732 = redout_tmp_126922; } - - float defunc_0_f_res_t_res_33416 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_33307 * - N_27771 + - i_33411]; - - defunc_0_f_res_33409 = defunc_0_f_res_t_res_33416; + x_116278 = redout_119732; + acc_116277 = x_116278; } else { - defunc_0_f_res_33409 = INFINITY; + acc_116277 = acc_116270; } - ((__global float *) mem_45289)[gtid_33307 * iota32_arg_28233 + - gtid_33308] = defunc_0_f_res_33409; + mem_121535[(int64_t) 0] = acc_116277; + barrier(CLK_LOCAL_MEM_FENCE); + mem_125212[(int64_t) 0] = mem_121535[(int64_t) 0]; } - error_0: + __local char *mem_121547; + + mem_121547 = (__local char *) mem_121547_backing_6; + + int64_t ltid_y_116290 = sext_i32_i64(ltid_pre_126917); + int64_t ltid_x_116288 = sext_i32_i64(ltid_pre_126918); + int32_t ltid_flat_116289 = local_tid_126913; + + if (slt64(ltid_y_116290, tile_sizze_116018) && slt64(ltid_x_116288, + tile_sizze_116018)) { + int64_t gtid_116299 = binop_x_116127 + ltid_y_116290; + int64_t gtid_116301 = binop_x_116142 + ltid_x_116288; + bool binop_x_116303 = slt64(gtid_116299, m_75136); + bool binop_y_116304 = slt64(gtid_116301, k2p2zq_75151); + bool cond_116305 = binop_x_116303 && binop_y_116304; + + if (cond_116305) { + double defunc_2_reduce_res_116302 = mem_125212[(int64_t) 0]; + double defunc_3_map_res_r_transformed_row_116309 = ((__global + double *) mem_121351)[gtid_116299 * + (k2p2zq_75151 * + k2p2zq_75151) + + i_106527 * + k2p2zq_75151 + + i_106527]; + double defunc_2_map_res_transformed_row_116311 = ((__global + double *) mem_120252)[gtid_116301 * + k2p2zq_75151 + + i_106527]; + double zm_res_116312 = defunc_2_map_res_transformed_row_116311 - + defunc_2_reduce_res_116302; + double zs_res_116313 = zm_res_116312 / + defunc_3_map_res_r_transformed_row_116309; + + ((__global double *) mem_121508)[gtid_116299 * k2p2zq_75151 + + gtid_116301 + i_106527 * + (k2p2zq_75151 * m_75136)] = + zs_res_116313; + for (int64_t i_126923 = 0; i_126923 < k2p2zq_75151; i_126923++) { + ((__global double *) mem_125219)[gid_flat_116023 + i_126923 * + num_threads_126157] = + ((__global double *) mem_121508)[gtid_116299 * + k2p2zq_75151 + + gtid_116301 + i_126923 * + (k2p2zq_75151 * m_75136)]; + } + } + for (int64_t i_126924 = 0; i_126924 < k2p2zq_75151; i_126924++) { + ((__local double *) mem_121547)[ltid_y_116290 * (k2p2zq_75151 * + tile_sizze_116018) + + ltid_x_116288 * k2p2zq_75151 + + i_126924] = ((__global + double *) mem_125219)[gid_flat_116023 + + i_126924 * + num_threads_126157]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t thread_out_index_126925 = gid_x_116015 * tile_sizze_116018 + + sext_i32_i64(ltid_pre_126917); + int64_t thread_out_index_126926 = gid_y_116016 * tile_sizze_116018 + + sext_i32_i64(ltid_pre_126918); + + if (slt64(thread_out_index_126925, m_75136) && + slt64(thread_out_index_126926, k2p2zq_75151)) { + for (int64_t i_126927 = 0; i_126927 < k2p2zq_75151; i_126927++) { + ((__global double *) mem_121551)[thread_out_index_126925 * + (k2p2zq_75151 * k2p2zq_75151) + + thread_out_index_126926 * + k2p2zq_75151 + i_126927] = + ((__local double *) mem_121547)[sext_i32_i64(ltid_pre_126917) * + (k2p2zq_75151 * + tile_sizze_116018) + + sext_i32_i64(ltid_pre_126918) * + k2p2zq_75151 + i_126927]; + } + } + + error_6: return; - #undef segmap_group_sizze_33391 + #undef tile_sizze_116018 } -__kernel void mainDetailedzisegmap_33367(__global int *global_failure, - int64_t m_27772, __global - unsigned char *defunc_4_map_res_mem_45177, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global unsigned char *mem_45285) +__kernel void mainzisegmap_intragroup_116342(__global int *global_failure, + __local volatile + int64_t *mem_121654_backing_aligned_0, + __local volatile + int64_t *mem_121652_backing_aligned_1, + int64_t m_75136, + int64_t k2p2zq_75151, + int64_t gridDim_x_116335, + int64_t gridDim_y_116336, + int64_t full_tiles_116367, + int64_t kk_116570, + int64_t binop_x_120251, __global + unsigned char *defunc_3_map_res_r_mem_121609, + __global unsigned char *mem_121636, + __global unsigned char *mem_121827) { - #define segmap_group_sizze_33376 (mainDetailedzisegmap_group_sizze_33369) + #define Ty_116322 (mainziTy_116319) + #define Ry_116323 (mainziRy_116321) + #define Tx_116324 (mainziTx_116318) + #define Rx_116325 (mainziRx_116320) + #define Tk_116326 (mainziTk_116317) + #define tk_div_tx_116327 (sdiv_up64(mainziTk_116317, mainziTx_116318)) + #define tk_div_ty_116328 (sdiv_up64(mainziTk_116317, mainziTy_116319)) + #define TxRx_116329 (mainziTx_116318 * mainziRx_116320) + #define TyRy_116330 (mainziTy_116319 * mainziRy_116321) + #define a_loc_szz_116332 (mainziTk_116317 * (mainziTy_116319 * mainziRy_116321)) + #define b_loc_szz_116334 (mainziRx_116320 * (mainziTx_116318 * mainziTk_116317)) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; + __local volatile char *restrict mem_121654_backing_1 = (__local volatile + char *) mem_121654_backing_aligned_0; + __local volatile char *restrict mem_121652_backing_0 = (__local volatile + char *) mem_121652_backing_aligned_1; if (*global_failure >= 0) return; - int32_t global_tid_46612; - int32_t local_tid_46613; - int64_t group_sizze_46616; - int32_t wave_sizze_46615; - int32_t group_tid_46614; + int32_t global_tid_127012; + int32_t local_tid_127013; + int64_t group_sizze_127016; + int32_t wave_sizze_127015; + int32_t group_tid_127014; - global_tid_46612 = get_global_id(0); - local_tid_46613 = get_local_id(0); - group_sizze_46616 = get_local_size(0); - wave_sizze_46615 = LOCKSTEP_WIDTH; - group_tid_46614 = get_group_id(0); + global_tid_127012 = get_global_id(0); + local_tid_127013 = get_local_id(0); + group_sizze_127016 = get_local_size(0); + wave_sizze_127015 = LOCKSTEP_WIDTH; + group_tid_127014 = get_group_id(0); - int32_t phys_tid_33367; + int32_t gid_flat_116342; - phys_tid_33367 = global_tid_46612; + gid_flat_116342 = group_tid_127014; - int64_t gtid_33366; + int32_t ltid_pre_127017; - gtid_33366 = sext_i32_i64(group_tid_46614) * segmap_group_sizze_33376 + - sext_i32_i64(local_tid_46613); - if (slt64(gtid_33366, m_27772)) { - int32_t x_33379 = ((__global - int32_t *) defunc_4_map_res_mem_45177)[gtid_33366]; - int32_t x_33380 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_33366]; - int32_t y_33381 = sub32(x_33379, x_33380); - - ((__global int32_t *) mem_45285)[gtid_33366] = y_33381; - } + ltid_pre_127017 = squot32(local_tid_127013, sext_i64_i32(Tx_116324)); - error_0: - return; - #undef segmap_group_sizze_33376 -} -__kernel void mainDetailedzisegmap_33545(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, - int64_t N_27771, int64_t m_27772, - int32_t n_27775, - int64_t iota32_arg_28203, - int64_t iota32_arg_28233, - int64_t num_groups_33660, __global - unsigned char *defunc_4_map_res_mem_45177, - __global - unsigned char *defunc_4_map_res_mem_45178, - __global - unsigned char *defunc_4_map_res_mem_45179, - __global - unsigned char *defunc_3_map_res_mem_45244, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global - unsigned char *defunc_3_map_res_mem_45246, - __global - unsigned char *defunc_0_f_res_mem_45279, - __global unsigned char *mem_45282, - __global unsigned char *mem_45326, - __global unsigned char *mem_45340, - __global unsigned char *mem_45354, - __global unsigned char *mem_45369, - __global unsigned char *mem_45372, - __global unsigned char *mem_45374, - __global unsigned char *mem_45376) -{ - #define segmap_group_sizze_33659 (mainDetailedzisegmap_group_sizze_33547) + int32_t ltid_pre_127018; - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - volatile __local bool local_failure; + ltid_pre_127018 = local_tid_127013 - squot32(local_tid_127013, + sext_i64_i32(Tx_116324)) * + sext_i64_i32(Tx_116324); - if (failure_is_an_option) { - int failed = *global_failure >= 0; - - if (failed) - return; + int64_t gtid_104827; + + gtid_104827 = squot64(sext_i32_i64(group_tid_127014), gridDim_y_116336 * + gridDim_x_116335); + + int64_t gid_y_116341; + + gid_y_116341 = squot64(sext_i32_i64(group_tid_127014) - + squot64(sext_i32_i64(group_tid_127014), + gridDim_y_116336 * gridDim_x_116335) * + (gridDim_y_116336 * gridDim_x_116335), + gridDim_x_116335); + + int64_t gid_x_116340; + + gid_x_116340 = sext_i32_i64(group_tid_127014) - + squot64(sext_i32_i64(group_tid_127014), gridDim_y_116336 * + gridDim_x_116335) * (gridDim_y_116336 * gridDim_x_116335) - + squot64(sext_i32_i64(group_tid_127014) - + squot64(sext_i32_i64(group_tid_127014), gridDim_y_116336 * + gridDim_x_116335) * (gridDim_y_116336 * + gridDim_x_116335), + gridDim_x_116335) * gridDim_x_116335; + + int64_t iii_116343; + + iii_116343 = TyRy_116330 * gid_y_116341; + + int64_t jjj_116344 = TxRx_116329 * gid_x_116340; + double mem_121650[Ry_116323 * Rx_116325]; + int64_t ltid_y_116347 = sext_i32_i64(ltid_pre_127017); + int64_t ltid_x_116345 = sext_i32_i64(ltid_pre_127018); + int32_t ltid_flat_116346 = local_tid_127013; + double mem_121641[Ry_116323 * Rx_116325]; + + for (int64_t i_116358 = 0; i_116358 < Ry_116323; i_116358++) { + for (int64_t i_116361 = 0; i_116361 < Rx_116325; i_116361++) { + mem_121641[i_116358 * Rx_116325 + i_116361] = 0.0; + } + } + for (int64_t i_127021 = 0; i_127021 < Ry_116323; i_127021++) { + for (int64_t i_127022 = 0; i_127022 < Rx_116325; i_127022++) { + mem_121650[i_127021 * Rx_116325 + i_127022] = mem_121641[i_127021 * + Rx_116325 + + i_127022]; + } } - local_failure = false; barrier(CLK_LOCAL_MEM_FENCE); - int32_t global_tid_46636; - int32_t local_tid_46637; - int64_t group_sizze_46640; - int32_t wave_sizze_46639; - int32_t group_tid_46638; - - global_tid_46636 = get_global_id(0); - local_tid_46637 = get_local_id(0); - group_sizze_46640 = get_local_size(0); - wave_sizze_46639 = LOCKSTEP_WIDTH; - group_tid_46638 = get_group_id(0); - - int32_t phys_tid_33545; - - phys_tid_33545 = global_tid_46636; - - int32_t phys_group_id_46641; - - phys_group_id_46641 = get_group_id(0); - for (int32_t i_46642 = 0; i_46642 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_27772, segmap_group_sizze_33659)) - - phys_group_id_46641, sext_i64_i32(num_groups_33660)); - i_46642++) { - int32_t virt_group_id_46643 = phys_group_id_46641 + i_46642 * - sext_i64_i32(num_groups_33660); - int64_t gtid_33544 = sext_i32_i64(virt_group_id_46643) * - segmap_group_sizze_33659 + sext_i32_i64(local_tid_46637); - - if (slt64(gtid_33544, m_27772)) { - int32_t x_33666 = ((__global - int32_t *) defunc_4_map_res_mem_45177)[gtid_33544]; - int32_t x_33667 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_33544]; - float x_33668 = ((__global - float *) defunc_3_map_res_mem_45246)[gtid_33544]; - int32_t x_33669 = ((__global - int32_t *) defunc_3_map_res_mem_45244)[gtid_33544]; - float x_33670 = ((__global - float *) defunc_0_f_res_mem_45279)[gtid_33544]; - int32_t y_33673 = sub32(x_33666, x_33667); - float discard_44340; - float scanacc_44336 = 0.0F; - - for (int64_t i_44338 = 0; i_44338 < iota32_arg_28233; i_44338++) { - int32_t index_primexp_44371 = sext_i64_i32(i_44338); - bool cond_33679 = sle32(y_33673, index_primexp_44371); - float defunc_0_f_res_33680; - - if (cond_33679) { - defunc_0_f_res_33680 = 0.0F; - } else { - bool cond_33681 = index_primexp_44371 == 0; - float defunc_0_f_res_f_res_33682; + __local char *mem_121652; + + mem_121652 = (__local char *) mem_121652_backing_0; + + __local char *mem_121654; + + mem_121654 = (__local char *) mem_121654_backing_1; + + double mem_121725[Ry_116323]; + double mem_121729[Rx_116325]; + double loop_mem_121741[Ry_116323 * Rx_116325]; + double mem_param_121655[Ry_116323 * Rx_116325]; + + for (int32_t i_2 = 0; i_2 < Ry_116323 * Rx_116325; i_2++) + mem_param_121655[i_2] = mem_121650[i_2]; + for (int64_t i_116368 = 0; i_116368 < full_tiles_116367; i_116368++) { + int64_t kk_116372 = Tk_116326 * i_116368; + + for (int64_t i_116373 = 0; i_116373 < Ry_116323; i_116373++) { + int64_t binop_y_116396 = Ty_116322 * i_116373; + + for (int64_t i_116375 = 0; i_116375 < tk_div_tx_116327; + i_116375++) { + int64_t binop_y_116394 = Tx_116324 * i_116375; + int64_t ltid_x_116377 = sext_i32_i64(ltid_pre_127017); + int64_t ltid_y_116378 = sext_i32_i64(ltid_pre_127018); + int32_t ltid_flat_116379 = local_tid_127013; + int64_t k_116395 = ltid_y_116378 + binop_y_116394; + int64_t i_116397 = ltid_x_116377 + binop_y_116396; + int64_t gtid_116398 = iii_116343 + i_116397; + int64_t A_col_idx_116399 = kk_116372 + k_116395; + bool cond_116400 = slt64(gtid_116398, k2p2zq_75151); + double A_elem_116401; + + if (cond_116400) { + double A_elem_116403 = ((__global + double *) mem_121636)[gtid_104827 * + (k2p2zq_75151 * + k2p2zq_75151) + + gtid_116398 * + k2p2zq_75151 + + A_col_idx_116399]; - if (cond_33681) { - defunc_0_f_res_f_res_33682 = x_33670; - } else { - int32_t i_33683 = add32(x_33667, index_primexp_44371); - int64_t i_33684 = sext_i32_i64(i_33683); - bool x_33685 = sle64((int64_t) 0, i_33684); - bool y_33686 = slt64(i_33684, N_27771); - bool bounds_check_33687 = x_33685 && y_33686; - bool index_certs_33688; - - if (!bounds_check_33687) { - { - if (atomic_cmpxchg_i32_global(global_failure, - -1, 67) == -1) { - global_failure_args[0] = i_33684; - global_failure_args[1] = N_27771; - ; - } - local_failure = true; - goto error_0; - } - } - - float x_33689 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_33544 * - N_27771 + - i_33684]; - int32_t x_33690 = sub32(x_33667, x_33669); - int32_t i_33691 = add32(x_33690, index_primexp_44371); - int64_t i_33692 = sext_i32_i64(i_33691); - bool x_33693 = sle64((int64_t) 0, i_33692); - bool y_33694 = slt64(i_33692, N_27771); - bool bounds_check_33695 = x_33693 && y_33694; - bool index_certs_33696; - - if (!bounds_check_33695) { - { - if (atomic_cmpxchg_i32_global(global_failure, - -1, 68) == -1) { - global_failure_args[0] = i_33692; - global_failure_args[1] = N_27771; - ; - } - local_failure = true; - goto error_0; - } - } - - float y_33697 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_33544 * - N_27771 + - i_33692]; - float defunc_0_f_res_f_res_f_res_33698 = x_33689 - - y_33697; - - defunc_0_f_res_f_res_33682 = - defunc_0_f_res_f_res_f_res_33698; - } - defunc_0_f_res_33680 = defunc_0_f_res_f_res_33682; + A_elem_116401 = A_elem_116403; + } else { + A_elem_116401 = 0.0; } - float defunc_1_op_res_33677 = defunc_0_f_res_33680 + - scanacc_44336; - - ((__global float *) mem_45326)[phys_tid_33545 + i_44338 * - (num_groups_33660 * - segmap_group_sizze_33659)] = - defunc_1_op_res_33677; + bool cond_116405 = slt64(k_116395, Tk_116326); + int64_t a_loc_ind_116406; - float scanacc_tmp_46644 = defunc_1_op_res_33677; - - scanacc_44336 = scanacc_tmp_46644; + if (cond_116405) { + int64_t binop_y_116407 = Tk_116326 * i_116397; + int64_t loc_fi_116408 = k_116395 + binop_y_116407; + + a_loc_ind_116406 = loc_fi_116408; + } else { + a_loc_ind_116406 = (int64_t) -1; + } + if (sle64((int64_t) 0, a_loc_ind_116406) && + slt64(a_loc_ind_116406, a_loc_szz_116332)) { + ((__local double *) mem_121652)[a_loc_ind_116406] = + A_elem_116401; + } + barrier(CLK_LOCAL_MEM_FENCE); } - discard_44340 = scanacc_44336; - - float i32_res_33699 = sitofp_i32_f32(x_33667); - float sqrt_res_33700; - - sqrt_res_33700 = futrts_sqrt32(i32_res_33699); - - float y_33701 = x_33668 * sqrt_res_33700; - bool defunc_0_f_res_33703; - int32_t defunc_0_f_res_33704; - float defunc_0_f_res_33705; - bool redout_44342; - int32_t redout_44343; - float redout_44344; + } + for (int64_t i_116413 = 0; i_116413 < tk_div_ty_116328; i_116413++) { + int64_t binop_y_116434 = Ty_116322 * i_116413; - redout_44342 = 0; - redout_44343 = -1; - redout_44344 = 0.0F; - for (int64_t i_44346 = 0; i_44346 < iota32_arg_28203; i_44346++) { - float x_33721 = ((__global float *) mem_45326)[phys_tid_33545 + - i_44346 * - (num_groups_33660 * - segmap_group_sizze_33659)]; - float x_33722 = ((__global float *) mem_45282)[i_44346]; - int32_t index_primexp_44372 = sext_i64_i32(i_44346); - int32_t x_33723 = index_primexp_44372; - float defunc_0_f_res_33724 = x_33721 / y_33701; - bool cond_33725 = slt32(index_primexp_44372, y_33673); - bool isnan_res_33726; - - isnan_res_33726 = futrts_isnan32(defunc_0_f_res_33724); - - bool cond_t_res_33727 = !isnan_res_33726; - bool x_33728 = cond_33725 && cond_t_res_33727; - float abs_res_33729 = (float) fabs(defunc_0_f_res_33724); - bool defunc_2_f_res_t_res_33730 = x_33722 < abs_res_33729; - bool x_33731 = x_33728 && defunc_2_f_res_t_res_33730; - float defunc_1_f_res_33732; - - if (cond_33725) { - defunc_1_f_res_33732 = defunc_0_f_res_33724; + for (int64_t i_116415 = 0; i_116415 < Rx_116325; i_116415++) { + int64_t binop_y_116436 = Tx_116324 * i_116415; + int64_t ltid_x_116417 = sext_i32_i64(ltid_pre_127017); + int64_t ltid_y_116418 = sext_i32_i64(ltid_pre_127018); + int32_t ltid_flat_116419 = local_tid_127013; + int64_t k_116435 = ltid_x_116417 + binop_y_116434; + int64_t j_116437 = ltid_y_116418 + binop_y_116436; + int64_t gtid_116438 = jjj_116344 + j_116437; + int64_t B_row_idx_116439 = kk_116372 + k_116435; + bool cond_116440 = slt64(gtid_116438, k2p2zq_75151); + double B_elem_116441; + + if (cond_116440) { + double B_elem_116443 = ((__global + double *) defunc_3_map_res_r_mem_121609)[gtid_104827 * + binop_x_120251 + + B_row_idx_116439 * + k2p2zq_75151 + + gtid_116438]; + + B_elem_116441 = B_elem_116443; } else { - defunc_1_f_res_33732 = 0.0F; + B_elem_116441 = 0.0; } - bool defunc_1_op_res_33711; - int32_t defunc_1_op_res_33712; + bool cond_116445 = slt64(k_116435, Tk_116326); + int64_t b_loc_ind_116446; - if (redout_44342) { - defunc_1_op_res_33711 = redout_44342; - defunc_1_op_res_33712 = redout_44343; + if (cond_116445) { + int64_t binop_y_116447 = TxRx_116329 * k_116435; + int64_t loc_fi_116448 = j_116437 + binop_y_116447; + + b_loc_ind_116446 = loc_fi_116448; } else { - bool x_33713 = x_33731 && x_33731; - bool x_33714 = !x_33731; - bool y_33715 = x_33714 && redout_44342; - bool defunc_1_op_res_f_res_33716 = x_33713 || y_33715; - int32_t defunc_1_op_res_f_res_33717; - - if (x_33731) { - defunc_1_op_res_f_res_33717 = x_33723; - } else { - defunc_1_op_res_f_res_33717 = redout_44343; - } - defunc_1_op_res_33711 = defunc_1_op_res_f_res_33716; - defunc_1_op_res_33712 = defunc_1_op_res_f_res_33717; + b_loc_ind_116446 = (int64_t) -1; } + if (sle64((int64_t) 0, b_loc_ind_116446) && + slt64(b_loc_ind_116446, b_loc_szz_116334)) { + ((__local double *) mem_121654)[b_loc_ind_116446] = + B_elem_116441; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + + double loop_mem_121740[Ry_116323 * Rx_116325]; + double mem_param_121712[Ry_116323 * Rx_116325]; + + for (int32_t i_3 = 0; i_3 < Ry_116323 * Rx_116325; i_3++) + mem_param_121712[i_3] = mem_param_121655[i_3]; + for (int64_t i_116453 = 0; i_116453 < Tk_116326; i_116453++) { + int64_t binop_y_116492 = TxRx_116329 * i_116453; + int64_t ltid_y_116457 = sext_i32_i64(ltid_pre_127017); + int64_t ltid_x_116455 = sext_i32_i64(ltid_pre_127018); + int32_t ltid_flat_116456 = local_tid_127013; + double mem_121715[Ry_116323]; + double mem_121717[Rx_116325]; + int64_t binop_x_116483 = Ry_116323 * ltid_y_116457; + + for (int64_t i_116481 = 0; i_116481 < Ry_116323; i_116481++) { + int64_t binop_x_116484 = i_116481 + binop_x_116483; + int64_t binop_y_116485 = Tk_116326 * binop_x_116484; + int64_t a_loc_ind_116486 = i_116453 + binop_y_116485; - float defunc_1_op_res_33720 = defunc_1_f_res_33732 + - redout_44344; - - ((__global float *) mem_45340)[phys_tid_33545 + i_44346 * - (num_groups_33660 * - segmap_group_sizze_33659)] = - defunc_0_f_res_33724; - - bool redout_tmp_46646 = defunc_1_op_res_33711; - int32_t redout_tmp_46647 = defunc_1_op_res_33712; - float redout_tmp_46648 = defunc_1_op_res_33720; - - redout_44342 = redout_tmp_46646; - redout_44343 = redout_tmp_46647; - redout_44344 = redout_tmp_46648; + for (int64_t i_127034 = 0; i_127034 < (int64_t) 1; i_127034++) { + mem_121715[i_116481 + i_127034] = ((__local + double *) mem_121652)[a_loc_ind_116486 + + i_127034]; + } } - defunc_0_f_res_33703 = redout_44342; - defunc_0_f_res_33704 = redout_44343; - defunc_0_f_res_33705 = redout_44344; - bool cond_33733 = y_33673 == 0; - float defunc_0_f_res_33734; + int64_t binop_y_116494 = Rx_116325 * ltid_x_116455; - if (cond_33733) { - defunc_0_f_res_33734 = 0.0F; - } else { - float i32_res_33735 = sitofp_i32_f32(y_33673); - float defunc_0_f_res_f_res_33736 = defunc_0_f_res_33705 / - i32_res_33735; + for (int64_t i_116490 = 0; i_116490 < Rx_116325; i_116490++) { + int64_t binop_x_116493 = i_116490 + binop_y_116492; + int64_t b_loc_ind_116495 = binop_x_116493 + binop_y_116494; - defunc_0_f_res_33734 = defunc_0_f_res_f_res_33736; + for (int64_t i_127036 = 0; i_127036 < (int64_t) 1; i_127036++) { + mem_121717[i_116490 + i_127036] = ((__local + double *) mem_121654)[b_loc_ind_116495 + + i_127036]; + } } + for (int64_t i_127037 = 0; i_127037 < Ry_116323; i_127037++) { + mem_121725[i_127037] = mem_121715[i_127037]; + } + for (int64_t i_127038 = 0; i_127038 < Rx_116325; i_127038++) { + mem_121729[i_127038] = mem_121717[i_127038]; + } + barrier(CLK_LOCAL_MEM_FENCE); - bool cond_33737 = !defunc_0_f_res_33703; - int32_t fst_breakzq_33738; + double mem_121739[Ry_116323 * Rx_116325]; + int64_t ltid_y_116502 = sext_i32_i64(ltid_pre_127017); + int64_t ltid_x_116500 = sext_i32_i64(ltid_pre_127018); + int32_t ltid_flat_116501 = local_tid_127013; + int64_t binop_y_116543 = Ry_116323 * ltid_y_116502; + int64_t binop_y_116547 = Rx_116325 * ltid_x_116500; - if (cond_33737) { - fst_breakzq_33738 = -1; - } else { - bool cond_33739 = slt32(defunc_0_f_res_33704, y_33673); - int32_t adjustValInds_res_33740; - - if (cond_33739) { - int32_t i_33741 = add32(x_33667, defunc_0_f_res_33704); - int64_t i_33742 = sext_i32_i64(i_33741); - bool x_33743 = sle64((int64_t) 0, i_33742); - bool y_33744 = slt64(i_33742, N_27771); - bool bounds_check_33745 = x_33743 && y_33744; - bool index_certs_33746; + for (int64_t i_116537 = 0; i_116537 < Ry_116323; i_116537++) { + int64_t binop_x_116542 = iii_116343 + i_116537; + int64_t cmpop_x_116544 = binop_x_116542 + binop_y_116543; + bool binop_x_116545 = slt64(cmpop_x_116544, k2p2zq_75151); + + for (int64_t i_116540 = 0; i_116540 < Rx_116325; i_116540++) { + int64_t binop_x_116546 = jjj_116344 + i_116540; + int64_t cmpop_x_116548 = binop_x_116546 + binop_y_116547; + bool binop_y_116549 = slt64(cmpop_x_116548, k2p2zq_75151); + bool cond_116550 = binop_x_116545 && binop_y_116549; - if (!bounds_check_33745) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 69) == -1) { - global_failure_args[0] = i_33742; - global_failure_args[1] = N_27771; - ; - } - local_failure = true; - goto error_0; - } + if (cond_116550) { + double a_116552 = mem_121725[i_116537]; + double b_116553 = mem_121729[i_116540]; + double c_116554 = mem_param_121712[i_116537 * + Rx_116325 + + i_116540]; + double defunc_1_f_res_116557 = a_116552 * b_116553; + double defunc_1_op_res_116561 = c_116554 + + defunc_1_f_res_116557; + + mem_param_121712[i_116537 * Rx_116325 + i_116540] = + defunc_1_op_res_116561; } - - int32_t x_33747 = ((__global - int32_t *) defunc_4_map_res_mem_45179)[gtid_33544 * - N_27771 + - i_33742]; - int32_t adjustValInds_res_t_res_33748 = sub32(x_33747, - n_27775); - - adjustValInds_res_33740 = adjustValInds_res_t_res_33748; - } else { - adjustValInds_res_33740 = -1; } - fst_breakzq_33738 = adjustValInds_res_33740; } + for (int64_t i_127041 = 0; i_127041 < Ry_116323; i_127041++) { + for (int64_t i_127042 = 0; i_127042 < Rx_116325; i_127042++) { + mem_121739[i_127041 * Rx_116325 + i_127042] = + mem_param_121712[i_127041 * Rx_116325 + i_127042]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); - bool cond_33749 = sle32(x_33667, 5); - bool cond_f_res_33750 = sle32(y_33673, 5); - bool x_33751 = !cond_33749; - bool y_33752 = cond_f_res_33750 && x_33751; - bool cond_33753 = cond_33749 || y_33752; - int32_t fst_breakzq_33754; + double mem_param_tmp_127031[Ry_116323 * Rx_116325]; - if (cond_33753) { - fst_breakzq_33754 = -2; - } else { - fst_breakzq_33754 = fst_breakzq_33738; - } - for (int64_t i_46650 = 0; i_46650 < iota32_arg_28233; i_46650++) { - ((__global float *) mem_45354)[phys_tid_33545 + i_46650 * - (num_groups_33660 * - segmap_group_sizze_33659)] = - NAN; - } - for (int64_t write_iter_44348 = 0; write_iter_44348 < - iota32_arg_28233; write_iter_44348++) { - int32_t index_primexp_44374 = sext_i64_i32(write_iter_44348); - bool cond_33760 = slt32(index_primexp_44374, y_33673); - int32_t defunc_0_f_res_33761; - - if (cond_33760) { - int32_t i_33762 = add32(x_33667, index_primexp_44374); - int64_t i_33763 = sext_i32_i64(i_33762); - bool x_33764 = sle64((int64_t) 0, i_33763); - bool y_33765 = slt64(i_33763, N_27771); - bool bounds_check_33766 = x_33764 && y_33765; - bool index_certs_33767; - - if (!bounds_check_33766) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 70) == -1) { - global_failure_args[0] = i_33763; - global_failure_args[1] = N_27771; - ; - } - local_failure = true; - goto error_0; - } - } - - int32_t x_33768 = ((__global - int32_t *) defunc_4_map_res_mem_45179)[gtid_33544 * - N_27771 + - i_33763]; - int32_t defunc_0_f_res_t_res_33769 = sub32(x_33768, - n_27775); - - defunc_0_f_res_33761 = defunc_0_f_res_t_res_33769; - } else { - defunc_0_f_res_33761 = -1; - } - - int64_t defunc_0_f_res_33770 = - sext_i32_i64(defunc_0_f_res_33761); - bool less_than_zzero_44352 = slt64(defunc_0_f_res_33770, - (int64_t) 0); - bool greater_than_sizze_44353 = sle64(iota32_arg_28233, - defunc_0_f_res_33770); - bool outside_bounds_dim_44354 = less_than_zzero_44352 || - greater_than_sizze_44353; - - if (!outside_bounds_dim_44354) { - for (int64_t i_46652 = 0; i_46652 < (int64_t) 1; - i_46652++) { - ((__global float *) mem_45354)[phys_tid_33545 + - (defunc_0_f_res_33770 + - i_46652) * - (num_groups_33660 * - segmap_group_sizze_33659)] = - ((__global float *) mem_45340)[phys_tid_33545 + - num_groups_33660 * - segmap_group_sizze_33659 * - write_iter_44348 + - i_46652 * - (num_groups_33660 * - segmap_group_sizze_33659)]; - } - } - } - for (int64_t i_46653 = 0; i_46653 < iota32_arg_28203; i_46653++) { - ((__global float *) mem_45369)[i_46653 * m_27772 + gtid_33544] = - ((__global float *) mem_45354)[phys_tid_33545 + i_46653 * - (num_groups_33660 * - segmap_group_sizze_33659)]; - } - for (int64_t i_46654 = 0; i_46654 < iota32_arg_28203; i_46654++) { - ((__global float *) mem_45372)[i_46654 * m_27772 + gtid_33544] = - ((__global float *) mem_45340)[phys_tid_33545 + i_46654 * - (num_groups_33660 * - segmap_group_sizze_33659)]; - } - ((__global int32_t *) mem_45374)[gtid_33544] = fst_breakzq_33754; - ((__global float *) mem_45376)[gtid_33544] = defunc_0_f_res_33734; + for (int32_t i_4 = 0; i_4 < Ry_116323 * Rx_116325; i_4++) + mem_param_tmp_127031[i_4] = mem_121739[i_4]; + for (int32_t i_5 = 0; i_5 < Ry_116323 * Rx_116325; i_5++) + mem_param_121712[i_5] = mem_param_tmp_127031[i_5]; } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - } - - error_0: - return; - #undef segmap_group_sizze_33659 -} -__kernel void mainDetailedzisegmap_33893(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, - int64_t N_27771, int64_t m_27772, - int32_t n_27775, - int64_t iota32_arg_28203, - int64_t iota32_arg_28233, __global - unsigned char *defunc_4_map_res_mem_45179, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global unsigned char *mem_45399, - __global unsigned char *mem_45416, - __global unsigned char *mem_45424) -{ - #define segmap_group_sizze_34224 (mainDetailedzisegmap_group_sizze_33896) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - - if (*global_failure >= 0) - return; - - int32_t global_tid_46853; - int32_t local_tid_46854; - int64_t group_sizze_46857; - int32_t wave_sizze_46856; - int32_t group_tid_46855; - - global_tid_46853 = get_global_id(0); - local_tid_46854 = get_local_id(0); - group_sizze_46857 = get_local_size(0); - wave_sizze_46856 = LOCKSTEP_WIDTH; - group_tid_46855 = get_group_id(0); - - int32_t phys_tid_33893; - - phys_tid_33893 = global_tid_46853; - - int64_t gtid_33891; - - gtid_33891 = squot64(sext_i32_i64(group_tid_46855) * - segmap_group_sizze_34224 + - sext_i32_i64(local_tid_46854), iota32_arg_28233); - - int64_t gtid_33892; - - gtid_33892 = sext_i32_i64(group_tid_46855) * segmap_group_sizze_34224 + - sext_i32_i64(local_tid_46854) - squot64(sext_i32_i64(group_tid_46855) * - segmap_group_sizze_34224 + - sext_i32_i64(local_tid_46854), - iota32_arg_28233) * - iota32_arg_28233; - if (slt64(gtid_33891, m_27772) && slt64(gtid_33892, iota32_arg_28233)) { - int32_t y_34229 = ((__global int32_t *) mem_45399)[gtid_33891]; - int32_t index_primexp_42427 = sext_i64_i32(gtid_33892); - int64_t binop_x_42415 = iota32_arg_28233 * gtid_33891; - int64_t binop_x_42416 = gtid_33892 + binop_x_42415; - int64_t new_index_42417 = squot64(binop_x_42416, iota32_arg_28203); - int64_t binop_y_42423 = iota32_arg_28203 * new_index_42417; - int64_t new_index_42424 = binop_x_42416 - binop_y_42423; - float write_value_34233 = ((__global - float *) mem_45416)[new_index_42417 * - iota32_arg_28203 + - new_index_42424]; - bool cond_34234 = slt32(index_primexp_42427, y_34229); - int32_t defunc_0_f_res_34235; + for (int32_t i_6 = 0; i_6 < Ry_116323 * Rx_116325; i_6++) + loop_mem_121740[i_6] = mem_param_121712[i_6]; - if (cond_34234) { - int32_t x_34227 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_33891]; - int32_t i_34236 = add32(x_34227, index_primexp_42427); - int64_t i_34237 = sext_i32_i64(i_34236); - bool x_34238 = sle64((int64_t) 0, i_34237); - bool y_34239 = slt64(i_34237, N_27771); - bool bounds_check_34240 = x_34238 && y_34239; - bool index_certs_34241; + double mem_param_tmp_127023[Ry_116323 * Rx_116325]; + + for (int32_t i_7 = 0; i_7 < Ry_116323 * Rx_116325; i_7++) + mem_param_tmp_127023[i_7] = loop_mem_121740[i_7]; + for (int32_t i_8 = 0; i_8 < Ry_116323 * Rx_116325; i_8++) + mem_param_121655[i_8] = mem_param_tmp_127023[i_8]; + } + for (int32_t i_9 = 0; i_9 < Ry_116323 * Rx_116325; i_9++) + loop_mem_121741[i_9] = mem_param_121655[i_9]; + for (int64_t i_116571 = 0; i_116571 < Ry_116323; i_116571++) { + int64_t binop_y_116596 = Ty_116322 * i_116571; + + for (int64_t i_116573 = 0; i_116573 < tk_div_tx_116327; i_116573++) { + int64_t binop_y_116594 = Tx_116324 * i_116573; + int64_t ltid_x_116575 = sext_i32_i64(ltid_pre_127017); + int64_t ltid_y_116576 = sext_i32_i64(ltid_pre_127018); + int32_t ltid_flat_116577 = local_tid_127013; + int64_t k_116595 = ltid_y_116576 + binop_y_116594; + int64_t i_116597 = ltid_x_116575 + binop_y_116596; + int64_t gtid_116598 = iii_116343 + i_116597; + int64_t A_col_idx_116599 = kk_116570 + k_116595; + bool binop_x_116600 = slt64(gtid_116598, k2p2zq_75151); + bool binop_y_116601 = slt64(A_col_idx_116599, k2p2zq_75151); + bool cond_116602 = binop_x_116600 && binop_y_116601; + double A_elem_116603; - if (!bounds_check_34240) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 78) == - -1) { - global_failure_args[0] = i_34237; - global_failure_args[1] = N_27771; - ; - } - return; - } + if (cond_116602) { + double A_elem_116605 = ((__global + double *) mem_121636)[gtid_104827 * + (k2p2zq_75151 * + k2p2zq_75151) + + gtid_116598 * + k2p2zq_75151 + + A_col_idx_116599]; + + A_elem_116603 = A_elem_116605; + } else { + A_elem_116603 = 0.0; } - int32_t x_34242 = ((__global - int32_t *) defunc_4_map_res_mem_45179)[gtid_33891 * - N_27771 + - i_34237]; - int32_t defunc_0_f_res_t_res_34243 = sub32(x_34242, n_27775); + bool cond_116607 = slt64(k_116595, Tk_116326); + int64_t a_loc_ind_116608; - defunc_0_f_res_34235 = defunc_0_f_res_t_res_34243; - } else { - defunc_0_f_res_34235 = -1; + if (cond_116607) { + int64_t binop_y_116609 = Tk_116326 * i_116597; + int64_t loc_fi_116610 = k_116595 + binop_y_116609; + + a_loc_ind_116608 = loc_fi_116610; + } else { + a_loc_ind_116608 = (int64_t) -1; + } + if (sle64((int64_t) 0, a_loc_ind_116608) && slt64(a_loc_ind_116608, + a_loc_szz_116332)) { + ((__local double *) mem_121652)[a_loc_ind_116608] = + A_elem_116603; + } + barrier(CLK_LOCAL_MEM_FENCE); } - - int64_t defunc_0_f_res_34244 = sext_i32_i64(defunc_0_f_res_34235); - - if ((sle64((int64_t) 0, gtid_33891) && slt64(gtid_33891, m_27772)) && - (sle64((int64_t) 0, defunc_0_f_res_34244) && - slt64(defunc_0_f_res_34244, iota32_arg_28233))) { - ((__global float *) mem_45424)[gtid_33891 * iota32_arg_28233 + - defunc_0_f_res_34244] = - write_value_34233; + } + for (int64_t i_116615 = 0; i_116615 < tk_div_ty_116328; i_116615++) { + int64_t binop_y_116638 = Ty_116322 * i_116615; + + for (int64_t i_116617 = 0; i_116617 < Rx_116325; i_116617++) { + int64_t binop_y_116640 = Tx_116324 * i_116617; + int64_t ltid_x_116619 = sext_i32_i64(ltid_pre_127017); + int64_t ltid_y_116620 = sext_i32_i64(ltid_pre_127018); + int32_t ltid_flat_116621 = local_tid_127013; + int64_t k_116639 = ltid_x_116619 + binop_y_116638; + int64_t j_116641 = ltid_y_116620 + binop_y_116640; + int64_t gtid_116642 = jjj_116344 + j_116641; + int64_t B_row_idx_116643 = kk_116570 + k_116639; + bool binop_x_116644 = slt64(gtid_116642, k2p2zq_75151); + bool binop_y_116645 = slt64(B_row_idx_116643, k2p2zq_75151); + bool cond_116646 = binop_x_116644 && binop_y_116645; + double B_elem_116647; + + if (cond_116646) { + double B_elem_116649 = ((__global + double *) defunc_3_map_res_r_mem_121609)[gtid_104827 * + binop_x_120251 + + B_row_idx_116643 * + k2p2zq_75151 + + gtid_116642]; + + B_elem_116647 = B_elem_116649; + } else { + B_elem_116647 = 0.0; + } + + bool cond_116651 = slt64(k_116639, Tk_116326); + int64_t b_loc_ind_116652; + + if (cond_116651) { + int64_t binop_y_116653 = TxRx_116329 * k_116639; + int64_t loc_fi_116654 = j_116641 + binop_y_116653; + + b_loc_ind_116652 = loc_fi_116654; + } else { + b_loc_ind_116652 = (int64_t) -1; + } + if (sle64((int64_t) 0, b_loc_ind_116652) && slt64(b_loc_ind_116652, + b_loc_szz_116334)) { + ((__local double *) mem_121654)[b_loc_ind_116652] = + B_elem_116647; + } + barrier(CLK_LOCAL_MEM_FENCE); } } - error_0: - return; - #undef segmap_group_sizze_34224 -} -__kernel void mainDetailedzisegmap_33950(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, - int64_t N_27771, int64_t m_27772, - int32_t n_27775, __global - unsigned char *defunc_4_map_res_mem_45179, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global unsigned char *mem_45399, - __global unsigned char *mem_45409, - __global unsigned char *mem_45411, - __global unsigned char *mem_45413, - __global unsigned char *mem_45419, - __global unsigned char *mem_45421) -{ - #define segmap_group_sizze_34175 (mainDetailedzisegmap_group_sizze_33952) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - - if (*global_failure >= 0) - return; - - int32_t global_tid_46848; - int32_t local_tid_46849; - int64_t group_sizze_46852; - int32_t wave_sizze_46851; - int32_t group_tid_46850; - - global_tid_46848 = get_global_id(0); - local_tid_46849 = get_local_id(0); - group_sizze_46852 = get_local_size(0); - wave_sizze_46851 = LOCKSTEP_WIDTH; - group_tid_46850 = get_group_id(0); - - int32_t phys_tid_33950; - - phys_tid_33950 = global_tid_46848; - - int64_t gtid_33949; - - gtid_33949 = sext_i32_i64(group_tid_46850) * segmap_group_sizze_34175 + - sext_i32_i64(local_tid_46849); - if (slt64(gtid_33949, m_27772)) { - int32_t x_34179 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_33949]; - int32_t y_34181 = ((__global int32_t *) mem_45399)[gtid_33949]; - bool defunc_0_f_res_34182 = ((__global bool *) mem_45409)[gtid_33949]; - bool cond_34185 = y_34181 == 0; - float defunc_0_f_res_34186; - - if (cond_34185) { - defunc_0_f_res_34186 = 0.0F; - } else { - float defunc_0_f_res_34184 = ((__global - float *) mem_45413)[gtid_33949]; - float i32_res_34187 = sitofp_i32_f32(y_34181); - float defunc_0_f_res_f_res_34188 = defunc_0_f_res_34184 / - i32_res_34187; + double mem_121807[Ry_116323]; + double mem_121811[Rx_116325]; + double mem_121821[Ry_116323 * Rx_116325]; + double loop_mem_121823[Ry_116323 * Rx_116325]; + double mem_param_121794[Ry_116323 * Rx_116325]; + + for (int32_t i_10 = 0; i_10 < Ry_116323 * Rx_116325; i_10++) + mem_param_121794[i_10] = loop_mem_121741[i_10]; + for (int64_t i_116659 = 0; i_116659 < Tk_116326; i_116659++) { + int64_t cmpop_x_116661 = kk_116570 + i_116659; + bool cond_116662 = slt64(cmpop_x_116661, k2p2zq_75151); + double mem_125235[Ry_116323 * Rx_116325]; + + if (cond_116662) { + int64_t binop_y_116700 = TxRx_116329 * i_116659; + int64_t bytes_121796 = (int64_t) 8 * Ry_116323; + int64_t bytes_121798 = (int64_t) 8 * Rx_116325; + int64_t ltid_y_116665 = sext_i32_i64(ltid_pre_127017); + int64_t ltid_x_116663 = sext_i32_i64(ltid_pre_127018); + int32_t ltid_flat_116664 = local_tid_127013; + double mem_121797[Ry_116323]; + double mem_121799[Rx_116325]; + int64_t binop_x_116691 = Ry_116323 * ltid_y_116665; - defunc_0_f_res_34186 = defunc_0_f_res_f_res_34188; - } - - bool cond_34189 = !defunc_0_f_res_34182; - int32_t fst_breakzq_34190; - - if (cond_34189) { - fst_breakzq_34190 = -1; - } else { - int32_t defunc_0_f_res_34183 = ((__global - int32_t *) mem_45411)[gtid_33949]; - bool cond_34191 = slt32(defunc_0_f_res_34183, y_34181); - int32_t adjustValInds_res_34192; + for (int64_t i_116689 = 0; i_116689 < Ry_116323; i_116689++) { + int64_t binop_x_116692 = i_116689 + binop_x_116691; + int64_t binop_y_116693 = Tk_116326 * binop_x_116692; + int64_t a_loc_ind_116694 = i_116659 + binop_y_116693; + + for (int64_t i_127050 = 0; i_127050 < (int64_t) 1; i_127050++) { + mem_121797[i_116689 + i_127050] = ((__local + double *) mem_121652)[a_loc_ind_116694 + + i_127050]; + } + } + + int64_t binop_y_116702 = Rx_116325 * ltid_x_116663; - if (cond_34191) { - int32_t i_34193 = add32(x_34179, defunc_0_f_res_34183); - int64_t i_34194 = sext_i32_i64(i_34193); - bool x_34195 = sle64((int64_t) 0, i_34194); - bool y_34196 = slt64(i_34194, N_27771); - bool bounds_check_34197 = x_34195 && y_34196; - bool index_certs_34198; + for (int64_t i_116698 = 0; i_116698 < Rx_116325; i_116698++) { + int64_t binop_x_116701 = i_116698 + binop_y_116700; + int64_t b_loc_ind_116703 = binop_x_116701 + binop_y_116702; - if (!bounds_check_34197) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 77) == - -1) { - global_failure_args[0] = i_34194; - global_failure_args[1] = N_27771; - ; - } - return; + for (int64_t i_127052 = 0; i_127052 < (int64_t) 1; i_127052++) { + mem_121799[i_116698 + i_127052] = ((__local + double *) mem_121654)[b_loc_ind_116703 + + i_127052]; + } + } + for (int64_t i_127053 = 0; i_127053 < Ry_116323; i_127053++) { + mem_121807[i_127053] = mem_121797[i_127053]; + } + for (int64_t i_127054 = 0; i_127054 < Rx_116325; i_127054++) { + mem_121811[i_127054] = mem_121799[i_127054]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t ltid_y_116710 = sext_i32_i64(ltid_pre_127017); + int64_t ltid_x_116708 = sext_i32_i64(ltid_pre_127018); + int32_t ltid_flat_116709 = local_tid_127013; + int64_t binop_y_116751 = Ry_116323 * ltid_y_116710; + int64_t binop_y_116755 = Rx_116325 * ltid_x_116708; + + for (int64_t i_116745 = 0; i_116745 < Ry_116323; i_116745++) { + int64_t binop_x_116750 = iii_116343 + i_116745; + int64_t cmpop_x_116752 = binop_x_116750 + binop_y_116751; + bool binop_x_116753 = slt64(cmpop_x_116752, k2p2zq_75151); + + for (int64_t i_116748 = 0; i_116748 < Rx_116325; i_116748++) { + int64_t binop_x_116754 = jjj_116344 + i_116748; + int64_t cmpop_x_116756 = binop_x_116754 + binop_y_116755; + bool binop_y_116757 = slt64(cmpop_x_116756, k2p2zq_75151); + bool cond_116758 = binop_x_116753 && binop_y_116757; + + if (cond_116758) { + double a_116760 = mem_121807[i_116745]; + double b_116761 = mem_121811[i_116748]; + double c_116762 = mem_param_121794[i_116745 * + Rx_116325 + + i_116748]; + double defunc_1_f_res_116765 = a_116760 * b_116761; + double defunc_1_op_res_116769 = c_116762 + + defunc_1_f_res_116765; + + mem_param_121794[i_116745 * Rx_116325 + i_116748] = + defunc_1_op_res_116769; } } - - int32_t x_34199 = ((__global - int32_t *) defunc_4_map_res_mem_45179)[gtid_33949 * - N_27771 + - i_34194]; - int32_t adjustValInds_res_t_res_34200 = sub32(x_34199, n_27775); - - adjustValInds_res_34192 = adjustValInds_res_t_res_34200; - } else { - adjustValInds_res_34192 = -1; } - fst_breakzq_34190 = adjustValInds_res_34192; - } - - bool cond_34201 = sle32(x_34179, 5); - bool cond_f_res_34202 = sle32(y_34181, 5); - bool x_34203 = !cond_34201; - bool y_34204 = cond_f_res_34202 && x_34203; - bool cond_34205 = cond_34201 || y_34204; - int32_t fst_breakzq_34206; - - if (cond_34205) { - fst_breakzq_34206 = -2; + for (int64_t i_127057 = 0; i_127057 < Ry_116323; i_127057++) { + for (int64_t i_127058 = 0; i_127058 < Rx_116325; i_127058++) { + mem_121821[i_127057 * Rx_116325 + i_127058] = + mem_param_121794[i_127057 * Rx_116325 + i_127058]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + for (int64_t i_127059 = 0; i_127059 < Ry_116323; i_127059++) { + for (int64_t i_127060 = 0; i_127060 < Rx_116325; i_127060++) { + mem_125235[i_127059 * Rx_116325 + i_127060] = + mem_121821[i_127059 * Rx_116325 + i_127060]; + } + } } else { - fst_breakzq_34206 = fst_breakzq_34190; + for (int64_t i_127061 = 0; i_127061 < Ry_116323; i_127061++) { + for (int64_t i_127062 = 0; i_127062 < Rx_116325; i_127062++) { + mem_125235[i_127061 * Rx_116325 + i_127062] = + mem_param_121794[i_127061 * Rx_116325 + i_127062]; + } + } } - ((__global int32_t *) mem_45419)[gtid_33949] = fst_breakzq_34206; - ((__global float *) mem_45421)[gtid_33949] = defunc_0_f_res_34186; - } - - error_0: - return; - #undef segmap_group_sizze_34175 -} -__kernel void mainDetailedzisegmap_34026(__global int *global_failure, - int64_t m_27772, __global - unsigned char *defunc_3_map_res_mem_45245, - __global - unsigned char *defunc_3_map_res_mem_45246, - __global unsigned char *mem_45406) -{ - #define segmap_group_sizze_34127 (mainDetailedzisegmap_group_sizze_34028) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - - if (*global_failure >= 0) - return; - - int32_t global_tid_46745; - int32_t local_tid_46746; - int64_t group_sizze_46749; - int32_t wave_sizze_46748; - int32_t group_tid_46747; - - global_tid_46745 = get_global_id(0); - local_tid_46746 = get_local_id(0); - group_sizze_46749 = get_local_size(0); - wave_sizze_46748 = LOCKSTEP_WIDTH; - group_tid_46747 = get_group_id(0); - - int32_t phys_tid_34026; - - phys_tid_34026 = global_tid_46745; - - int64_t gtid_34025; - - gtid_34025 = sext_i32_i64(group_tid_46747) * segmap_group_sizze_34127 + - sext_i32_i64(local_tid_46746); - if (slt64(gtid_34025, m_27772)) { - int32_t x_34130 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_34025]; - float x_34131 = ((__global - float *) defunc_3_map_res_mem_45246)[gtid_34025]; - float i32_res_34132 = sitofp_i32_f32(x_34130); - float sqrt_res_34133; - - sqrt_res_34133 = futrts_sqrt32(i32_res_34132); - float y_34134 = x_34131 * sqrt_res_34133; + double mem_param_tmp_127047[Ry_116323 * Rx_116325]; - ((__global float *) mem_45406)[gtid_34025] = y_34134; + for (int32_t i_11 = 0; i_11 < Ry_116323 * Rx_116325; i_11++) + mem_param_tmp_127047[i_11] = mem_125235[i_11]; + for (int32_t i_12 = 0; i_12 < Ry_116323 * Rx_116325; i_12++) + mem_param_121794[i_12] = mem_param_tmp_127047[i_12]; } - - error_0: - return; - #undef segmap_group_sizze_34127 -} -__kernel void mainDetailedzisegmap_34076(__global int *global_failure, - int64_t m_27772, __global - unsigned char *defunc_4_map_res_mem_45177, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global unsigned char *mem_45399) -{ - #define segmap_group_sizze_34085 (mainDetailedzisegmap_group_sizze_34078) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - - if (*global_failure >= 0) - return; - - int32_t global_tid_46684; - int32_t local_tid_46685; - int64_t group_sizze_46688; - int32_t wave_sizze_46687; - int32_t group_tid_46686; - - global_tid_46684 = get_global_id(0); - local_tid_46685 = get_local_id(0); - group_sizze_46688 = get_local_size(0); - wave_sizze_46687 = LOCKSTEP_WIDTH; - group_tid_46686 = get_group_id(0); - - int32_t phys_tid_34076; - - phys_tid_34076 = global_tid_46684; - - int64_t gtid_34075; - - gtid_34075 = sext_i32_i64(group_tid_46686) * segmap_group_sizze_34085 + - sext_i32_i64(local_tid_46685); - if (slt64(gtid_34075, m_27772)) { - int32_t x_34088 = ((__global - int32_t *) defunc_4_map_res_mem_45177)[gtid_34075]; - int32_t x_34089 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_34075]; - int32_t y_34090 = sub32(x_34088, x_34089); - - ((__global int32_t *) mem_45399)[gtid_34075] = y_34090; + for (int32_t i_13 = 0; i_13 < Ry_116323 * Rx_116325; i_13++) + loop_mem_121823[i_13] = mem_param_121794[i_13]; + + int64_t reg_tile_i_127063 = squot64(sext_i32_i64(local_tid_127013), + Ty_116322 * Tx_116324); + int64_t reg_tile_i_127064 = squot64(sext_i32_i64(local_tid_127013) - + squot64(sext_i32_i64(local_tid_127013), + Ty_116322 * Tx_116324) * + (Ty_116322 * Tx_116324), Tx_116324); + int64_t reg_tile_i_127065 = sext_i32_i64(local_tid_127013) - + squot64(sext_i32_i64(local_tid_127013), Ty_116322 * Tx_116324) * + (Ty_116322 * Tx_116324) - squot64(sext_i32_i64(local_tid_127013) - + squot64(sext_i32_i64(local_tid_127013), + Ty_116322 * Tx_116324) * + (Ty_116322 * Tx_116324), + Tx_116324) * Tx_116324; + int64_t tile_dim_start_127066 = gtid_104827 + reg_tile_i_127063; + int64_t tile_dim_start_127067 = Ry_116323 * (Ty_116322 * gid_y_116341 + + reg_tile_i_127064); + int64_t tile_dim_start_127068 = Rx_116325 * (Tx_116324 * gid_x_116340 + + reg_tile_i_127065); + + for (int64_t nest_i_127069 = 0; nest_i_127069 < (int64_t) 1; + nest_i_127069++) { + for (int64_t nest_i_127070 = 0; nest_i_127070 < Ry_116323; + nest_i_127070++) { + for (int64_t nest_i_127071 = 0; nest_i_127071 < Rx_116325; + nest_i_127071++) { + if ((slt64(tile_dim_start_127066 + nest_i_127069, m_75136) && + slt64(tile_dim_start_127067 + nest_i_127070, + k2p2zq_75151)) && slt64(tile_dim_start_127068 + + nest_i_127071, + k2p2zq_75151)) { + ((__global double *) mem_121827)[(tile_dim_start_127066 + + nest_i_127069) * + (k2p2zq_75151 * + k2p2zq_75151) + + (tile_dim_start_127067 + + nest_i_127070) * + k2p2zq_75151 + + (tile_dim_start_127068 + + nest_i_127071)] = + loop_mem_121823[squot64(nest_i_127070 * Rx_116325 + + nest_i_127071 - + squot64(nest_i_127070 * + Rx_116325 + + nest_i_127071, + Tx_116324 * Ry_116323 * + Rx_116325) * + (Tx_116324 * Ry_116323 * + Rx_116325) - + squot64(nest_i_127070 * + Rx_116325 + + nest_i_127071 - + squot64(nest_i_127070 * + Rx_116325 + + nest_i_127071, + Tx_116324 * + Ry_116323 * + Rx_116325) * + (Tx_116324 * Ry_116323 * + Rx_116325), Ry_116323 * + Rx_116325) * + (Ry_116323 * Rx_116325), + Rx_116325) * Rx_116325 + + (nest_i_127070 * Rx_116325 + + nest_i_127071 - squot64(nest_i_127070 * + Rx_116325 + + nest_i_127071, + Tx_116324 * + Ry_116323 * + Rx_116325) * + (Tx_116324 * Ry_116323 * Rx_116325) - + squot64(nest_i_127070 * Rx_116325 + + nest_i_127071 - + squot64(nest_i_127070 * + Rx_116325 + + nest_i_127071, + Tx_116324 * Ry_116323 * + Rx_116325) * + (Tx_116324 * Ry_116323 * + Rx_116325), Ry_116323 * + Rx_116325) * (Ry_116323 * + Rx_116325) - + squot64(nest_i_127070 * Rx_116325 + + nest_i_127071 - + squot64(nest_i_127070 * + Rx_116325 + + nest_i_127071, + Tx_116324 * Ry_116323 * + Rx_116325) * + (Tx_116324 * Ry_116323 * + Rx_116325) - + squot64(nest_i_127070 * + Rx_116325 + + nest_i_127071 - + squot64(nest_i_127070 * + Rx_116325 + + nest_i_127071, + Tx_116324 * + Ry_116323 * + Rx_116325) * + (Tx_116324 * + Ry_116323 * + Rx_116325), + Ry_116323 * + Rx_116325) * + (Ry_116323 * Rx_116325), + Rx_116325) * Rx_116325)]; + } + } + } } - error_0: + error_9: return; - #undef segmap_group_sizze_34085 + #undef Ty_116322 + #undef Ry_116323 + #undef Tx_116324 + #undef Rx_116325 + #undef Tk_116326 + #undef tk_div_tx_116327 + #undef tk_div_ty_116328 + #undef TxRx_116329 + #undef TyRy_116330 + #undef a_loc_szz_116332 + #undef b_loc_szz_116334 } -__kernel void mainDetailedzisegmap_intragroup_30688(__global - int *global_failure, - int failure_is_an_option, - __global - int64_t *global_failure_args, - __local volatile - int64_t *mem_44563_backing_aligned_0, - __local volatile - int64_t *mem_44553_backing_aligned_1, - int32_t k2p2zq_27785, - int64_t i32_res_27787, - int32_t m_27918, - int64_t nm_27920, - int64_t i32_res_27935, - __global - unsigned char *defunc_3_map_res_mem_44549, - __global - unsigned char *mem_44573) +__kernel void mainzisegmap_intragroup_116784(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + __local volatile + int64_t *mem_125292_backing_aligned_0, + __local volatile + int64_t *mem_123215_backing_aligned_1, + __local volatile + int64_t *mem_123196_backing_aligned_2, + __local volatile + int64_t *mem_123187_backing_aligned_3, + __local volatile + int64_t *mem_123164_backing_aligned_4, + int64_t m_75136, + int64_t k2p2zq_75151, + int64_t num_groups_y_116782, + int64_t ctx_val_123177, + int64_t num_threads_126222, + __global unsigned char *mem_121944, + __global unsigned char *mem_121946, + __global unsigned char *mem_123151, + __global unsigned char *mem_123155, + __global unsigned char *mem_123233, + __global unsigned char *mem_125275) { + #define tile_sizze_116779 (mainzitile_sizze_116778) + const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict mem_44563_backing_1 = (__local volatile - char *) mem_44563_backing_aligned_0; - __local volatile char *restrict mem_44553_backing_0 = (__local volatile - char *) mem_44553_backing_aligned_1; + __local volatile char *restrict mem_125292_backing_8 = (__local volatile + char *) mem_125292_backing_aligned_0; + __local volatile char *restrict mem_123215_backing_7 = (__local volatile + char *) mem_123215_backing_aligned_1; + __local volatile char *restrict mem_123196_backing_2 = (__local volatile + char *) mem_123196_backing_aligned_2; + __local volatile char *restrict mem_123187_backing_1 = (__local volatile + char *) mem_123187_backing_aligned_3; + __local volatile char *restrict mem_123164_backing_0 = (__local volatile + char *) mem_123164_backing_aligned_4; volatile __local bool local_failure; if (failure_is_an_option) { @@ -22123,112 +27987,143 @@ def sync(self): local_failure = false; barrier(CLK_LOCAL_MEM_FENCE); - int32_t global_tid_45832; - int32_t local_tid_45833; - int64_t group_sizze_45836; - int32_t wave_sizze_45835; - int32_t group_tid_45834; + int32_t global_tid_127659; + int32_t local_tid_127660; + int64_t group_sizze_127663; + int32_t wave_sizze_127662; + int32_t group_tid_127661; - global_tid_45832 = get_global_id(0); - local_tid_45833 = get_local_id(0); - group_sizze_45836 = get_local_size(0); - wave_sizze_45835 = LOCKSTEP_WIDTH; - group_tid_45834 = get_group_id(0); + global_tid_127659 = get_global_id(0); + local_tid_127660 = get_local_id(0); + group_sizze_127663 = get_local_size(0); + wave_sizze_127662 = LOCKSTEP_WIDTH; + group_tid_127661 = get_group_id(0); - int32_t phys_tid_30688; + int32_t gid_flat_116784; - phys_tid_30688 = group_tid_45834; + gid_flat_116784 = group_tid_127661; - int32_t ltid_pre_45837; + int32_t ltid_pre_127664; - ltid_pre_45837 = local_tid_45833; + ltid_pre_127664 = squot32(local_tid_127660, + sext_i64_i32(tile_sizze_116779)); - int64_t gtid_30619; + int32_t ltid_pre_127665; - gtid_30619 = sext_i32_i64(group_tid_45834); + ltid_pre_127665 = local_tid_127660 - squot32(local_tid_127660, + sext_i64_i32(tile_sizze_116779)) * + sext_i64_i32(tile_sizze_116779); - __local char *mem_44553; + int64_t gid_x_116776; - mem_44553 = (__local char *) mem_44553_backing_0; + gid_x_116776 = squot64(sext_i32_i64(group_tid_127661), num_groups_y_116782); - int64_t gtid_30622 = sext_i32_i64(ltid_pre_45837); - int32_t phys_tid_30623 = local_tid_45833; - int32_t index_primexp_42354 = sext_i64_i32(gtid_30622); - int32_t defunc_0_f_res_30869 = sdiv32(index_primexp_42354, m_27918); - int32_t defunc_0_f_res_30870 = smod32(index_primexp_42354, m_27918); - bool cond_30871 = slt32(defunc_0_f_res_30870, k2p2zq_27785); - float defunc_0_f_res_30872; + int64_t gid_y_116777; - if (cond_30871) { - int64_t i_30873 = sext_i32_i64(defunc_0_f_res_30869); - bool x_30874 = sle64((int64_t) 0, i_30873); - bool y_30875 = slt64(i_30873, i32_res_27787); - bool bounds_check_30876 = x_30874 && y_30875; - int64_t j_30877 = sext_i32_i64(defunc_0_f_res_30870); - bool x_30878 = sle64((int64_t) 0, j_30877); - bool y_30879 = slt64(j_30877, i32_res_27787); - bool bounds_check_30880 = x_30878 && y_30879; - bool index_ok_30881 = bounds_check_30876 && bounds_check_30880; - bool index_certs_30882; + gid_y_116777 = sext_i32_i64(group_tid_127661) - + squot64(sext_i32_i64(group_tid_127661), num_groups_y_116782) * + num_groups_y_116782; + + int64_t binop_x_116811; + + binop_x_116811 = gid_x_116776 * tile_sizze_116779; + + int64_t binop_x_116813 = gid_y_116777 * tile_sizze_116779; + __local char *mem_123164; + + mem_123164 = (__local char *) mem_123164_backing_0; + + int64_t ltid_y_116803 = sext_i32_i64(ltid_pre_127664); + int64_t ltid_x_116801 = sext_i32_i64(ltid_pre_127665); + int32_t ltid_flat_116802 = local_tid_127660; + + if (slt64(ltid_y_116803, tile_sizze_116779) && slt64(ltid_x_116801, + tile_sizze_116779)) { + int64_t gtid_116812 = ltid_y_116803 + binop_x_116811; + int64_t gtid_116814 = ltid_x_116801 + binop_x_116813; + bool binop_x_116815 = slt64(gtid_116812, m_75136); + bool binop_y_116816 = slt64(gtid_116814, k2p2zq_75151); + bool cond_116817 = binop_x_116815 && binop_y_116816; - if (!index_ok_30881) { + if (cond_116817) { + for (int64_t i_127666 = 0; i_127666 < k2p2zq_75151; i_127666++) { + ((__global double *) mem_125275)[gid_flat_116784 + i_127666 * + num_threads_126222] = + ((__global double *) mem_121946)[i_127666]; + } + } + for (int64_t i_127667 = 0; i_127667 < k2p2zq_75151; i_127667++) { + ((__local double *) mem_123164)[ltid_y_116803 * (k2p2zq_75151 * + tile_sizze_116779) + + ltid_x_116801 * k2p2zq_75151 + + i_127667] = ((__global + double *) mem_125275)[gid_flat_116784 + + i_127667 * + num_threads_126222]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_123182[1]; + __local char *mem_123187; + + mem_123187 = (__local char *) mem_123187_backing_1; + + __local char *mem_123196; + + mem_123196 = (__local char *) mem_123196_backing_2; + + double mem_123200[1]; + double mem_125285[1]; + __local char *tiled_inside_loop_mem_123229; + __local char *mem_param_123175; + + mem_param_123175 = mem_123164; + for (int64_t i_110368 = 0; i_110368 < k2p2zq_75151; i_110368++) { + int64_t x_110370 = sub64(k2p2zq_75151, i_110368); + int64_t i_110371 = sub64(x_110370, (int64_t) 1); + bool x_110372 = sle64((int64_t) 0, i_110371); + bool y_110373 = slt64(i_110371, k2p2zq_75151); + bool bounds_check_110374 = x_110372 && y_110373; + int64_t j_m_i_110375 = sub64(k2p2zq_75151, x_110370); + bool empty_slice_110376 = j_m_i_110375 == (int64_t) 0; + int64_t m_110377 = sub64(j_m_i_110375, (int64_t) 1); + int64_t i_p_m_t_s_110378 = add64(x_110370, m_110377); + bool zzero_leq_i_p_m_t_s_110379 = sle64((int64_t) 0, i_p_m_t_s_110378); + bool i_p_m_t_s_leq_w_110380 = slt64(i_p_m_t_s_110378, k2p2zq_75151); + bool zzero_lte_i_110381 = sle64((int64_t) 0, x_110370); + bool i_lte_j_110382 = sle64(x_110370, k2p2zq_75151); + bool y_110383 = i_p_m_t_s_leq_w_110380 && zzero_lte_i_110381; + bool y_110384 = zzero_leq_i_p_m_t_s_110379 && y_110383; + bool y_110385 = i_lte_j_110382 && y_110384; + bool forwards_ok_110386 = zzero_lte_i_110381 && y_110385; + bool ok_or_empty_110387 = empty_slice_110376 || forwards_ok_110386; + bool index_ok_110388 = bounds_check_110374 && ok_or_empty_110387; + bool index_certs_110389; + + if (!index_ok_110388) { { - if (atomic_cmpxchg_i32_global(global_failure, -1, 31) == -1) { - global_failure_args[0] = i_30873; - global_failure_args[1] = j_30877; - global_failure_args[2] = i32_res_27787; - global_failure_args[3] = i32_res_27787; + if (atomic_cmpxchg_i32_global(global_failure, -1, 167) == -1) { + global_failure_args[0] = i_110371; + global_failure_args[1] = x_110370; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; + global_failure_args[4] = k2p2zq_75151; ; } local_failure = true; - goto error_0; + goto error_1; } } - float defunc_0_f_res_t_res_30883 = ((__global - float *) defunc_3_map_res_mem_44549)[gtid_30619 * - (i32_res_27787 * - i32_res_27787) + - i_30873 * - i32_res_27787 + - j_30877]; - - defunc_0_f_res_30872 = defunc_0_f_res_t_res_30883; - } else { - int32_t y_30884 = add32(k2p2zq_27785, defunc_0_f_res_30869); - bool cond_30885 = defunc_0_f_res_30870 == y_30884; - float defunc_0_f_res_f_res_30886; - - if (cond_30885) { - defunc_0_f_res_f_res_30886 = 1.0F; - } else { - defunc_0_f_res_f_res_30886 = 0.0F; - } - defunc_0_f_res_30872 = defunc_0_f_res_f_res_30886; - } - ((__local float *) mem_44553)[gtid_30622] = defunc_0_f_res_30872; - - error_0: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); - - __local char *mem_44563; - - mem_44563 = (__local char *) mem_44563_backing_1; - for (int32_t i_30888 = 0; i_30888 < k2p2zq_27785; i_30888++) { - int64_t i32_res_30890 = sext_i32_i64(i_30888); - bool x_30891 = sle64((int64_t) 0, i32_res_30890); - bool y_30892 = slt64(i32_res_30890, nm_27920); - bool bounds_check_30893 = x_30891 && y_30892; - bool index_certs_30894; + bool index_certs_110390; - if (!bounds_check_30893) { + if (!ok_or_empty_110387) { { - if (atomic_cmpxchg_i32_global(global_failure, -1, 32) == -1) { - global_failure_args[0] = i32_res_30890; - global_failure_args[1] = nm_27920; + if (atomic_cmpxchg_i32_global(global_failure, -1, 168) == -1) { + global_failure_args[0] = x_110370; + global_failure_args[1] = k2p2zq_75151; + global_failure_args[2] = k2p2zq_75151; ; } local_failure = true; @@ -22236,4794 +28131,4018 @@ def sync(self): } } - float v1_30895 = ((__local float *) mem_44553)[i32_res_30890]; - bool cond_30896 = v1_30895 == 0.0F; - int64_t gtid_30643 = sext_i32_i64(ltid_pre_45837); - int32_t phys_tid_30644 = local_tid_45833; - int32_t defunc_0_f_res_30899 = sext_i64_i32(gtid_30643); - int32_t defunc_0_f_res_30900 = sdiv32(defunc_0_f_res_30899, m_27918); - int32_t defunc_0_f_res_30901 = smod32(defunc_0_f_res_30899, m_27918); - float defunc_0_f_res_30902; + int64_t num_whole_tiles_116837 = squot64(j_m_i_110375, + tile_sizze_116779); + int64_t ltid_y_116840 = sext_i32_i64(ltid_pre_127664); + int64_t ltid_x_116838 = sext_i32_i64(ltid_pre_127665); + int32_t ltid_flat_116839 = local_tid_127660; + + if (slt64(ltid_y_116840, tile_sizze_116779) && slt64(ltid_x_116838, + tile_sizze_116779)) { + mem_123182[(int64_t) 0] = 0.0; + } + + error_1: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + double accs_mem_123192[1]; + double mem_param_123183[1]; - if (cond_30896) { - int32_t x_30903 = mul32(m_27918, defunc_0_f_res_30900); - int32_t i32_arg_30904 = add32(defunc_0_f_res_30901, x_30903); - int64_t i32_res_30905 = sext_i32_i64(i32_arg_30904); - bool x_30906 = sle64((int64_t) 0, i32_res_30905); - bool y_30907 = slt64(i32_res_30905, nm_27920); - bool bounds_check_30908 = x_30906 && y_30907; - bool index_certs_30909; + for (int32_t i_3 = 0; i_3 < 1; i_3++) + mem_param_123183[i_3] = mem_123182[i_3]; + for (int64_t tile_id_116849 = 0; tile_id_116849 < + num_whole_tiles_116837; tile_id_116849++) { + int64_t binop_x_116925 = tile_sizze_116779 * tile_id_116849; + int64_t ltid_y_116852 = sext_i32_i64(ltid_pre_127664); + int64_t ltid_x_116850 = sext_i32_i64(ltid_pre_127665); + int32_t ltid_flat_116851 = local_tid_127660; + int64_t j_116926 = ltid_x_116850 + binop_x_116925; + int64_t gtid_116928 = binop_x_116811 + ltid_y_116852; + bool binop_x_116934 = slt64(j_116926, j_m_i_110375); + bool binop_y_116935 = slt64(gtid_116928, m_75136); + bool cond_116936 = binop_x_116934 && binop_y_116935; + double pre_116937; - if (!bounds_check_30908) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 33) == - -1) { - global_failure_args[0] = i32_res_30905; - global_failure_args[1] = nm_27920; - ; + if (cond_116936) { + int64_t slice_119577 = x_110370 + j_116926; + double x_116938 = ((__global + double *) mem_123151)[slice_119577 * + (k2p2zq_75151 * + m_75136) + + gtid_116928 * + k2p2zq_75151 + + i_110371]; + + pre_116937 = x_116938; + } else { + pre_116937 = 0.0; + } + ((__local double *) mem_123187)[ltid_y_116852 * tile_sizze_116779 + + ltid_x_116850] = pre_116937; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t slice_119578 = x_110370 + binop_x_116925; + double mem_123191[1]; + int64_t ltid_y_116885 = sext_i32_i64(ltid_pre_127664); + int64_t ltid_x_116883 = sext_i32_i64(ltid_pre_127665); + int32_t ltid_flat_116884 = local_tid_127660; + int64_t gtid_116942 = binop_x_116811 + ltid_y_116885; + int64_t gtid_116944 = binop_x_116813 + ltid_x_116883; + double acc_116948 = mem_param_123183[(int64_t) 0]; + bool binop_x_116952 = slt64(gtid_116942, m_75136); + bool binop_y_116953 = slt64(gtid_116944, k2p2zq_75151); + bool cond_116954 = binop_x_116952 && binop_y_116953; + double acc_116955; + + if (cond_116954) { + double x_116956; + double redout_119837 = acc_116948; + + for (int64_t i_119838 = 0; i_119838 < tile_sizze_116779; + i_119838++) { + int64_t slice_120037 = slice_119578 + i_119838; + double x_116961 = ((__local + double *) mem_123187)[ltid_y_116885 * + tile_sizze_116779 + + i_119838]; + bool isnan_res_116962; + + isnan_res_116962 = futrts_isnan64(x_116961); + + double defunc_1_f_res_116963; + + if (isnan_res_116962) { + defunc_1_f_res_116963 = 0.0; + } else { + double x_116960 = ((__local + double *) mem_param_123175)[ltid_y_116885 * + ctx_val_123177 + + ltid_x_116883 * + k2p2zq_75151 + + slice_120037]; + double defunc_1_f_res_f_res_116964 = x_116960 * + x_116961; + + defunc_1_f_res_116963 = defunc_1_f_res_f_res_116964; } - local_failure = true; - goto error_1; + + double defunc_1_op_res_116959 = defunc_1_f_res_116963 + + redout_119837; + double redout_tmp_127672 = defunc_1_op_res_116959; + + redout_119837 = redout_tmp_127672; } + x_116956 = redout_119837; + acc_116955 = x_116956; + } else { + acc_116955 = acc_116948; } + mem_123191[(int64_t) 0] = acc_116955; + barrier(CLK_LOCAL_MEM_FENCE); - float defunc_0_f_res_t_res_30910 = ((__local - float *) mem_44553)[i32_res_30905]; + double mem_param_tmp_127670[1]; - defunc_0_f_res_30902 = defunc_0_f_res_t_res_30910; + for (int32_t i_4 = 0; i_4 < 1; i_4++) + mem_param_tmp_127670[i_4] = mem_123191[i_4]; + for (int32_t i_5 = 0; i_5 < 1; i_5++) + mem_param_123183[i_5] = mem_param_tmp_127670[i_5]; + } + for (int32_t i_6 = 0; i_6 < 1; i_6++) + accs_mem_123192[i_6] = mem_param_123183[i_6]; + + int64_t residual_input_116974 = srem64(j_m_i_110375, tile_sizze_116779); + bool cond_116975 = residual_input_116974 == (int64_t) 0; + + if (cond_116975) { + mem_125285[(int64_t) 0] = accs_mem_123192[(int64_t) 0]; } else { - int64_t i32_res_30911 = sext_i32_i64(defunc_0_f_res_30901); - bool x_30912 = sle64((int64_t) 0, i32_res_30911); - bool y_30913 = slt64(i32_res_30911, nm_27920); - bool bounds_check_30914 = x_30912 && y_30913; - bool index_certs_30915; + int64_t binop_x_117052 = tile_sizze_116779 * num_whole_tiles_116837; + int64_t ltid_y_116978 = sext_i32_i64(ltid_pre_127664); + int64_t ltid_x_116976 = sext_i32_i64(ltid_pre_127665); + int32_t ltid_flat_116977 = local_tid_127660; + int64_t j_117053 = ltid_x_116976 + binop_x_117052; + int64_t gtid_117055 = binop_x_116811 + ltid_y_116978; + bool binop_x_117061 = slt64(j_117053, j_m_i_110375); + bool binop_y_117062 = slt64(gtid_117055, m_75136); + bool cond_117063 = binop_x_117061 && binop_y_117062; + double pre_117064; - if (!bounds_check_30914) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 34) == - -1) { - global_failure_args[0] = i32_res_30911; - global_failure_args[1] = nm_27920; - ; + if (cond_117063) { + int64_t slice_119579 = x_110370 + j_117053; + double x_117065 = ((__global + double *) mem_123151)[slice_119579 * + (k2p2zq_75151 * + m_75136) + + gtid_117055 * + k2p2zq_75151 + + i_110371]; + + pre_117064 = x_117065; + } else { + pre_117064 = 0.0; + } + ((__local double *) mem_123196)[ltid_y_116978 * tile_sizze_116779 + + ltid_x_116976] = pre_117064; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t slice_119580 = x_110370 + binop_x_117052; + int64_t ltid_y_117012 = sext_i32_i64(ltid_pre_127664); + int64_t ltid_x_117010 = sext_i32_i64(ltid_pre_127665); + int32_t ltid_flat_117011 = local_tid_127660; + int64_t gtid_117070 = binop_x_116811 + ltid_y_117012; + int64_t gtid_117072 = binop_x_116813 + ltid_x_117010; + double acc_117076 = accs_mem_123192[(int64_t) 0]; + bool binop_x_117080 = slt64(gtid_117070, m_75136); + bool binop_y_117081 = slt64(gtid_117072, k2p2zq_75151); + bool cond_117082 = binop_x_117080 && binop_y_117081; + double acc_117083; + + if (cond_117082) { + double x_117084; + double redout_119839 = acc_117076; + + for (int64_t i_119840 = 0; i_119840 < residual_input_116974; + i_119840++) { + int64_t slice_120038 = slice_119580 + i_119840; + double x_117089 = ((__local + double *) mem_123196)[ltid_y_117012 * + tile_sizze_116779 + + i_119840]; + bool isnan_res_117090; + + isnan_res_117090 = futrts_isnan64(x_117089); + + double defunc_1_f_res_117091; + + if (isnan_res_117090) { + defunc_1_f_res_117091 = 0.0; + } else { + double x_117088 = ((__local + double *) mem_param_123175)[ltid_y_117012 * + ctx_val_123177 + + ltid_x_117010 * + k2p2zq_75151 + + slice_120038]; + double defunc_1_f_res_f_res_117092 = x_117088 * + x_117089; + + defunc_1_f_res_117091 = defunc_1_f_res_f_res_117092; } - local_failure = true; - goto error_1; + + double defunc_1_op_res_117087 = defunc_1_f_res_117091 + + redout_119839; + double redout_tmp_127673 = defunc_1_op_res_117087; + + redout_119839 = redout_tmp_127673; } + x_117084 = redout_119839; + acc_117083 = x_117084; + } else { + acc_117083 = acc_117076; } + mem_123200[(int64_t) 0] = acc_117083; + barrier(CLK_LOCAL_MEM_FENCE); + mem_125285[(int64_t) 0] = mem_123200[(int64_t) 0]; + } + + __local char *mem_123215; + + mem_123215 = (__local char *) mem_123215_backing_7; + + int64_t ltid_y_117096 = sext_i32_i64(ltid_pre_127664); + int64_t ltid_x_117094 = sext_i32_i64(ltid_pre_127665); + int32_t ltid_flat_117095 = local_tid_127660; + + if (slt64(ltid_y_117096, tile_sizze_116779) && slt64(ltid_x_117094, + tile_sizze_116779)) { + int64_t gtid_117105 = binop_x_116811 + ltid_y_117096; + int64_t gtid_117107 = binop_x_116813 + ltid_x_117094; + bool binop_x_117109 = slt64(gtid_117105, m_75136); + bool binop_y_117110 = slt64(gtid_117107, k2p2zq_75151); + bool cond_117111 = binop_x_117109 && binop_y_117110; + __local char *mem_125292; - float x_30916 = ((__local float *) mem_44553)[i32_res_30911]; - float x_30917 = x_30916 / v1_30895; - int32_t y_30918 = sub32(k2p2zq_27785, 1); - bool cond_30919 = slt32(defunc_0_f_res_30900, y_30918); - float defunc_0_f_res_f_res_30920; - - if (cond_30919) { - int32_t x_30921 = add32(1, defunc_0_f_res_30900); - int32_t x_30922 = mul32(m_27918, x_30921); - int32_t i32_arg_30923 = add32(defunc_0_f_res_30901, x_30922); - int64_t i32_res_30924 = sext_i32_i64(i32_arg_30923); - bool x_30925 = sle64((int64_t) 0, i32_res_30924); - bool y_30926 = slt64(i32_res_30924, nm_27920); - bool bounds_check_30927 = x_30925 && y_30926; - bool index_certs_30928; - - if (!bounds_check_30927) { + mem_125292 = (__local char *) mem_125292_backing_8; + if (cond_117111) { + double defunc_2_reduce_res_117108 = mem_125285[(int64_t) 0]; + bool index_ok_117116 = bounds_check_110374 && + bounds_check_110374; + bool index_certs_117117; + + if (!index_ok_117116) { { - if (atomic_cmpxchg_i32_global(global_failure, -1, 35) == - -1) { - global_failure_args[0] = i32_res_30924; - global_failure_args[1] = nm_27920; + if (atomic_cmpxchg_i32_global(global_failure, -1, + 169) == -1) { + global_failure_args[0] = i_110371; + global_failure_args[1] = i_110371; + global_failure_args[2] = k2p2zq_75151; + global_failure_args[3] = k2p2zq_75151; ; } local_failure = true; - goto error_1; + goto error_6; } } - float x_30929 = ((__local float *) mem_44553)[i32_res_30924]; - int32_t i32_arg_30930 = add32(i_30888, x_30922); - int64_t i32_res_30931 = sext_i32_i64(i32_arg_30930); - bool x_30932 = sle64((int64_t) 0, i32_res_30931); - bool y_30933 = slt64(i32_res_30931, nm_27920); - bool bounds_check_30934 = x_30932 && y_30933; - bool index_certs_30935; + double zs_arg_117118 = ((__global + double *) mem_123155)[i_110371 * + (k2p2zq_75151 * + m_75136) + + gtid_117105 * + k2p2zq_75151 + + i_110371]; + bool index_certs_117119; - if (!bounds_check_30934) { + if (!bounds_check_110374) { { - if (atomic_cmpxchg_i32_global(global_failure, -1, 36) == - -1) { - global_failure_args[0] = i32_res_30931; - global_failure_args[1] = nm_27920; + if (atomic_cmpxchg_i32_global(global_failure, -1, + 170) == -1) { + global_failure_args[0] = i_110371; + global_failure_args[1] = k2p2zq_75151; ; } local_failure = true; - goto error_1; + goto error_6; } } - float x_30936 = ((__local float *) mem_44553)[i32_res_30931]; - float y_30937 = x_30917 * x_30936; - float defunc_0_f_res_f_res_t_res_30938 = x_30929 - y_30937; + double zm_arg_117120 = ((__global + double *) mem_121944)[i_110371 * + k2p2zq_75151 + + gtid_117107]; + double zm_res_117121 = zm_arg_117120 - + defunc_2_reduce_res_117108; + double zs_res_117122 = zm_res_117121 / zs_arg_117118; - defunc_0_f_res_f_res_30920 = defunc_0_f_res_f_res_t_res_30938; - } else { - defunc_0_f_res_f_res_30920 = x_30917; + ((__local double *) mem_param_123175)[ltid_y_117096 * + ctx_val_123177 + + ltid_x_117094 * + k2p2zq_75151 + i_110371] = + zs_res_117122; + for (int64_t i_127674 = 0; i_127674 < k2p2zq_75151; + i_127674++) { + ((__local double *) mem_125292)[i_127674] = ((__local + double *) mem_param_123175)[ltid_y_117096 * + ctx_val_123177 + + ltid_x_117094 * + k2p2zq_75151 + + i_127674]; + } + } + for (int64_t i_127675 = 0; i_127675 < k2p2zq_75151; i_127675++) { + ((__local double *) mem_123215)[ltid_y_117096 * (k2p2zq_75151 * + tile_sizze_116779) + + ltid_x_117094 * k2p2zq_75151 + + i_127675] = ((__local + double *) mem_125292)[i_127675]; } - defunc_0_f_res_30902 = defunc_0_f_res_f_res_30920; } - ((__local float *) mem_44563)[gtid_30643] = defunc_0_f_res_30902; - error_1: + error_6: barrier(CLK_LOCAL_MEM_FENCE); if (local_failure) return; barrier(CLK_LOCAL_MEM_FENCE); - int64_t write_i_30686 = sext_i32_i64(ltid_pre_45837); - int32_t phys_tid_30687 = local_tid_45833; - float write_value_30941 = ((__local float *) mem_44563)[write_i_30686]; - - if (sle64((int64_t) 0, write_i_30686) && slt64(write_i_30686, - nm_27920)) { - ((__local float *) mem_44553)[write_i_30686] = write_value_30941; - } - barrier(CLK_LOCAL_MEM_FENCE); - } - for (int64_t i_45839 = 0; i_45839 < sdiv_up64(i32_res_27787 * - i32_res_27787 - - sext_i32_i64(local_tid_45833), - nm_27920); i_45839++) { - ((__global float *) mem_44573)[gtid_30619 * (i32_res_27787 * - i32_res_27787) + - squot64(i_45839 * nm_27920 + - sext_i32_i64(local_tid_45833), - i32_res_27787) * i32_res_27787 + - (i_45839 * nm_27920 + - sext_i32_i64(local_tid_45833) - - squot64(i_45839 * nm_27920 + - sext_i32_i64(local_tid_45833), - i32_res_27787) * - i32_res_27787)] = ((__local - float *) mem_44553)[i32_res_27787 + - (squot64(i_45839 * - nm_27920 + - sext_i32_i64(local_tid_45833), - i32_res_27787) * - i32_res_27935 + - (i_45839 * - nm_27920 + - sext_i32_i64(local_tid_45833) - - squot64(i_45839 * - nm_27920 + - sext_i32_i64(local_tid_45833), - i32_res_27787) * - i32_res_27787))]; + __local char *mem_param_tmp_127668; + + mem_param_tmp_127668 = mem_123215; + mem_param_123175 = mem_param_tmp_127668; + } + tiled_inside_loop_mem_123229 = mem_param_123175; + + int64_t thread_out_index_127676 = gid_x_116776 * tile_sizze_116779 + + sext_i32_i64(ltid_pre_127664); + int64_t thread_out_index_127677 = gid_y_116777 * tile_sizze_116779 + + sext_i32_i64(ltid_pre_127665); + + if (slt64(thread_out_index_127676, m_75136) && + slt64(thread_out_index_127677, k2p2zq_75151)) { + for (int64_t i_127678 = 0; i_127678 < k2p2zq_75151; i_127678++) { + ((__global double *) mem_123233)[thread_out_index_127676 * + (k2p2zq_75151 * k2p2zq_75151) + + thread_out_index_127677 * + k2p2zq_75151 + i_127678] = + ((__local + double *) tiled_inside_loop_mem_123229)[sext_i32_i64(ltid_pre_127664) * + ctx_val_123177 + + sext_i32_i64(ltid_pre_127665) * + k2p2zq_75151 + + i_127678]; + } } - barrier(CLK_LOCAL_MEM_FENCE); - error_3: + error_7: return; + #undef tile_sizze_116779 } -__kernel void mainDetailedzisegmap_intragroup_31015(__global - int *global_failure, - int failure_is_an_option, - __global - int64_t *global_failure_args, - __local volatile - int64_t *mem_44594_backing_aligned_0, - int64_t m_27772, - int32_t k2p2zq_27785, - int32_t m_27918, - int64_t nm_27920, - int32_t i_31554, - int64_t i32_res_31556, - int64_t ctx_param_ext_44580, - int64_t ctx_param_ext_44581, - int64_t ctx_param_ext_44583, - __global - unsigned char *mem_param_44585, - __global - unsigned char *mem_44590, - __global - unsigned char *mem_44598) +__kernel void mainzisegmap_intragroup_117146(__global int *global_failure, + __local volatile + int64_t *mem_123330_backing_aligned_0, + __local volatile + int64_t *mem_123314_backing_aligned_1, + __local volatile + int64_t *mem_123305_backing_aligned_2, + int64_t m_75136, + int64_t k2p2zq_75151, + int64_t x_110426, int64_t i_110427, + int64_t j_m_i_110431, + int64_t num_groups_y_117144, + int64_t num_whole_tiles_117162, + int64_t residual_input_117295, + unsigned char cond_117296, + int64_t num_threads_126227, + __global unsigned char *mem_121938, + __global unsigned char *mem_123143, + __global unsigned char *mem_123241, + __global unsigned char *mem_123291, + __global unsigned char *mem_123295, + __global unsigned char *mem_123334, + __global unsigned char *mem_125317) { + #define tile_sizze_117141 (mainzitile_sizze_117140) + const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict mem_44594_backing_0 = (__local volatile - char *) mem_44594_backing_aligned_0; - volatile __local bool local_failure; + __local volatile char *restrict mem_123330_backing_6 = (__local volatile + char *) mem_123330_backing_aligned_0; + __local volatile char *restrict mem_123314_backing_5 = (__local volatile + char *) mem_123314_backing_aligned_1; + __local volatile char *restrict mem_123305_backing_0 = (__local volatile + char *) mem_123305_backing_aligned_2; - if (failure_is_an_option) { - int failed = *global_failure >= 0; - - if (failed) - return; - } - local_failure = false; - barrier(CLK_LOCAL_MEM_FENCE); + if (*global_failure >= 0) + return; - int32_t global_tid_45866; - int32_t local_tid_45867; - int64_t group_sizze_45870; - int32_t wave_sizze_45869; - int32_t group_tid_45868; + int32_t global_tid_127696; + int32_t local_tid_127697; + int64_t group_sizze_127700; + int32_t wave_sizze_127699; + int32_t group_tid_127698; - global_tid_45866 = get_global_id(0); - local_tid_45867 = get_local_id(0); - group_sizze_45870 = get_local_size(0); - wave_sizze_45869 = LOCKSTEP_WIDTH; - group_tid_45868 = get_group_id(0); + global_tid_127696 = get_global_id(0); + local_tid_127697 = get_local_id(0); + group_sizze_127700 = get_local_size(0); + wave_sizze_127699 = LOCKSTEP_WIDTH; + group_tid_127698 = get_group_id(0); - int32_t phys_tid_31015; + int32_t gid_flat_117146; - phys_tid_31015 = group_tid_45868; + gid_flat_117146 = group_tid_127698; - int32_t ltid_pre_45871; + int32_t ltid_pre_127701; - ltid_pre_45871 = local_tid_45867; + ltid_pre_127701 = squot32(local_tid_127697, + sext_i64_i32(tile_sizze_117141)); - int64_t gtid_30967; + int32_t ltid_pre_127702; - gtid_30967 = sext_i32_i64(group_tid_45868); + ltid_pre_127702 = local_tid_127697 - squot32(local_tid_127697, + sext_i64_i32(tile_sizze_117141)) * + sext_i64_i32(tile_sizze_117141); - float v1_31573 = ((__global float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_30967 * - ctx_param_ext_44581 + - i32_res_31556 * - ctx_param_ext_44583)]; - bool cond_31574 = v1_31573 == 0.0F; - __local char *mem_44594; + int64_t gid_x_117138; - mem_44594 = (__local char *) mem_44594_backing_0; + gid_x_117138 = squot64(sext_i32_i64(group_tid_127698), num_groups_y_117144); - int64_t gtid_30970 = sext_i32_i64(ltid_pre_45871); - int32_t phys_tid_30971 = local_tid_45867; - int32_t defunc_0_f_res_31577 = sext_i64_i32(gtid_30970); - int32_t defunc_0_f_res_31578 = sdiv32(defunc_0_f_res_31577, m_27918); - int32_t defunc_0_f_res_31579 = smod32(defunc_0_f_res_31577, m_27918); - float defunc_0_f_res_31580; + int64_t gid_y_117139; - if (cond_31574) { - int32_t x_31581 = mul32(m_27918, defunc_0_f_res_31578); - int32_t i32_arg_31582 = add32(defunc_0_f_res_31579, x_31581); - int64_t i32_res_31583 = sext_i32_i64(i32_arg_31582); - bool x_31584 = sle64((int64_t) 0, i32_res_31583); - bool y_31585 = slt64(i32_res_31583, nm_27920); - bool bounds_check_31586 = x_31584 && y_31585; - bool index_certs_31587; - - if (!bounds_check_31586) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 38) == -1) { - global_failure_args[0] = i32_res_31583; - global_failure_args[1] = nm_27920; - ; - } - local_failure = true; - goto error_0; - } + gid_y_117139 = sext_i32_i64(group_tid_127698) - + squot64(sext_i32_i64(group_tid_127698), num_groups_y_117144) * + num_groups_y_117144; + + double mem_123300[1]; + int64_t ltid_y_117165 = sext_i32_i64(ltid_pre_127701); + int64_t ltid_x_117163 = sext_i32_i64(ltid_pre_127702); + int32_t ltid_flat_117164 = local_tid_127697; + + if (slt64(ltid_y_117165, tile_sizze_117141) && slt64(ltid_x_117163, + tile_sizze_117141)) { + mem_123300[(int64_t) 0] = 0.0; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t binop_x_117250 = gid_x_117138 * tile_sizze_117141; + int64_t binop_x_117265 = gid_y_117139 * tile_sizze_117141; + __local char *mem_123305; + + mem_123305 = (__local char *) mem_123305_backing_0; + + double accs_mem_123310[1]; + double mem_param_123301[1]; + + for (int32_t i_1 = 0; i_1 < 1; i_1++) + mem_param_123301[i_1] = mem_123300[i_1]; + for (int64_t tile_id_117174 = 0; tile_id_117174 < num_whole_tiles_117162; + tile_id_117174++) { + int64_t binop_x_117248 = tile_sizze_117141 * tile_id_117174; + int64_t ltid_y_117177 = sext_i32_i64(ltid_pre_127701); + int64_t ltid_x_117175 = sext_i32_i64(ltid_pre_127702); + int32_t ltid_flat_117176 = local_tid_127697; + int64_t j_117249 = ltid_x_117175 + binop_x_117248; + int64_t gtid_117251 = ltid_y_117177 + binop_x_117250; + bool binop_x_117256 = slt64(j_117249, j_m_i_110431); + bool binop_y_117257 = slt64(gtid_117251, m_75136); + bool cond_117258 = binop_x_117256 && binop_y_117257; + double pre_117259; + + if (cond_117258) { + int64_t slice_119581 = x_110426 + j_117249; + double x_117260 = ((__global double *) mem_123241)[slice_119581 * + (k2p2zq_75151 * + m_75136) + + gtid_117251 * + k2p2zq_75151 + + i_110427]; + + pre_117259 = x_117260; + } else { + pre_117259 = 0.0; } + ((__local double *) mem_123305)[ltid_y_117177 * tile_sizze_117141 + + ltid_x_117175] = pre_117259; + barrier(CLK_LOCAL_MEM_FENCE); - float defunc_0_f_res_t_res_31588 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_30967 * - ctx_param_ext_44581 + - i32_res_31583 * - ctx_param_ext_44583)]; - - defunc_0_f_res_31580 = defunc_0_f_res_t_res_31588; - } else { - int64_t i32_res_31589 = sext_i32_i64(defunc_0_f_res_31579); - bool x_31590 = sle64((int64_t) 0, i32_res_31589); - bool y_31591 = slt64(i32_res_31589, nm_27920); - bool bounds_check_31592 = x_31590 && y_31591; - bool index_certs_31593; - - if (!bounds_check_31592) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 39) == -1) { - global_failure_args[0] = i32_res_31589; - global_failure_args[1] = nm_27920; - ; + int64_t slice_119582 = x_110426 + binop_x_117248; + double mem_123309[1]; + int64_t ltid_y_117209 = sext_i32_i64(ltid_pre_127701); + int64_t ltid_x_117207 = sext_i32_i64(ltid_pre_127702); + int32_t ltid_flat_117208 = local_tid_127697; + int64_t gtid_117264 = ltid_y_117209 + binop_x_117250; + int64_t gtid_117266 = ltid_x_117207 + binop_x_117265; + double acc_117269 = mem_param_123301[(int64_t) 0]; + bool binop_x_117273 = slt64(gtid_117264, m_75136); + bool binop_y_117274 = slt64(gtid_117266, k2p2zq_75151); + bool cond_117275 = binop_x_117273 && binop_y_117274; + double acc_117276; + + if (cond_117275) { + double x_117277; + double redout_119848 = acc_117269; + + for (int64_t i_119849 = 0; i_119849 < tile_sizze_117141; + i_119849++) { + int64_t slice_120041 = slice_119582 + i_119849; + double x_117282 = ((__local + double *) mem_123305)[ltid_y_117209 * + tile_sizze_117141 + + i_119849]; + bool isnan_res_117283; + + isnan_res_117283 = futrts_isnan64(x_117282); + + double defunc_1_f_res_117284; + + if (isnan_res_117283) { + defunc_1_f_res_117284 = 0.0; + } else { + double x_117281 = ((__global + double *) mem_123295)[slice_120041 * + (k2p2zq_75151 * + m_75136) + + gtid_117264 * + k2p2zq_75151 + + gtid_117266]; + double defunc_1_f_res_f_res_117285 = x_117281 * x_117282; + + defunc_1_f_res_117284 = defunc_1_f_res_f_res_117285; } - local_failure = true; - goto error_0; + + double defunc_1_op_res_117280 = defunc_1_f_res_117284 + + redout_119848; + double redout_tmp_127705 = defunc_1_op_res_117280; + + redout_119848 = redout_tmp_127705; } + x_117277 = redout_119848; + acc_117276 = x_117277; + } else { + acc_117276 = acc_117269; } + mem_123309[(int64_t) 0] = acc_117276; + barrier(CLK_LOCAL_MEM_FENCE); - float x_31594 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_30967 * - ctx_param_ext_44581 + - i32_res_31589 * - ctx_param_ext_44583)]; - float x_31595 = x_31594 / v1_31573; - int32_t y_31596 = sub32(k2p2zq_27785, 1); - bool cond_31597 = slt32(defunc_0_f_res_31578, y_31596); - float defunc_0_f_res_f_res_31598; + double mem_param_tmp_127703[1]; - if (cond_31597) { - int32_t x_31599 = add32(1, defunc_0_f_res_31578); - int32_t x_31600 = mul32(m_27918, x_31599); - int32_t i32_arg_31601 = add32(defunc_0_f_res_31579, x_31600); - int64_t i32_res_31602 = sext_i32_i64(i32_arg_31601); - bool x_31603 = sle64((int64_t) 0, i32_res_31602); - bool y_31604 = slt64(i32_res_31602, nm_27920); - bool bounds_check_31605 = x_31603 && y_31604; - bool index_certs_31606; - - if (!bounds_check_31605) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 40) == - -1) { - global_failure_args[0] = i32_res_31602; - global_failure_args[1] = nm_27920; - ; - } - local_failure = true; - goto error_0; - } - } + for (int32_t i_2 = 0; i_2 < 1; i_2++) + mem_param_tmp_127703[i_2] = mem_123309[i_2]; + for (int32_t i_3 = 0; i_3 < 1; i_3++) + mem_param_123301[i_3] = mem_param_tmp_127703[i_3]; + } + for (int32_t i_4 = 0; i_4 < 1; i_4++) + accs_mem_123310[i_4] = mem_param_123301[i_4]; + + __local char *mem_123314; + + mem_123314 = (__local char *) mem_123314_backing_5; + + double mem_123318[1]; + double mem_125310[1]; + + if (cond_117296) { + mem_125310[(int64_t) 0] = accs_mem_123310[(int64_t) 0]; + } else { + int64_t binop_x_117371 = tile_sizze_117141 * num_whole_tiles_117162; + int64_t ltid_y_117299 = sext_i32_i64(ltid_pre_127701); + int64_t ltid_x_117297 = sext_i32_i64(ltid_pre_127702); + int32_t ltid_flat_117298 = local_tid_127697; + int64_t j_117372 = ltid_x_117297 + binop_x_117371; + int64_t gtid_117374 = binop_x_117250 + ltid_y_117299; + bool binop_x_117379 = slt64(j_117372, j_m_i_110431); + bool binop_y_117380 = slt64(gtid_117374, m_75136); + bool cond_117381 = binop_x_117379 && binop_y_117380; + double pre_117382; + + if (cond_117381) { + int64_t slice_119583 = x_110426 + j_117372; + double x_117383 = ((__global double *) mem_123241)[slice_119583 * + (k2p2zq_75151 * + m_75136) + + gtid_117374 * + k2p2zq_75151 + + i_110427]; - float x_31607 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_30967 * - ctx_param_ext_44581 + - i32_res_31602 * - ctx_param_ext_44583)]; - int32_t i32_arg_31608 = add32(i_31554, x_31600); - int64_t i32_res_31609 = sext_i32_i64(i32_arg_31608); - bool x_31610 = sle64((int64_t) 0, i32_res_31609); - bool y_31611 = slt64(i32_res_31609, nm_27920); - bool bounds_check_31612 = x_31610 && y_31611; - bool index_certs_31613; + pre_117382 = x_117383; + } else { + pre_117382 = 0.0; + } + ((__local double *) mem_123314)[ltid_y_117299 * tile_sizze_117141 + + ltid_x_117297] = pre_117382; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t slice_119584 = x_110426 + binop_x_117371; + int64_t ltid_y_117332 = sext_i32_i64(ltid_pre_127701); + int64_t ltid_x_117330 = sext_i32_i64(ltid_pre_127702); + int32_t ltid_flat_117331 = local_tid_127697; + int64_t gtid_117388 = binop_x_117250 + ltid_y_117332; + int64_t gtid_117390 = binop_x_117265 + ltid_x_117330; + double acc_117393 = accs_mem_123310[(int64_t) 0]; + bool binop_x_117397 = slt64(gtid_117388, m_75136); + bool binop_y_117398 = slt64(gtid_117390, k2p2zq_75151); + bool cond_117399 = binop_x_117397 && binop_y_117398; + double acc_117400; + + if (cond_117399) { + double x_117401; + double redout_119850 = acc_117393; - if (!bounds_check_31612) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 41) == - -1) { - global_failure_args[0] = i32_res_31609; - global_failure_args[1] = nm_27920; - ; - } - local_failure = true; - goto error_0; + for (int64_t i_119851 = 0; i_119851 < residual_input_117295; + i_119851++) { + int64_t slice_120042 = slice_119584 + i_119851; + double x_117406 = ((__local + double *) mem_123314)[ltid_y_117332 * + tile_sizze_117141 + + i_119851]; + bool isnan_res_117407; + + isnan_res_117407 = futrts_isnan64(x_117406); + + double defunc_1_f_res_117408; + + if (isnan_res_117407) { + defunc_1_f_res_117408 = 0.0; + } else { + double x_117405 = ((__global + double *) mem_123295)[slice_120042 * + (k2p2zq_75151 * + m_75136) + + gtid_117388 * + k2p2zq_75151 + + gtid_117390]; + double defunc_1_f_res_f_res_117409 = x_117405 * x_117406; + + defunc_1_f_res_117408 = defunc_1_f_res_f_res_117409; } + + double defunc_1_op_res_117404 = defunc_1_f_res_117408 + + redout_119850; + double redout_tmp_127706 = defunc_1_op_res_117404; + + redout_119850 = redout_tmp_127706; } - - float x_31614 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_30967 * - ctx_param_ext_44581 + - i32_res_31609 * - ctx_param_ext_44583)]; - float y_31615 = x_31595 * x_31614; - float defunc_0_f_res_f_res_t_res_31616 = x_31607 - y_31615; - - defunc_0_f_res_f_res_31598 = defunc_0_f_res_f_res_t_res_31616; + x_117401 = redout_119850; + acc_117400 = x_117401; } else { - defunc_0_f_res_f_res_31598 = x_31595; + acc_117400 = acc_117393; } - defunc_0_f_res_31580 = defunc_0_f_res_f_res_31598; + mem_123318[(int64_t) 0] = acc_117400; + barrier(CLK_LOCAL_MEM_FENCE); + mem_125310[(int64_t) 0] = mem_123318[(int64_t) 0]; } - ((__local float *) mem_44594)[gtid_30970] = defunc_0_f_res_31580; - - error_0: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t write_i_31013 = sext_i32_i64(ltid_pre_45871); - int32_t phys_tid_31014 = local_tid_45867; - float write_value_31619 = ((__local float *) mem_44594)[write_i_31013]; - if (sle64((int64_t) 0, write_i_31013) && slt64(write_i_31013, nm_27920)) { - ((__global float *) mem_44590)[gtid_30967 + write_i_31013 * m_27772] = - write_value_31619; + __local char *mem_123330; + + mem_123330 = (__local char *) mem_123330_backing_6; + + int64_t ltid_y_117413 = sext_i32_i64(ltid_pre_127701); + int64_t ltid_x_117411 = sext_i32_i64(ltid_pre_127702); + int32_t ltid_flat_117412 = local_tid_127697; + + if (slt64(ltid_y_117413, tile_sizze_117141) && slt64(ltid_x_117411, + tile_sizze_117141)) { + int64_t gtid_117422 = binop_x_117250 + ltid_y_117413; + int64_t gtid_117424 = binop_x_117265 + ltid_x_117411; + bool binop_x_117426 = slt64(gtid_117422, m_75136); + bool binop_y_117427 = slt64(gtid_117424, k2p2zq_75151); + bool cond_117428 = binop_x_117426 && binop_y_117427; + + if (cond_117428) { + double defunc_2_reduce_res_117425 = mem_125310[(int64_t) 0]; + double defunc_3_map_res_r_transformed_row_117432 = ((__global + double *) mem_123143)[gtid_117422 * + (k2p2zq_75151 * + k2p2zq_75151) + + i_110427 * + k2p2zq_75151 + + i_110427]; + double defunc_2_map_res_transformed_row_117434 = ((__global + double *) mem_121938)[gtid_117424 * + k2p2zq_75151 + + i_110427]; + double zm_res_117435 = defunc_2_map_res_transformed_row_117434 - + defunc_2_reduce_res_117425; + double zs_res_117436 = zm_res_117435 / + defunc_3_map_res_r_transformed_row_117432; + + ((__global double *) mem_123291)[gtid_117422 * k2p2zq_75151 + + gtid_117424 + i_110427 * + (k2p2zq_75151 * m_75136)] = + zs_res_117436; + for (int64_t i_127707 = 0; i_127707 < k2p2zq_75151; i_127707++) { + ((__global double *) mem_125317)[gid_flat_117146 + i_127707 * + num_threads_126227] = + ((__global double *) mem_123291)[gtid_117422 * + k2p2zq_75151 + + gtid_117424 + i_127707 * + (k2p2zq_75151 * m_75136)]; + } + } + for (int64_t i_127708 = 0; i_127708 < k2p2zq_75151; i_127708++) { + ((__local double *) mem_123330)[ltid_y_117413 * (k2p2zq_75151 * + tile_sizze_117141) + + ltid_x_117411 * k2p2zq_75151 + + i_127708] = ((__global + double *) mem_125317)[gid_flat_117146 + + i_127708 * + num_threads_126227]; + } } barrier(CLK_LOCAL_MEM_FENCE); - if (local_tid_45867 == 0) { - for (int64_t i_45872 = 0; i_45872 < nm_27920; i_45872++) { - ((__global float *) mem_44598)[gtid_30967 * nm_27920 + i_45872] = - ((__global float *) mem_44590)[gtid_30967 + i_45872 * m_27772]; + + int64_t thread_out_index_127709 = gid_x_117138 * tile_sizze_117141 + + sext_i32_i64(ltid_pre_127701); + int64_t thread_out_index_127710 = gid_y_117139 * tile_sizze_117141 + + sext_i32_i64(ltid_pre_127702); + + if (slt64(thread_out_index_127709, m_75136) && + slt64(thread_out_index_127710, k2p2zq_75151)) { + for (int64_t i_127711 = 0; i_127711 < k2p2zq_75151; i_127711++) { + ((__global double *) mem_123334)[thread_out_index_127709 * + (k2p2zq_75151 * k2p2zq_75151) + + thread_out_index_127710 * + k2p2zq_75151 + i_127711] = + ((__local double *) mem_123330)[sext_i32_i64(ltid_pre_127701) * + (k2p2zq_75151 * + tile_sizze_117141) + + sext_i32_i64(ltid_pre_127702) * + k2p2zq_75151 + i_127711]; } } - error_2: + error_6: return; + #undef tile_sizze_117141 } -__kernel void mainDetailedzisegmap_intragroup_32146(__global - int *global_failure, - __local volatile - int64_t *mem_45150_backing_aligned_0, - __local volatile - int64_t *mem_45148_backing_aligned_1, - __local volatile - int64_t *mem_45146_backing_aligned_2, - __local volatile - int64_t *mem_45144_backing_aligned_3, - int64_t N_27771, - int64_t i_28075, __global - unsigned char *images_mem_44381, - __global - unsigned char *defunc_3_map_res_mem_45140, - __global - unsigned char *mem_45153, - __global - unsigned char *mem_45156, - __global - unsigned char *mem_45159) +__kernel void mainzisegmap_intragroup_117465(__global int *global_failure, + __local volatile + int64_t *mem_123437_backing_aligned_0, + __local volatile + int64_t *mem_123435_backing_aligned_1, + int64_t m_75136, + int64_t k2p2zq_75151, + int64_t gridDim_x_117458, + int64_t gridDim_y_117459, + int64_t full_tiles_117490, + int64_t kk_117693, + int64_t binop_x_120251, __global + unsigned char *defunc_3_map_res_r_mem_123392, + __global unsigned char *mem_123419, + __global unsigned char *mem_123610) { + #define Ty_117445 (mainziTy_117442) + #define Ry_117446 (mainziRy_117444) + #define Tx_117447 (mainziTx_117441) + #define Rx_117448 (mainziRx_117443) + #define Tk_117449 (mainziTk_117440) + #define tk_div_tx_117450 (sdiv_up_safe64(mainziTk_117440, mainziTx_117441)) + #define tk_div_ty_117451 (sdiv_up_safe64(mainziTk_117440, mainziTy_117442)) + #define TxRx_117452 (mainziTx_117441 * mainziRx_117443) + #define TyRy_117453 (mainziTy_117442 * mainziRy_117444) + #define a_loc_szz_117455 (mainziTk_117440 * (mainziTy_117442 * mainziRy_117444)) + #define b_loc_szz_117457 (mainziRx_117443 * (mainziTx_117441 * mainziTk_117440)) + const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict mem_45150_backing_3 = (__local volatile - char *) mem_45150_backing_aligned_0; - __local volatile char *restrict mem_45148_backing_2 = (__local volatile - char *) mem_45148_backing_aligned_1; - __local volatile char *restrict mem_45146_backing_1 = (__local volatile - char *) mem_45146_backing_aligned_2; - __local volatile char *restrict mem_45144_backing_0 = (__local volatile - char *) mem_45144_backing_aligned_3; + __local volatile char *restrict mem_123437_backing_1 = (__local volatile + char *) mem_123437_backing_aligned_0; + __local volatile char *restrict mem_123435_backing_0 = (__local volatile + char *) mem_123435_backing_aligned_1; if (*global_failure >= 0) return; - int32_t global_tid_46248; - int32_t local_tid_46249; - int64_t group_sizze_46252; - int32_t wave_sizze_46251; - int32_t group_tid_46250; - - global_tid_46248 = get_global_id(0); - local_tid_46249 = get_local_id(0); - group_sizze_46252 = get_local_size(0); - wave_sizze_46251 = LOCKSTEP_WIDTH; - group_tid_46250 = get_group_id(0); + int32_t global_tid_127796; + int32_t local_tid_127797; + int64_t group_sizze_127800; + int32_t wave_sizze_127799; + int32_t group_tid_127798; - int32_t phys_tid_32146; + global_tid_127796 = get_global_id(0); + local_tid_127797 = get_local_id(0); + group_sizze_127800 = get_local_size(0); + wave_sizze_127799 = LOCKSTEP_WIDTH; + group_tid_127798 = get_group_id(0); - phys_tid_32146 = group_tid_46250; + int32_t gid_flat_117465; - int32_t ltid_pre_46253; + gid_flat_117465 = group_tid_127798; - ltid_pre_46253 = local_tid_46249; + int32_t ltid_pre_127801; - int64_t gtid_32139; + ltid_pre_127801 = squot32(local_tid_127797, sext_i64_i32(Tx_117447)); - gtid_32139 = sext_i32_i64(group_tid_46250); + int32_t ltid_pre_127802; - __local char *mem_45144; + ltid_pre_127802 = local_tid_127797 - squot32(local_tid_127797, + sext_i64_i32(Tx_117447)) * + sext_i64_i32(Tx_117447); - mem_45144 = (__local char *) mem_45144_backing_0; + int64_t gtid_108577; - __local char *mem_45146; + gtid_108577 = squot64(sext_i32_i64(group_tid_127798), gridDim_y_117459 * + gridDim_x_117458); - mem_45146 = (__local char *) mem_45146_backing_1; + int64_t gid_y_117464; - int64_t gtid_32142 = sext_i32_i64(ltid_pre_46253); - int32_t phys_tid_32143 = local_tid_46249; - float x_32235 = ((__global float *) images_mem_44381)[gtid_32139 * N_27771 + - gtid_32142]; - bool isnan_res_32237; + gid_y_117464 = squot64(sext_i32_i64(group_tid_127798) - + squot64(sext_i32_i64(group_tid_127798), + gridDim_y_117459 * gridDim_x_117458) * + (gridDim_y_117459 * gridDim_x_117458), + gridDim_x_117458); - isnan_res_32237 = futrts_isnan32(x_32235); + int64_t gid_x_117463; - bool cond_32238 = !isnan_res_32237; - float defunc_1_f_res_32239; - - if (cond_32238) { - float x_32236 = ((__global - float *) defunc_3_map_res_mem_45140)[gtid_32139 * - N_27771 + - gtid_32142]; - float defunc_1_f_res_t_res_32240 = x_32235 - x_32236; - - defunc_1_f_res_32239 = defunc_1_f_res_t_res_32240; - } else { - defunc_1_f_res_32239 = NAN; - } + gid_x_117463 = sext_i32_i64(group_tid_127798) - + squot64(sext_i32_i64(group_tid_127798), gridDim_y_117459 * + gridDim_x_117458) * (gridDim_y_117459 * gridDim_x_117458) - + squot64(sext_i32_i64(group_tid_127798) - + squot64(sext_i32_i64(group_tid_127798), gridDim_y_117459 * + gridDim_x_117458) * (gridDim_y_117459 * + gridDim_x_117458), + gridDim_x_117458) * gridDim_x_117458; - bool isnan_res_32241; + int64_t iii_117466; - isnan_res_32241 = futrts_isnan32(defunc_1_f_res_32239); + iii_117466 = TyRy_117453 * gid_y_117464; - bool defunc_0_p_res_32242 = !isnan_res_32241; - int64_t defunc_0_f_res_32243 = btoi_bool_i64(defunc_0_p_res_32242); + int64_t jjj_117467 = TxRx_117452 * gid_x_117463; + double mem_123433[Ry_117446 * Rx_117448]; + int64_t ltid_y_117470 = sext_i32_i64(ltid_pre_127801); + int64_t ltid_x_117468 = sext_i32_i64(ltid_pre_127802); + int32_t ltid_flat_117469 = local_tid_127797; + double mem_123424[Ry_117446 * Rx_117448]; - ((__local int64_t *) mem_45144)[gtid_32142] = defunc_0_f_res_32243; - ((__local float *) mem_45146)[gtid_32142] = defunc_1_f_res_32239; + for (int64_t i_117481 = 0; i_117481 < Ry_117446; i_117481++) { + for (int64_t i_117484 = 0; i_117484 < Rx_117448; i_117484++) { + mem_123424[i_117481 * Rx_117448 + i_117484] = 0.0; + } + } + for (int64_t i_127805 = 0; i_127805 < Ry_117446; i_127805++) { + for (int64_t i_127806 = 0; i_127806 < Rx_117448; i_127806++) { + mem_123433[i_127805 * Rx_117448 + i_127806] = mem_123424[i_127805 * + Rx_117448 + + i_127806]; + } + } barrier(CLK_LOCAL_MEM_FENCE); - int64_t dims_flat_46254; + __local char *mem_123435; - dims_flat_46254 = N_27771; + mem_123435 = (__local char *) mem_123435_backing_0; - int64_t x_32232; - int64_t x_32233; - int64_t x_46256; - int64_t x_46257; - bool ltid_in_bounds_46259; + __local char *mem_123437; - ltid_in_bounds_46259 = slt64(sext_i32_i64(local_tid_46249), N_27771); + mem_123437 = (__local char *) mem_123437_backing_1; - int32_t skip_threads_46260; + double mem_123508[Ry_117446]; + double mem_123512[Rx_117448]; + double loop_mem_123524[Ry_117446 * Rx_117448]; + double mem_param_123438[Ry_117446 * Rx_117448]; - // read input for in-block scan - { - if (ltid_in_bounds_46259) { - x_32233 = ((volatile __local - int64_t *) mem_45144)[sext_i32_i64(local_tid_46249)]; - if ((local_tid_46249 - squot32(local_tid_46249, 32) * 32) == 0) { - x_32232 = x_32233; - } - } - } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46260 = 1; - while (slt32(skip_threads_46260, 32)) { - if (sle32(skip_threads_46260, local_tid_46249 - - squot32(local_tid_46249, 32) * 32) && - ltid_in_bounds_46259) { - // read operands - { - x_32232 = ((volatile __local - int64_t *) mem_45144)[sext_i32_i64(local_tid_46249) - - sext_i32_i64(skip_threads_46260)]; + for (int32_t i_2 = 0; i_2 < Ry_117446 * Rx_117448; i_2++) + mem_param_123438[i_2] = mem_123433[i_2]; + for (int64_t i_117491 = 0; i_117491 < full_tiles_117490; i_117491++) { + int64_t kk_117495 = Tk_117449 * i_117491; + + for (int64_t i_117496 = 0; i_117496 < Ry_117446; i_117496++) { + int64_t binop_y_117519 = Ty_117445 * i_117496; + + for (int64_t i_117498 = 0; i_117498 < tk_div_tx_117450; + i_117498++) { + int64_t binop_y_117517 = Tx_117447 * i_117498; + int64_t ltid_x_117500 = sext_i32_i64(ltid_pre_127801); + int64_t ltid_y_117501 = sext_i32_i64(ltid_pre_127802); + int32_t ltid_flat_117502 = local_tid_127797; + int64_t k_117518 = ltid_y_117501 + binop_y_117517; + int64_t i_117520 = ltid_x_117500 + binop_y_117519; + int64_t gtid_117521 = iii_117466 + i_117520; + int64_t A_col_idx_117522 = kk_117495 + k_117518; + bool cond_117523 = slt64(gtid_117521, k2p2zq_75151); + double A_elem_117524; + + if (cond_117523) { + double A_elem_117526 = ((__global + double *) mem_123419)[gtid_108577 * + (k2p2zq_75151 * + k2p2zq_75151) + + gtid_117521 * + k2p2zq_75151 + + A_col_idx_117522]; + + A_elem_117524 = A_elem_117526; + } else { + A_elem_117524 = 0.0; } - // perform operation - { - bool inactive_46261 = - slt64(srem64(sext_i32_i64(local_tid_46249), N_27771), - sext_i32_i64(local_tid_46249) - - sext_i32_i64(local_tid_46249 - - skip_threads_46260)); + + bool cond_117528 = slt64(k_117518, Tk_117449); + int64_t a_loc_ind_117529; + + if (cond_117528) { + int64_t binop_y_117530 = Tk_117449 * i_117520; + int64_t loc_fi_117531 = k_117518 + binop_y_117530; - if (inactive_46261) { - x_32232 = x_32233; - } - if (!inactive_46261) { - int64_t defunc_1_op_res_32234 = add64(x_32232, x_32233); - - x_32232 = defunc_1_op_res_32234; - } + a_loc_ind_117529 = loc_fi_117531; + } else { + a_loc_ind_117529 = (int64_t) -1; + } + if (sle64((int64_t) 0, a_loc_ind_117529) && + slt64(a_loc_ind_117529, a_loc_szz_117455)) { + ((__local double *) mem_123435)[a_loc_ind_117529] = + A_elem_117524; } - } - if (sle32(wave_sizze_46251, skip_threads_46260)) { barrier(CLK_LOCAL_MEM_FENCE); } - if (sle32(skip_threads_46260, local_tid_46249 - - squot32(local_tid_46249, 32) * 32) && - ltid_in_bounds_46259) { - // write result - { - ((volatile __local - int64_t *) mem_45144)[sext_i32_i64(local_tid_46249)] = - x_32232; - x_32233 = x_32232; + } + for (int64_t i_117536 = 0; i_117536 < tk_div_ty_117451; i_117536++) { + int64_t binop_y_117557 = Ty_117445 * i_117536; + + for (int64_t i_117538 = 0; i_117538 < Rx_117448; i_117538++) { + int64_t binop_y_117559 = Tx_117447 * i_117538; + int64_t ltid_x_117540 = sext_i32_i64(ltid_pre_127801); + int64_t ltid_y_117541 = sext_i32_i64(ltid_pre_127802); + int32_t ltid_flat_117542 = local_tid_127797; + int64_t k_117558 = ltid_x_117540 + binop_y_117557; + int64_t j_117560 = ltid_y_117541 + binop_y_117559; + int64_t gtid_117561 = jjj_117467 + j_117560; + int64_t B_row_idx_117562 = kk_117495 + k_117558; + bool cond_117563 = slt64(gtid_117561, k2p2zq_75151); + double B_elem_117564; + + if (cond_117563) { + double B_elem_117566 = ((__global + double *) defunc_3_map_res_r_mem_123392)[gtid_108577 * + binop_x_120251 + + B_row_idx_117562 * + k2p2zq_75151 + + gtid_117561]; + + B_elem_117564 = B_elem_117566; + } else { + B_elem_117564 = 0.0; + } + + bool cond_117568 = slt64(k_117558, Tk_117449); + int64_t b_loc_ind_117569; + + if (cond_117568) { + int64_t binop_y_117570 = TxRx_117452 * k_117558; + int64_t loc_fi_117571 = j_117560 + binop_y_117570; + + b_loc_ind_117569 = loc_fi_117571; + } else { + b_loc_ind_117569 = (int64_t) -1; + } + if (sle64((int64_t) 0, b_loc_ind_117569) && + slt64(b_loc_ind_117569, b_loc_szz_117457)) { + ((__local double *) mem_123437)[b_loc_ind_117569] = + B_elem_117564; } - } - if (sle32(wave_sizze_46251, skip_threads_46260)) { barrier(CLK_LOCAL_MEM_FENCE); } - skip_threads_46260 *= 2; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' - { - if ((local_tid_46249 - squot32(local_tid_46249, 32) * 32) == 31 && - ltid_in_bounds_46259) { - ((volatile __local - int64_t *) mem_45144)[sext_i32_i64(squot32(local_tid_46249, - 32))] = x_32232; } - } - barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' - { - int32_t skip_threads_46262; - // read input for in-block scan - { - if (squot32(local_tid_46249, 32) == 0 && ltid_in_bounds_46259) { - x_46257 = ((volatile __local - int64_t *) mem_45144)[sext_i32_i64(local_tid_46249)]; - if ((local_tid_46249 - squot32(local_tid_46249, 32) * 32) == - 0) { - x_46256 = x_46257; + double loop_mem_123523[Ry_117446 * Rx_117448]; + double mem_param_123495[Ry_117446 * Rx_117448]; + + for (int32_t i_3 = 0; i_3 < Ry_117446 * Rx_117448; i_3++) + mem_param_123495[i_3] = mem_param_123438[i_3]; + for (int64_t i_117576 = 0; i_117576 < Tk_117449; i_117576++) { + int64_t binop_y_117615 = TxRx_117452 * i_117576; + int64_t ltid_y_117580 = sext_i32_i64(ltid_pre_127801); + int64_t ltid_x_117578 = sext_i32_i64(ltid_pre_127802); + int32_t ltid_flat_117579 = local_tid_127797; + double mem_123498[Ry_117446]; + double mem_123500[Rx_117448]; + int64_t binop_x_117606 = Ry_117446 * ltid_y_117580; + + for (int64_t i_117604 = 0; i_117604 < Ry_117446; i_117604++) { + int64_t binop_x_117607 = i_117604 + binop_x_117606; + int64_t binop_y_117608 = Tk_117449 * binop_x_117607; + int64_t a_loc_ind_117609 = i_117576 + binop_y_117608; + + for (int64_t i_127818 = 0; i_127818 < (int64_t) 1; i_127818++) { + mem_123498[i_117604 + i_127818] = ((__local + double *) mem_123435)[a_loc_ind_117609 + + i_127818]; } } - } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46262 = 1; - while (slt32(skip_threads_46262, 32)) { - if (sle32(skip_threads_46262, local_tid_46249 - - squot32(local_tid_46249, 32) * 32) && - (squot32(local_tid_46249, 32) == 0 && - ltid_in_bounds_46259)) { - // read operands - { - x_46256 = ((volatile __local - int64_t *) mem_45144)[sext_i32_i64(local_tid_46249) - - sext_i32_i64(skip_threads_46262)]; - } - // perform operation - { - bool inactive_46263 = - slt64(srem64(sext_i32_i64(local_tid_46249 * 32 + - 32 - 1), N_27771), - sext_i32_i64(local_tid_46249 * 32 + 32 - 1) - - sext_i32_i64((local_tid_46249 - - skip_threads_46262) * 32 + 32 - - 1)); - - if (inactive_46263) { - x_46256 = x_46257; - } - if (!inactive_46263) { - int64_t defunc_1_op_res_46258 = add64(x_46256, - x_46257); - - x_46256 = defunc_1_op_res_46258; - } - } - } - if (sle32(wave_sizze_46251, skip_threads_46262)) { - barrier(CLK_LOCAL_MEM_FENCE); + + int64_t binop_y_117617 = Rx_117448 * ltid_x_117578; + + for (int64_t i_117613 = 0; i_117613 < Rx_117448; i_117613++) { + int64_t binop_x_117616 = i_117613 + binop_y_117615; + int64_t b_loc_ind_117618 = binop_x_117616 + binop_y_117617; + + for (int64_t i_127820 = 0; i_127820 < (int64_t) 1; i_127820++) { + mem_123500[i_117613 + i_127820] = ((__local + double *) mem_123437)[b_loc_ind_117618 + + i_127820]; } - if (sle32(skip_threads_46262, local_tid_46249 - - squot32(local_tid_46249, 32) * 32) && - (squot32(local_tid_46249, 32) == 0 && - ltid_in_bounds_46259)) { - // write result - { - ((volatile __local - int64_t *) mem_45144)[sext_i32_i64(local_tid_46249)] = - x_46256; - x_46257 = x_46256; + } + for (int64_t i_127821 = 0; i_127821 < Ry_117446; i_127821++) { + mem_123508[i_127821] = mem_123498[i_127821]; + } + for (int64_t i_127822 = 0; i_127822 < Rx_117448; i_127822++) { + mem_123512[i_127822] = mem_123500[i_127822]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_123522[Ry_117446 * Rx_117448]; + int64_t ltid_y_117625 = sext_i32_i64(ltid_pre_127801); + int64_t ltid_x_117623 = sext_i32_i64(ltid_pre_127802); + int32_t ltid_flat_117624 = local_tid_127797; + int64_t binop_y_117666 = Ry_117446 * ltid_y_117625; + int64_t binop_y_117670 = Rx_117448 * ltid_x_117623; + + for (int64_t i_117660 = 0; i_117660 < Ry_117446; i_117660++) { + int64_t binop_x_117665 = iii_117466 + i_117660; + int64_t cmpop_x_117667 = binop_x_117665 + binop_y_117666; + bool binop_x_117668 = slt64(cmpop_x_117667, k2p2zq_75151); + + for (int64_t i_117663 = 0; i_117663 < Rx_117448; i_117663++) { + int64_t binop_x_117669 = jjj_117467 + i_117663; + int64_t cmpop_x_117671 = binop_x_117669 + binop_y_117670; + bool binop_y_117672 = slt64(cmpop_x_117671, k2p2zq_75151); + bool cond_117673 = binop_x_117668 && binop_y_117672; + + if (cond_117673) { + double a_117675 = mem_123508[i_117660]; + double b_117676 = mem_123512[i_117663]; + double c_117677 = mem_param_123495[i_117660 * + Rx_117448 + + i_117663]; + double defunc_1_f_res_117680 = a_117675 * b_117676; + double defunc_1_op_res_117684 = c_117677 + + defunc_1_f_res_117680; + + mem_param_123495[i_117660 * Rx_117448 + i_117663] = + defunc_1_op_res_117684; } } - if (sle32(wave_sizze_46251, skip_threads_46262)) { - barrier(CLK_LOCAL_MEM_FENCE); + } + for (int64_t i_127825 = 0; i_127825 < Ry_117446; i_127825++) { + for (int64_t i_127826 = 0; i_127826 < Rx_117448; i_127826++) { + mem_123522[i_127825 * Rx_117448 + i_127826] = + mem_param_123495[i_127825 * Rx_117448 + i_127826]; } - skip_threads_46262 *= 2; } + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_param_tmp_127815[Ry_117446 * Rx_117448]; + + for (int32_t i_4 = 0; i_4 < Ry_117446 * Rx_117448; i_4++) + mem_param_tmp_127815[i_4] = mem_123522[i_4]; + for (int32_t i_5 = 0; i_5 < Ry_117446 * Rx_117448; i_5++) + mem_param_123495[i_5] = mem_param_tmp_127815[i_5]; } + for (int32_t i_6 = 0; i_6 < Ry_117446 * Rx_117448; i_6++) + loop_mem_123523[i_6] = mem_param_123495[i_6]; + + double mem_param_tmp_127807[Ry_117446 * Rx_117448]; + + for (int32_t i_7 = 0; i_7 < Ry_117446 * Rx_117448; i_7++) + mem_param_tmp_127807[i_7] = loop_mem_123523[i_7]; + for (int32_t i_8 = 0; i_8 < Ry_117446 * Rx_117448; i_8++) + mem_param_123438[i_8] = mem_param_tmp_127807[i_8]; } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_46249, 32) == 0 || !ltid_in_bounds_46259)) { - // read operands - { - x_32233 = x_32232; - x_32232 = ((__local - int64_t *) mem_45144)[sext_i32_i64(squot32(local_tid_46249, - 32)) - - (int64_t) 1]; + for (int32_t i_9 = 0; i_9 < Ry_117446 * Rx_117448; i_9++) + loop_mem_123524[i_9] = mem_param_123438[i_9]; + for (int64_t i_117694 = 0; i_117694 < Ry_117446; i_117694++) { + int64_t binop_y_117719 = Ty_117445 * i_117694; + + for (int64_t i_117696 = 0; i_117696 < tk_div_tx_117450; i_117696++) { + int64_t binop_y_117717 = Tx_117447 * i_117696; + int64_t ltid_x_117698 = sext_i32_i64(ltid_pre_127801); + int64_t ltid_y_117699 = sext_i32_i64(ltid_pre_127802); + int32_t ltid_flat_117700 = local_tid_127797; + int64_t k_117718 = ltid_y_117699 + binop_y_117717; + int64_t i_117720 = ltid_x_117698 + binop_y_117719; + int64_t gtid_117721 = iii_117466 + i_117720; + int64_t A_col_idx_117722 = kk_117693 + k_117718; + bool binop_x_117723 = slt64(gtid_117721, k2p2zq_75151); + bool binop_y_117724 = slt64(A_col_idx_117722, k2p2zq_75151); + bool cond_117725 = binop_x_117723 && binop_y_117724; + double A_elem_117726; + + if (cond_117725) { + double A_elem_117728 = ((__global + double *) mem_123419)[gtid_108577 * + (k2p2zq_75151 * + k2p2zq_75151) + + gtid_117721 * + k2p2zq_75151 + + A_col_idx_117722]; + + A_elem_117726 = A_elem_117728; + } else { + A_elem_117726 = 0.0; } - // perform operation - { - bool inactive_46264 = - slt64(srem64(sext_i32_i64(local_tid_46249), N_27771), - sext_i32_i64(local_tid_46249) - - sext_i32_i64(squot32(local_tid_46249, 32) * 32 - 1)); + + bool cond_117730 = slt64(k_117718, Tk_117449); + int64_t a_loc_ind_117731; + + if (cond_117730) { + int64_t binop_y_117732 = Tk_117449 * i_117720; + int64_t loc_fi_117733 = k_117718 + binop_y_117732; + + a_loc_ind_117731 = loc_fi_117733; + } else { + a_loc_ind_117731 = (int64_t) -1; + } + if (sle64((int64_t) 0, a_loc_ind_117731) && slt64(a_loc_ind_117731, + a_loc_szz_117455)) { + ((__local double *) mem_123435)[a_loc_ind_117731] = + A_elem_117726; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + for (int64_t i_117738 = 0; i_117738 < tk_div_ty_117451; i_117738++) { + int64_t binop_y_117761 = Ty_117445 * i_117738; + + for (int64_t i_117740 = 0; i_117740 < Rx_117448; i_117740++) { + int64_t binop_y_117763 = Tx_117447 * i_117740; + int64_t ltid_x_117742 = sext_i32_i64(ltid_pre_127801); + int64_t ltid_y_117743 = sext_i32_i64(ltid_pre_127802); + int32_t ltid_flat_117744 = local_tid_127797; + int64_t k_117762 = ltid_x_117742 + binop_y_117761; + int64_t j_117764 = ltid_y_117743 + binop_y_117763; + int64_t gtid_117765 = jjj_117467 + j_117764; + int64_t B_row_idx_117766 = kk_117693 + k_117762; + bool binop_x_117767 = slt64(gtid_117765, k2p2zq_75151); + bool binop_y_117768 = slt64(B_row_idx_117766, k2p2zq_75151); + bool cond_117769 = binop_x_117767 && binop_y_117768; + double B_elem_117770; + + if (cond_117769) { + double B_elem_117772 = ((__global + double *) defunc_3_map_res_r_mem_123392)[gtid_108577 * + binop_x_120251 + + B_row_idx_117766 * + k2p2zq_75151 + + gtid_117765]; + + B_elem_117770 = B_elem_117772; + } else { + B_elem_117770 = 0.0; + } + + bool cond_117774 = slt64(k_117762, Tk_117449); + int64_t b_loc_ind_117775; + + if (cond_117774) { + int64_t binop_y_117776 = TxRx_117452 * k_117762; + int64_t loc_fi_117777 = j_117764 + binop_y_117776; + + b_loc_ind_117775 = loc_fi_117777; + } else { + b_loc_ind_117775 = (int64_t) -1; + } + if (sle64((int64_t) 0, b_loc_ind_117775) && slt64(b_loc_ind_117775, + b_loc_szz_117457)) { + ((__local double *) mem_123437)[b_loc_ind_117775] = + B_elem_117770; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + + double mem_123590[Ry_117446]; + double mem_123594[Rx_117448]; + double mem_123604[Ry_117446 * Rx_117448]; + double loop_mem_123606[Ry_117446 * Rx_117448]; + double mem_param_123577[Ry_117446 * Rx_117448]; + + for (int32_t i_10 = 0; i_10 < Ry_117446 * Rx_117448; i_10++) + mem_param_123577[i_10] = loop_mem_123524[i_10]; + for (int64_t i_117782 = 0; i_117782 < Tk_117449; i_117782++) { + int64_t cmpop_x_117784 = kk_117693 + i_117782; + bool cond_117785 = slt64(cmpop_x_117784, k2p2zq_75151); + double mem_125333[Ry_117446 * Rx_117448]; + + if (cond_117785) { + int64_t binop_y_117823 = TxRx_117452 * i_117782; + int64_t bytes_123579 = (int64_t) 8 * Ry_117446; + int64_t bytes_123581 = (int64_t) 8 * Rx_117448; + int64_t ltid_y_117788 = sext_i32_i64(ltid_pre_127801); + int64_t ltid_x_117786 = sext_i32_i64(ltid_pre_127802); + int32_t ltid_flat_117787 = local_tid_127797; + double mem_123580[Ry_117446]; + double mem_123582[Rx_117448]; + int64_t binop_x_117814 = Ry_117446 * ltid_y_117788; + + for (int64_t i_117812 = 0; i_117812 < Ry_117446; i_117812++) { + int64_t binop_x_117815 = i_117812 + binop_x_117814; + int64_t binop_y_117816 = Tk_117449 * binop_x_117815; + int64_t a_loc_ind_117817 = i_117782 + binop_y_117816; + + for (int64_t i_127834 = 0; i_127834 < (int64_t) 1; i_127834++) { + mem_123580[i_117812 + i_127834] = ((__local + double *) mem_123435)[a_loc_ind_117817 + + i_127834]; + } + } + + int64_t binop_y_117825 = Rx_117448 * ltid_x_117786; + + for (int64_t i_117821 = 0; i_117821 < Rx_117448; i_117821++) { + int64_t binop_x_117824 = i_117821 + binop_y_117823; + int64_t b_loc_ind_117826 = binop_x_117824 + binop_y_117825; - if (inactive_46264) { - x_32232 = x_32233; + for (int64_t i_127836 = 0; i_127836 < (int64_t) 1; i_127836++) { + mem_123582[i_117821 + i_127836] = ((__local + double *) mem_123437)[b_loc_ind_117826 + + i_127836]; } - if (!inactive_46264) { - int64_t defunc_1_op_res_32234 = add64(x_32232, x_32233); + } + for (int64_t i_127837 = 0; i_127837 < Ry_117446; i_127837++) { + mem_123590[i_127837] = mem_123580[i_127837]; + } + for (int64_t i_127838 = 0; i_127838 < Rx_117448; i_127838++) { + mem_123594[i_127838] = mem_123582[i_127838]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t ltid_y_117833 = sext_i32_i64(ltid_pre_127801); + int64_t ltid_x_117831 = sext_i32_i64(ltid_pre_127802); + int32_t ltid_flat_117832 = local_tid_127797; + int64_t binop_y_117874 = Ry_117446 * ltid_y_117833; + int64_t binop_y_117878 = Rx_117448 * ltid_x_117831; + + for (int64_t i_117868 = 0; i_117868 < Ry_117446; i_117868++) { + int64_t binop_x_117873 = iii_117466 + i_117868; + int64_t cmpop_x_117875 = binop_x_117873 + binop_y_117874; + bool binop_x_117876 = slt64(cmpop_x_117875, k2p2zq_75151); + + for (int64_t i_117871 = 0; i_117871 < Rx_117448; i_117871++) { + int64_t binop_x_117877 = jjj_117467 + i_117871; + int64_t cmpop_x_117879 = binop_x_117877 + binop_y_117878; + bool binop_y_117880 = slt64(cmpop_x_117879, k2p2zq_75151); + bool cond_117881 = binop_x_117876 && binop_y_117880; - x_32232 = defunc_1_op_res_32234; + if (cond_117881) { + double a_117883 = mem_123590[i_117868]; + double b_117884 = mem_123594[i_117871]; + double c_117885 = mem_param_123577[i_117868 * + Rx_117448 + + i_117871]; + double defunc_1_f_res_117888 = a_117883 * b_117884; + double defunc_1_op_res_117892 = c_117885 + + defunc_1_f_res_117888; + + mem_param_123577[i_117868 * Rx_117448 + i_117871] = + defunc_1_op_res_117892; + } } } - // write final result - { - ((__local int64_t *) mem_45144)[sext_i32_i64(local_tid_46249)] = - x_32232; + for (int64_t i_127841 = 0; i_127841 < Ry_117446; i_127841++) { + for (int64_t i_127842 = 0; i_127842 < Rx_117448; i_127842++) { + mem_123604[i_127841 * Rx_117448 + i_127842] = + mem_param_123577[i_127841 * Rx_117448 + i_127842]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + for (int64_t i_127843 = 0; i_127843 < Ry_117446; i_127843++) { + for (int64_t i_127844 = 0; i_127844 < Rx_117448; i_127844++) { + mem_125333[i_127843 * Rx_117448 + i_127844] = + mem_123604[i_127843 * Rx_117448 + i_127844]; + } + } + } else { + for (int64_t i_127845 = 0; i_127845 < Ry_117446; i_127845++) { + for (int64_t i_127846 = 0; i_127846 < Rx_117448; i_127846++) { + mem_125333[i_127845 * Rx_117448 + i_127846] = + mem_param_123577[i_127845 * Rx_117448 + i_127846]; + } } } + + double mem_param_tmp_127831[Ry_117446 * Rx_117448]; + + for (int32_t i_11 = 0; i_11 < Ry_117446 * Rx_117448; i_11++) + mem_param_tmp_127831[i_11] = mem_125333[i_11]; + for (int32_t i_12 = 0; i_12 < Ry_117446 * Rx_117448; i_12++) + mem_param_123577[i_12] = mem_param_tmp_127831[i_12]; } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_46249, 32) == 0) { - ((__local int64_t *) mem_45144)[sext_i32_i64(local_tid_46249)] = - x_32233; + for (int32_t i_13 = 0; i_13 < Ry_117446 * Rx_117448; i_13++) + loop_mem_123606[i_13] = mem_param_123577[i_13]; + + int64_t reg_tile_i_127847 = squot64(sext_i32_i64(local_tid_127797), + Ty_117445 * Tx_117447); + int64_t reg_tile_i_127848 = squot64(sext_i32_i64(local_tid_127797) - + squot64(sext_i32_i64(local_tid_127797), + Ty_117445 * Tx_117447) * + (Ty_117445 * Tx_117447), Tx_117447); + int64_t reg_tile_i_127849 = sext_i32_i64(local_tid_127797) - + squot64(sext_i32_i64(local_tid_127797), Ty_117445 * Tx_117447) * + (Ty_117445 * Tx_117447) - squot64(sext_i32_i64(local_tid_127797) - + squot64(sext_i32_i64(local_tid_127797), + Ty_117445 * Tx_117447) * + (Ty_117445 * Tx_117447), + Tx_117447) * Tx_117447; + int64_t tile_dim_start_127850 = gtid_108577 + reg_tile_i_127847; + int64_t tile_dim_start_127851 = Ry_117446 * (Ty_117445 * gid_y_117464 + + reg_tile_i_127848); + int64_t tile_dim_start_127852 = Rx_117448 * (Tx_117447 * gid_x_117463 + + reg_tile_i_127849); + + for (int64_t nest_i_127853 = 0; nest_i_127853 < (int64_t) 1; + nest_i_127853++) { + for (int64_t nest_i_127854 = 0; nest_i_127854 < Ry_117446; + nest_i_127854++) { + for (int64_t nest_i_127855 = 0; nest_i_127855 < Rx_117448; + nest_i_127855++) { + if ((slt64(tile_dim_start_127850 + nest_i_127853, m_75136) && + slt64(tile_dim_start_127851 + nest_i_127854, + k2p2zq_75151)) && slt64(tile_dim_start_127852 + + nest_i_127855, + k2p2zq_75151)) { + ((__global double *) mem_123610)[(tile_dim_start_127850 + + nest_i_127853) * + (k2p2zq_75151 * + k2p2zq_75151) + + (tile_dim_start_127851 + + nest_i_127854) * + k2p2zq_75151 + + (tile_dim_start_127852 + + nest_i_127855)] = + loop_mem_123606[squot64(nest_i_127854 * Rx_117448 + + nest_i_127855 - + squot64(nest_i_127854 * + Rx_117448 + + nest_i_127855, + Tx_117447 * Ry_117446 * + Rx_117448) * + (Tx_117447 * Ry_117446 * + Rx_117448) - + squot64(nest_i_127854 * + Rx_117448 + + nest_i_127855 - + squot64(nest_i_127854 * + Rx_117448 + + nest_i_127855, + Tx_117447 * + Ry_117446 * + Rx_117448) * + (Tx_117447 * Ry_117446 * + Rx_117448), Ry_117446 * + Rx_117448) * + (Ry_117446 * Rx_117448), + Rx_117448) * Rx_117448 + + (nest_i_127854 * Rx_117448 + + nest_i_127855 - squot64(nest_i_127854 * + Rx_117448 + + nest_i_127855, + Tx_117447 * + Ry_117446 * + Rx_117448) * + (Tx_117447 * Ry_117446 * Rx_117448) - + squot64(nest_i_127854 * Rx_117448 + + nest_i_127855 - + squot64(nest_i_127854 * + Rx_117448 + + nest_i_127855, + Tx_117447 * Ry_117446 * + Rx_117448) * + (Tx_117447 * Ry_117446 * + Rx_117448), Ry_117446 * + Rx_117448) * (Ry_117446 * + Rx_117448) - + squot64(nest_i_127854 * Rx_117448 + + nest_i_127855 - + squot64(nest_i_127854 * + Rx_117448 + + nest_i_127855, + Tx_117447 * Ry_117446 * + Rx_117448) * + (Tx_117447 * Ry_117446 * + Rx_117448) - + squot64(nest_i_127854 * + Rx_117448 + + nest_i_127855 - + squot64(nest_i_127854 * + Rx_117448 + + nest_i_127855, + Tx_117447 * + Ry_117446 * + Rx_117448) * + (Tx_117447 * + Ry_117446 * + Rx_117448), + Ry_117446 * + Rx_117448) * + (Ry_117446 * Rx_117448), + Rx_117448) * Rx_117448)]; + } + } } } - barrier(CLK_LOCAL_MEM_FENCE); - int64_t last_res_32244 = ((__local int64_t *) mem_45144)[i_28075]; - int32_t defunc_0_f_res_32245 = sext_i64_i32(last_res_32244); - __local char *mem_45148; + error_9: + return; + #undef Ty_117445 + #undef Ry_117446 + #undef Tx_117447 + #undef Rx_117448 + #undef Tk_117449 + #undef tk_div_tx_117450 + #undef tk_div_ty_117451 + #undef TxRx_117452 + #undef TyRy_117453 + #undef a_loc_szz_117455 + #undef b_loc_szz_117457 +} +__kernel void mainzisegmap_intragroup_117900(__global int *global_failure, + __local volatile + int64_t *mem_124104_backing_aligned_0, + __local volatile + int64_t *mem_124097_backing_aligned_1, + int64_t m_75136, + double level_75142, + int64_t num_recresids_padded_75809, + int64_t num_whole_tiles_117920, + int64_t residual_input_118032, + unsigned char cond_118033, __global + unsigned char *defunc_3_map_res_mem_124069, + __global unsigned char *mem_124081, + __global unsigned char *mem_124084, + __global unsigned char *mem_124113) +{ + #define segmap_group_sizze_111966 (mainzisegmap_group_sizze_111831) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_124104_backing_5 = (__local volatile + char *) mem_124104_backing_aligned_0; + __local volatile char *restrict mem_124097_backing_0 = (__local volatile + char *) mem_124097_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128537; + int32_t local_tid_128538; + int64_t group_sizze_128541; + int32_t wave_sizze_128540; + int32_t group_tid_128539; + + global_tid_128537 = get_global_id(0); + local_tid_128538 = get_local_id(0); + group_sizze_128541 = get_local_size(0); + wave_sizze_128540 = LOCKSTEP_WIDTH; + group_tid_128539 = get_group_id(0); + + int32_t gid_flat_117900; + + gid_flat_117900 = group_tid_128539; + + int32_t ltid_pre_128542; + + ltid_pre_128542 = local_tid_128538; + + int64_t gid_117899; + + gid_117899 = sext_i32_i64(group_tid_128539); - mem_45148 = (__local char *) mem_45148_backing_2; - ((__local float *) mem_45148)[sext_i32_i64(local_tid_46249)] = NAN; + int64_t binop_x_117909; + + binop_x_117909 = segmap_group_sizze_111966 * gid_117899; + + int64_t mem_124088[1]; + double mem_124090[1]; + int64_t ltid_117901 = sext_i32_i64(ltid_pre_128542); + int32_t ltid_flat_117902 = local_tid_128538; + int64_t gtid_117910 = ltid_117901 + binop_x_117909; + bool cond_117911 = slt64(gtid_117910, m_75136); + int64_t pre_117912; + double pre_117913; + + if (cond_117911) { + int64_t x_117914 = ((__global + int64_t *) defunc_3_map_res_mem_124069)[gtid_117910]; + double i64_res_117915 = sitofp_i64_f64(x_117914); + + pre_117912 = x_117914; + pre_117913 = i64_res_117915; + } else { + pre_117912 = (int64_t) 0; + pre_117913 = 0.0; + } + mem_124088[(int64_t) 0] = pre_117912; + mem_124090[(int64_t) 0] = pre_117913; barrier(CLK_LOCAL_MEM_FENCE); - __local char *mem_45150; + double mem_124093[1]; + int64_t ltid_117921 = sext_i32_i64(ltid_pre_128542); + int32_t ltid_flat_117922 = local_tid_128538; - mem_45150 = (__local char *) mem_45150_backing_3; - ((__local int32_t *) mem_45150)[sext_i32_i64(local_tid_46249)] = 0; + mem_124093[(int64_t) 0] = -INFINITY; barrier(CLK_LOCAL_MEM_FENCE); - int64_t write_i_32144 = sext_i32_i64(ltid_pre_46253); - int32_t phys_tid_32145 = local_tid_46249; - float x_32250 = ((__local float *) mem_45146)[write_i_32144]; - int32_t index_primexp_42374 = sext_i64_i32(write_i_32144); - bool isnan_res_32253; + __local char *mem_124097; - isnan_res_32253 = futrts_isnan32(x_32250); + mem_124097 = (__local char *) mem_124097_backing_0; - bool defunc_0_p_res_32254 = !isnan_res_32253; - int64_t defunc_1_f_res_32255; + double accs_mem_124101[1]; + double mem_param_124094[1]; - if (defunc_0_p_res_32254) { - int64_t x_32251 = ((__local int64_t *) mem_45144)[write_i_32144]; - int64_t defunc_1_f_res_t_res_32256 = sub64(x_32251, (int64_t) 1); + for (int32_t i_1 = 0; i_1 < 1; i_1++) + mem_param_124094[i_1] = mem_124093[i_1]; + for (int64_t tile_id_117928 = 0; tile_id_117928 < num_whole_tiles_117920; + tile_id_117928++) { + int64_t binop_x_117984 = segmap_group_sizze_111966 * tile_id_117928; + int64_t ltid_117929 = sext_i32_i64(ltid_pre_128542); + int32_t ltid_flat_117930 = local_tid_128538; + int64_t j_117985 = ltid_117929 + binop_x_117984; + bool cond_117991 = slt64(j_117985, num_recresids_padded_75809); + int64_t pre_117992; + + if (cond_117991) { + pre_117992 = j_117985; + } else { + pre_117992 = (int64_t) 0; + } + ((__local int64_t *) mem_124097)[ltid_117929] = pre_117992; + barrier(CLK_LOCAL_MEM_FENCE); - defunc_1_f_res_32255 = defunc_1_f_res_t_res_32256; - } else { - defunc_1_f_res_32255 = (int64_t) -1; + int64_t slice_119591 = (int64_t) 1 + binop_x_117984; + double mem_124100[1]; + int64_t ltid_117950 = sext_i32_i64(ltid_pre_128542); + int32_t ltid_flat_117951 = local_tid_128538; + int64_t gtid_117997 = binop_x_117909 + ltid_117950; + double acc_118001 = mem_param_124094[(int64_t) 0]; + bool cond_118004 = slt64(gtid_117997, m_75136); + double acc_118005; + + if (cond_118004) { + double i64_res_117999 = mem_124090[(int64_t) 0]; + double x_118006; + double redout_119906 = acc_118001; + + for (int64_t i_119907 = 0; i_119907 < segmap_group_sizze_111966; + i_119907++) { + int64_t slice_120045 = slice_119591 + i_119907; + double x_118010 = ((__global + double *) mem_124081)[slice_120045 * + m_75136 + + gtid_117997]; + int64_t x_118011 = ((__local int64_t *) mem_124097)[i_119907]; + int64_t x_118012 = mul64((int64_t) 2, x_118011); + int64_t i64_arg_118013 = add64((int64_t) 2, x_118012); + double i64_res_118014 = sitofp_i64_f64(i64_arg_118013); + double y_118015 = i64_res_118014 / i64_res_117999; + double lifted_div_res_118016 = 1.0 + y_118015; + double abs_arg_118017 = x_118010 / lifted_div_res_118016; + double abs_res_118018 = fabs(abs_arg_118017); + double defunc_1_op_res_118009 = fmax64(abs_res_118018, + redout_119906); + double redout_tmp_128545 = defunc_1_op_res_118009; + + redout_119906 = redout_tmp_128545; + } + x_118006 = redout_119906; + acc_118005 = x_118006; + } else { + acc_118005 = acc_118001; + } + mem_124100[(int64_t) 0] = acc_118005; + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_param_tmp_128543[1]; + + for (int32_t i_2 = 0; i_2 < 1; i_2++) + mem_param_tmp_128543[i_2] = mem_124100[i_2]; + for (int32_t i_3 = 0; i_3 < 1; i_3++) + mem_param_124094[i_3] = mem_param_tmp_128543[i_3]; } - if (sle64((int64_t) 0, defunc_1_f_res_32255) && slt64(defunc_1_f_res_32255, - N_27771)) { - ((__local int32_t *) mem_45150)[defunc_1_f_res_32255] = - index_primexp_42374; + for (int32_t i_4 = 0; i_4 < 1; i_4++) + accs_mem_124101[i_4] = mem_param_124094[i_4]; + + __local char *mem_124104; + + mem_124104 = (__local char *) mem_124104_backing_5; + + double mem_124107[1]; + double mem_125358[1]; + + if (cond_118033) { + mem_125358[(int64_t) 0] = accs_mem_124101[(int64_t) 0]; + } else { + int64_t binop_x_118043 = segmap_group_sizze_111966 * + num_whole_tiles_117920; + int64_t ltid_118034 = sext_i32_i64(ltid_pre_128542); + int32_t ltid_flat_118035 = local_tid_128538; + int64_t j_118044 = ltid_118034 + binop_x_118043; + bool cond_118050 = slt64(j_118044, num_recresids_padded_75809); + int64_t pre_118051; + + if (cond_118050) { + pre_118051 = j_118044; + } else { + pre_118051 = (int64_t) 0; + } + ((__local int64_t *) mem_124104)[ltid_118034] = pre_118051; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t slice_offset_118071 = num_whole_tiles_117920 * + residual_input_118032; + int64_t slice_119594 = (int64_t) 1 + slice_offset_118071; + int64_t ltid_118056 = sext_i32_i64(ltid_pre_128542); + int32_t ltid_flat_118057 = local_tid_128538; + int64_t gtid_118066 = binop_x_117909 + ltid_118056; + double acc_118070 = accs_mem_124101[(int64_t) 0]; + bool cond_118073 = slt64(gtid_118066, m_75136); + double acc_118074; + + if (cond_118073) { + double i64_res_118068 = mem_124090[(int64_t) 0]; + double x_118075; + double redout_119908 = acc_118070; + + for (int64_t i_119909 = 0; i_119909 < residual_input_118032; + i_119909++) { + int64_t slice_120046 = slice_119594 + i_119909; + double x_118079 = ((__global + double *) mem_124081)[slice_120046 * + m_75136 + + gtid_118066]; + int64_t x_118080 = ((__local int64_t *) mem_124104)[i_119909]; + int64_t x_118081 = mul64((int64_t) 2, x_118080); + int64_t i64_arg_118082 = add64((int64_t) 2, x_118081); + double i64_res_118083 = sitofp_i64_f64(i64_arg_118082); + double y_118084 = i64_res_118083 / i64_res_118068; + double lifted_div_res_118085 = 1.0 + y_118084; + double abs_arg_118086 = x_118079 / lifted_div_res_118085; + double abs_res_118087 = fabs(abs_arg_118086); + double defunc_1_op_res_118078 = fmax64(abs_res_118087, + redout_119908); + double redout_tmp_128546 = defunc_1_op_res_118078; + + redout_119908 = redout_tmp_128546; + } + x_118075 = redout_119908; + acc_118074 = x_118075; + } else { + acc_118074 = acc_118070; + } + mem_124107[(int64_t) 0] = acc_118074; + barrier(CLK_LOCAL_MEM_FENCE); + mem_125358[(int64_t) 0] = mem_124107[(int64_t) 0]; } - if (sle64((int64_t) 0, defunc_1_f_res_32255) && slt64(defunc_1_f_res_32255, - N_27771)) { - ((__local float *) mem_45148)[defunc_1_f_res_32255] = x_32250; + + int64_t mem_124111[1]; + int64_t ltid_118090 = sext_i32_i64(ltid_pre_128542); + int32_t ltid_flat_118091 = local_tid_128538; + int64_t gtid_118097 = binop_x_117909 + ltid_118090; + bool cond_118099 = slt64(gtid_118097, m_75136); + int64_t postlude_118100; + + if (cond_118099) { + double defunc_2_reduce_res_118098 = mem_125358[(int64_t) 0]; + double defunc_0_Q_arg_118105 = 3.0 * defunc_2_reduce_res_118098; + double zs_res_118106 = defunc_0_Q_arg_118105 / 1.4142135623730951; + double abs_res_118107 = fabs(zs_res_118106); + double zs_res_118108 = abs_res_118107 / 2.0; + double zp_res_118109 = 1.0 + zs_res_118108; + double zs_res_118110 = 1.0 / zp_res_118109; + double zt_res_118111 = zs_res_118110 * zs_res_118110; + double zt_res_118112 = zs_res_118110 * zt_res_118111; + double zt_res_118113 = zt_res_118111 * zt_res_118111; + double zt_res_118114 = zt_res_118111 * zt_res_118112; + double zt_res_118115 = zt_res_118112 * zt_res_118112; + double zt_res_118116 = zt_res_118112 * zt_res_118113; + double zt_res_118117 = zt_res_118113 * zt_res_118113; + double zt_res_118118 = zt_res_118113 * zt_res_118114; + double zt_res_118119 = 0.17087277 * zt_res_118118; + double zt_res_118120 = 0.82215223 * zt_res_118117; + double zt_res_118121 = 1.48851587 * zt_res_118116; + double zt_res_118122 = 1.13520398 * zt_res_118115; + double zt_res_118123 = 0.27886807 * zt_res_118114; + double zt_res_118124 = 0.18628806 * zt_res_118113; + double zt_res_118125 = 9.678418e-2 * zt_res_118112; + double zt_res_118126 = 0.37409196 * zt_res_118111; + double zt_res_118127 = 1.00002368 * zs_res_118110; + double zt_res_118128 = zs_res_118106 * zs_res_118106; + double zm_res_118129 = 0.0 - zt_res_118128; + double zm_res_118130 = zm_res_118129 - 1.26551223; + double zp_res_118131 = zt_res_118127 + zm_res_118130; + double zp_res_118132 = zt_res_118126 + zp_res_118131; + double zp_res_118133 = zt_res_118125 + zp_res_118132; + double zm_res_118134 = zp_res_118133 - zt_res_118124; + double zp_res_118135 = zt_res_118123 + zm_res_118134; + double zm_res_118136 = zp_res_118135 - zt_res_118122; + double zp_res_118137 = zt_res_118121 + zm_res_118136; + double zm_res_118138 = zp_res_118137 - zt_res_118120; + double zp_res_118139 = zt_res_118119 + zm_res_118138; + double exp_res_118140; + + exp_res_118140 = futrts_exp64(zp_res_118139); + + double zt_res_118141 = zs_res_118110 * exp_res_118140; + bool zgze_res_118142 = 0.0 <= zs_res_118106; + double erf_res_118143; + + if (zgze_res_118142) { + double zm_res_118144 = 1.0 - zt_res_118141; + + erf_res_118143 = zm_res_118144; + } else { + double zm_res_118145 = zt_res_118141 - 1.0; + + erf_res_118143 = zm_res_118145; + } + + double zp_res_118146 = 1.0 + erf_res_118143; + double zs_res_118147 = zp_res_118146 / 2.0; + double defunc_0_Q_res_118148 = 1.0 - zs_res_118147; + double y_118149 = fpow64(defunc_2_reduce_res_118098, 2.0); + double negate_arg_118150 = 4.0 * y_118149; + double defunc_0_exp_arg_118151 = 0.0 - negate_arg_118150; + double defunc_0_exp_res_118152 = fpow64(2.718281828459045, + defunc_0_exp_arg_118151); + double x_118153 = defunc_0_Q_res_118148 + defunc_0_exp_res_118152; + double zs_res_118154 = defunc_2_reduce_res_118098 / 1.4142135623730951; + double abs_res_118155 = fabs(zs_res_118154); + double zs_res_118156 = abs_res_118155 / 2.0; + double zp_res_118157 = 1.0 + zs_res_118156; + double zs_res_118158 = 1.0 / zp_res_118157; + double zt_res_118159 = zs_res_118158 * zs_res_118158; + double zt_res_118160 = zs_res_118158 * zt_res_118159; + double zt_res_118161 = zt_res_118159 * zt_res_118159; + double zt_res_118162 = zt_res_118159 * zt_res_118160; + double zt_res_118163 = zt_res_118160 * zt_res_118160; + double zt_res_118164 = zt_res_118160 * zt_res_118161; + double zt_res_118165 = zt_res_118161 * zt_res_118161; + double zt_res_118166 = zt_res_118161 * zt_res_118162; + double zt_res_118167 = 0.17087277 * zt_res_118166; + double zt_res_118168 = 0.82215223 * zt_res_118165; + double zt_res_118169 = 1.48851587 * zt_res_118164; + double zt_res_118170 = 1.13520398 * zt_res_118163; + double zt_res_118171 = 0.27886807 * zt_res_118162; + double zt_res_118172 = 0.18628806 * zt_res_118161; + double zt_res_118173 = 9.678418e-2 * zt_res_118160; + double zt_res_118174 = 0.37409196 * zt_res_118159; + double zt_res_118175 = 1.00002368 * zs_res_118158; + double zt_res_118176 = zs_res_118154 * zs_res_118154; + double zm_res_118177 = 0.0 - zt_res_118176; + double zm_res_118178 = zm_res_118177 - 1.26551223; + double zp_res_118179 = zt_res_118175 + zm_res_118178; + double zp_res_118180 = zt_res_118174 + zp_res_118179; + double zp_res_118181 = zt_res_118173 + zp_res_118180; + double zm_res_118182 = zp_res_118181 - zt_res_118172; + double zp_res_118183 = zt_res_118171 + zm_res_118182; + double zm_res_118184 = zp_res_118183 - zt_res_118170; + double zp_res_118185 = zt_res_118169 + zm_res_118184; + double zm_res_118186 = zp_res_118185 - zt_res_118168; + double zp_res_118187 = zt_res_118167 + zm_res_118186; + double exp_res_118188; + + exp_res_118188 = futrts_exp64(zp_res_118187); + + double zt_res_118189 = zs_res_118158 * exp_res_118188; + bool zgze_res_118190 = 0.0 <= zs_res_118154; + double erf_res_118191; + + if (zgze_res_118190) { + double zm_res_118192 = 1.0 - zt_res_118189; + + erf_res_118191 = zm_res_118192; + } else { + double zm_res_118193 = zt_res_118189 - 1.0; + + erf_res_118191 = zm_res_118193; + } + + double zp_res_118194 = 1.0 + erf_res_118191; + double zs_res_118195 = zp_res_118194 / 2.0; + double defunc_0_Q_res_118196 = 1.0 - zs_res_118195; + double y_118197 = defunc_0_exp_res_118152 * defunc_0_Q_res_118196; + double y_118198 = x_118153 - y_118197; + double pval_brownian_motion_max_res_118199 = 2.0 * y_118198; + int64_t defunc_0_f_res_118200; + int64_t redout_119910 = (int64_t) 9223372036854775807; + + for (int64_t i_119911 = 0; i_119911 < num_recresids_padded_75809; + i_119911++) { + int64_t slice_120048 = (int64_t) 1 + i_119911; + double x_118205 = ((__global double *) mem_124081)[slice_120048 * + m_75136 + + gtid_118097]; + double x_118206 = ((__global double *) mem_124084)[slice_120048 * + m_75136 + + gtid_118097]; + double abs_res_118207 = fabs(x_118205); + bool cond_118208 = x_118206 < abs_res_118207; + int64_t defunc_2_f_res_118209; + + if (cond_118208) { + defunc_2_f_res_118209 = i_119911; + } else { + defunc_2_f_res_118209 = (int64_t) 9223372036854775807; + } + + int64_t defunc_1_op_res_118203 = smin64(defunc_2_f_res_118209, + redout_119910); + int64_t redout_tmp_128547 = defunc_1_op_res_118203; + + redout_119910 = redout_tmp_128547; + } + defunc_0_f_res_118200 = redout_119910; + + bool isnan_res_118210; + + isnan_res_118210 = futrts_isnan64(pval_brownian_motion_max_res_118199); + + bool cond_118211 = !isnan_res_118210; + bool cond_t_res_118212 = pval_brownian_motion_max_res_118199 < + level_75142; + bool x_118213 = cond_118211 && cond_t_res_118212; + bool chk_t_res_118214 = defunc_0_f_res_118200 == + (int64_t) 9223372036854775807; + bool chk_t_res_118215 = !chk_t_res_118214; + bool x_118216 = x_118213 && chk_t_res_118215; + int64_t y_start_118217; + + if (x_118216) { + int64_t x_118101 = mem_124088[(int64_t) 0]; + int64_t y_start_t_res_118218 = sub64(x_118101, + defunc_0_f_res_118200); + + y_start_118217 = y_start_t_res_118218; + } else { + y_start_118217 = (int64_t) 0; + } + postlude_118100 = y_start_118217; + } else { + postlude_118100 = (int64_t) 0; } + mem_124111[(int64_t) 0] = postlude_118100; barrier(CLK_LOCAL_MEM_FENCE); - if (local_tid_46249 == 0) { - ((__global int32_t *) mem_45153)[gtid_32139] = defunc_0_f_res_32245; + if (slt64(sext_i32_i64(local_tid_128538) + segmap_group_sizze_111966 * + sext_i32_i64(group_tid_128539), m_75136)) { + ((__global int64_t *) mem_124113)[sext_i32_i64(local_tid_128538) + + segmap_group_sizze_111966 * + sext_i32_i64(group_tid_128539)] = + mem_124111[(int64_t) 0]; } - ((__global float *) mem_45156)[gtid_32139 * N_27771 + - sext_i32_i64(local_tid_46249)] = ((__local - float *) mem_45148)[sext_i32_i64(local_tid_46249)]; - barrier(CLK_LOCAL_MEM_FENCE); - ((__global int32_t *) mem_45159)[gtid_32139 * N_27771 + - sext_i32_i64(local_tid_46249)] = ((__local - int32_t *) mem_45150)[sext_i32_i64(local_tid_46249)]; - barrier(CLK_LOCAL_MEM_FENCE); - error_2: + error_7: return; + #undef segmap_group_sizze_111966 } -__kernel void mainDetailedzisegmap_intragroup_32486(__global - int *global_failure, - int failure_is_an_option, - __global - int64_t *global_failure_args, - __local volatile - int64_t *red_arr_mem_46373_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_46369_backing_aligned_1, - int64_t N_27771, - float hfrac_27777, - int64_t i32_res_27781, - int32_t k2p2_27783, __global - unsigned char *images_mem_44381, - __global - unsigned char *defunc_4_map_res_mem_45178, - __global - unsigned char *mem_45225, - __global - unsigned char *mem_45227, - __global - unsigned char *mem_45229) +__kernel void mainzisegmap_intragroup_118238(__global int *global_failure, + __local volatile + int64_t *mem_124225_backing_aligned_0, + int64_t m_75136, int64_t n_75139, + int64_t k2p2zq_75151, + int64_t Ty_118226, + int64_t Tx_118227, + int64_t gridDim_x_118228, + int64_t gridDim_y_118229, + int64_t group_sizze_tile3d_118233, + int64_t count_shmem_118234, + __global unsigned char *mem_120120, + __global unsigned char *mem_120124, + __global unsigned char *mem_124213, + __global unsigned char *mem_124273) { const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_46373_backing_1 = - (__local volatile - char *) red_arr_mem_46373_backing_aligned_0; - __local volatile char *restrict red_arr_mem_46369_backing_0 = - (__local volatile - char *) red_arr_mem_46369_backing_aligned_1; - volatile __local bool local_failure; + __local volatile char *restrict mem_124225_backing_0 = (__local volatile + char *) mem_124225_backing_aligned_0; - if (failure_is_an_option) { - int failed = *global_failure >= 0; - - if (failed) - return; - } - local_failure = false; - barrier(CLK_LOCAL_MEM_FENCE); + if (*global_failure >= 0) + return; - int32_t global_tid_46363; - int32_t local_tid_46364; - int64_t group_sizze_46367; - int32_t wave_sizze_46366; - int32_t group_tid_46365; + int32_t global_tid_128731; + int32_t local_tid_128732; + int64_t group_sizze_128735; + int32_t wave_sizze_128734; + int32_t group_tid_128733; - global_tid_46363 = get_global_id(0); - local_tid_46364 = get_local_id(0); - group_sizze_46367 = get_local_size(0); - wave_sizze_46366 = LOCKSTEP_WIDTH; - group_tid_46365 = get_group_id(0); + global_tid_128731 = get_global_id(0); + local_tid_128732 = get_local_id(0); + group_sizze_128735 = get_local_size(0); + wave_sizze_128734 = LOCKSTEP_WIDTH; + group_tid_128733 = get_group_id(0); - int32_t phys_tid_32486; + int32_t gid_flat_118238; - phys_tid_32486 = group_tid_46365; + gid_flat_118238 = group_tid_128733; - int32_t ltid_pre_46368; + int32_t ltid_pre_128736; - ltid_pre_46368 = local_tid_46364; + ltid_pre_128736 = squot32(local_tid_128732, sext_i64_i32(Ty_118226) * + sext_i64_i32(Tx_118227)); - int64_t gtid_32479; + int32_t ltid_pre_128737; - gtid_32479 = sext_i32_i64(group_tid_46365); + ltid_pre_128737 = squot32(local_tid_128732 - squot32(local_tid_128732, + sext_i64_i32(Ty_118226) * + sext_i64_i32(Tx_118227)) * + (sext_i64_i32(Ty_118226) * + sext_i64_i32(Tx_118227)), + sext_i64_i32(Tx_118227)); - int32_t defunc_0_f_res_32571; - int64_t gtid_32482 = sext_i32_i64(ltid_pre_46368); - int32_t phys_tid_32483 = local_tid_46364; - __local char *red_arr_mem_46369; + int32_t ltid_pre_128738; - red_arr_mem_46369 = (__local char *) red_arr_mem_46369_backing_0; + ltid_pre_128738 = local_tid_128732 - squot32(local_tid_128732, + sext_i64_i32(Ty_118226) * + sext_i64_i32(Tx_118227)) * + (sext_i64_i32(Ty_118226) * sext_i64_i32(Tx_118227)) - + squot32(local_tid_128732 - squot32(local_tid_128732, + sext_i64_i32(Ty_118226) * + sext_i64_i32(Tx_118227)) * + (sext_i64_i32(Ty_118226) * sext_i64_i32(Tx_118227)), + sext_i64_i32(Tx_118227)) * sext_i64_i32(Tx_118227); - float x_32575; + int32_t ltid_pre_128739; - x_32575 = ((__global float *) images_mem_44381)[gtid_32479 * N_27771 + - gtid_32482]; + ltid_pre_128739 = squot32(local_tid_128732, sext_i64_i32(Tx_118227)); - bool isnan_res_32576; + int32_t ltid_pre_128740; - isnan_res_32576 = futrts_isnan32(x_32575); + ltid_pre_128740 = local_tid_128732 - squot32(local_tid_128732, + sext_i64_i32(Tx_118227)) * + sext_i64_i32(Tx_118227); - bool cond_32577 = !isnan_res_32576; - int32_t defunc_0_f_res_32578 = btoi_bool_i32(cond_32577); + int32_t ltid_pre_128741; - ((__local int32_t *) red_arr_mem_46369)[gtid_32482] = defunc_0_f_res_32578; - barrier(CLK_LOCAL_MEM_FENCE); + ltid_pre_128741 = local_tid_128732; - int32_t offset_46371; - int32_t skip_waves_46372; + int64_t gid_zz_118237; - skip_waves_46372 = 1; + gid_zz_118237 = squot64(sext_i32_i64(group_tid_128733), gridDim_y_118229 * + gridDim_x_118228); - int32_t x_32572; - int32_t x_32573; + int64_t gid_y_118236; - offset_46371 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46364, sext_i64_i32(i32_res_27781))) { - x_32572 = ((__local - int32_t *) red_arr_mem_46369)[sext_i32_i64(local_tid_46364 + - offset_46371)]; - } - } - offset_46371 = 1; - while (slt32(offset_46371, wave_sizze_46366)) { - if (slt32(local_tid_46364 + offset_46371, - sext_i64_i32(i32_res_27781)) && ((local_tid_46364 - - squot32(local_tid_46364, - wave_sizze_46366) * - wave_sizze_46366) & (2 * - offset_46371 - - 1)) == - 0) { - // read array element - { - x_32573 = ((volatile __local - int32_t *) red_arr_mem_46369)[sext_i32_i64(local_tid_46364 + - offset_46371)]; - } - // apply reduction operation - { - int32_t defunc_1_op_res_32574 = add32(x_32572, x_32573); - - x_32572 = defunc_1_op_res_32574; - } - // write result of operation - { - ((volatile __local - int32_t *) red_arr_mem_46369)[sext_i32_i64(local_tid_46364)] = - x_32572; - } - } - offset_46371 *= 2; - } - while (slt32(skip_waves_46372, squot32(sext_i64_i32(i32_res_27781) + - wave_sizze_46366 - 1, - wave_sizze_46366))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46371 = skip_waves_46372 * wave_sizze_46366; - if (slt32(local_tid_46364 + offset_46371, - sext_i64_i32(i32_res_27781)) && ((local_tid_46364 - - squot32(local_tid_46364, - wave_sizze_46366) * - wave_sizze_46366) == 0 && - (squot32(local_tid_46364, - wave_sizze_46366) & - (2 * skip_waves_46372 - - 1)) == 0)) { - // read array element - { - x_32573 = ((__local - int32_t *) red_arr_mem_46369)[sext_i32_i64(local_tid_46364 + - offset_46371)]; - } - // apply reduction operation - { - int32_t defunc_1_op_res_32574 = add32(x_32572, x_32573); - - x_32572 = defunc_1_op_res_32574; - } - // write result of operation - { - ((__local - int32_t *) red_arr_mem_46369)[sext_i32_i64(local_tid_46364)] = - x_32572; - } - } - skip_waves_46372 *= 2; - } - barrier(CLK_LOCAL_MEM_FENCE); - defunc_0_f_res_32571 = ((__local int32_t *) red_arr_mem_46369)[(int64_t) 0]; + gid_y_118236 = squot64(sext_i32_i64(group_tid_128733) - + squot64(sext_i32_i64(group_tid_128733), + gridDim_y_118229 * gridDim_x_118228) * + (gridDim_y_118229 * gridDim_x_118228), + gridDim_x_118228); - float defunc_0_f_res_32579; - int64_t gtid_32484 = sext_i32_i64(ltid_pre_46368); - int32_t phys_tid_32485 = local_tid_46364; - __local char *red_arr_mem_46373; + int64_t gid_x_118235; - red_arr_mem_46373 = (__local char *) red_arr_mem_46373_backing_1; + gid_x_118235 = sext_i32_i64(group_tid_128733) - + squot64(sext_i32_i64(group_tid_128733), gridDim_y_118229 * + gridDim_x_118228) * (gridDim_y_118229 * gridDim_x_118228) - + squot64(sext_i32_i64(group_tid_128733) - + squot64(sext_i32_i64(group_tid_128733), gridDim_y_118229 * + gridDim_x_118228) * (gridDim_y_118229 * + gridDim_x_118228), + gridDim_x_118228) * gridDim_x_118228; - int32_t index_primexp_42382; + int64_t ii_118239; - index_primexp_42382 = sext_i64_i32(gtid_32484); + ii_118239 = (int64_t) 30 * gid_zz_118237; - bool cond_32584 = slt32(index_primexp_42382, defunc_0_f_res_32571); - float defunc_0_f_res_32585; + int64_t jj1_118240 = Ty_118226 * gid_y_118236; + int64_t jj2_118241 = Tx_118227 * gid_x_118235; + double mem_124223[30]; + int64_t ltid_y_118244 = sext_i32_i64(ltid_pre_128739); + int64_t ltid_x_118242 = sext_i32_i64(ltid_pre_128740); + int32_t ltid_flat_118243 = local_tid_128732; + double mem_124217[30]; - if (cond_32584) { - int64_t i_32586 = sext_i32_i64(index_primexp_42382); - bool x_32587 = sle64((int64_t) 0, i_32586); - bool y_32588 = slt64(i_32586, N_27771); - bool bounds_check_32589 = x_32587 && y_32588; - bool index_certs_32590; + for (int32_t i_119595 = 0; i_119595 < 30; i_119595++) { + int64_t i_118252 = sext_i32_i64(i_119595); - if (!bounds_check_32589) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 48) == -1) { - global_failure_args[0] = i_32586; - global_failure_args[1] = N_27771; - ; - } - local_failure = true; - goto error_2; - } - } - - float defunc_0_f_res_t_res_32591 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_32479 * - N_27771 + - i_32586]; - - defunc_0_f_res_32585 = defunc_0_f_res_t_res_32591; - } else { - defunc_0_f_res_32585 = 0.0F; + mem_124217[i_118252] = 0.0; + } + for (int64_t i_128743 = 0; i_128743 < (int64_t) 30; i_128743++) { + mem_124223[i_128743] = mem_124217[i_128743]; } - - float defunc_0_f_res_32592 = defunc_0_f_res_32585 * defunc_0_f_res_32585; - - ((__local float *) red_arr_mem_46373)[gtid_32484] = defunc_0_f_res_32592; - - error_2: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; barrier(CLK_LOCAL_MEM_FENCE); - int32_t offset_46375; - int32_t skip_waves_46376; + __local char *mem_124225; - skip_waves_46376 = 1; + mem_124225 = (__local char *) mem_124225_backing_0; - float x_32580; - float x_32581; + double loop_mem_124255[30]; + double mem_param_124226[30]; - offset_46375 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46364, sext_i64_i32(i32_res_27781))) { - x_32580 = ((__local - float *) red_arr_mem_46373)[sext_i32_i64(local_tid_46364 + - offset_46375)]; - } - } - offset_46375 = 1; - while (slt32(offset_46375, wave_sizze_46366)) { - if (slt32(local_tid_46364 + offset_46375, - sext_i64_i32(i32_res_27781)) && ((local_tid_46364 - - squot32(local_tid_46364, - wave_sizze_46366) * - wave_sizze_46366) & (2 * - offset_46375 - - 1)) == - 0) { - // read array element - { - x_32581 = ((volatile __local - float *) red_arr_mem_46373)[sext_i32_i64(local_tid_46364 + - offset_46375)]; - } - // apply reduction operation - { - float defunc_1_op_res_32582 = x_32580 + x_32581; + for (int32_t i_1 = 0; i_1 < 30; i_1++) + mem_param_124226[i_1] = mem_124223[i_1]; + for (int64_t i_118257 = 0; i_118257 < n_75139; i_118257++) { + for (int64_t i_118260 = 0; i_118260 < count_shmem_118234; i_118260++) { + int64_t offs_118273 = group_sizze_tile3d_118233 * i_118260; + int64_t ltid_118263 = sext_i32_i64(ltid_pre_128741); + int32_t ltid_flat_118262 = local_tid_128732; + int64_t loc_ind_118274 = ltid_118263 + offs_118273; + int64_t gtid_118275 = ii_118239 + loc_ind_118274; + bool cond_118276 = slt64(gtid_118275, m_75136); + double y_elem_118277; + + if (cond_118276) { + double Y_elem_118279 = ((__global + double *) mem_124213)[i_118257 * + m_75136 + + gtid_118275]; - x_32580 = defunc_1_op_res_32582; + y_elem_118277 = Y_elem_118279; + } else { + y_elem_118277 = 0.0; } - // write result of operation - { - ((volatile __local - float *) red_arr_mem_46373)[sext_i32_i64(local_tid_46364)] = - x_32580; - } - } - offset_46375 *= 2; - } - while (slt32(skip_waves_46376, squot32(sext_i64_i32(i32_res_27781) + - wave_sizze_46366 - 1, - wave_sizze_46366))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46375 = skip_waves_46376 * wave_sizze_46366; - if (slt32(local_tid_46364 + offset_46375, - sext_i64_i32(i32_res_27781)) && ((local_tid_46364 - - squot32(local_tid_46364, - wave_sizze_46366) * - wave_sizze_46366) == 0 && - (squot32(local_tid_46364, - wave_sizze_46366) & - (2 * skip_waves_46376 - - 1)) == 0)) { - // read array element - { - x_32581 = ((__local - float *) red_arr_mem_46373)[sext_i32_i64(local_tid_46364 + - offset_46375)]; + + bool cond_118281 = slt64(loc_ind_118274, (int64_t) 30); + int64_t y_loc_ind_118282; + + if (cond_118281) { + y_loc_ind_118282 = loc_ind_118274; + } else { + y_loc_ind_118282 = (int64_t) -1; } - // apply reduction operation - { - float defunc_1_op_res_32582 = x_32580 + x_32581; - - x_32580 = defunc_1_op_res_32582; + if (sle64((int64_t) 0, y_loc_ind_118282) && slt64(y_loc_ind_118282, + (int64_t) 30)) { + ((__local double *) mem_124225)[y_loc_ind_118282] = + y_elem_118277; } - // write result of operation - { - ((__local - float *) red_arr_mem_46373)[sext_i32_i64(local_tid_46364)] = - x_32580; + barrier(CLK_LOCAL_MEM_FENCE); + } + + double mem_124254[30]; + int64_t ltid_y_118288 = sext_i32_i64(ltid_pre_128739); + int64_t ltid_x_118286 = sext_i32_i64(ltid_pre_128740); + int32_t ltid_flat_118287 = local_tid_128732; + int64_t gtid_118315 = jj1_118240 + ltid_y_118288; + int64_t gtid_118316 = jj2_118241 + ltid_x_118286; + bool binop_x_118318 = slt64(gtid_118315, k2p2zq_75151); + bool binop_y_118319 = slt64(gtid_118316, k2p2zq_75151); + bool cond_118320 = binop_x_118318 && binop_y_118319; + double mem_125364[30]; + + if (cond_118320) { + double x_118323 = ((__global double *) mem_120120)[i_118257 * + k2p2zq_75151 + + gtid_118315]; + double x_118325 = ((__global double *) mem_120124)[i_118257 * + k2p2zq_75151 + + gtid_118316]; + + for (int32_t i_119596 = 0; i_119596 < 30; i_119596++) { + int64_t i_118327 = sext_i32_i64(i_119596); + int64_t gtid_118329 = ii_118239 + i_118327; + bool cond_118330 = slt64(gtid_118329, m_75136); + + if (cond_118330) { + double inp_reg_var2zz_118332 = ((__local + double *) mem_124225)[i_118327]; + double res_reg_var2zz_118333 = mem_param_124226[i_118327]; + double x_118337 = x_118323 * x_118325; + bool isnan_res_118338; + + isnan_res_118338 = futrts_isnan64(inp_reg_var2zz_118332); + + double y_118339; + + if (isnan_res_118338) { + y_118339 = 0.0; + } else { + y_118339 = 1.0; + } + + double defunc_2_f_res_118340 = x_118337 * y_118339; + double defunc_1_op_res_118344 = res_reg_var2zz_118333 + + defunc_2_f_res_118340; + + mem_param_124226[i_118327] = defunc_1_op_res_118344; + } + } + for (int64_t i_128749 = 0; i_128749 < (int64_t) 30; i_128749++) { + mem_125364[i_128749] = mem_param_124226[i_128749]; + } + } else { + for (int64_t i_128750 = 0; i_128750 < (int64_t) 30; i_128750++) { + mem_125364[i_128750] = mem_param_124226[i_128750]; } } - skip_waves_46376 *= 2; + for (int64_t i_128751 = 0; i_128751 < (int64_t) 30; i_128751++) { + mem_124254[i_128751] = mem_125364[i_128751]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_param_tmp_128744[30]; + + for (int32_t i_2 = 0; i_2 < 30; i_2++) + mem_param_tmp_128744[i_2] = mem_124254[i_2]; + for (int32_t i_3 = 0; i_3 < 30; i_3++) + mem_param_124226[i_3] = mem_param_tmp_128744[i_3]; } - barrier(CLK_LOCAL_MEM_FENCE); - defunc_0_f_res_32579 = ((__local float *) red_arr_mem_46373)[(int64_t) 0]; - - int32_t r32_arg_32593 = sub32(defunc_0_f_res_32571, k2p2_27783); - float i32_res_32594 = sitofp_i32_f32(r32_arg_32593); - float sqrt_arg_32595 = defunc_0_f_res_32579 / i32_res_32594; - float sqrt_res_32596; - - sqrt_res_32596 = futrts_sqrt32(sqrt_arg_32595); + for (int32_t i_4 = 0; i_4 < 30; i_4++) + loop_mem_124255[i_4] = mem_param_124226[i_4]; - float i32_res_32597 = sitofp_i32_f32(defunc_0_f_res_32571); - float t32_arg_32598 = hfrac_27777 * i32_res_32597; - int32_t f32_res_32599 = fptosi_f32_i32(t32_arg_32598); + double mem_124269[30 * 1 * 1]; + int64_t ltid_zz_118353 = sext_i32_i64(ltid_pre_128736); + int64_t ltid_y_118352 = sext_i32_i64(ltid_pre_128737); + int64_t ltid_x_118350 = sext_i32_i64(ltid_pre_128738); + int32_t ltid_flat_118351 = local_tid_128732; + double mem_124263[30 * 1 * 1]; - if (local_tid_46364 == 0) { - ((__global int32_t *) mem_45225)[gtid_32479] = f32_res_32599; + for (int32_t i_119598 = 0; i_119598 < 30; i_119598++) { + int64_t i_118362 = sext_i32_i64(i_119598); + + for (int64_t i_128753 = 0; i_128753 < (int64_t) 1; i_128753++) { + mem_124263[i_118362 + i_128753] = loop_mem_124255[i_118362 + + i_128753]; + } } - if (local_tid_46364 == 0) { - ((__global int32_t *) mem_45227)[gtid_32479] = defunc_0_f_res_32571; + for (int64_t i_128754 = 0; i_128754 < (int64_t) 30; i_128754++) { + for (int64_t i_128755 = 0; i_128755 < (int64_t) 1; i_128755++) { + for (int64_t i_128756 = 0; i_128756 < (int64_t) 1; i_128756++) { + mem_124269[i_128754 + i_128755 + i_128756] = + mem_124263[i_128754 + i_128755 + i_128756]; + } + } } - if (local_tid_46364 == 0) { - ((__global float *) mem_45229)[gtid_32479] = sqrt_res_32596; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t reg_tile_i_128757 = squot64(sext_i32_i64(local_tid_128732), + Ty_118226 * Tx_118227); + int64_t reg_tile_i_128758 = squot64(sext_i32_i64(local_tid_128732) - + squot64(sext_i32_i64(local_tid_128732), + Ty_118226 * Tx_118227) * + (Ty_118226 * Tx_118227), Tx_118227); + int64_t reg_tile_i_128759 = sext_i32_i64(local_tid_128732) - + squot64(sext_i32_i64(local_tid_128732), Ty_118226 * Tx_118227) * + (Ty_118226 * Tx_118227) - squot64(sext_i32_i64(local_tid_128732) - + squot64(sext_i32_i64(local_tid_128732), + Ty_118226 * Tx_118227) * + (Ty_118226 * Tx_118227), + Tx_118227) * Tx_118227; + int64_t tile_dim_start_128760 = (int64_t) 30 * (gid_zz_118237 + + reg_tile_i_128757); + int64_t tile_dim_start_128761 = Ty_118226 * gid_y_118236 + + reg_tile_i_128758; + int64_t tile_dim_start_128762 = Tx_118227 * gid_x_118235 + + reg_tile_i_128759; + + for (int64_t nest_i_128763 = 0; nest_i_128763 < (int64_t) 30; + nest_i_128763++) { + for (int64_t nest_i_128764 = 0; nest_i_128764 < (int64_t) 1; + nest_i_128764++) { + for (int64_t nest_i_128765 = 0; nest_i_128765 < (int64_t) 1; + nest_i_128765++) { + if ((slt64(tile_dim_start_128760 + nest_i_128763, m_75136) && + slt64(tile_dim_start_128761 + nest_i_128764, + k2p2zq_75151)) && slt64(tile_dim_start_128762 + + nest_i_128765, + k2p2zq_75151)) { + ((__global double *) mem_124273)[(tile_dim_start_128760 + + nest_i_128763) * + (k2p2zq_75151 * + k2p2zq_75151) + + (tile_dim_start_128761 + + nest_i_128764) * + k2p2zq_75151 + + (tile_dim_start_128762 + + nest_i_128765)] = + mem_124269[nest_i_128763 + nest_i_128764 + + nest_i_128765]; + } + } + } } error_4: return; } -__kernel void mainDetailedzisegmap_intragroup_33543(__global - int *global_failure, - int failure_is_an_option, - __global - int64_t *global_failure_args, - __local volatile - int64_t *mem_45385_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_46677_backing_aligned_1, - __local volatile - int64_t *red_arr_mem_46675_backing_aligned_2, - __local volatile - int64_t *red_arr_mem_46673_backing_aligned_3, - __local volatile - int64_t *mem_45383_backing_aligned_4, - __local volatile - int64_t *mem_45380_backing_aligned_5, - int64_t N_27771, - int32_t n_27775, - int64_t iota32_arg_28203, - int64_t iota32_arg_28233, - int64_t computed_group_sizze_33535, - __global - unsigned char *defunc_4_map_res_mem_45177, - __global - unsigned char *defunc_4_map_res_mem_45178, - __global - unsigned char *defunc_4_map_res_mem_45179, - __global - unsigned char *defunc_3_map_res_mem_45244, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global - unsigned char *defunc_3_map_res_mem_45246, - __global - unsigned char *defunc_0_f_res_mem_45279, - __global - unsigned char *mem_45282, - __global - unsigned char *mem_45389, - __global - unsigned char *mem_45392, - __global - unsigned char *mem_45394, - __global - unsigned char *mem_45396) +__kernel void mainzisegmap_intragroup_118391(__global int *global_failure, + __local volatile + int64_t *mem_124411_backing_aligned_0, + __local volatile + int64_t *mem_124409_backing_aligned_1, + int64_t N_75135, int64_t m_75136, + int64_t n_75139, + int64_t k2p2zq_75151, + int64_t gridDim_x_118385, + int64_t full_tiles_118416, + int64_t kk_118623, __global + unsigned char *mem_120120, __global + unsigned char *mem_124142, __global + unsigned char *mem_124583) { + #define Ty_118372 (mainziTy_118369) + #define Ry_118373 (mainziRy_118371) + #define Tx_118374 (mainziTx_118368) + #define Rx_118375 (mainziRx_118370) + #define Tk_118376 (mainziTk_118367) + #define tk_div_tx_118377 (sdiv_up64(mainziTk_118367, mainziTx_118368)) + #define tk_div_ty_118378 (sdiv_up64(mainziTk_118367, mainziTy_118369)) + #define TxRx_118379 (mainziTx_118368 * mainziRx_118370) + #define TyRy_118380 (mainziTy_118369 * mainziRy_118371) + #define a_loc_szz_118382 (mainziTk_118367 * (mainziTy_118369 * mainziRy_118371)) + #define b_loc_szz_118384 (mainziRx_118370 * (mainziTx_118368 * mainziTk_118367)) + const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict mem_45385_backing_5 = (__local volatile - char *) mem_45385_backing_aligned_0; - __local volatile char *restrict red_arr_mem_46677_backing_4 = - (__local volatile - char *) red_arr_mem_46677_backing_aligned_1; - __local volatile char *restrict red_arr_mem_46675_backing_3 = - (__local volatile - char *) red_arr_mem_46675_backing_aligned_2; - __local volatile char *restrict red_arr_mem_46673_backing_2 = - (__local volatile - char *) red_arr_mem_46673_backing_aligned_3; - __local volatile char *restrict mem_45383_backing_1 = (__local volatile - char *) mem_45383_backing_aligned_4; - __local volatile char *restrict mem_45380_backing_0 = (__local volatile - char *) mem_45380_backing_aligned_5; - volatile __local bool local_failure; + __local volatile char *restrict mem_124411_backing_1 = (__local volatile + char *) mem_124411_backing_aligned_0; + __local volatile char *restrict mem_124409_backing_0 = (__local volatile + char *) mem_124409_backing_aligned_1; - if (failure_is_an_option) { - int failed = *global_failure >= 0; - - if (failed) - return; - } - local_failure = false; - barrier(CLK_LOCAL_MEM_FENCE); + if (*global_failure >= 0) + return; - int32_t global_tid_46655; - int32_t local_tid_46656; - int64_t group_sizze_46659; - int32_t wave_sizze_46658; - int32_t group_tid_46657; + int32_t global_tid_128899; + int32_t local_tid_128900; + int64_t group_sizze_128903; + int32_t wave_sizze_128902; + int32_t group_tid_128901; - global_tid_46655 = get_global_id(0); - local_tid_46656 = get_local_id(0); - group_sizze_46659 = get_local_size(0); - wave_sizze_46658 = LOCKSTEP_WIDTH; - group_tid_46657 = get_group_id(0); + global_tid_128899 = get_global_id(0); + local_tid_128900 = get_local_id(0); + group_sizze_128903 = get_local_size(0); + wave_sizze_128902 = LOCKSTEP_WIDTH; + group_tid_128901 = get_group_id(0); - int32_t phys_tid_33543; + int32_t gid_flat_118391; - phys_tid_33543 = group_tid_46657; + gid_flat_118391 = group_tid_128901; - int32_t ltid_pre_46660; + int32_t ltid_pre_128904; - ltid_pre_46660 = local_tid_46656; + ltid_pre_128904 = squot32(local_tid_128900, sext_i64_i32(Tx_118374)); - int32_t ltid_pre_46661; + int32_t ltid_pre_128905; - ltid_pre_46661 = local_tid_46656; + ltid_pre_128905 = local_tid_128900 - squot32(local_tid_128900, + sext_i64_i32(Tx_118374)) * + sext_i64_i32(Tx_118374); - int64_t gtid_33533; + int64_t gid_y_118390; - gtid_33533 = sext_i32_i64(group_tid_46657); + gid_y_118390 = squot64(sext_i32_i64(group_tid_128901), gridDim_x_118385); - int32_t x_33781; + int64_t gid_x_118389; - x_33781 = ((__global int32_t *) defunc_4_map_res_mem_45177)[gtid_33533]; + gid_x_118389 = sext_i32_i64(group_tid_128901) - + squot64(sext_i32_i64(group_tid_128901), gridDim_x_118385) * + gridDim_x_118385; - int32_t x_33782 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_33533]; - float x_33783 = ((__global float *) defunc_3_map_res_mem_45246)[gtid_33533]; - int32_t x_33784 = ((__global - int32_t *) defunc_3_map_res_mem_45244)[gtid_33533]; - float x_33785 = ((__global float *) defunc_0_f_res_mem_45279)[gtid_33533]; - int32_t y_33788 = sub32(x_33781, x_33782); - __local char *mem_45380; + int64_t iii_118392; - mem_45380 = (__local char *) mem_45380_backing_0; + iii_118392 = TyRy_118380 * gid_y_118390; - int64_t gtid_33536 = sext_i32_i64(ltid_pre_46661); - int32_t phys_tid_33537 = local_tid_46656; + int64_t jjj_118393 = TxRx_118379 * gid_x_118389; + double mem_124407[Ry_118373 * Rx_118375]; + int64_t ltid_y_118396 = sext_i32_i64(ltid_pre_128904); + int64_t ltid_x_118394 = sext_i32_i64(ltid_pre_128905); + int32_t ltid_flat_118395 = local_tid_128900; + double mem_124398[Ry_118373 * Rx_118375]; - if (slt64(gtid_33536, iota32_arg_28233)) { - int32_t index_primexp_42402 = sext_i64_i32(gtid_33536); - bool cond_33794 = sle32(y_33788, index_primexp_42402); - float defunc_0_f_res_33795; - - if (cond_33794) { - defunc_0_f_res_33795 = 0.0F; - } else { - bool cond_33796 = index_primexp_42402 == 0; - float defunc_0_f_res_f_res_33797; - - if (cond_33796) { - defunc_0_f_res_f_res_33797 = x_33785; - } else { - int32_t i_33798 = add32(x_33782, index_primexp_42402); - int64_t i_33799 = sext_i32_i64(i_33798); - bool x_33800 = sle64((int64_t) 0, i_33799); - bool y_33801 = slt64(i_33799, N_27771); - bool bounds_check_33802 = x_33800 && y_33801; - bool index_certs_33803; - - if (!bounds_check_33802) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 71) == - -1) { - global_failure_args[0] = i_33799; - global_failure_args[1] = N_27771; - ; - } - local_failure = true; - goto error_0; - } - } - - float x_33804 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_33533 * - N_27771 + - i_33799]; - int32_t x_33805 = sub32(x_33782, x_33784); - int32_t i_33806 = add32(x_33805, index_primexp_42402); - int64_t i_33807 = sext_i32_i64(i_33806); - bool x_33808 = sle64((int64_t) 0, i_33807); - bool y_33809 = slt64(i_33807, N_27771); - bool bounds_check_33810 = x_33808 && y_33809; - bool index_certs_33811; - - if (!bounds_check_33810) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 72) == - -1) { - global_failure_args[0] = i_33807; - global_failure_args[1] = N_27771; - ; - } - local_failure = true; - goto error_0; - } - } - - float y_33812 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_33533 * - N_27771 + - i_33807]; - float defunc_0_f_res_f_res_f_res_33813 = x_33804 - y_33812; - - defunc_0_f_res_f_res_33797 = defunc_0_f_res_f_res_f_res_33813; - } - defunc_0_f_res_33795 = defunc_0_f_res_f_res_33797; + for (int64_t i_118407 = 0; i_118407 < Ry_118373; i_118407++) { + for (int64_t i_118410 = 0; i_118410 < Rx_118375; i_118410++) { + mem_124398[i_118407 * Rx_118375 + i_118410] = 0.0; + } + } + for (int64_t i_128908 = 0; i_128908 < Ry_118373; i_128908++) { + for (int64_t i_128909 = 0; i_128909 < Rx_118375; i_128909++) { + mem_124407[i_128908 * Rx_118375 + i_128909] = mem_124398[i_128908 * + Rx_118375 + + i_128909]; } - ((__local float *) mem_45380)[gtid_33536] = defunc_0_f_res_33795; } - - error_0: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; barrier(CLK_LOCAL_MEM_FENCE); - int64_t dims_flat_46662; + __local char *mem_124409; - dims_flat_46662 = iota32_arg_28233; + mem_124409 = (__local char *) mem_124409_backing_0; - float x_33790; - float x_33791; - float x_46664; - float x_46665; - bool ltid_in_bounds_46667; + __local char *mem_124411; - ltid_in_bounds_46667 = slt64(sext_i32_i64(local_tid_46656), - iota32_arg_28233); + mem_124411 = (__local char *) mem_124411_backing_1; - int32_t skip_threads_46668; + double mem_124482[Ry_118373]; + double mem_124486[Rx_118375]; + double loop_mem_124498[Ry_118373 * Rx_118375]; + double mem_param_124412[Ry_118373 * Rx_118375]; - // read input for in-block scan - { - if (ltid_in_bounds_46667) { - x_33791 = ((volatile __local - float *) mem_45380)[sext_i32_i64(local_tid_46656)]; - if ((local_tid_46656 - squot32(local_tid_46656, 32) * 32) == 0) { - x_33790 = x_33791; - } - } - } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46668 = 1; - while (slt32(skip_threads_46668, 32)) { - if (sle32(skip_threads_46668, local_tid_46656 - - squot32(local_tid_46656, 32) * 32) && - ltid_in_bounds_46667) { - // read operands - { - x_33790 = ((volatile __local - float *) mem_45380)[sext_i32_i64(local_tid_46656) - - sext_i32_i64(skip_threads_46668)]; - } - // perform operation - { - bool inactive_46669 = - slt64(srem64(sext_i32_i64(local_tid_46656), - iota32_arg_28233), - sext_i32_i64(local_tid_46656) - - sext_i32_i64(local_tid_46656 - - skip_threads_46668)); - - if (inactive_46669) { - x_33790 = x_33791; - } - if (!inactive_46669) { - float defunc_1_op_res_33792 = x_33790 + x_33791; - - x_33790 = defunc_1_op_res_33792; - } - } - } - if (sle32(wave_sizze_46658, skip_threads_46668)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46668, local_tid_46656 - - squot32(local_tid_46656, 32) * 32) && - ltid_in_bounds_46667) { - // write result - { - ((volatile __local - float *) mem_45380)[sext_i32_i64(local_tid_46656)] = - x_33790; - x_33791 = x_33790; - } - } - if (sle32(wave_sizze_46658, skip_threads_46668)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46668 *= 2; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' - { - if ((local_tid_46656 - squot32(local_tid_46656, 32) * 32) == 31 && - ltid_in_bounds_46667) { - ((volatile __local - float *) mem_45380)[sext_i32_i64(squot32(local_tid_46656, 32))] = - x_33790; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' - { - int32_t skip_threads_46670; - - // read input for in-block scan - { - if (squot32(local_tid_46656, 32) == 0 && ltid_in_bounds_46667) { - x_46665 = ((volatile __local - float *) mem_45380)[sext_i32_i64(local_tid_46656)]; - if ((local_tid_46656 - squot32(local_tid_46656, 32) * 32) == - 0) { - x_46664 = x_46665; - } - } - } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46670 = 1; - while (slt32(skip_threads_46670, 32)) { - if (sle32(skip_threads_46670, local_tid_46656 - - squot32(local_tid_46656, 32) * 32) && - (squot32(local_tid_46656, 32) == 0 && - ltid_in_bounds_46667)) { - // read operands - { - x_46664 = ((volatile __local - float *) mem_45380)[sext_i32_i64(local_tid_46656) - - sext_i32_i64(skip_threads_46670)]; - } - // perform operation - { - bool inactive_46671 = - slt64(srem64(sext_i32_i64(local_tid_46656 * 32 + - 32 - 1), iota32_arg_28233), - sext_i32_i64(local_tid_46656 * 32 + 32 - 1) - - sext_i32_i64((local_tid_46656 - - skip_threads_46670) * 32 + 32 - - 1)); - - if (inactive_46671) { - x_46664 = x_46665; - } - if (!inactive_46671) { - float defunc_1_op_res_46666 = x_46664 + x_46665; - - x_46664 = defunc_1_op_res_46666; - } - } - } - if (sle32(wave_sizze_46658, skip_threads_46670)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46670, local_tid_46656 - - squot32(local_tid_46656, 32) * 32) && - (squot32(local_tid_46656, 32) == 0 && - ltid_in_bounds_46667)) { - // write result - { - ((volatile __local - float *) mem_45380)[sext_i32_i64(local_tid_46656)] = - x_46664; - x_46665 = x_46664; - } - } - if (sle32(wave_sizze_46658, skip_threads_46670)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46670 *= 2; - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_46656, 32) == 0 || !ltid_in_bounds_46667)) { - // read operands - { - x_33791 = x_33790; - x_33790 = ((__local - float *) mem_45380)[sext_i32_i64(squot32(local_tid_46656, - 32)) - - (int64_t) 1]; - } - // perform operation - { - bool inactive_46672 = - slt64(srem64(sext_i32_i64(local_tid_46656), - iota32_arg_28233), - sext_i32_i64(local_tid_46656) - - sext_i32_i64(squot32(local_tid_46656, 32) * 32 - 1)); - - if (inactive_46672) { - x_33790 = x_33791; - } - if (!inactive_46672) { - float defunc_1_op_res_33792 = x_33790 + x_33791; - - x_33790 = defunc_1_op_res_33792; - } - } - // write final result - { - ((__local float *) mem_45380)[sext_i32_i64(local_tid_46656)] = - x_33790; - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_46656, 32) == 0) { - ((__local float *) mem_45380)[sext_i32_i64(local_tid_46656)] = - x_33791; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - - float i32_res_33814 = sitofp_i32_f32(x_33782); - float sqrt_res_33815; - - sqrt_res_33815 = futrts_sqrt32(i32_res_33814); - - float y_33816 = x_33783 * sqrt_res_33815; - __local char *mem_45383; - - mem_45383 = (__local char *) mem_45383_backing_1; - - bool defunc_0_f_res_33818; - int32_t defunc_0_f_res_33819; - float defunc_0_f_res_33820; - int64_t gtid_33538 = sext_i32_i64(ltid_pre_46660); - int32_t phys_tid_33539 = local_tid_46656; - __local char *red_arr_mem_46673; - - red_arr_mem_46673 = (__local char *) red_arr_mem_46673_backing_2; - - __local char *red_arr_mem_46675; - - red_arr_mem_46675 = (__local char *) red_arr_mem_46675_backing_3; - - __local char *red_arr_mem_46677; - - red_arr_mem_46677 = (__local char *) red_arr_mem_46677_backing_4; - if (slt64(gtid_33538, iota32_arg_28203)) { - float x_33836 = ((__local float *) mem_45380)[gtid_33538]; - float x_33837 = ((__global float *) mem_45282)[gtid_33538]; - int32_t index_primexp_42405 = sext_i64_i32(gtid_33538); - float defunc_0_f_res_33839 = x_33836 / y_33816; - bool cond_33840 = slt32(index_primexp_42405, y_33788); - bool isnan_res_33841; - - isnan_res_33841 = futrts_isnan32(defunc_0_f_res_33839); - - bool cond_t_res_33842 = !isnan_res_33841; - bool x_33843 = cond_33840 && cond_t_res_33842; - float abs_res_33844 = (float) fabs(defunc_0_f_res_33839); - bool defunc_2_f_res_t_res_33845 = x_33837 < abs_res_33844; - bool x_33846 = x_33843 && defunc_2_f_res_t_res_33845; - float defunc_1_f_res_33847; - - if (cond_33840) { - defunc_1_f_res_33847 = defunc_0_f_res_33839; - } else { - defunc_1_f_res_33847 = 0.0F; - } - ((__local bool *) red_arr_mem_46673)[gtid_33538] = x_33846; - ((__local int32_t *) red_arr_mem_46675)[gtid_33538] = - index_primexp_42405; - ((__local float *) red_arr_mem_46677)[gtid_33538] = - defunc_1_f_res_33847; - ((__local float *) mem_45383)[gtid_33538] = defunc_0_f_res_33839; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t offset_46679; - int32_t skip_waves_46680; - - skip_waves_46680 = 1; - - bool x_33822; - int32_t x_33823; - float x_33824; - bool x_33825; - int32_t x_33826; - float x_33827; - - offset_46679 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46656, sext_i64_i32(iota32_arg_28203))) { - x_33822 = ((__local - bool *) red_arr_mem_46673)[sext_i32_i64(local_tid_46656 + - offset_46679)]; - x_33823 = ((__local - int32_t *) red_arr_mem_46675)[sext_i32_i64(local_tid_46656 + - offset_46679)]; - x_33824 = ((__local - float *) red_arr_mem_46677)[sext_i32_i64(local_tid_46656 + - offset_46679)]; - } - } - offset_46679 = 1; - while (slt32(offset_46679, wave_sizze_46658)) { - if (slt32(local_tid_46656 + offset_46679, - sext_i64_i32(iota32_arg_28203)) && ((local_tid_46656 - - squot32(local_tid_46656, - wave_sizze_46658) * - wave_sizze_46658) & (2 * - offset_46679 - - 1)) == - 0) { - // read array element - { - x_33825 = ((volatile __local - bool *) red_arr_mem_46673)[sext_i32_i64(local_tid_46656 + - offset_46679)]; - x_33826 = ((volatile __local - int32_t *) red_arr_mem_46675)[sext_i32_i64(local_tid_46656 + - offset_46679)]; - x_33827 = ((volatile __local - float *) red_arr_mem_46677)[sext_i32_i64(local_tid_46656 + - offset_46679)]; - } - // apply reduction operation - { - bool defunc_1_op_res_33828; - int32_t defunc_1_op_res_33829; - - if (x_33822) { - defunc_1_op_res_33828 = x_33822; - defunc_1_op_res_33829 = x_33823; - } else { - bool x_33830 = x_33825 && x_33825; - bool x_33831 = !x_33825; - bool y_33832 = x_33822 && x_33831; - bool defunc_1_op_res_f_res_33833 = x_33830 || y_33832; - int32_t defunc_1_op_res_f_res_33834; - - if (x_33825) { - defunc_1_op_res_f_res_33834 = x_33826; - } else { - defunc_1_op_res_f_res_33834 = x_33823; - } - defunc_1_op_res_33828 = defunc_1_op_res_f_res_33833; - defunc_1_op_res_33829 = defunc_1_op_res_f_res_33834; - } - - float defunc_1_op_res_33835 = x_33824 + x_33827; - - x_33822 = defunc_1_op_res_33828; - x_33823 = defunc_1_op_res_33829; - x_33824 = defunc_1_op_res_33835; - } - // write result of operation - { - ((volatile __local - bool *) red_arr_mem_46673)[sext_i32_i64(local_tid_46656)] = - x_33822; - ((volatile __local - int32_t *) red_arr_mem_46675)[sext_i32_i64(local_tid_46656)] = - x_33823; - ((volatile __local - float *) red_arr_mem_46677)[sext_i32_i64(local_tid_46656)] = - x_33824; - } - } - offset_46679 *= 2; - } - while (slt32(skip_waves_46680, - squot32(sext_i64_i32(computed_group_sizze_33535) + - wave_sizze_46658 - 1, wave_sizze_46658))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46679 = skip_waves_46680 * wave_sizze_46658; - if (slt32(local_tid_46656 + offset_46679, - sext_i64_i32(iota32_arg_28203)) && ((local_tid_46656 - - squot32(local_tid_46656, - wave_sizze_46658) * - wave_sizze_46658) == 0 && - (squot32(local_tid_46656, - wave_sizze_46658) & - (2 * skip_waves_46680 - - 1)) == 0)) { - // read array element - { - x_33825 = ((__local - bool *) red_arr_mem_46673)[sext_i32_i64(local_tid_46656 + - offset_46679)]; - x_33826 = ((__local - int32_t *) red_arr_mem_46675)[sext_i32_i64(local_tid_46656 + - offset_46679)]; - x_33827 = ((__local - float *) red_arr_mem_46677)[sext_i32_i64(local_tid_46656 + - offset_46679)]; - } - // apply reduction operation - { - bool defunc_1_op_res_33828; - int32_t defunc_1_op_res_33829; - - if (x_33822) { - defunc_1_op_res_33828 = x_33822; - defunc_1_op_res_33829 = x_33823; - } else { - bool x_33830 = x_33825 && x_33825; - bool x_33831 = !x_33825; - bool y_33832 = x_33822 && x_33831; - bool defunc_1_op_res_f_res_33833 = x_33830 || y_33832; - int32_t defunc_1_op_res_f_res_33834; - - if (x_33825) { - defunc_1_op_res_f_res_33834 = x_33826; - } else { - defunc_1_op_res_f_res_33834 = x_33823; - } - defunc_1_op_res_33828 = defunc_1_op_res_f_res_33833; - defunc_1_op_res_33829 = defunc_1_op_res_f_res_33834; - } - - float defunc_1_op_res_33835 = x_33824 + x_33827; - - x_33822 = defunc_1_op_res_33828; - x_33823 = defunc_1_op_res_33829; - x_33824 = defunc_1_op_res_33835; - } - // write result of operation - { - ((__local - bool *) red_arr_mem_46673)[sext_i32_i64(local_tid_46656)] = - x_33822; - ((__local - int32_t *) red_arr_mem_46675)[sext_i32_i64(local_tid_46656)] = - x_33823; - ((__local - float *) red_arr_mem_46677)[sext_i32_i64(local_tid_46656)] = - x_33824; - } - } - skip_waves_46680 *= 2; - } - barrier(CLK_LOCAL_MEM_FENCE); - defunc_0_f_res_33818 = ((__local bool *) red_arr_mem_46673)[(int64_t) 0]; - defunc_0_f_res_33819 = ((__local int32_t *) red_arr_mem_46675)[(int64_t) 0]; - defunc_0_f_res_33820 = ((__local float *) red_arr_mem_46677)[(int64_t) 0]; - - bool cond_33848 = y_33788 == 0; - float defunc_0_f_res_33849; - - if (cond_33848) { - defunc_0_f_res_33849 = 0.0F; - } else { - float i32_res_33850 = sitofp_i32_f32(y_33788); - float defunc_0_f_res_f_res_33851 = defunc_0_f_res_33820 / i32_res_33850; - - defunc_0_f_res_33849 = defunc_0_f_res_f_res_33851; - } - - bool cond_33852 = !defunc_0_f_res_33818; - int32_t fst_breakzq_33853; - - if (cond_33852) { - fst_breakzq_33853 = -1; - } else { - bool cond_33854 = slt32(defunc_0_f_res_33819, y_33788); - int32_t adjustValInds_res_33855; - - if (cond_33854) { - int32_t i_33856 = add32(x_33782, defunc_0_f_res_33819); - int64_t i_33857 = sext_i32_i64(i_33856); - bool x_33858 = sle64((int64_t) 0, i_33857); - bool y_33859 = slt64(i_33857, N_27771); - bool bounds_check_33860 = x_33858 && y_33859; - bool index_certs_33861; - - if (!bounds_check_33860) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 73) == - -1) { - global_failure_args[0] = i_33857; - global_failure_args[1] = N_27771; - ; - } - local_failure = true; - goto error_3; - } - } - - int32_t x_33862 = ((__global - int32_t *) defunc_4_map_res_mem_45179)[gtid_33533 * - N_27771 + - i_33857]; - int32_t adjustValInds_res_t_res_33863 = sub32(x_33862, n_27775); - - adjustValInds_res_33855 = adjustValInds_res_t_res_33863; - } else { - adjustValInds_res_33855 = -1; - } - fst_breakzq_33853 = adjustValInds_res_33855; - } - - bool cond_33864 = sle32(x_33782, 5); - bool cond_f_res_33865 = sle32(y_33788, 5); - bool x_33866 = !cond_33864; - bool y_33867 = cond_f_res_33865 && x_33866; - bool cond_33868 = cond_33864 || y_33867; - int32_t fst_breakzq_33869; - - if (cond_33868) { - fst_breakzq_33869 = -2; - } else { - fst_breakzq_33869 = fst_breakzq_33853; - } - - __local char *mem_45385; - - mem_45385 = (__local char *) mem_45385_backing_5; - for (int64_t i_46681 = 0; i_46681 < sdiv_up64(iota32_arg_28233 - - sext_i32_i64(local_tid_46656), - computed_group_sizze_33535); - i_46681++) { - ((__local float *) mem_45385)[i_46681 * computed_group_sizze_33535 + - sext_i32_i64(local_tid_46656)] = NAN; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t write_i_33540 = sext_i32_i64(ltid_pre_46661); - int32_t phys_tid_33541 = local_tid_46656; - - if (slt64(write_i_33540, iota32_arg_28233)) { - int32_t index_primexp_42408 = sext_i64_i32(write_i_33540); - float write_value_33874 = ((__local float *) mem_45383)[write_i_33540]; - bool cond_33875 = slt32(index_primexp_42408, y_33788); - int32_t defunc_0_f_res_33876; - - if (cond_33875) { - int32_t i_33877 = add32(x_33782, index_primexp_42408); - int64_t i_33878 = sext_i32_i64(i_33877); - bool x_33879 = sle64((int64_t) 0, i_33878); - bool y_33880 = slt64(i_33878, N_27771); - bool bounds_check_33881 = x_33879 && y_33880; - bool index_certs_33882; - - if (!bounds_check_33881) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 74) == - -1) { - global_failure_args[0] = i_33878; - global_failure_args[1] = N_27771; - ; - } - local_failure = true; - goto error_3; - } - } - - int32_t x_33883 = ((__global - int32_t *) defunc_4_map_res_mem_45179)[gtid_33533 * - N_27771 + - i_33878]; - int32_t defunc_0_f_res_t_res_33884 = sub32(x_33883, n_27775); - - defunc_0_f_res_33876 = defunc_0_f_res_t_res_33884; - } else { - defunc_0_f_res_33876 = -1; - } - - int64_t defunc_0_f_res_33885 = sext_i32_i64(defunc_0_f_res_33876); - - if (sle64((int64_t) 0, defunc_0_f_res_33885) && - slt64(defunc_0_f_res_33885, iota32_arg_28233)) { - ((__local float *) mem_45385)[defunc_0_f_res_33885] = - write_value_33874; - } - } - - error_3: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); - for (int64_t i_46682 = 0; i_46682 < sdiv_up64(iota32_arg_28203 - - sext_i32_i64(local_tid_46656), - computed_group_sizze_33535); - i_46682++) { - ((__global float *) mem_45389)[gtid_33533 * iota32_arg_28203 + - (i_46682 * computed_group_sizze_33535 + - sext_i32_i64(local_tid_46656))] = - ((__local float *) mem_45385)[i_46682 * computed_group_sizze_33535 + - sext_i32_i64(local_tid_46656)]; - } - barrier(CLK_LOCAL_MEM_FENCE); - for (int64_t i_46683 = 0; i_46683 < sdiv_up64(iota32_arg_28203 - - sext_i32_i64(local_tid_46656), - computed_group_sizze_33535); - i_46683++) { - ((__global float *) mem_45392)[gtid_33533 * iota32_arg_28203 + - (i_46683 * computed_group_sizze_33535 + - sext_i32_i64(local_tid_46656))] = - ((__local float *) mem_45383)[i_46683 * computed_group_sizze_33535 + - sext_i32_i64(local_tid_46656)]; - } - barrier(CLK_LOCAL_MEM_FENCE); - if (local_tid_46656 == 0) { - ((__global int32_t *) mem_45394)[gtid_33533] = fst_breakzq_33869; - } - if (local_tid_46656 == 0) { - ((__global float *) mem_45396)[gtid_33533] = defunc_0_f_res_33849; - } - - error_4: - return; -} -__kernel void mainDetailedzisegmap_intragroup_42541(__global - int *global_failure, - __local volatile - int64_t *mem_44480_backing_aligned_0, - int64_t m_27772, - int32_t n_27775, - int64_t i32_res_27787, - int64_t Ty_42529, - int64_t Tx_42530, - int64_t gridDim_x_42531, - int64_t gridDim_y_42532, - int64_t group_sizze_tile3d_42536, - int64_t count_shmem_42537, - __global - unsigned char *mem_44393, - __global - unsigned char *mem_44397, - __global - unsigned char *mem_44468, - __global - unsigned char *mem_44528) -{ - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - __local volatile char *restrict mem_44480_backing_0 = (__local volatile - char *) mem_44480_backing_aligned_0; - - if (*global_failure >= 0) - return; - - int32_t global_tid_45733; - int32_t local_tid_45734; - int64_t group_sizze_45737; - int32_t wave_sizze_45736; - int32_t group_tid_45735; - - global_tid_45733 = get_global_id(0); - local_tid_45734 = get_local_id(0); - group_sizze_45737 = get_local_size(0); - wave_sizze_45736 = LOCKSTEP_WIDTH; - group_tid_45735 = get_group_id(0); - - int32_t gid_flat_42541; - - gid_flat_42541 = group_tid_45735; - - int32_t ltid_pre_45738; - - ltid_pre_45738 = squot32(local_tid_45734, sext_i64_i32(Ty_42529) * - sext_i64_i32(Tx_42530)); - - int32_t ltid_pre_45739; - - ltid_pre_45739 = squot32(local_tid_45734 - squot32(local_tid_45734, - sext_i64_i32(Ty_42529) * - sext_i64_i32(Tx_42530)) * - (sext_i64_i32(Ty_42529) * sext_i64_i32(Tx_42530)), - sext_i64_i32(Tx_42530)); - - int32_t ltid_pre_45740; - - ltid_pre_45740 = local_tid_45734 - squot32(local_tid_45734, - sext_i64_i32(Ty_42529) * - sext_i64_i32(Tx_42530)) * - (sext_i64_i32(Ty_42529) * sext_i64_i32(Tx_42530)) - - squot32(local_tid_45734 - squot32(local_tid_45734, - sext_i64_i32(Ty_42529) * - sext_i64_i32(Tx_42530)) * - (sext_i64_i32(Ty_42529) * sext_i64_i32(Tx_42530)), - sext_i64_i32(Tx_42530)) * sext_i64_i32(Tx_42530); - - int32_t ltid_pre_45741; - - ltid_pre_45741 = squot32(local_tid_45734, sext_i64_i32(Tx_42530)); - - int32_t ltid_pre_45742; - - ltid_pre_45742 = local_tid_45734 - squot32(local_tid_45734, - sext_i64_i32(Tx_42530)) * - sext_i64_i32(Tx_42530); - - int32_t ltid_pre_45743; - - ltid_pre_45743 = local_tid_45734; - - int64_t gid_zz_42540; - - gid_zz_42540 = squot64(sext_i32_i64(group_tid_45735), gridDim_y_42532 * - gridDim_x_42531); - - int64_t gid_y_42539; - - gid_y_42539 = squot64(sext_i32_i64(group_tid_45735) - - squot64(sext_i32_i64(group_tid_45735), - gridDim_y_42532 * gridDim_x_42531) * - (gridDim_y_42532 * gridDim_x_42531), gridDim_x_42531); - - int64_t gid_x_42538; - - gid_x_42538 = sext_i32_i64(group_tid_45735) - - squot64(sext_i32_i64(group_tid_45735), gridDim_y_42532 * - gridDim_x_42531) * (gridDim_y_42532 * gridDim_x_42531) - - squot64(sext_i32_i64(group_tid_45735) - - squot64(sext_i32_i64(group_tid_45735), gridDim_y_42532 * - gridDim_x_42531) * (gridDim_y_42532 * gridDim_x_42531), - gridDim_x_42531) * gridDim_x_42531; - - int64_t ii_42542; - - ii_42542 = (int64_t) 30 * gid_zz_42540; - - int64_t jj1_42543 = Ty_42529 * gid_y_42539; - int64_t jj2_42544 = Tx_42530 * gid_x_42538; - float mem_44478[30]; - int64_t ltid_y_42547 = sext_i32_i64(ltid_pre_45741); - int64_t ltid_x_42545 = sext_i32_i64(ltid_pre_45742); - int32_t ltid_flat_42546 = local_tid_45734; - float mem_44472[30]; - - for (int32_t i_44270 = 0; i_44270 < 30; i_44270++) { - int64_t i_42555 = sext_i32_i64(i_44270); - - mem_44472[i_42555] = 0.0F; - } - for (int64_t i_45745 = 0; i_45745 < (int64_t) 30; i_45745++) { - mem_44478[i_45745] = mem_44472[i_45745]; - } - barrier(CLK_LOCAL_MEM_FENCE); - - __local char *mem_44480; - - mem_44480 = (__local char *) mem_44480_backing_0; - - float loop_mem_44510[30]; - float mem_param_44481[30]; - - for (int32_t i_1 = 0; i_1 < 30; i_1++) - mem_param_44481[i_1] = mem_44478[i_1]; - for (int32_t i_44272 = 0; i_44272 < n_27775; i_44272++) { - int64_t i_42560 = sext_i32_i64(i_44272); - - for (int64_t i_42563 = 0; i_42563 < count_shmem_42537; i_42563++) { - int64_t offs_42576 = group_sizze_tile3d_42536 * i_42563; - int64_t ltid_42566 = sext_i32_i64(ltid_pre_45743); - int32_t ltid_flat_42565 = local_tid_45734; - int64_t loc_ind_42577 = ltid_42566 + offs_42576; - int64_t gtid_42578 = ii_42542 + loc_ind_42577; - bool cond_42579 = slt64(gtid_42578, m_27772); - float y_elem_42580; - - if (cond_42579) { - float Y_elem_42582 = ((__global float *) mem_44468)[i_42560 * - m_27772 + - gtid_42578]; - - y_elem_42580 = Y_elem_42582; - } else { - y_elem_42580 = 0.0F; - } - - bool cond_42584 = slt64(loc_ind_42577, (int64_t) 30); - int64_t y_loc_ind_42585; - - if (cond_42584) { - y_loc_ind_42585 = loc_ind_42577; - } else { - y_loc_ind_42585 = (int64_t) -1; - } - if (sle64((int64_t) 0, y_loc_ind_42585) && slt64(y_loc_ind_42585, - (int64_t) 30)) { - ((__local float *) mem_44480)[y_loc_ind_42585] = y_elem_42580; - } - barrier(CLK_LOCAL_MEM_FENCE); - } - - float mem_44509[30]; - int64_t ltid_y_42591 = sext_i32_i64(ltid_pre_45741); - int64_t ltid_x_42589 = sext_i32_i64(ltid_pre_45742); - int32_t ltid_flat_42590 = local_tid_45734; - int64_t gtid_42618 = jj1_42543 + ltid_y_42591; - int64_t gtid_42619 = jj2_42544 + ltid_x_42589; - bool binop_x_42621 = slt64(gtid_42618, i32_res_27787); - bool binop_y_42622 = slt64(gtid_42619, i32_res_27787); - bool cond_42623 = binop_x_42621 && binop_y_42622; - float mem_45450[30]; - - if (cond_42623) { - float x_42626 = ((__global float *) mem_44393)[i_42560 * - i32_res_27787 + - gtid_42618]; - float x_42628 = ((__global float *) mem_44397)[i_42560 * - i32_res_27787 + - gtid_42619]; - - for (int32_t i_44271 = 0; i_44271 < 30; i_44271++) { - int64_t i_42630 = sext_i32_i64(i_44271); - int64_t gtid_42632 = ii_42542 + i_42630; - bool cond_42633 = slt64(gtid_42632, m_27772); - - if (cond_42633) { - float inp_reg_var2zz_42635 = ((__local - float *) mem_44480)[i_42630]; - float res_reg_var2zz_42636 = mem_param_44481[i_42630]; - float x_42640 = x_42626 * x_42628; - bool isnan_res_42641; - - isnan_res_42641 = futrts_isnan32(inp_reg_var2zz_42635); - - float y_42642; - - if (isnan_res_42641) { - y_42642 = 0.0F; - } else { - y_42642 = 1.0F; - } - - float defunc_2_f_res_42643 = x_42640 * y_42642; - float defunc_1_op_res_42647 = res_reg_var2zz_42636 + - defunc_2_f_res_42643; - - mem_param_44481[i_42630] = defunc_1_op_res_42647; - } - } - for (int64_t i_45751 = 0; i_45751 < (int64_t) 30; i_45751++) { - mem_45450[i_45751] = mem_param_44481[i_45751]; - } - } else { - for (int64_t i_45752 = 0; i_45752 < (int64_t) 30; i_45752++) { - mem_45450[i_45752] = mem_param_44481[i_45752]; - } - } - for (int64_t i_45753 = 0; i_45753 < (int64_t) 30; i_45753++) { - mem_44509[i_45753] = mem_45450[i_45753]; - } - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_param_tmp_45746[30]; - - for (int32_t i_2 = 0; i_2 < 30; i_2++) - mem_param_tmp_45746[i_2] = mem_44509[i_2]; - for (int32_t i_3 = 0; i_3 < 30; i_3++) - mem_param_44481[i_3] = mem_param_tmp_45746[i_3]; - } - for (int32_t i_4 = 0; i_4 < 30; i_4++) - loop_mem_44510[i_4] = mem_param_44481[i_4]; - - float mem_44524[30 * 1 * 1]; - int64_t ltid_zz_42656 = sext_i32_i64(ltid_pre_45738); - int64_t ltid_y_42655 = sext_i32_i64(ltid_pre_45739); - int64_t ltid_x_42653 = sext_i32_i64(ltid_pre_45740); - int32_t ltid_flat_42654 = local_tid_45734; - float mem_44518[30 * 1 * 1]; - - for (int32_t i_44274 = 0; i_44274 < 30; i_44274++) { - int64_t i_42665 = sext_i32_i64(i_44274); - - for (int64_t i_45755 = 0; i_45755 < (int64_t) 1; i_45755++) { - mem_44518[i_42665 + i_45755] = loop_mem_44510[i_42665 + i_45755]; - } - } - for (int64_t i_45756 = 0; i_45756 < (int64_t) 30; i_45756++) { - for (int64_t i_45757 = 0; i_45757 < (int64_t) 1; i_45757++) { - for (int64_t i_45758 = 0; i_45758 < (int64_t) 1; i_45758++) { - mem_44524[i_45756 + i_45757 + i_45758] = mem_44518[i_45756 + - i_45757 + - i_45758]; - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t reg_tile_i_45759 = squot64(sext_i32_i64(local_tid_45734), Ty_42529 * - Tx_42530); - int64_t reg_tile_i_45760 = squot64(sext_i32_i64(local_tid_45734) - - squot64(sext_i32_i64(local_tid_45734), - Ty_42529 * Tx_42530) * - (Ty_42529 * Tx_42530), Tx_42530); - int64_t reg_tile_i_45761 = sext_i32_i64(local_tid_45734) - - squot64(sext_i32_i64(local_tid_45734), Ty_42529 * Tx_42530) * - (Ty_42529 * Tx_42530) - squot64(sext_i32_i64(local_tid_45734) - - squot64(sext_i32_i64(local_tid_45734), - Ty_42529 * Tx_42530) * - (Ty_42529 * Tx_42530), Tx_42530) * - Tx_42530; - int64_t tile_dim_start_45762 = (int64_t) 30 * (gid_zz_42540 + - reg_tile_i_45759); - int64_t tile_dim_start_45763 = Ty_42529 * gid_y_42539 + reg_tile_i_45760; - int64_t tile_dim_start_45764 = Tx_42530 * gid_x_42538 + reg_tile_i_45761; - - for (int64_t nest_i_45765 = 0; nest_i_45765 < (int64_t) 30; - nest_i_45765++) { - for (int64_t nest_i_45766 = 0; nest_i_45766 < (int64_t) 1; - nest_i_45766++) { - for (int64_t nest_i_45767 = 0; nest_i_45767 < (int64_t) 1; - nest_i_45767++) { - if ((slt64(tile_dim_start_45762 + nest_i_45765, m_27772) && - slt64(tile_dim_start_45763 + nest_i_45766, - i32_res_27787)) && slt64(tile_dim_start_45764 + - nest_i_45767, - i32_res_27787)) { - ((__global float *) mem_44528)[(tile_dim_start_45762 + - nest_i_45765) * - (i32_res_27787 * - i32_res_27787) + - (tile_dim_start_45763 + - nest_i_45766) * - i32_res_27787 + - (tile_dim_start_45764 + - nest_i_45767)] = - mem_44524[nest_i_45765 + nest_i_45766 + nest_i_45767]; - } - } - } - } - - error_4: - return; -} -__kernel void mainDetailedzisegmap_intragroup_42694(__global - int *global_failure, - __local volatile - int64_t *mem_44668_backing_aligned_0, - __local volatile - int64_t *mem_44666_backing_aligned_1, - int64_t N_27771, - int64_t m_27772, - int64_t i32_res_27781, - int64_t i32_res_27787, - int64_t gridDim_x_42688, - int64_t full_tiles_42719, - int64_t kk_42926, __global - unsigned char *images_mem_44381, - __global - unsigned char *mem_44393, - __global - unsigned char *mem_44840) -{ - #define Ty_42675 (mainDetailedziTy_42672) - #define Ry_42676 (mainDetailedziRy_42674) - #define Tx_42677 (mainDetailedziTx_42671) - #define Rx_42678 (mainDetailedziRx_42673) - #define Tk_42679 (mainDetailedziTk_42670) - #define tk_div_tx_42680 (sdiv_up64(mainDetailedziTk_42670, mainDetailedziTx_42671)) - #define tk_div_ty_42681 (sdiv_up64(mainDetailedziTk_42670, mainDetailedziTy_42672)) - #define TxRx_42682 (mainDetailedziTx_42671 * mainDetailedziRx_42673) - #define TyRy_42683 (mainDetailedziTy_42672 * mainDetailedziRy_42674) - #define a_loc_szz_42685 (mainDetailedziTk_42670 * (mainDetailedziTy_42672 * mainDetailedziRy_42674)) - #define b_loc_szz_42687 (mainDetailedziRx_42673 * (mainDetailedziTx_42671 * mainDetailedziTk_42670)) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - __local volatile char *restrict mem_44668_backing_1 = (__local volatile - char *) mem_44668_backing_aligned_0; - __local volatile char *restrict mem_44666_backing_0 = (__local volatile - char *) mem_44666_backing_aligned_1; - - if (*global_failure >= 0) - return; - - int32_t global_tid_45908; - int32_t local_tid_45909; - int64_t group_sizze_45912; - int32_t wave_sizze_45911; - int32_t group_tid_45910; - - global_tid_45908 = get_global_id(0); - local_tid_45909 = get_local_id(0); - group_sizze_45912 = get_local_size(0); - wave_sizze_45911 = LOCKSTEP_WIDTH; - group_tid_45910 = get_group_id(0); - - int32_t gid_flat_42694; - - gid_flat_42694 = group_tid_45910; - - int32_t ltid_pre_45913; - - ltid_pre_45913 = squot32(local_tid_45909, sext_i64_i32(Tx_42677)); - - int32_t ltid_pre_45914; - - ltid_pre_45914 = local_tid_45909 - squot32(local_tid_45909, - sext_i64_i32(Tx_42677)) * - sext_i64_i32(Tx_42677); - - int64_t gid_y_42693; - - gid_y_42693 = squot64(sext_i32_i64(group_tid_45910), gridDim_x_42688); - - int64_t gid_x_42692; - - gid_x_42692 = sext_i32_i64(group_tid_45910) - - squot64(sext_i32_i64(group_tid_45910), gridDim_x_42688) * - gridDim_x_42688; - - int64_t iii_42695; - - iii_42695 = TyRy_42683 * gid_y_42693; - - int64_t jjj_42696 = TxRx_42682 * gid_x_42692; - float mem_44664[Ry_42676 * Rx_42678]; - int64_t ltid_y_42699 = sext_i32_i64(ltid_pre_45913); - int64_t ltid_x_42697 = sext_i32_i64(ltid_pre_45914); - int32_t ltid_flat_42698 = local_tid_45909; - float mem_44655[Ry_42676 * Rx_42678]; - - for (int64_t i_42710 = 0; i_42710 < Ry_42676; i_42710++) { - for (int64_t i_42713 = 0; i_42713 < Rx_42678; i_42713++) { - mem_44655[i_42710 * Rx_42678 + i_42713] = 0.0F; - } - } - for (int64_t i_45917 = 0; i_45917 < Ry_42676; i_45917++) { - for (int64_t i_45918 = 0; i_45918 < Rx_42678; i_45918++) { - mem_44664[i_45917 * Rx_42678 + i_45918] = mem_44655[i_45917 * - Rx_42678 + - i_45918]; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - - __local char *mem_44666; - - mem_44666 = (__local char *) mem_44666_backing_0; - - __local char *mem_44668; - - mem_44668 = (__local char *) mem_44668_backing_1; - - float mem_44739[Ry_42676]; - float mem_44743[Rx_42678]; - float loop_mem_44755[Ry_42676 * Rx_42678]; - float mem_param_44669[Ry_42676 * Rx_42678]; - - for (int32_t i_2 = 0; i_2 < Ry_42676 * Rx_42678; i_2++) - mem_param_44669[i_2] = mem_44664[i_2]; - for (int64_t i_42720 = 0; i_42720 < full_tiles_42719; i_42720++) { - int64_t kk_42724 = Tk_42679 * i_42720; + for (int32_t i_2 = 0; i_2 < Ry_118373 * Rx_118375; i_2++) + mem_param_124412[i_2] = mem_124407[i_2]; + for (int64_t i_118417 = 0; i_118417 < full_tiles_118416; i_118417++) { + int64_t kk_118421 = Tk_118376 * i_118417; - for (int64_t i_42725 = 0; i_42725 < Ry_42676; i_42725++) { - int64_t binop_y_42748 = Ty_42675 * i_42725; + for (int64_t i_118422 = 0; i_118422 < Ry_118373; i_118422++) { + int64_t binop_y_118445 = Ty_118372 * i_118422; - for (int64_t i_42727 = 0; i_42727 < tk_div_tx_42680; i_42727++) { - int64_t binop_y_42746 = Tx_42677 * i_42727; - int64_t ltid_x_42729 = sext_i32_i64(ltid_pre_45913); - int64_t ltid_y_42730 = sext_i32_i64(ltid_pre_45914); - int32_t ltid_flat_42731 = local_tid_45909; - int64_t k_42747 = ltid_y_42730 + binop_y_42746; - int64_t i_42749 = ltid_x_42729 + binop_y_42748; - int64_t gtid_42750 = iii_42695 + i_42749; - int64_t A_col_idx_42751 = kk_42724 + k_42747; - bool cond_42752 = slt64(gtid_42750, m_27772); - float A_elem_42753; - - if (cond_42752) { - float A_elem_42755 = ((__global - float *) images_mem_44381)[gtid_42750 * - N_27771 + - A_col_idx_42751]; + for (int64_t i_118424 = 0; i_118424 < tk_div_tx_118377; + i_118424++) { + int64_t binop_y_118443 = Tx_118374 * i_118424; + int64_t ltid_x_118426 = sext_i32_i64(ltid_pre_128904); + int64_t ltid_y_118427 = sext_i32_i64(ltid_pre_128905); + int32_t ltid_flat_118428 = local_tid_128900; + int64_t k_118444 = ltid_y_118427 + binop_y_118443; + int64_t i_118446 = ltid_x_118426 + binop_y_118445; + int64_t gtid_118447 = iii_118392 + i_118446; + int64_t A_col_idx_118448 = kk_118421 + k_118444; + bool cond_118449 = slt64(gtid_118447, m_75136); + double A_elem_118450; + + if (cond_118449) { + double A_elem_118452 = ((__global + double *) mem_124142)[gtid_118447 * + N_75135 + + A_col_idx_118448]; - A_elem_42753 = A_elem_42755; + A_elem_118450 = A_elem_118452; } else { - A_elem_42753 = 0.0F; + A_elem_118450 = 0.0; } - bool cond_42757 = slt64(k_42747, Tk_42679); - int64_t a_loc_ind_42758; + bool cond_118454 = slt64(k_118444, Tk_118376); + int64_t a_loc_ind_118455; - if (cond_42757) { - int64_t binop_y_42759 = Tk_42679 * i_42749; - int64_t loc_fi_42760 = k_42747 + binop_y_42759; + if (cond_118454) { + int64_t binop_y_118456 = Tk_118376 * i_118446; + int64_t loc_fi_118457 = k_118444 + binop_y_118456; - a_loc_ind_42758 = loc_fi_42760; + a_loc_ind_118455 = loc_fi_118457; } else { - a_loc_ind_42758 = (int64_t) -1; + a_loc_ind_118455 = (int64_t) -1; } - if (sle64((int64_t) 0, a_loc_ind_42758) && - slt64(a_loc_ind_42758, a_loc_szz_42685)) { - ((__local float *) mem_44666)[a_loc_ind_42758] = - A_elem_42753; + if (sle64((int64_t) 0, a_loc_ind_118455) && + slt64(a_loc_ind_118455, a_loc_szz_118382)) { + ((__local double *) mem_124409)[a_loc_ind_118455] = + A_elem_118450; } barrier(CLK_LOCAL_MEM_FENCE); } } - for (int64_t i_42765 = 0; i_42765 < tk_div_ty_42681; i_42765++) { - int64_t binop_y_42786 = Ty_42675 * i_42765; + for (int64_t i_118462 = 0; i_118462 < tk_div_ty_118378; i_118462++) { + int64_t binop_y_118483 = Ty_118372 * i_118462; - for (int64_t i_42767 = 0; i_42767 < Rx_42678; i_42767++) { - int64_t binop_y_42788 = Tx_42677 * i_42767; - int64_t ltid_x_42769 = sext_i32_i64(ltid_pre_45913); - int64_t ltid_y_42770 = sext_i32_i64(ltid_pre_45914); - int32_t ltid_flat_42771 = local_tid_45909; - int64_t k_42787 = ltid_x_42769 + binop_y_42786; - int64_t j_42789 = ltid_y_42770 + binop_y_42788; - int64_t gtid_42790 = jjj_42696 + j_42789; - int64_t B_row_idx_42791 = kk_42724 + k_42787; - bool cond_42792 = slt64(gtid_42790, i32_res_27787); - float B_elem_42793; - - if (cond_42792) { - float B_elem_42795 = ((__global - float *) mem_44393)[B_row_idx_42791 * - i32_res_27787 + - gtid_42790]; + for (int64_t i_118464 = 0; i_118464 < Rx_118375; i_118464++) { + int64_t binop_y_118485 = Tx_118374 * i_118464; + int64_t ltid_x_118466 = sext_i32_i64(ltid_pre_128904); + int64_t ltid_y_118467 = sext_i32_i64(ltid_pre_128905); + int32_t ltid_flat_118468 = local_tid_128900; + int64_t k_118484 = ltid_x_118466 + binop_y_118483; + int64_t j_118486 = ltid_y_118467 + binop_y_118485; + int64_t gtid_118487 = jjj_118393 + j_118486; + int64_t B_row_idx_118488 = kk_118421 + k_118484; + bool cond_118489 = slt64(gtid_118487, k2p2zq_75151); + double B_elem_118490; + + if (cond_118489) { + double B_elem_118492 = ((__global + double *) mem_120120)[B_row_idx_118488 * + k2p2zq_75151 + + gtid_118487]; - B_elem_42793 = B_elem_42795; + B_elem_118490 = B_elem_118492; } else { - B_elem_42793 = 0.0F; + B_elem_118490 = 0.0; } - bool cond_42797 = slt64(k_42787, Tk_42679); - int64_t b_loc_ind_42798; + bool cond_118494 = slt64(k_118484, Tk_118376); + int64_t b_loc_ind_118495; - if (cond_42797) { - int64_t binop_y_42799 = TxRx_42682 * k_42787; - int64_t loc_fi_42800 = j_42789 + binop_y_42799; + if (cond_118494) { + int64_t binop_y_118496 = TxRx_118379 * k_118484; + int64_t loc_fi_118497 = j_118486 + binop_y_118496; - b_loc_ind_42798 = loc_fi_42800; + b_loc_ind_118495 = loc_fi_118497; } else { - b_loc_ind_42798 = (int64_t) -1; + b_loc_ind_118495 = (int64_t) -1; } - if (sle64((int64_t) 0, b_loc_ind_42798) && - slt64(b_loc_ind_42798, b_loc_szz_42687)) { - ((__local float *) mem_44668)[b_loc_ind_42798] = - B_elem_42793; + if (sle64((int64_t) 0, b_loc_ind_118495) && + slt64(b_loc_ind_118495, b_loc_szz_118384)) { + ((__local double *) mem_124411)[b_loc_ind_118495] = + B_elem_118490; } barrier(CLK_LOCAL_MEM_FENCE); } } - float loop_mem_44754[Ry_42676 * Rx_42678]; - float mem_param_44726[Ry_42676 * Rx_42678]; + double loop_mem_124497[Ry_118373 * Rx_118375]; + double mem_param_124469[Ry_118373 * Rx_118375]; - for (int32_t i_3 = 0; i_3 < Ry_42676 * Rx_42678; i_3++) - mem_param_44726[i_3] = mem_param_44669[i_3]; - for (int64_t i_42805 = 0; i_42805 < Tk_42679; i_42805++) { - int64_t binop_y_42844 = TxRx_42682 * i_42805; - int64_t ltid_y_42809 = sext_i32_i64(ltid_pre_45913); - int64_t ltid_x_42807 = sext_i32_i64(ltid_pre_45914); - int32_t ltid_flat_42808 = local_tid_45909; - float mem_44729[Ry_42676]; - float mem_44731[Rx_42678]; - int64_t binop_x_42835 = Ry_42676 * ltid_y_42809; + for (int32_t i_3 = 0; i_3 < Ry_118373 * Rx_118375; i_3++) + mem_param_124469[i_3] = mem_param_124412[i_3]; + for (int64_t i_118502 = 0; i_118502 < Tk_118376; i_118502++) { + int64_t binop_y_118541 = TxRx_118379 * i_118502; + int64_t ltid_y_118506 = sext_i32_i64(ltid_pre_128904); + int64_t ltid_x_118504 = sext_i32_i64(ltid_pre_128905); + int32_t ltid_flat_118505 = local_tid_128900; + double mem_124472[Ry_118373]; + double mem_124474[Rx_118375]; + int64_t binop_x_118532 = Ry_118373 * ltid_y_118506; - for (int64_t i_42833 = 0; i_42833 < Ry_42676; i_42833++) { - int64_t binop_x_42836 = i_42833 + binop_x_42835; - int64_t binop_y_42837 = Tk_42679 * binop_x_42836; - int64_t a_loc_ind_42838 = i_42805 + binop_y_42837; + for (int64_t i_118530 = 0; i_118530 < Ry_118373; i_118530++) { + int64_t binop_x_118533 = i_118530 + binop_x_118532; + int64_t binop_y_118534 = Tk_118376 * binop_x_118533; + int64_t a_loc_ind_118535 = i_118502 + binop_y_118534; - for (int64_t i_45930 = 0; i_45930 < (int64_t) 1; i_45930++) { - mem_44729[i_42833 + i_45930] = ((__local - float *) mem_44666)[a_loc_ind_42838 + - i_45930]; + for (int64_t i_128921 = 0; i_128921 < (int64_t) 1; i_128921++) { + mem_124472[i_118530 + i_128921] = ((__local + double *) mem_124409)[a_loc_ind_118535 + + i_128921]; } } - int64_t binop_y_42846 = Rx_42678 * ltid_x_42807; + int64_t binop_y_118543 = Rx_118375 * ltid_x_118504; - for (int64_t i_42842 = 0; i_42842 < Rx_42678; i_42842++) { - int64_t binop_x_42845 = i_42842 + binop_y_42844; - int64_t b_loc_ind_42847 = binop_x_42845 + binop_y_42846; + for (int64_t i_118539 = 0; i_118539 < Rx_118375; i_118539++) { + int64_t binop_x_118542 = i_118539 + binop_y_118541; + int64_t b_loc_ind_118544 = binop_x_118542 + binop_y_118543; - for (int64_t i_45932 = 0; i_45932 < (int64_t) 1; i_45932++) { - mem_44731[i_42842 + i_45932] = ((__local - float *) mem_44668)[b_loc_ind_42847 + - i_45932]; + for (int64_t i_128923 = 0; i_128923 < (int64_t) 1; i_128923++) { + mem_124474[i_118539 + i_128923] = ((__local + double *) mem_124411)[b_loc_ind_118544 + + i_128923]; } } - for (int64_t i_45933 = 0; i_45933 < Ry_42676; i_45933++) { - mem_44739[i_45933] = mem_44729[i_45933]; + for (int64_t i_128924 = 0; i_128924 < Ry_118373; i_128924++) { + mem_124482[i_128924] = mem_124472[i_128924]; } - for (int64_t i_45934 = 0; i_45934 < Rx_42678; i_45934++) { - mem_44743[i_45934] = mem_44731[i_45934]; + for (int64_t i_128925 = 0; i_128925 < Rx_118375; i_128925++) { + mem_124486[i_128925] = mem_124474[i_128925]; } barrier(CLK_LOCAL_MEM_FENCE); - float mem_44753[Ry_42676 * Rx_42678]; - int64_t ltid_y_42854 = sext_i32_i64(ltid_pre_45913); - int64_t ltid_x_42852 = sext_i32_i64(ltid_pre_45914); - int32_t ltid_flat_42853 = local_tid_45909; - int64_t binop_y_42897 = Ry_42676 * ltid_y_42854; - int64_t binop_y_42901 = Rx_42678 * ltid_x_42852; + double mem_124496[Ry_118373 * Rx_118375]; + int64_t ltid_y_118551 = sext_i32_i64(ltid_pre_128904); + int64_t ltid_x_118549 = sext_i32_i64(ltid_pre_128905); + int32_t ltid_flat_118550 = local_tid_128900; + int64_t binop_y_118594 = Ry_118373 * ltid_y_118551; + int64_t binop_y_118598 = Rx_118375 * ltid_x_118549; - for (int64_t i_42891 = 0; i_42891 < Ry_42676; i_42891++) { - int64_t binop_x_42896 = iii_42695 + i_42891; - int64_t cmpop_x_42898 = binop_x_42896 + binop_y_42897; - bool binop_x_42899 = slt64(cmpop_x_42898, m_27772); - - for (int64_t i_42894 = 0; i_42894 < Rx_42678; i_42894++) { - int64_t binop_x_42900 = jjj_42696 + i_42894; - int64_t cmpop_x_42902 = binop_x_42900 + binop_y_42901; - bool binop_y_42903 = slt64(cmpop_x_42902, i32_res_27787); - bool cond_42904 = binop_x_42899 && binop_y_42903; + for (int64_t i_118588 = 0; i_118588 < Ry_118373; i_118588++) { + int64_t binop_x_118593 = iii_118392 + i_118588; + int64_t cmpop_x_118595 = binop_x_118593 + binop_y_118594; + bool binop_x_118596 = slt64(cmpop_x_118595, m_75136); + + for (int64_t i_118591 = 0; i_118591 < Rx_118375; i_118591++) { + int64_t binop_x_118597 = jjj_118393 + i_118591; + int64_t cmpop_x_118599 = binop_x_118597 + binop_y_118598; + bool binop_y_118600 = slt64(cmpop_x_118599, k2p2zq_75151); + bool cond_118601 = binop_x_118596 && binop_y_118600; - if (cond_42904) { - float a_42906 = mem_44739[i_42891]; - float c_42908 = mem_param_44726[i_42891 * Rx_42678 + - i_42894]; - bool isnan_res_42911; + if (cond_118601) { + double a_118603 = mem_124482[i_118588]; + double c_118605 = mem_param_124469[i_118588 * + Rx_118375 + + i_118591]; + bool isnan_res_118608; - isnan_res_42911 = futrts_isnan32(a_42906); + isnan_res_118608 = futrts_isnan64(a_118603); - float defunc_1_f_res_42912; + double defunc_1_f_res_118609; - if (isnan_res_42911) { - defunc_1_f_res_42912 = 0.0F; + if (isnan_res_118608) { + defunc_1_f_res_118609 = 0.0; } else { - float b_42907 = mem_44743[i_42894]; - float defunc_1_f_res_f_res_42913 = a_42906 * - b_42907; + double b_118604 = mem_124486[i_118591]; + double defunc_1_f_res_f_res_118610 = a_118603 * + b_118604; - defunc_1_f_res_42912 = defunc_1_f_res_f_res_42913; + defunc_1_f_res_118609 = defunc_1_f_res_f_res_118610; } - float defunc_1_op_res_42917 = c_42908 + - defunc_1_f_res_42912; + double defunc_1_op_res_118614 = c_118605 + + defunc_1_f_res_118609; - mem_param_44726[i_42891 * Rx_42678 + i_42894] = - defunc_1_op_res_42917; + mem_param_124469[i_118588 * Rx_118375 + i_118591] = + defunc_1_op_res_118614; } } } - for (int64_t i_45937 = 0; i_45937 < Ry_42676; i_45937++) { - for (int64_t i_45938 = 0; i_45938 < Rx_42678; i_45938++) { - mem_44753[i_45937 * Rx_42678 + i_45938] = - mem_param_44726[i_45937 * Rx_42678 + i_45938]; + for (int64_t i_128928 = 0; i_128928 < Ry_118373; i_128928++) { + for (int64_t i_128929 = 0; i_128929 < Rx_118375; i_128929++) { + mem_124496[i_128928 * Rx_118375 + i_128929] = + mem_param_124469[i_128928 * Rx_118375 + i_128929]; } } barrier(CLK_LOCAL_MEM_FENCE); - float mem_param_tmp_45927[Ry_42676 * Rx_42678]; + double mem_param_tmp_128918[Ry_118373 * Rx_118375]; - for (int32_t i_4 = 0; i_4 < Ry_42676 * Rx_42678; i_4++) - mem_param_tmp_45927[i_4] = mem_44753[i_4]; - for (int32_t i_5 = 0; i_5 < Ry_42676 * Rx_42678; i_5++) - mem_param_44726[i_5] = mem_param_tmp_45927[i_5]; + for (int32_t i_4 = 0; i_4 < Ry_118373 * Rx_118375; i_4++) + mem_param_tmp_128918[i_4] = mem_124496[i_4]; + for (int32_t i_5 = 0; i_5 < Ry_118373 * Rx_118375; i_5++) + mem_param_124469[i_5] = mem_param_tmp_128918[i_5]; } - for (int32_t i_6 = 0; i_6 < Ry_42676 * Rx_42678; i_6++) - loop_mem_44754[i_6] = mem_param_44726[i_6]; + for (int32_t i_6 = 0; i_6 < Ry_118373 * Rx_118375; i_6++) + loop_mem_124497[i_6] = mem_param_124469[i_6]; - float mem_param_tmp_45919[Ry_42676 * Rx_42678]; + double mem_param_tmp_128910[Ry_118373 * Rx_118375]; - for (int32_t i_7 = 0; i_7 < Ry_42676 * Rx_42678; i_7++) - mem_param_tmp_45919[i_7] = loop_mem_44754[i_7]; - for (int32_t i_8 = 0; i_8 < Ry_42676 * Rx_42678; i_8++) - mem_param_44669[i_8] = mem_param_tmp_45919[i_8]; + for (int32_t i_7 = 0; i_7 < Ry_118373 * Rx_118375; i_7++) + mem_param_tmp_128910[i_7] = loop_mem_124497[i_7]; + for (int32_t i_8 = 0; i_8 < Ry_118373 * Rx_118375; i_8++) + mem_param_124412[i_8] = mem_param_tmp_128910[i_8]; } - for (int32_t i_9 = 0; i_9 < Ry_42676 * Rx_42678; i_9++) - loop_mem_44755[i_9] = mem_param_44669[i_9]; - for (int64_t i_42927 = 0; i_42927 < Ry_42676; i_42927++) { - int64_t binop_y_42952 = Ty_42675 * i_42927; - - for (int64_t i_42929 = 0; i_42929 < tk_div_tx_42680; i_42929++) { - int64_t binop_y_42950 = Tx_42677 * i_42929; - int64_t ltid_x_42931 = sext_i32_i64(ltid_pre_45913); - int64_t ltid_y_42932 = sext_i32_i64(ltid_pre_45914); - int32_t ltid_flat_42933 = local_tid_45909; - int64_t k_42951 = ltid_y_42932 + binop_y_42950; - int64_t i_42953 = ltid_x_42931 + binop_y_42952; - int64_t gtid_42954 = iii_42695 + i_42953; - int64_t A_col_idx_42955 = kk_42926 + k_42951; - bool binop_x_42956 = slt64(gtid_42954, m_27772); - bool binop_y_42957 = slt64(A_col_idx_42955, i32_res_27781); - bool cond_42958 = binop_x_42956 && binop_y_42957; - float A_elem_42959; + for (int32_t i_9 = 0; i_9 < Ry_118373 * Rx_118375; i_9++) + loop_mem_124498[i_9] = mem_param_124412[i_9]; + for (int64_t i_118624 = 0; i_118624 < Ry_118373; i_118624++) { + int64_t binop_y_118649 = Ty_118372 * i_118624; + + for (int64_t i_118626 = 0; i_118626 < tk_div_tx_118377; i_118626++) { + int64_t binop_y_118647 = Tx_118374 * i_118626; + int64_t ltid_x_118628 = sext_i32_i64(ltid_pre_128904); + int64_t ltid_y_118629 = sext_i32_i64(ltid_pre_128905); + int32_t ltid_flat_118630 = local_tid_128900; + int64_t k_118648 = ltid_y_118629 + binop_y_118647; + int64_t i_118650 = ltid_x_118628 + binop_y_118649; + int64_t gtid_118651 = iii_118392 + i_118650; + int64_t A_col_idx_118652 = kk_118623 + k_118648; + bool binop_x_118653 = slt64(gtid_118651, m_75136); + bool binop_y_118654 = slt64(A_col_idx_118652, n_75139); + bool cond_118655 = binop_x_118653 && binop_y_118654; + double A_elem_118656; - if (cond_42958) { - float A_elem_42961 = ((__global - float *) images_mem_44381)[gtid_42954 * - N_27771 + - A_col_idx_42955]; + if (cond_118655) { + double A_elem_118658 = ((__global + double *) mem_124142)[gtid_118651 * + N_75135 + + A_col_idx_118652]; - A_elem_42959 = A_elem_42961; + A_elem_118656 = A_elem_118658; } else { - A_elem_42959 = 0.0F; + A_elem_118656 = 0.0; } - bool cond_42963 = slt64(k_42951, Tk_42679); - int64_t a_loc_ind_42964; + bool cond_118660 = slt64(k_118648, Tk_118376); + int64_t a_loc_ind_118661; - if (cond_42963) { - int64_t binop_y_42965 = Tk_42679 * i_42953; - int64_t loc_fi_42966 = k_42951 + binop_y_42965; + if (cond_118660) { + int64_t binop_y_118662 = Tk_118376 * i_118650; + int64_t loc_fi_118663 = k_118648 + binop_y_118662; - a_loc_ind_42964 = loc_fi_42966; + a_loc_ind_118661 = loc_fi_118663; } else { - a_loc_ind_42964 = (int64_t) -1; + a_loc_ind_118661 = (int64_t) -1; } - if (sle64((int64_t) 0, a_loc_ind_42964) && slt64(a_loc_ind_42964, - a_loc_szz_42685)) { - ((__local float *) mem_44666)[a_loc_ind_42964] = A_elem_42959; + if (sle64((int64_t) 0, a_loc_ind_118661) && slt64(a_loc_ind_118661, + a_loc_szz_118382)) { + ((__local double *) mem_124409)[a_loc_ind_118661] = + A_elem_118656; } barrier(CLK_LOCAL_MEM_FENCE); } } - for (int64_t i_42971 = 0; i_42971 < tk_div_ty_42681; i_42971++) { - int64_t binop_y_42994 = Ty_42675 * i_42971; - - for (int64_t i_42973 = 0; i_42973 < Rx_42678; i_42973++) { - int64_t binop_y_42996 = Tx_42677 * i_42973; - int64_t ltid_x_42975 = sext_i32_i64(ltid_pre_45913); - int64_t ltid_y_42976 = sext_i32_i64(ltid_pre_45914); - int32_t ltid_flat_42977 = local_tid_45909; - int64_t k_42995 = ltid_x_42975 + binop_y_42994; - int64_t j_42997 = ltid_y_42976 + binop_y_42996; - int64_t gtid_42998 = jjj_42696 + j_42997; - int64_t B_row_idx_42999 = kk_42926 + k_42995; - bool binop_x_43000 = slt64(gtid_42998, i32_res_27787); - bool binop_y_43001 = slt64(B_row_idx_42999, i32_res_27781); - bool cond_43002 = binop_x_43000 && binop_y_43001; - float B_elem_43003; + for (int64_t i_118668 = 0; i_118668 < tk_div_ty_118378; i_118668++) { + int64_t binop_y_118691 = Ty_118372 * i_118668; + + for (int64_t i_118670 = 0; i_118670 < Rx_118375; i_118670++) { + int64_t binop_y_118693 = Tx_118374 * i_118670; + int64_t ltid_x_118672 = sext_i32_i64(ltid_pre_128904); + int64_t ltid_y_118673 = sext_i32_i64(ltid_pre_128905); + int32_t ltid_flat_118674 = local_tid_128900; + int64_t k_118692 = ltid_x_118672 + binop_y_118691; + int64_t j_118694 = ltid_y_118673 + binop_y_118693; + int64_t gtid_118695 = jjj_118393 + j_118694; + int64_t B_row_idx_118696 = kk_118623 + k_118692; + bool binop_x_118697 = slt64(gtid_118695, k2p2zq_75151); + bool binop_y_118698 = slt64(B_row_idx_118696, n_75139); + bool cond_118699 = binop_x_118697 && binop_y_118698; + double B_elem_118700; - if (cond_43002) { - float B_elem_43005 = ((__global - float *) mem_44393)[B_row_idx_42999 * - i32_res_27787 + - gtid_42998]; + if (cond_118699) { + double B_elem_118702 = ((__global + double *) mem_120120)[B_row_idx_118696 * + k2p2zq_75151 + + gtid_118695]; - B_elem_43003 = B_elem_43005; + B_elem_118700 = B_elem_118702; } else { - B_elem_43003 = 0.0F; + B_elem_118700 = 0.0; } - bool cond_43007 = slt64(k_42995, Tk_42679); - int64_t b_loc_ind_43008; + bool cond_118704 = slt64(k_118692, Tk_118376); + int64_t b_loc_ind_118705; - if (cond_43007) { - int64_t binop_y_43009 = TxRx_42682 * k_42995; - int64_t loc_fi_43010 = j_42997 + binop_y_43009; + if (cond_118704) { + int64_t binop_y_118706 = TxRx_118379 * k_118692; + int64_t loc_fi_118707 = j_118694 + binop_y_118706; - b_loc_ind_43008 = loc_fi_43010; + b_loc_ind_118705 = loc_fi_118707; } else { - b_loc_ind_43008 = (int64_t) -1; + b_loc_ind_118705 = (int64_t) -1; } - if (sle64((int64_t) 0, b_loc_ind_43008) && slt64(b_loc_ind_43008, - b_loc_szz_42687)) { - ((__local float *) mem_44668)[b_loc_ind_43008] = B_elem_43003; + if (sle64((int64_t) 0, b_loc_ind_118705) && slt64(b_loc_ind_118705, + b_loc_szz_118384)) { + ((__local double *) mem_124411)[b_loc_ind_118705] = + B_elem_118700; } barrier(CLK_LOCAL_MEM_FENCE); } } - float mem_44821[Ry_42676]; - float mem_44825[Rx_42678]; - float mem_44835[Ry_42676 * Rx_42678]; - float loop_mem_44837[Ry_42676 * Rx_42678]; - float mem_param_44808[Ry_42676 * Rx_42678]; - - for (int32_t i_10 = 0; i_10 < Ry_42676 * Rx_42678; i_10++) - mem_param_44808[i_10] = loop_mem_44755[i_10]; - for (int64_t i_43015 = 0; i_43015 < Tk_42679; i_43015++) { - int64_t cmpop_x_43017 = kk_42926 + i_43015; - bool cond_43018 = slt64(cmpop_x_43017, i32_res_27781); - float mem_45468[Ry_42676 * Rx_42678]; - - if (cond_43018) { - int64_t binop_y_43056 = TxRx_42682 * i_43015; - int64_t bytes_44810 = (int64_t) 4 * Ry_42676; - int64_t bytes_44812 = (int64_t) 4 * Rx_42678; - int64_t ltid_y_43021 = sext_i32_i64(ltid_pre_45913); - int64_t ltid_x_43019 = sext_i32_i64(ltid_pre_45914); - int32_t ltid_flat_43020 = local_tid_45909; - float mem_44811[Ry_42676]; - float mem_44813[Rx_42678]; - int64_t binop_x_43047 = Ry_42676 * ltid_y_43021; + double mem_124564[Ry_118373]; + double mem_124568[Rx_118375]; + double mem_124578[Ry_118373 * Rx_118375]; + double loop_mem_124580[Ry_118373 * Rx_118375]; + double mem_param_124551[Ry_118373 * Rx_118375]; + + for (int32_t i_10 = 0; i_10 < Ry_118373 * Rx_118375; i_10++) + mem_param_124551[i_10] = loop_mem_124498[i_10]; + for (int64_t i_118712 = 0; i_118712 < Tk_118376; i_118712++) { + int64_t cmpop_x_118714 = kk_118623 + i_118712; + bool cond_118715 = slt64(cmpop_x_118714, n_75139); + double mem_125382[Ry_118373 * Rx_118375]; + + if (cond_118715) { + int64_t binop_y_118753 = TxRx_118379 * i_118712; + int64_t bytes_124553 = (int64_t) 8 * Ry_118373; + int64_t bytes_124555 = (int64_t) 8 * Rx_118375; + int64_t ltid_y_118718 = sext_i32_i64(ltid_pre_128904); + int64_t ltid_x_118716 = sext_i32_i64(ltid_pre_128905); + int32_t ltid_flat_118717 = local_tid_128900; + double mem_124554[Ry_118373]; + double mem_124556[Rx_118375]; + int64_t binop_x_118744 = Ry_118373 * ltid_y_118718; - for (int64_t i_43045 = 0; i_43045 < Ry_42676; i_43045++) { - int64_t binop_x_43048 = i_43045 + binop_x_43047; - int64_t binop_y_43049 = Tk_42679 * binop_x_43048; - int64_t a_loc_ind_43050 = i_43015 + binop_y_43049; + for (int64_t i_118742 = 0; i_118742 < Ry_118373; i_118742++) { + int64_t binop_x_118745 = i_118742 + binop_x_118744; + int64_t binop_y_118746 = Tk_118376 * binop_x_118745; + int64_t a_loc_ind_118747 = i_118712 + binop_y_118746; - for (int64_t i_45946 = 0; i_45946 < (int64_t) 1; i_45946++) { - mem_44811[i_43045 + i_45946] = ((__local - float *) mem_44666)[a_loc_ind_43050 + - i_45946]; + for (int64_t i_128937 = 0; i_128937 < (int64_t) 1; i_128937++) { + mem_124554[i_118742 + i_128937] = ((__local + double *) mem_124409)[a_loc_ind_118747 + + i_128937]; } } - int64_t binop_y_43058 = Rx_42678 * ltid_x_43019; + int64_t binop_y_118755 = Rx_118375 * ltid_x_118716; - for (int64_t i_43054 = 0; i_43054 < Rx_42678; i_43054++) { - int64_t binop_x_43057 = i_43054 + binop_y_43056; - int64_t b_loc_ind_43059 = binop_x_43057 + binop_y_43058; + for (int64_t i_118751 = 0; i_118751 < Rx_118375; i_118751++) { + int64_t binop_x_118754 = i_118751 + binop_y_118753; + int64_t b_loc_ind_118756 = binop_x_118754 + binop_y_118755; - for (int64_t i_45948 = 0; i_45948 < (int64_t) 1; i_45948++) { - mem_44813[i_43054 + i_45948] = ((__local - float *) mem_44668)[b_loc_ind_43059 + - i_45948]; + for (int64_t i_128939 = 0; i_128939 < (int64_t) 1; i_128939++) { + mem_124556[i_118751 + i_128939] = ((__local + double *) mem_124411)[b_loc_ind_118756 + + i_128939]; } } - for (int64_t i_45949 = 0; i_45949 < Ry_42676; i_45949++) { - mem_44821[i_45949] = mem_44811[i_45949]; + for (int64_t i_128940 = 0; i_128940 < Ry_118373; i_128940++) { + mem_124564[i_128940] = mem_124554[i_128940]; } - for (int64_t i_45950 = 0; i_45950 < Rx_42678; i_45950++) { - mem_44825[i_45950] = mem_44813[i_45950]; + for (int64_t i_128941 = 0; i_128941 < Rx_118375; i_128941++) { + mem_124568[i_128941] = mem_124556[i_128941]; } barrier(CLK_LOCAL_MEM_FENCE); - int64_t ltid_y_43066 = sext_i32_i64(ltid_pre_45913); - int64_t ltid_x_43064 = sext_i32_i64(ltid_pre_45914); - int32_t ltid_flat_43065 = local_tid_45909; - int64_t binop_y_43109 = Ry_42676 * ltid_y_43066; - int64_t binop_y_43113 = Rx_42678 * ltid_x_43064; + int64_t ltid_y_118763 = sext_i32_i64(ltid_pre_128904); + int64_t ltid_x_118761 = sext_i32_i64(ltid_pre_128905); + int32_t ltid_flat_118762 = local_tid_128900; + int64_t binop_y_118806 = Ry_118373 * ltid_y_118763; + int64_t binop_y_118810 = Rx_118375 * ltid_x_118761; - for (int64_t i_43103 = 0; i_43103 < Ry_42676; i_43103++) { - int64_t binop_x_43108 = iii_42695 + i_43103; - int64_t cmpop_x_43110 = binop_x_43108 + binop_y_43109; - bool binop_x_43111 = slt64(cmpop_x_43110, m_27772); - - for (int64_t i_43106 = 0; i_43106 < Rx_42678; i_43106++) { - int64_t binop_x_43112 = jjj_42696 + i_43106; - int64_t cmpop_x_43114 = binop_x_43112 + binop_y_43113; - bool binop_y_43115 = slt64(cmpop_x_43114, i32_res_27787); - bool cond_43116 = binop_x_43111 && binop_y_43115; + for (int64_t i_118800 = 0; i_118800 < Ry_118373; i_118800++) { + int64_t binop_x_118805 = iii_118392 + i_118800; + int64_t cmpop_x_118807 = binop_x_118805 + binop_y_118806; + bool binop_x_118808 = slt64(cmpop_x_118807, m_75136); + + for (int64_t i_118803 = 0; i_118803 < Rx_118375; i_118803++) { + int64_t binop_x_118809 = jjj_118393 + i_118803; + int64_t cmpop_x_118811 = binop_x_118809 + binop_y_118810; + bool binop_y_118812 = slt64(cmpop_x_118811, k2p2zq_75151); + bool cond_118813 = binop_x_118808 && binop_y_118812; - if (cond_43116) { - float a_43118 = mem_44821[i_43103]; - float c_43120 = mem_param_44808[i_43103 * Rx_42678 + - i_43106]; - bool isnan_res_43123; + if (cond_118813) { + double a_118815 = mem_124564[i_118800]; + double c_118817 = mem_param_124551[i_118800 * + Rx_118375 + + i_118803]; + bool isnan_res_118820; - isnan_res_43123 = futrts_isnan32(a_43118); + isnan_res_118820 = futrts_isnan64(a_118815); - float defunc_1_f_res_43124; + double defunc_1_f_res_118821; - if (isnan_res_43123) { - defunc_1_f_res_43124 = 0.0F; + if (isnan_res_118820) { + defunc_1_f_res_118821 = 0.0; } else { - float b_43119 = mem_44825[i_43106]; - float defunc_1_f_res_f_res_43125 = a_43118 * - b_43119; + double b_118816 = mem_124568[i_118803]; + double defunc_1_f_res_f_res_118822 = a_118815 * + b_118816; - defunc_1_f_res_43124 = defunc_1_f_res_f_res_43125; + defunc_1_f_res_118821 = defunc_1_f_res_f_res_118822; } - float defunc_1_op_res_43129 = c_43120 + - defunc_1_f_res_43124; + double defunc_1_op_res_118826 = c_118817 + + defunc_1_f_res_118821; - mem_param_44808[i_43103 * Rx_42678 + i_43106] = - defunc_1_op_res_43129; + mem_param_124551[i_118800 * Rx_118375 + i_118803] = + defunc_1_op_res_118826; } } } - for (int64_t i_45953 = 0; i_45953 < Ry_42676; i_45953++) { - for (int64_t i_45954 = 0; i_45954 < Rx_42678; i_45954++) { - mem_44835[i_45953 * Rx_42678 + i_45954] = - mem_param_44808[i_45953 * Rx_42678 + i_45954]; + for (int64_t i_128944 = 0; i_128944 < Ry_118373; i_128944++) { + for (int64_t i_128945 = 0; i_128945 < Rx_118375; i_128945++) { + mem_124578[i_128944 * Rx_118375 + i_128945] = + mem_param_124551[i_128944 * Rx_118375 + i_128945]; } } barrier(CLK_LOCAL_MEM_FENCE); - for (int64_t i_45955 = 0; i_45955 < Ry_42676; i_45955++) { - for (int64_t i_45956 = 0; i_45956 < Rx_42678; i_45956++) { - mem_45468[i_45955 * Rx_42678 + i_45956] = - mem_44835[i_45955 * Rx_42678 + i_45956]; + for (int64_t i_128946 = 0; i_128946 < Ry_118373; i_128946++) { + for (int64_t i_128947 = 0; i_128947 < Rx_118375; i_128947++) { + mem_125382[i_128946 * Rx_118375 + i_128947] = + mem_124578[i_128946 * Rx_118375 + i_128947]; } } } else { - for (int64_t i_45957 = 0; i_45957 < Ry_42676; i_45957++) { - for (int64_t i_45958 = 0; i_45958 < Rx_42678; i_45958++) { - mem_45468[i_45957 * Rx_42678 + i_45958] = - mem_param_44808[i_45957 * Rx_42678 + i_45958]; + for (int64_t i_128948 = 0; i_128948 < Ry_118373; i_128948++) { + for (int64_t i_128949 = 0; i_128949 < Rx_118375; i_128949++) { + mem_125382[i_128948 * Rx_118375 + i_128949] = + mem_param_124551[i_128948 * Rx_118375 + i_128949]; } } } - float mem_param_tmp_45943[Ry_42676 * Rx_42678]; + double mem_param_tmp_128934[Ry_118373 * Rx_118375]; - for (int32_t i_11 = 0; i_11 < Ry_42676 * Rx_42678; i_11++) - mem_param_tmp_45943[i_11] = mem_45468[i_11]; - for (int32_t i_12 = 0; i_12 < Ry_42676 * Rx_42678; i_12++) - mem_param_44808[i_12] = mem_param_tmp_45943[i_12]; + for (int32_t i_11 = 0; i_11 < Ry_118373 * Rx_118375; i_11++) + mem_param_tmp_128934[i_11] = mem_125382[i_11]; + for (int32_t i_12 = 0; i_12 < Ry_118373 * Rx_118375; i_12++) + mem_param_124551[i_12] = mem_param_tmp_128934[i_12]; } - for (int32_t i_13 = 0; i_13 < Ry_42676 * Rx_42678; i_13++) - loop_mem_44837[i_13] = mem_param_44808[i_13]; - - int64_t reg_tile_i_45959 = squot64(sext_i32_i64(local_tid_45909), Tx_42677); - int64_t reg_tile_i_45960 = sext_i32_i64(local_tid_45909) - - squot64(sext_i32_i64(local_tid_45909), Tx_42677) * Tx_42677; - int64_t tile_dim_start_45961 = Ry_42676 * (Ty_42675 * gid_y_42693 + - reg_tile_i_45959); - int64_t tile_dim_start_45962 = Rx_42678 * (Tx_42677 * gid_x_42692 + - reg_tile_i_45960); - - for (int64_t nest_i_45963 = 0; nest_i_45963 < Ry_42676; nest_i_45963++) { - for (int64_t nest_i_45964 = 0; nest_i_45964 < Rx_42678; - nest_i_45964++) { - if (slt64(tile_dim_start_45961 + nest_i_45963, m_27772) && - slt64(tile_dim_start_45962 + nest_i_45964, i32_res_27787)) { - ((__global float *) mem_44840)[(tile_dim_start_45961 + - nest_i_45963) * i32_res_27787 + - (tile_dim_start_45962 + - nest_i_45964)] = - loop_mem_44837[nest_i_45963 * Rx_42678 + nest_i_45964]; + for (int32_t i_13 = 0; i_13 < Ry_118373 * Rx_118375; i_13++) + loop_mem_124580[i_13] = mem_param_124551[i_13]; + + int64_t reg_tile_i_128950 = squot64(sext_i32_i64(local_tid_128900), + Tx_118374); + int64_t reg_tile_i_128951 = sext_i32_i64(local_tid_128900) - + squot64(sext_i32_i64(local_tid_128900), Tx_118374) * Tx_118374; + int64_t tile_dim_start_128952 = Ry_118373 * (Ty_118372 * gid_y_118390 + + reg_tile_i_128950); + int64_t tile_dim_start_128953 = Rx_118375 * (Tx_118374 * gid_x_118389 + + reg_tile_i_128951); + + for (int64_t nest_i_128954 = 0; nest_i_128954 < Ry_118373; + nest_i_128954++) { + for (int64_t nest_i_128955 = 0; nest_i_128955 < Rx_118375; + nest_i_128955++) { + if (slt64(tile_dim_start_128952 + nest_i_128954, m_75136) && + slt64(tile_dim_start_128953 + nest_i_128955, k2p2zq_75151)) { + ((__global double *) mem_124583)[(tile_dim_start_128952 + + nest_i_128954) * + k2p2zq_75151 + + (tile_dim_start_128953 + + nest_i_128955)] = + loop_mem_124580[nest_i_128954 * Rx_118375 + nest_i_128955]; } } } error_9: return; - #undef Ty_42675 - #undef Ry_42676 - #undef Tx_42677 - #undef Rx_42678 - #undef Tk_42679 - #undef tk_div_tx_42680 - #undef tk_div_ty_42681 - #undef TxRx_42682 - #undef TyRy_42683 - #undef a_loc_szz_42685 - #undef b_loc_szz_42687 -} -__kernel void mainDetailedzisegmap_intragroup_43143(__global - int *global_failure, - __local volatile - int64_t *mem_44898_backing_aligned_0, - __local volatile - int64_t *mem_44889_backing_aligned_1, - int64_t m_27772, - int64_t i32_res_27787, - int64_t num_groups_y_43141, - int64_t num_whole_tiles_43159, - int64_t residual_input_43286, - unsigned char cond_43287, - __global - unsigned char *defunc_3_map_res_mem_44850, - __global - unsigned char *mem_44879, - __global - unsigned char *mem_44906) + #undef Ty_118372 + #undef Ry_118373 + #undef Tx_118374 + #undef Rx_118375 + #undef Tk_118376 + #undef tk_div_tx_118377 + #undef tk_div_ty_118378 + #undef TxRx_118379 + #undef TyRy_118380 + #undef a_loc_szz_118382 + #undef b_loc_szz_118384 +} +__kernel void mainzisegmap_intragroup_118840(__global int *global_failure, + __local volatile + int64_t *mem_124641_backing_aligned_0, + __local volatile + int64_t *mem_124632_backing_aligned_1, + int64_t m_75136, + int64_t k2p2zq_75151, + int64_t num_groups_y_118838, + int64_t num_whole_tiles_118856, + int64_t residual_input_118983, + unsigned char cond_118984, __global + unsigned char *defunc_3_map_res_mem_124593, + __global unsigned char *mem_124622, + __global unsigned char *mem_124649) { - #define tile_sizze_43138 (mainDetailedzitile_sizze_43137) + #define tile_sizze_118835 (mainzitile_sizze_118834) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict mem_44898_backing_5 = (__local volatile - char *) mem_44898_backing_aligned_0; - __local volatile char *restrict mem_44889_backing_0 = (__local volatile - char *) mem_44889_backing_aligned_1; + __local volatile char *restrict mem_124641_backing_5 = (__local volatile + char *) mem_124641_backing_aligned_0; + __local volatile char *restrict mem_124632_backing_0 = (__local volatile + char *) mem_124632_backing_aligned_1; if (*global_failure >= 0) return; - int32_t global_tid_46040; - int32_t local_tid_46041; - int64_t group_sizze_46044; - int32_t wave_sizze_46043; - int32_t group_tid_46042; + int32_t global_tid_129031; + int32_t local_tid_129032; + int64_t group_sizze_129035; + int32_t wave_sizze_129034; + int32_t group_tid_129033; - global_tid_46040 = get_global_id(0); - local_tid_46041 = get_local_id(0); - group_sizze_46044 = get_local_size(0); - wave_sizze_46043 = LOCKSTEP_WIDTH; - group_tid_46042 = get_group_id(0); + global_tid_129031 = get_global_id(0); + local_tid_129032 = get_local_id(0); + group_sizze_129035 = get_local_size(0); + wave_sizze_129034 = LOCKSTEP_WIDTH; + group_tid_129033 = get_group_id(0); - int32_t gid_flat_43143; + int32_t gid_flat_118840; - gid_flat_43143 = group_tid_46042; + gid_flat_118840 = group_tid_129033; - int32_t ltid_pre_46045; + int32_t ltid_pre_129036; - ltid_pre_46045 = squot32(local_tid_46041, sext_i64_i32(tile_sizze_43138)); + ltid_pre_129036 = squot32(local_tid_129032, + sext_i64_i32(tile_sizze_118835)); - int32_t ltid_pre_46046; + int32_t ltid_pre_129037; - ltid_pre_46046 = local_tid_46041 - squot32(local_tid_46041, - sext_i64_i32(tile_sizze_43138)) * - sext_i64_i32(tile_sizze_43138); + ltid_pre_129037 = local_tid_129032 - squot32(local_tid_129032, + sext_i64_i32(tile_sizze_118835)) * + sext_i64_i32(tile_sizze_118835); - int64_t gid_x_43135; + int64_t gid_x_118832; - gid_x_43135 = squot64(sext_i32_i64(group_tid_46042), num_groups_y_43141); + gid_x_118832 = squot64(sext_i32_i64(group_tid_129033), num_groups_y_118838); - int64_t gid_y_43136; + int64_t gid_y_118833; - gid_y_43136 = sext_i32_i64(group_tid_46042) - - squot64(sext_i32_i64(group_tid_46042), num_groups_y_43141) * - num_groups_y_43141; + gid_y_118833 = sext_i32_i64(group_tid_129033) - + squot64(sext_i32_i64(group_tid_129033), num_groups_y_118838) * + num_groups_y_118838; - float mem_44884[1]; - int64_t ltid_y_43162 = sext_i32_i64(ltid_pre_46045); - int64_t ltid_x_43160 = sext_i32_i64(ltid_pre_46046); - int32_t ltid_flat_43161 = local_tid_46041; + double mem_124627[1]; + int64_t ltid_y_118859 = sext_i32_i64(ltid_pre_129036); + int64_t ltid_x_118857 = sext_i32_i64(ltid_pre_129037); + int32_t ltid_flat_118858 = local_tid_129032; - if (slt64(ltid_y_43162, tile_sizze_43138) && slt64(ltid_x_43160, - tile_sizze_43138)) { - mem_44884[(int64_t) 0] = 0.0F; + if (slt64(ltid_y_118859, tile_sizze_118835) && slt64(ltid_x_118857, + tile_sizze_118835)) { + mem_124627[(int64_t) 0] = 0.0; } barrier(CLK_LOCAL_MEM_FENCE); - int64_t binop_x_43245 = gid_x_43135 * tile_sizze_43138; - int64_t binop_x_43260 = gid_y_43136 * tile_sizze_43138; - __local char *mem_44889; + int64_t binop_x_118942 = gid_x_118832 * tile_sizze_118835; + int64_t binop_x_118957 = gid_y_118833 * tile_sizze_118835; + __local char *mem_124632; - mem_44889 = (__local char *) mem_44889_backing_0; + mem_124632 = (__local char *) mem_124632_backing_0; - float accs_mem_44894[1]; - float mem_param_44885[1]; + double accs_mem_124637[1]; + double mem_param_124628[1]; for (int32_t i_1 = 0; i_1 < 1; i_1++) - mem_param_44885[i_1] = mem_44884[i_1]; - for (int64_t tile_id_43171 = 0; tile_id_43171 < num_whole_tiles_43159; - tile_id_43171++) { - int64_t binop_x_43243 = tile_sizze_43138 * tile_id_43171; - int64_t ltid_y_43174 = sext_i32_i64(ltid_pre_46045); - int64_t ltid_x_43172 = sext_i32_i64(ltid_pre_46046); - int32_t ltid_flat_43173 = local_tid_46041; - int64_t j_43244 = ltid_x_43172 + binop_x_43243; - int64_t gtid_43246 = ltid_y_43174 + binop_x_43245; - bool binop_x_43251 = slt64(j_43244, i32_res_27787); - bool binop_y_43252 = slt64(gtid_43246, m_27772); - bool cond_43253 = binop_x_43251 && binop_y_43252; - float pre_43254; - - if (cond_43253) { - float x_43255 = ((__global - float *) defunc_3_map_res_mem_44850)[gtid_43246 * - i32_res_27787 + - j_43244]; - - pre_43254 = x_43255; - } else { - pre_43254 = 0.0F; - } - ((__local float *) mem_44889)[ltid_y_43174 * tile_sizze_43138 + - ltid_x_43172] = pre_43254; - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_44893[1]; - int64_t ltid_y_43206 = sext_i32_i64(ltid_pre_46045); - int64_t ltid_x_43204 = sext_i32_i64(ltid_pre_46046); - int32_t ltid_flat_43205 = local_tid_46041; - int64_t gtid_43259 = ltid_y_43206 + binop_x_43245; - int64_t gtid_43261 = ltid_x_43204 + binop_x_43260; - float acc_43264 = mem_param_44885[(int64_t) 0]; - bool binop_x_43268 = slt64(gtid_43259, m_27772); - bool binop_y_43269 = slt64(gtid_43261, i32_res_27787); - bool cond_43270 = binop_x_43268 && binop_y_43269; - float acc_43271; - - if (cond_43270) { - float x_43272; - float redout_44315 = acc_43264; - - for (int64_t i_44316 = 0; i_44316 < tile_sizze_43138; i_44316++) { - float x_43276 = ((__local float *) mem_44889)[ltid_y_43206 * - tile_sizze_43138 + - i_44316]; - int64_t slice_44367 = binop_x_43243 + i_44316; - float x_43277 = ((__global float *) mem_44879)[slice_44367 * - (i32_res_27787 * - m_27772) + - gtid_43259 * - i32_res_27787 + - gtid_43261]; - float defunc_1_f_res_43278 = x_43276 * x_43277; - float defunc_1_op_res_43275 = defunc_1_f_res_43278 + - redout_44315; - float redout_tmp_46049 = defunc_1_op_res_43275; - - redout_44315 = redout_tmp_46049; - } - x_43272 = redout_44315; - acc_43271 = x_43272; - } else { - acc_43271 = acc_43264; - } - mem_44893[(int64_t) 0] = acc_43271; - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_param_tmp_46047[1]; + mem_param_124628[i_1] = mem_124627[i_1]; + for (int64_t tile_id_118868 = 0; tile_id_118868 < num_whole_tiles_118856; + tile_id_118868++) { + int64_t binop_x_118940 = tile_sizze_118835 * tile_id_118868; + int64_t ltid_y_118871 = sext_i32_i64(ltid_pre_129036); + int64_t ltid_x_118869 = sext_i32_i64(ltid_pre_129037); + int32_t ltid_flat_118870 = local_tid_129032; + int64_t j_118941 = ltid_x_118869 + binop_x_118940; + int64_t gtid_118943 = ltid_y_118871 + binop_x_118942; + bool binop_x_118948 = slt64(j_118941, k2p2zq_75151); + bool binop_y_118949 = slt64(gtid_118943, m_75136); + bool cond_118950 = binop_x_118948 && binop_y_118949; + double pre_118951; + + if (cond_118950) { + double x_118952 = ((__global + double *) defunc_3_map_res_mem_124593)[gtid_118943 * + k2p2zq_75151 + + j_118941]; + + pre_118951 = x_118952; + } else { + pre_118951 = 0.0; + } + ((__local double *) mem_124632)[ltid_y_118871 * tile_sizze_118835 + + ltid_x_118869] = pre_118951; + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_124636[1]; + int64_t ltid_y_118903 = sext_i32_i64(ltid_pre_129036); + int64_t ltid_x_118901 = sext_i32_i64(ltid_pre_129037); + int32_t ltid_flat_118902 = local_tid_129032; + int64_t gtid_118956 = ltid_y_118903 + binop_x_118942; + int64_t gtid_118958 = ltid_x_118901 + binop_x_118957; + double acc_118961 = mem_param_124628[(int64_t) 0]; + bool binop_x_118965 = slt64(gtid_118956, m_75136); + bool binop_y_118966 = slt64(gtid_118958, k2p2zq_75151); + bool cond_118967 = binop_x_118965 && binop_y_118966; + double acc_118968; + + if (cond_118967) { + double x_118969; + double redout_119940 = acc_118961; + + for (int64_t i_119941 = 0; i_119941 < tile_sizze_118835; + i_119941++) { + double x_118973 = ((__local + double *) mem_124632)[ltid_y_118903 * + tile_sizze_118835 + + i_119941]; + int64_t slice_120051 = binop_x_118940 + i_119941; + double x_118974 = ((__global + double *) mem_124622)[slice_120051 * + (k2p2zq_75151 * + m_75136) + + gtid_118956 * + k2p2zq_75151 + + gtid_118958]; + double defunc_1_f_res_118975 = x_118973 * x_118974; + double defunc_1_op_res_118972 = defunc_1_f_res_118975 + + redout_119940; + double redout_tmp_129040 = defunc_1_op_res_118972; + + redout_119940 = redout_tmp_129040; + } + x_118969 = redout_119940; + acc_118968 = x_118969; + } else { + acc_118968 = acc_118961; + } + mem_124636[(int64_t) 0] = acc_118968; + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_param_tmp_129038[1]; for (int32_t i_2 = 0; i_2 < 1; i_2++) - mem_param_tmp_46047[i_2] = mem_44893[i_2]; + mem_param_tmp_129038[i_2] = mem_124636[i_2]; for (int32_t i_3 = 0; i_3 < 1; i_3++) - mem_param_44885[i_3] = mem_param_tmp_46047[i_3]; + mem_param_124628[i_3] = mem_param_tmp_129038[i_3]; } for (int32_t i_4 = 0; i_4 < 1; i_4++) - accs_mem_44894[i_4] = mem_param_44885[i_4]; + accs_mem_124637[i_4] = mem_param_124628[i_4]; - __local char *mem_44898; + __local char *mem_124641; - mem_44898 = (__local char *) mem_44898_backing_5; + mem_124641 = (__local char *) mem_124641_backing_5; - float mem_44902[1]; - float mem_45482[1]; + double mem_124645[1]; + double mem_125396[1]; - if (cond_43287) { - mem_45482[(int64_t) 0] = accs_mem_44894[(int64_t) 0]; + if (cond_118984) { + mem_125396[(int64_t) 0] = accs_mem_124637[(int64_t) 0]; } else { - int64_t binop_x_43360 = tile_sizze_43138 * num_whole_tiles_43159; - int64_t ltid_y_43290 = sext_i32_i64(ltid_pre_46045); - int64_t ltid_x_43288 = sext_i32_i64(ltid_pre_46046); - int32_t ltid_flat_43289 = local_tid_46041; - int64_t j_43361 = ltid_x_43288 + binop_x_43360; - int64_t gtid_43363 = binop_x_43245 + ltid_y_43290; - bool binop_x_43368 = slt64(j_43361, i32_res_27787); - bool binop_y_43369 = slt64(gtid_43363, m_27772); - bool cond_43370 = binop_x_43368 && binop_y_43369; - float pre_43371; - - if (cond_43370) { - float x_43372 = ((__global - float *) defunc_3_map_res_mem_44850)[gtid_43363 * - i32_res_27787 + - j_43361]; - - pre_43371 = x_43372; - } else { - pre_43371 = 0.0F; - } - ((__local float *) mem_44898)[ltid_y_43290 * tile_sizze_43138 + - ltid_x_43288] = pre_43371; - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t ltid_y_43323 = sext_i32_i64(ltid_pre_46045); - int64_t ltid_x_43321 = sext_i32_i64(ltid_pre_46046); - int32_t ltid_flat_43322 = local_tid_46041; - int64_t gtid_43377 = binop_x_43245 + ltid_y_43323; - int64_t gtid_43379 = binop_x_43260 + ltid_x_43321; - float acc_43382 = accs_mem_44894[(int64_t) 0]; - bool binop_x_43386 = slt64(gtid_43377, m_27772); - bool binop_y_43387 = slt64(gtid_43379, i32_res_27787); - bool cond_43388 = binop_x_43386 && binop_y_43387; - float acc_43389; - - if (cond_43388) { - float x_43390; - float redout_44317 = acc_43382; - - for (int64_t i_44318 = 0; i_44318 < residual_input_43286; - i_44318++) { - float x_43394 = ((__local float *) mem_44898)[ltid_y_43323 * - tile_sizze_43138 + - i_44318]; - int64_t slice_44368 = binop_x_43360 + i_44318; - float x_43395 = ((__global float *) mem_44879)[slice_44368 * - (i32_res_27787 * - m_27772) + - gtid_43377 * - i32_res_27787 + - gtid_43379]; - float defunc_1_f_res_43396 = x_43394 * x_43395; - float defunc_1_op_res_43393 = defunc_1_f_res_43396 + - redout_44317; - float redout_tmp_46050 = defunc_1_op_res_43393; - - redout_44317 = redout_tmp_46050; - } - x_43390 = redout_44317; - acc_43389 = x_43390; - } else { - acc_43389 = acc_43382; - } - mem_44902[(int64_t) 0] = acc_43389; - barrier(CLK_LOCAL_MEM_FENCE); - mem_45482[(int64_t) 0] = mem_44902[(int64_t) 0]; - } - - int64_t thread_out_index_46051 = gid_x_43135 * tile_sizze_43138 + - sext_i32_i64(ltid_pre_46045); - int64_t thread_out_index_46052 = gid_y_43136 * tile_sizze_43138 + - sext_i32_i64(ltid_pre_46046); - - if (slt64(thread_out_index_46051, m_27772) && slt64(thread_out_index_46052, - i32_res_27787)) { - ((__global float *) mem_44906)[thread_out_index_46051 * i32_res_27787 + - thread_out_index_46052] = - mem_45482[(int64_t) 0]; + int64_t binop_x_119057 = tile_sizze_118835 * num_whole_tiles_118856; + int64_t ltid_y_118987 = sext_i32_i64(ltid_pre_129036); + int64_t ltid_x_118985 = sext_i32_i64(ltid_pre_129037); + int32_t ltid_flat_118986 = local_tid_129032; + int64_t j_119058 = ltid_x_118985 + binop_x_119057; + int64_t gtid_119060 = binop_x_118942 + ltid_y_118987; + bool binop_x_119065 = slt64(j_119058, k2p2zq_75151); + bool binop_y_119066 = slt64(gtid_119060, m_75136); + bool cond_119067 = binop_x_119065 && binop_y_119066; + double pre_119068; + + if (cond_119067) { + double x_119069 = ((__global + double *) defunc_3_map_res_mem_124593)[gtid_119060 * + k2p2zq_75151 + + j_119058]; + + pre_119068 = x_119069; + } else { + pre_119068 = 0.0; + } + ((__local double *) mem_124641)[ltid_y_118987 * tile_sizze_118835 + + ltid_x_118985] = pre_119068; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t ltid_y_119020 = sext_i32_i64(ltid_pre_129036); + int64_t ltid_x_119018 = sext_i32_i64(ltid_pre_129037); + int32_t ltid_flat_119019 = local_tid_129032; + int64_t gtid_119074 = binop_x_118942 + ltid_y_119020; + int64_t gtid_119076 = binop_x_118957 + ltid_x_119018; + double acc_119079 = accs_mem_124637[(int64_t) 0]; + bool binop_x_119083 = slt64(gtid_119074, m_75136); + bool binop_y_119084 = slt64(gtid_119076, k2p2zq_75151); + bool cond_119085 = binop_x_119083 && binop_y_119084; + double acc_119086; + + if (cond_119085) { + double x_119087; + double redout_119942 = acc_119079; + + for (int64_t i_119943 = 0; i_119943 < residual_input_118983; + i_119943++) { + double x_119091 = ((__local + double *) mem_124641)[ltid_y_119020 * + tile_sizze_118835 + + i_119943]; + int64_t slice_120052 = binop_x_119057 + i_119943; + double x_119092 = ((__global + double *) mem_124622)[slice_120052 * + (k2p2zq_75151 * + m_75136) + + gtid_119074 * + k2p2zq_75151 + + gtid_119076]; + double defunc_1_f_res_119093 = x_119091 * x_119092; + double defunc_1_op_res_119090 = defunc_1_f_res_119093 + + redout_119942; + double redout_tmp_129041 = defunc_1_op_res_119090; + + redout_119942 = redout_tmp_129041; + } + x_119087 = redout_119942; + acc_119086 = x_119087; + } else { + acc_119086 = acc_119079; + } + mem_124645[(int64_t) 0] = acc_119086; + barrier(CLK_LOCAL_MEM_FENCE); + mem_125396[(int64_t) 0] = mem_124645[(int64_t) 0]; + } + + int64_t thread_out_index_129042 = gid_x_118832 * tile_sizze_118835 + + sext_i32_i64(ltid_pre_129036); + int64_t thread_out_index_129043 = gid_y_118833 * tile_sizze_118835 + + sext_i32_i64(ltid_pre_129037); + + if (slt64(thread_out_index_129042, m_75136) && + slt64(thread_out_index_129043, k2p2zq_75151)) { + ((__global double *) mem_124649)[thread_out_index_129042 * + k2p2zq_75151 + + thread_out_index_129043] = + mem_125396[(int64_t) 0]; } error_5: return; - #undef tile_sizze_43138 + #undef tile_sizze_118835 } -__kernel void mainDetailedzisegmap_intragroup_43435(__global - int *global_failure, - __local volatile - int64_t *mem_44958_backing_aligned_0, - __local volatile - int64_t *mem_44956_backing_aligned_1, - int64_t N_27771, - int64_t m_27772, - int64_t i32_res_27787, - int64_t gridDim_x_43429, - int64_t full_tiles_43460, - int64_t kk_43663, __global - unsigned char *defunc_4_map_res_mem_44916, - __global - unsigned char *mem_44940, - __global - unsigned char *mem_45130) +__kernel void mainzisegmap_intragroup_119132(__global int *global_failure, + __local volatile + int64_t *mem_124701_backing_aligned_0, + __local volatile + int64_t *mem_124699_backing_aligned_1, + int64_t N_75135, int64_t m_75136, + int64_t k2p2zq_75151, + int64_t gridDim_x_119126, + int64_t full_tiles_119157, + int64_t kk_119360, __global + unsigned char *defunc_4_map_res_mem_124659, + __global unsigned char *mem_124683, + __global unsigned char *mem_124873) { - #define Ty_43416 (mainDetailedziTy_43413) - #define Ry_43417 (mainDetailedziRy_43415) - #define Tx_43418 (mainDetailedziTx_43412) - #define Rx_43419 (mainDetailedziRx_43414) - #define Tk_43420 (mainDetailedziTk_43411) - #define tk_div_tx_43421 (sdiv_up64(mainDetailedziTk_43411, mainDetailedziTx_43412)) - #define tk_div_ty_43422 (sdiv_up64(mainDetailedziTk_43411, mainDetailedziTy_43413)) - #define TxRx_43423 (mainDetailedziTx_43412 * mainDetailedziRx_43414) - #define TyRy_43424 (mainDetailedziTy_43413 * mainDetailedziRy_43415) - #define a_loc_szz_43426 (mainDetailedziTk_43411 * (mainDetailedziTy_43413 * mainDetailedziRy_43415)) - #define b_loc_szz_43428 (mainDetailedziRx_43414 * (mainDetailedziTx_43412 * mainDetailedziTk_43411)) + #define Ty_119113 (mainziTy_119110) + #define Ry_119114 (mainziRy_119112) + #define Tx_119115 (mainziTx_119109) + #define Rx_119116 (mainziRx_119111) + #define Tk_119117 (mainziTk_119108) + #define tk_div_tx_119118 (sdiv_up64(mainziTk_119108, mainziTx_119109)) + #define tk_div_ty_119119 (sdiv_up64(mainziTk_119108, mainziTy_119110)) + #define TxRx_119120 (mainziTx_119109 * mainziRx_119111) + #define TyRy_119121 (mainziTy_119110 * mainziRy_119112) + #define a_loc_szz_119123 (mainziTk_119108 * (mainziTy_119110 * mainziRy_119112)) + #define b_loc_szz_119125 (mainziRx_119111 * (mainziTx_119109 * mainziTk_119108)) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict mem_44958_backing_1 = (__local volatile - char *) mem_44958_backing_aligned_0; - __local volatile char *restrict mem_44956_backing_0 = (__local volatile - char *) mem_44956_backing_aligned_1; + __local volatile char *restrict mem_124701_backing_1 = (__local volatile + char *) mem_124701_backing_aligned_0; + __local volatile char *restrict mem_124699_backing_0 = (__local volatile + char *) mem_124699_backing_aligned_1; if (*global_failure >= 0) return; - int32_t global_tid_46128; - int32_t local_tid_46129; - int64_t group_sizze_46132; - int32_t wave_sizze_46131; - int32_t group_tid_46130; + int32_t global_tid_129119; + int32_t local_tid_129120; + int64_t group_sizze_129123; + int32_t wave_sizze_129122; + int32_t group_tid_129121; - global_tid_46128 = get_global_id(0); - local_tid_46129 = get_local_id(0); - group_sizze_46132 = get_local_size(0); - wave_sizze_46131 = LOCKSTEP_WIDTH; - group_tid_46130 = get_group_id(0); + global_tid_129119 = get_global_id(0); + local_tid_129120 = get_local_id(0); + group_sizze_129123 = get_local_size(0); + wave_sizze_129122 = LOCKSTEP_WIDTH; + group_tid_129121 = get_group_id(0); - int32_t gid_flat_43435; + int32_t gid_flat_119132; - gid_flat_43435 = group_tid_46130; + gid_flat_119132 = group_tid_129121; - int32_t ltid_pre_46133; + int32_t ltid_pre_129124; - ltid_pre_46133 = squot32(local_tid_46129, sext_i64_i32(Tx_43418)); + ltid_pre_129124 = squot32(local_tid_129120, sext_i64_i32(Tx_119115)); - int32_t ltid_pre_46134; + int32_t ltid_pre_129125; - ltid_pre_46134 = local_tid_46129 - squot32(local_tid_46129, - sext_i64_i32(Tx_43418)) * - sext_i64_i32(Tx_43418); + ltid_pre_129125 = local_tid_129120 - squot32(local_tid_129120, + sext_i64_i32(Tx_119115)) * + sext_i64_i32(Tx_119115); - int64_t gid_y_43434; + int64_t gid_y_119131; - gid_y_43434 = squot64(sext_i32_i64(group_tid_46130), gridDim_x_43429); + gid_y_119131 = squot64(sext_i32_i64(group_tid_129121), gridDim_x_119126); - int64_t gid_x_43433; + int64_t gid_x_119130; - gid_x_43433 = sext_i32_i64(group_tid_46130) - - squot64(sext_i32_i64(group_tid_46130), gridDim_x_43429) * - gridDim_x_43429; + gid_x_119130 = sext_i32_i64(group_tid_129121) - + squot64(sext_i32_i64(group_tid_129121), gridDim_x_119126) * + gridDim_x_119126; - int64_t iii_43436; + int64_t iii_119133; - iii_43436 = TyRy_43424 * gid_y_43434; + iii_119133 = TyRy_119121 * gid_y_119131; - int64_t jjj_43437 = TxRx_43423 * gid_x_43433; - float mem_44954[Ry_43417 * Rx_43419]; - int64_t ltid_y_43440 = sext_i32_i64(ltid_pre_46133); - int64_t ltid_x_43438 = sext_i32_i64(ltid_pre_46134); - int32_t ltid_flat_43439 = local_tid_46129; - float mem_44945[Ry_43417 * Rx_43419]; + int64_t jjj_119134 = TxRx_119120 * gid_x_119130; + double mem_124697[Ry_119114 * Rx_119116]; + int64_t ltid_y_119137 = sext_i32_i64(ltid_pre_129124); + int64_t ltid_x_119135 = sext_i32_i64(ltid_pre_129125); + int32_t ltid_flat_119136 = local_tid_129120; + double mem_124688[Ry_119114 * Rx_119116]; - for (int64_t i_43451 = 0; i_43451 < Ry_43417; i_43451++) { - for (int64_t i_43454 = 0; i_43454 < Rx_43419; i_43454++) { - mem_44945[i_43451 * Rx_43419 + i_43454] = 0.0F; + for (int64_t i_119148 = 0; i_119148 < Ry_119114; i_119148++) { + for (int64_t i_119151 = 0; i_119151 < Rx_119116; i_119151++) { + mem_124688[i_119148 * Rx_119116 + i_119151] = 0.0; } } - for (int64_t i_46137 = 0; i_46137 < Ry_43417; i_46137++) { - for (int64_t i_46138 = 0; i_46138 < Rx_43419; i_46138++) { - mem_44954[i_46137 * Rx_43419 + i_46138] = mem_44945[i_46137 * - Rx_43419 + - i_46138]; + for (int64_t i_129128 = 0; i_129128 < Ry_119114; i_129128++) { + for (int64_t i_129129 = 0; i_129129 < Rx_119116; i_129129++) { + mem_124697[i_129128 * Rx_119116 + i_129129] = mem_124688[i_129128 * + Rx_119116 + + i_129129]; } } barrier(CLK_LOCAL_MEM_FENCE); - __local char *mem_44956; + __local char *mem_124699; - mem_44956 = (__local char *) mem_44956_backing_0; + mem_124699 = (__local char *) mem_124699_backing_0; - __local char *mem_44958; + __local char *mem_124701; - mem_44958 = (__local char *) mem_44958_backing_1; + mem_124701 = (__local char *) mem_124701_backing_1; - float mem_45029[Ry_43417]; - float mem_45033[Rx_43419]; - float loop_mem_45045[Ry_43417 * Rx_43419]; - float mem_param_44959[Ry_43417 * Rx_43419]; + double mem_124772[Ry_119114]; + double mem_124776[Rx_119116]; + double loop_mem_124788[Ry_119114 * Rx_119116]; + double mem_param_124702[Ry_119114 * Rx_119116]; - for (int32_t i_2 = 0; i_2 < Ry_43417 * Rx_43419; i_2++) - mem_param_44959[i_2] = mem_44954[i_2]; - for (int64_t i_43461 = 0; i_43461 < full_tiles_43460; i_43461++) { - int64_t kk_43465 = Tk_43420 * i_43461; + for (int32_t i_2 = 0; i_2 < Ry_119114 * Rx_119116; i_2++) + mem_param_124702[i_2] = mem_124697[i_2]; + for (int64_t i_119158 = 0; i_119158 < full_tiles_119157; i_119158++) { + int64_t kk_119162 = Tk_119117 * i_119158; - for (int64_t i_43466 = 0; i_43466 < Ry_43417; i_43466++) { - int64_t binop_y_43489 = Ty_43416 * i_43466; + for (int64_t i_119163 = 0; i_119163 < Ry_119114; i_119163++) { + int64_t binop_y_119186 = Ty_119113 * i_119163; - for (int64_t i_43468 = 0; i_43468 < tk_div_tx_43421; i_43468++) { - int64_t binop_y_43487 = Tx_43418 * i_43468; - int64_t ltid_x_43470 = sext_i32_i64(ltid_pre_46133); - int64_t ltid_y_43471 = sext_i32_i64(ltid_pre_46134); - int32_t ltid_flat_43472 = local_tid_46129; - int64_t k_43488 = ltid_y_43471 + binop_y_43487; - int64_t i_43490 = ltid_x_43470 + binop_y_43489; - int64_t gtid_43491 = iii_43436 + i_43490; - int64_t A_col_idx_43492 = kk_43465 + k_43488; - bool cond_43493 = slt64(gtid_43491, m_27772); - float A_elem_43494; - - if (cond_43493) { - float A_elem_43496 = ((__global - float *) defunc_4_map_res_mem_44916)[gtid_43491 * - i32_res_27787 + - A_col_idx_43492]; + for (int64_t i_119165 = 0; i_119165 < tk_div_tx_119118; + i_119165++) { + int64_t binop_y_119184 = Tx_119115 * i_119165; + int64_t ltid_x_119167 = sext_i32_i64(ltid_pre_129124); + int64_t ltid_y_119168 = sext_i32_i64(ltid_pre_129125); + int32_t ltid_flat_119169 = local_tid_129120; + int64_t k_119185 = ltid_y_119168 + binop_y_119184; + int64_t i_119187 = ltid_x_119167 + binop_y_119186; + int64_t gtid_119188 = iii_119133 + i_119187; + int64_t A_col_idx_119189 = kk_119162 + k_119185; + bool cond_119190 = slt64(gtid_119188, m_75136); + double A_elem_119191; + + if (cond_119190) { + double A_elem_119193 = ((__global + double *) defunc_4_map_res_mem_124659)[gtid_119188 * + k2p2zq_75151 + + A_col_idx_119189]; - A_elem_43494 = A_elem_43496; + A_elem_119191 = A_elem_119193; } else { - A_elem_43494 = 0.0F; + A_elem_119191 = 0.0; } - bool cond_43498 = slt64(k_43488, Tk_43420); - int64_t a_loc_ind_43499; + bool cond_119195 = slt64(k_119185, Tk_119117); + int64_t a_loc_ind_119196; - if (cond_43498) { - int64_t binop_y_43500 = Tk_43420 * i_43490; - int64_t loc_fi_43501 = k_43488 + binop_y_43500; + if (cond_119195) { + int64_t binop_y_119197 = Tk_119117 * i_119187; + int64_t loc_fi_119198 = k_119185 + binop_y_119197; - a_loc_ind_43499 = loc_fi_43501; + a_loc_ind_119196 = loc_fi_119198; } else { - a_loc_ind_43499 = (int64_t) -1; + a_loc_ind_119196 = (int64_t) -1; } - if (sle64((int64_t) 0, a_loc_ind_43499) && - slt64(a_loc_ind_43499, a_loc_szz_43426)) { - ((__local float *) mem_44956)[a_loc_ind_43499] = - A_elem_43494; + if (sle64((int64_t) 0, a_loc_ind_119196) && + slt64(a_loc_ind_119196, a_loc_szz_119123)) { + ((__local double *) mem_124699)[a_loc_ind_119196] = + A_elem_119191; } barrier(CLK_LOCAL_MEM_FENCE); } } - for (int64_t i_43506 = 0; i_43506 < tk_div_ty_43422; i_43506++) { - int64_t binop_y_43527 = Ty_43416 * i_43506; + for (int64_t i_119203 = 0; i_119203 < tk_div_ty_119119; i_119203++) { + int64_t binop_y_119224 = Ty_119113 * i_119203; - for (int64_t i_43508 = 0; i_43508 < Rx_43419; i_43508++) { - int64_t binop_y_43529 = Tx_43418 * i_43508; - int64_t ltid_x_43510 = sext_i32_i64(ltid_pre_46133); - int64_t ltid_y_43511 = sext_i32_i64(ltid_pre_46134); - int32_t ltid_flat_43512 = local_tid_46129; - int64_t k_43528 = ltid_x_43510 + binop_y_43527; - int64_t j_43530 = ltid_y_43511 + binop_y_43529; - int64_t gtid_43531 = jjj_43437 + j_43530; - int64_t B_row_idx_43532 = kk_43465 + k_43528; - bool cond_43533 = slt64(gtid_43531, N_27771); - float B_elem_43534; - - if (cond_43533) { - float B_elem_43536 = ((__global - float *) mem_44940)[B_row_idx_43532 * - N_27771 + - gtid_43531]; + for (int64_t i_119205 = 0; i_119205 < Rx_119116; i_119205++) { + int64_t binop_y_119226 = Tx_119115 * i_119205; + int64_t ltid_x_119207 = sext_i32_i64(ltid_pre_129124); + int64_t ltid_y_119208 = sext_i32_i64(ltid_pre_129125); + int32_t ltid_flat_119209 = local_tid_129120; + int64_t k_119225 = ltid_x_119207 + binop_y_119224; + int64_t j_119227 = ltid_y_119208 + binop_y_119226; + int64_t gtid_119228 = jjj_119134 + j_119227; + int64_t B_row_idx_119229 = kk_119162 + k_119225; + bool cond_119230 = slt64(gtid_119228, N_75135); + double B_elem_119231; + + if (cond_119230) { + double B_elem_119233 = ((__global + double *) mem_124683)[B_row_idx_119229 * + N_75135 + + gtid_119228]; - B_elem_43534 = B_elem_43536; + B_elem_119231 = B_elem_119233; } else { - B_elem_43534 = 0.0F; + B_elem_119231 = 0.0; } - bool cond_43538 = slt64(k_43528, Tk_43420); - int64_t b_loc_ind_43539; + bool cond_119235 = slt64(k_119225, Tk_119117); + int64_t b_loc_ind_119236; - if (cond_43538) { - int64_t binop_y_43540 = TxRx_43423 * k_43528; - int64_t loc_fi_43541 = j_43530 + binop_y_43540; + if (cond_119235) { + int64_t binop_y_119237 = TxRx_119120 * k_119225; + int64_t loc_fi_119238 = j_119227 + binop_y_119237; - b_loc_ind_43539 = loc_fi_43541; + b_loc_ind_119236 = loc_fi_119238; } else { - b_loc_ind_43539 = (int64_t) -1; + b_loc_ind_119236 = (int64_t) -1; } - if (sle64((int64_t) 0, b_loc_ind_43539) && - slt64(b_loc_ind_43539, b_loc_szz_43428)) { - ((__local float *) mem_44958)[b_loc_ind_43539] = - B_elem_43534; + if (sle64((int64_t) 0, b_loc_ind_119236) && + slt64(b_loc_ind_119236, b_loc_szz_119125)) { + ((__local double *) mem_124701)[b_loc_ind_119236] = + B_elem_119231; } barrier(CLK_LOCAL_MEM_FENCE); } } - float loop_mem_45044[Ry_43417 * Rx_43419]; - float mem_param_45016[Ry_43417 * Rx_43419]; + double loop_mem_124787[Ry_119114 * Rx_119116]; + double mem_param_124759[Ry_119114 * Rx_119116]; - for (int32_t i_3 = 0; i_3 < Ry_43417 * Rx_43419; i_3++) - mem_param_45016[i_3] = mem_param_44959[i_3]; - for (int64_t i_43546 = 0; i_43546 < Tk_43420; i_43546++) { - int64_t binop_y_43585 = TxRx_43423 * i_43546; - int64_t ltid_y_43550 = sext_i32_i64(ltid_pre_46133); - int64_t ltid_x_43548 = sext_i32_i64(ltid_pre_46134); - int32_t ltid_flat_43549 = local_tid_46129; - float mem_45019[Ry_43417]; - float mem_45021[Rx_43419]; - int64_t binop_x_43576 = Ry_43417 * ltid_y_43550; + for (int32_t i_3 = 0; i_3 < Ry_119114 * Rx_119116; i_3++) + mem_param_124759[i_3] = mem_param_124702[i_3]; + for (int64_t i_119243 = 0; i_119243 < Tk_119117; i_119243++) { + int64_t binop_y_119282 = TxRx_119120 * i_119243; + int64_t ltid_y_119247 = sext_i32_i64(ltid_pre_129124); + int64_t ltid_x_119245 = sext_i32_i64(ltid_pre_129125); + int32_t ltid_flat_119246 = local_tid_129120; + double mem_124762[Ry_119114]; + double mem_124764[Rx_119116]; + int64_t binop_x_119273 = Ry_119114 * ltid_y_119247; - for (int64_t i_43574 = 0; i_43574 < Ry_43417; i_43574++) { - int64_t binop_x_43577 = i_43574 + binop_x_43576; - int64_t binop_y_43578 = Tk_43420 * binop_x_43577; - int64_t a_loc_ind_43579 = i_43546 + binop_y_43578; + for (int64_t i_119271 = 0; i_119271 < Ry_119114; i_119271++) { + int64_t binop_x_119274 = i_119271 + binop_x_119273; + int64_t binop_y_119275 = Tk_119117 * binop_x_119274; + int64_t a_loc_ind_119276 = i_119243 + binop_y_119275; - for (int64_t i_46150 = 0; i_46150 < (int64_t) 1; i_46150++) { - mem_45019[i_43574 + i_46150] = ((__local - float *) mem_44956)[a_loc_ind_43579 + - i_46150]; + for (int64_t i_129141 = 0; i_129141 < (int64_t) 1; i_129141++) { + mem_124762[i_119271 + i_129141] = ((__local + double *) mem_124699)[a_loc_ind_119276 + + i_129141]; } } - int64_t binop_y_43587 = Rx_43419 * ltid_x_43548; + int64_t binop_y_119284 = Rx_119116 * ltid_x_119245; - for (int64_t i_43583 = 0; i_43583 < Rx_43419; i_43583++) { - int64_t binop_x_43586 = i_43583 + binop_y_43585; - int64_t b_loc_ind_43588 = binop_x_43586 + binop_y_43587; + for (int64_t i_119280 = 0; i_119280 < Rx_119116; i_119280++) { + int64_t binop_x_119283 = i_119280 + binop_y_119282; + int64_t b_loc_ind_119285 = binop_x_119283 + binop_y_119284; - for (int64_t i_46152 = 0; i_46152 < (int64_t) 1; i_46152++) { - mem_45021[i_43583 + i_46152] = ((__local - float *) mem_44958)[b_loc_ind_43588 + - i_46152]; + for (int64_t i_129143 = 0; i_129143 < (int64_t) 1; i_129143++) { + mem_124764[i_119280 + i_129143] = ((__local + double *) mem_124701)[b_loc_ind_119285 + + i_129143]; } } - for (int64_t i_46153 = 0; i_46153 < Ry_43417; i_46153++) { - mem_45029[i_46153] = mem_45019[i_46153]; + for (int64_t i_129144 = 0; i_129144 < Ry_119114; i_129144++) { + mem_124772[i_129144] = mem_124762[i_129144]; } - for (int64_t i_46154 = 0; i_46154 < Rx_43419; i_46154++) { - mem_45033[i_46154] = mem_45021[i_46154]; + for (int64_t i_129145 = 0; i_129145 < Rx_119116; i_129145++) { + mem_124776[i_129145] = mem_124764[i_129145]; } barrier(CLK_LOCAL_MEM_FENCE); - float mem_45043[Ry_43417 * Rx_43419]; - int64_t ltid_y_43595 = sext_i32_i64(ltid_pre_46133); - int64_t ltid_x_43593 = sext_i32_i64(ltid_pre_46134); - int32_t ltid_flat_43594 = local_tid_46129; - int64_t binop_y_43636 = Ry_43417 * ltid_y_43595; - int64_t binop_y_43640 = Rx_43419 * ltid_x_43593; + double mem_124786[Ry_119114 * Rx_119116]; + int64_t ltid_y_119292 = sext_i32_i64(ltid_pre_129124); + int64_t ltid_x_119290 = sext_i32_i64(ltid_pre_129125); + int32_t ltid_flat_119291 = local_tid_129120; + int64_t binop_y_119333 = Ry_119114 * ltid_y_119292; + int64_t binop_y_119337 = Rx_119116 * ltid_x_119290; - for (int64_t i_43630 = 0; i_43630 < Ry_43417; i_43630++) { - int64_t binop_x_43635 = iii_43436 + i_43630; - int64_t cmpop_x_43637 = binop_x_43635 + binop_y_43636; - bool binop_x_43638 = slt64(cmpop_x_43637, m_27772); - - for (int64_t i_43633 = 0; i_43633 < Rx_43419; i_43633++) { - int64_t binop_x_43639 = jjj_43437 + i_43633; - int64_t cmpop_x_43641 = binop_x_43639 + binop_y_43640; - bool binop_y_43642 = slt64(cmpop_x_43641, N_27771); - bool cond_43643 = binop_x_43638 && binop_y_43642; + for (int64_t i_119327 = 0; i_119327 < Ry_119114; i_119327++) { + int64_t binop_x_119332 = iii_119133 + i_119327; + int64_t cmpop_x_119334 = binop_x_119332 + binop_y_119333; + bool binop_x_119335 = slt64(cmpop_x_119334, m_75136); + + for (int64_t i_119330 = 0; i_119330 < Rx_119116; i_119330++) { + int64_t binop_x_119336 = jjj_119134 + i_119330; + int64_t cmpop_x_119338 = binop_x_119336 + binop_y_119337; + bool binop_y_119339 = slt64(cmpop_x_119338, N_75135); + bool cond_119340 = binop_x_119335 && binop_y_119339; - if (cond_43643) { - float a_43645 = mem_45029[i_43630]; - float b_43646 = mem_45033[i_43633]; - float c_43647 = mem_param_45016[i_43630 * Rx_43419 + - i_43633]; - float defunc_1_f_res_43650 = a_43645 * b_43646; - float defunc_1_op_res_43654 = c_43647 + - defunc_1_f_res_43650; + if (cond_119340) { + double a_119342 = mem_124772[i_119327]; + double b_119343 = mem_124776[i_119330]; + double c_119344 = mem_param_124759[i_119327 * + Rx_119116 + + i_119330]; + double defunc_1_f_res_119347 = a_119342 * b_119343; + double defunc_1_op_res_119351 = c_119344 + + defunc_1_f_res_119347; - mem_param_45016[i_43630 * Rx_43419 + i_43633] = - defunc_1_op_res_43654; + mem_param_124759[i_119327 * Rx_119116 + i_119330] = + defunc_1_op_res_119351; } } } - for (int64_t i_46157 = 0; i_46157 < Ry_43417; i_46157++) { - for (int64_t i_46158 = 0; i_46158 < Rx_43419; i_46158++) { - mem_45043[i_46157 * Rx_43419 + i_46158] = - mem_param_45016[i_46157 * Rx_43419 + i_46158]; + for (int64_t i_129148 = 0; i_129148 < Ry_119114; i_129148++) { + for (int64_t i_129149 = 0; i_129149 < Rx_119116; i_129149++) { + mem_124786[i_129148 * Rx_119116 + i_129149] = + mem_param_124759[i_129148 * Rx_119116 + i_129149]; } } barrier(CLK_LOCAL_MEM_FENCE); - float mem_param_tmp_46147[Ry_43417 * Rx_43419]; + double mem_param_tmp_129138[Ry_119114 * Rx_119116]; - for (int32_t i_4 = 0; i_4 < Ry_43417 * Rx_43419; i_4++) - mem_param_tmp_46147[i_4] = mem_45043[i_4]; - for (int32_t i_5 = 0; i_5 < Ry_43417 * Rx_43419; i_5++) - mem_param_45016[i_5] = mem_param_tmp_46147[i_5]; + for (int32_t i_4 = 0; i_4 < Ry_119114 * Rx_119116; i_4++) + mem_param_tmp_129138[i_4] = mem_124786[i_4]; + for (int32_t i_5 = 0; i_5 < Ry_119114 * Rx_119116; i_5++) + mem_param_124759[i_5] = mem_param_tmp_129138[i_5]; } - for (int32_t i_6 = 0; i_6 < Ry_43417 * Rx_43419; i_6++) - loop_mem_45044[i_6] = mem_param_45016[i_6]; + for (int32_t i_6 = 0; i_6 < Ry_119114 * Rx_119116; i_6++) + loop_mem_124787[i_6] = mem_param_124759[i_6]; - float mem_param_tmp_46139[Ry_43417 * Rx_43419]; + double mem_param_tmp_129130[Ry_119114 * Rx_119116]; - for (int32_t i_7 = 0; i_7 < Ry_43417 * Rx_43419; i_7++) - mem_param_tmp_46139[i_7] = loop_mem_45044[i_7]; - for (int32_t i_8 = 0; i_8 < Ry_43417 * Rx_43419; i_8++) - mem_param_44959[i_8] = mem_param_tmp_46139[i_8]; + for (int32_t i_7 = 0; i_7 < Ry_119114 * Rx_119116; i_7++) + mem_param_tmp_129130[i_7] = loop_mem_124787[i_7]; + for (int32_t i_8 = 0; i_8 < Ry_119114 * Rx_119116; i_8++) + mem_param_124702[i_8] = mem_param_tmp_129130[i_8]; } - for (int32_t i_9 = 0; i_9 < Ry_43417 * Rx_43419; i_9++) - loop_mem_45045[i_9] = mem_param_44959[i_9]; - for (int64_t i_43664 = 0; i_43664 < Ry_43417; i_43664++) { - int64_t binop_y_43689 = Ty_43416 * i_43664; - - for (int64_t i_43666 = 0; i_43666 < tk_div_tx_43421; i_43666++) { - int64_t binop_y_43687 = Tx_43418 * i_43666; - int64_t ltid_x_43668 = sext_i32_i64(ltid_pre_46133); - int64_t ltid_y_43669 = sext_i32_i64(ltid_pre_46134); - int32_t ltid_flat_43670 = local_tid_46129; - int64_t k_43688 = ltid_y_43669 + binop_y_43687; - int64_t i_43690 = ltid_x_43668 + binop_y_43689; - int64_t gtid_43691 = iii_43436 + i_43690; - int64_t A_col_idx_43692 = kk_43663 + k_43688; - bool binop_x_43693 = slt64(gtid_43691, m_27772); - bool binop_y_43694 = slt64(A_col_idx_43692, i32_res_27787); - bool cond_43695 = binop_x_43693 && binop_y_43694; - float A_elem_43696; + for (int32_t i_9 = 0; i_9 < Ry_119114 * Rx_119116; i_9++) + loop_mem_124788[i_9] = mem_param_124702[i_9]; + for (int64_t i_119361 = 0; i_119361 < Ry_119114; i_119361++) { + int64_t binop_y_119386 = Ty_119113 * i_119361; + + for (int64_t i_119363 = 0; i_119363 < tk_div_tx_119118; i_119363++) { + int64_t binop_y_119384 = Tx_119115 * i_119363; + int64_t ltid_x_119365 = sext_i32_i64(ltid_pre_129124); + int64_t ltid_y_119366 = sext_i32_i64(ltid_pre_129125); + int32_t ltid_flat_119367 = local_tid_129120; + int64_t k_119385 = ltid_y_119366 + binop_y_119384; + int64_t i_119387 = ltid_x_119365 + binop_y_119386; + int64_t gtid_119388 = iii_119133 + i_119387; + int64_t A_col_idx_119389 = kk_119360 + k_119385; + bool binop_x_119390 = slt64(gtid_119388, m_75136); + bool binop_y_119391 = slt64(A_col_idx_119389, k2p2zq_75151); + bool cond_119392 = binop_x_119390 && binop_y_119391; + double A_elem_119393; - if (cond_43695) { - float A_elem_43698 = ((__global - float *) defunc_4_map_res_mem_44916)[gtid_43691 * - i32_res_27787 + - A_col_idx_43692]; + if (cond_119392) { + double A_elem_119395 = ((__global + double *) defunc_4_map_res_mem_124659)[gtid_119388 * + k2p2zq_75151 + + A_col_idx_119389]; - A_elem_43696 = A_elem_43698; + A_elem_119393 = A_elem_119395; } else { - A_elem_43696 = 0.0F; + A_elem_119393 = 0.0; } - bool cond_43700 = slt64(k_43688, Tk_43420); - int64_t a_loc_ind_43701; + bool cond_119397 = slt64(k_119385, Tk_119117); + int64_t a_loc_ind_119398; - if (cond_43700) { - int64_t binop_y_43702 = Tk_43420 * i_43690; - int64_t loc_fi_43703 = k_43688 + binop_y_43702; + if (cond_119397) { + int64_t binop_y_119399 = Tk_119117 * i_119387; + int64_t loc_fi_119400 = k_119385 + binop_y_119399; - a_loc_ind_43701 = loc_fi_43703; + a_loc_ind_119398 = loc_fi_119400; } else { - a_loc_ind_43701 = (int64_t) -1; + a_loc_ind_119398 = (int64_t) -1; } - if (sle64((int64_t) 0, a_loc_ind_43701) && slt64(a_loc_ind_43701, - a_loc_szz_43426)) { - ((__local float *) mem_44956)[a_loc_ind_43701] = A_elem_43696; + if (sle64((int64_t) 0, a_loc_ind_119398) && slt64(a_loc_ind_119398, + a_loc_szz_119123)) { + ((__local double *) mem_124699)[a_loc_ind_119398] = + A_elem_119393; } barrier(CLK_LOCAL_MEM_FENCE); } } - for (int64_t i_43708 = 0; i_43708 < tk_div_ty_43422; i_43708++) { - int64_t binop_y_43731 = Ty_43416 * i_43708; - - for (int64_t i_43710 = 0; i_43710 < Rx_43419; i_43710++) { - int64_t binop_y_43733 = Tx_43418 * i_43710; - int64_t ltid_x_43712 = sext_i32_i64(ltid_pre_46133); - int64_t ltid_y_43713 = sext_i32_i64(ltid_pre_46134); - int32_t ltid_flat_43714 = local_tid_46129; - int64_t k_43732 = ltid_x_43712 + binop_y_43731; - int64_t j_43734 = ltid_y_43713 + binop_y_43733; - int64_t gtid_43735 = jjj_43437 + j_43734; - int64_t B_row_idx_43736 = kk_43663 + k_43732; - bool binop_x_43737 = slt64(gtid_43735, N_27771); - bool binop_y_43738 = slt64(B_row_idx_43736, i32_res_27787); - bool cond_43739 = binop_x_43737 && binop_y_43738; - float B_elem_43740; + for (int64_t i_119405 = 0; i_119405 < tk_div_ty_119119; i_119405++) { + int64_t binop_y_119428 = Ty_119113 * i_119405; + + for (int64_t i_119407 = 0; i_119407 < Rx_119116; i_119407++) { + int64_t binop_y_119430 = Tx_119115 * i_119407; + int64_t ltid_x_119409 = sext_i32_i64(ltid_pre_129124); + int64_t ltid_y_119410 = sext_i32_i64(ltid_pre_129125); + int32_t ltid_flat_119411 = local_tid_129120; + int64_t k_119429 = ltid_x_119409 + binop_y_119428; + int64_t j_119431 = ltid_y_119410 + binop_y_119430; + int64_t gtid_119432 = jjj_119134 + j_119431; + int64_t B_row_idx_119433 = kk_119360 + k_119429; + bool binop_x_119434 = slt64(gtid_119432, N_75135); + bool binop_y_119435 = slt64(B_row_idx_119433, k2p2zq_75151); + bool cond_119436 = binop_x_119434 && binop_y_119435; + double B_elem_119437; - if (cond_43739) { - float B_elem_43742 = ((__global - float *) mem_44940)[B_row_idx_43736 * - N_27771 + - gtid_43735]; + if (cond_119436) { + double B_elem_119439 = ((__global + double *) mem_124683)[B_row_idx_119433 * + N_75135 + + gtid_119432]; - B_elem_43740 = B_elem_43742; + B_elem_119437 = B_elem_119439; } else { - B_elem_43740 = 0.0F; + B_elem_119437 = 0.0; } - bool cond_43744 = slt64(k_43732, Tk_43420); - int64_t b_loc_ind_43745; + bool cond_119441 = slt64(k_119429, Tk_119117); + int64_t b_loc_ind_119442; - if (cond_43744) { - int64_t binop_y_43746 = TxRx_43423 * k_43732; - int64_t loc_fi_43747 = j_43734 + binop_y_43746; + if (cond_119441) { + int64_t binop_y_119443 = TxRx_119120 * k_119429; + int64_t loc_fi_119444 = j_119431 + binop_y_119443; - b_loc_ind_43745 = loc_fi_43747; + b_loc_ind_119442 = loc_fi_119444; } else { - b_loc_ind_43745 = (int64_t) -1; + b_loc_ind_119442 = (int64_t) -1; } - if (sle64((int64_t) 0, b_loc_ind_43745) && slt64(b_loc_ind_43745, - b_loc_szz_43428)) { - ((__local float *) mem_44958)[b_loc_ind_43745] = B_elem_43740; + if (sle64((int64_t) 0, b_loc_ind_119442) && slt64(b_loc_ind_119442, + b_loc_szz_119125)) { + ((__local double *) mem_124701)[b_loc_ind_119442] = + B_elem_119437; } barrier(CLK_LOCAL_MEM_FENCE); } } - float mem_45111[Ry_43417]; - float mem_45115[Rx_43419]; - float mem_45125[Ry_43417 * Rx_43419]; - float loop_mem_45127[Ry_43417 * Rx_43419]; - float mem_param_45098[Ry_43417 * Rx_43419]; - - for (int32_t i_10 = 0; i_10 < Ry_43417 * Rx_43419; i_10++) - mem_param_45098[i_10] = loop_mem_45045[i_10]; - for (int64_t i_43752 = 0; i_43752 < Tk_43420; i_43752++) { - int64_t cmpop_x_43754 = kk_43663 + i_43752; - bool cond_43755 = slt64(cmpop_x_43754, i32_res_27787); - float mem_45498[Ry_43417 * Rx_43419]; - - if (cond_43755) { - int64_t binop_y_43793 = TxRx_43423 * i_43752; - int64_t bytes_45100 = (int64_t) 4 * Ry_43417; - int64_t bytes_45102 = (int64_t) 4 * Rx_43419; - int64_t ltid_y_43758 = sext_i32_i64(ltid_pre_46133); - int64_t ltid_x_43756 = sext_i32_i64(ltid_pre_46134); - int32_t ltid_flat_43757 = local_tid_46129; - float mem_45101[Ry_43417]; - float mem_45103[Rx_43419]; - int64_t binop_x_43784 = Ry_43417 * ltid_y_43758; + double mem_124854[Ry_119114]; + double mem_124858[Rx_119116]; + double mem_124868[Ry_119114 * Rx_119116]; + double loop_mem_124870[Ry_119114 * Rx_119116]; + double mem_param_124841[Ry_119114 * Rx_119116]; + + for (int32_t i_10 = 0; i_10 < Ry_119114 * Rx_119116; i_10++) + mem_param_124841[i_10] = loop_mem_124788[i_10]; + for (int64_t i_119449 = 0; i_119449 < Tk_119117; i_119449++) { + int64_t cmpop_x_119451 = kk_119360 + i_119449; + bool cond_119452 = slt64(cmpop_x_119451, k2p2zq_75151); + double mem_125412[Ry_119114 * Rx_119116]; + + if (cond_119452) { + int64_t binop_y_119490 = TxRx_119120 * i_119449; + int64_t bytes_124843 = (int64_t) 8 * Ry_119114; + int64_t bytes_124845 = (int64_t) 8 * Rx_119116; + int64_t ltid_y_119455 = sext_i32_i64(ltid_pre_129124); + int64_t ltid_x_119453 = sext_i32_i64(ltid_pre_129125); + int32_t ltid_flat_119454 = local_tid_129120; + double mem_124844[Ry_119114]; + double mem_124846[Rx_119116]; + int64_t binop_x_119481 = Ry_119114 * ltid_y_119455; - for (int64_t i_43782 = 0; i_43782 < Ry_43417; i_43782++) { - int64_t binop_x_43785 = i_43782 + binop_x_43784; - int64_t binop_y_43786 = Tk_43420 * binop_x_43785; - int64_t a_loc_ind_43787 = i_43752 + binop_y_43786; + for (int64_t i_119479 = 0; i_119479 < Ry_119114; i_119479++) { + int64_t binop_x_119482 = i_119479 + binop_x_119481; + int64_t binop_y_119483 = Tk_119117 * binop_x_119482; + int64_t a_loc_ind_119484 = i_119449 + binop_y_119483; - for (int64_t i_46166 = 0; i_46166 < (int64_t) 1; i_46166++) { - mem_45101[i_43782 + i_46166] = ((__local - float *) mem_44956)[a_loc_ind_43787 + - i_46166]; + for (int64_t i_129157 = 0; i_129157 < (int64_t) 1; i_129157++) { + mem_124844[i_119479 + i_129157] = ((__local + double *) mem_124699)[a_loc_ind_119484 + + i_129157]; } } - int64_t binop_y_43795 = Rx_43419 * ltid_x_43756; + int64_t binop_y_119492 = Rx_119116 * ltid_x_119453; - for (int64_t i_43791 = 0; i_43791 < Rx_43419; i_43791++) { - int64_t binop_x_43794 = i_43791 + binop_y_43793; - int64_t b_loc_ind_43796 = binop_x_43794 + binop_y_43795; + for (int64_t i_119488 = 0; i_119488 < Rx_119116; i_119488++) { + int64_t binop_x_119491 = i_119488 + binop_y_119490; + int64_t b_loc_ind_119493 = binop_x_119491 + binop_y_119492; - for (int64_t i_46168 = 0; i_46168 < (int64_t) 1; i_46168++) { - mem_45103[i_43791 + i_46168] = ((__local - float *) mem_44958)[b_loc_ind_43796 + - i_46168]; + for (int64_t i_129159 = 0; i_129159 < (int64_t) 1; i_129159++) { + mem_124846[i_119488 + i_129159] = ((__local + double *) mem_124701)[b_loc_ind_119493 + + i_129159]; } } - for (int64_t i_46169 = 0; i_46169 < Ry_43417; i_46169++) { - mem_45111[i_46169] = mem_45101[i_46169]; + for (int64_t i_129160 = 0; i_129160 < Ry_119114; i_129160++) { + mem_124854[i_129160] = mem_124844[i_129160]; } - for (int64_t i_46170 = 0; i_46170 < Rx_43419; i_46170++) { - mem_45115[i_46170] = mem_45103[i_46170]; + for (int64_t i_129161 = 0; i_129161 < Rx_119116; i_129161++) { + mem_124858[i_129161] = mem_124846[i_129161]; } barrier(CLK_LOCAL_MEM_FENCE); - int64_t ltid_y_43803 = sext_i32_i64(ltid_pre_46133); - int64_t ltid_x_43801 = sext_i32_i64(ltid_pre_46134); - int32_t ltid_flat_43802 = local_tid_46129; - int64_t binop_y_43844 = Ry_43417 * ltid_y_43803; - int64_t binop_y_43848 = Rx_43419 * ltid_x_43801; + int64_t ltid_y_119500 = sext_i32_i64(ltid_pre_129124); + int64_t ltid_x_119498 = sext_i32_i64(ltid_pre_129125); + int32_t ltid_flat_119499 = local_tid_129120; + int64_t binop_y_119541 = Ry_119114 * ltid_y_119500; + int64_t binop_y_119545 = Rx_119116 * ltid_x_119498; - for (int64_t i_43838 = 0; i_43838 < Ry_43417; i_43838++) { - int64_t binop_x_43843 = iii_43436 + i_43838; - int64_t cmpop_x_43845 = binop_x_43843 + binop_y_43844; - bool binop_x_43846 = slt64(cmpop_x_43845, m_27772); - - for (int64_t i_43841 = 0; i_43841 < Rx_43419; i_43841++) { - int64_t binop_x_43847 = jjj_43437 + i_43841; - int64_t cmpop_x_43849 = binop_x_43847 + binop_y_43848; - bool binop_y_43850 = slt64(cmpop_x_43849, N_27771); - bool cond_43851 = binop_x_43846 && binop_y_43850; + for (int64_t i_119535 = 0; i_119535 < Ry_119114; i_119535++) { + int64_t binop_x_119540 = iii_119133 + i_119535; + int64_t cmpop_x_119542 = binop_x_119540 + binop_y_119541; + bool binop_x_119543 = slt64(cmpop_x_119542, m_75136); + + for (int64_t i_119538 = 0; i_119538 < Rx_119116; i_119538++) { + int64_t binop_x_119544 = jjj_119134 + i_119538; + int64_t cmpop_x_119546 = binop_x_119544 + binop_y_119545; + bool binop_y_119547 = slt64(cmpop_x_119546, N_75135); + bool cond_119548 = binop_x_119543 && binop_y_119547; - if (cond_43851) { - float a_43853 = mem_45111[i_43838]; - float b_43854 = mem_45115[i_43841]; - float c_43855 = mem_param_45098[i_43838 * Rx_43419 + - i_43841]; - float defunc_1_f_res_43858 = a_43853 * b_43854; - float defunc_1_op_res_43862 = c_43855 + - defunc_1_f_res_43858; + if (cond_119548) { + double a_119550 = mem_124854[i_119535]; + double b_119551 = mem_124858[i_119538]; + double c_119552 = mem_param_124841[i_119535 * + Rx_119116 + + i_119538]; + double defunc_1_f_res_119555 = a_119550 * b_119551; + double defunc_1_op_res_119559 = c_119552 + + defunc_1_f_res_119555; - mem_param_45098[i_43838 * Rx_43419 + i_43841] = - defunc_1_op_res_43862; + mem_param_124841[i_119535 * Rx_119116 + i_119538] = + defunc_1_op_res_119559; } } } - for (int64_t i_46173 = 0; i_46173 < Ry_43417; i_46173++) { - for (int64_t i_46174 = 0; i_46174 < Rx_43419; i_46174++) { - mem_45125[i_46173 * Rx_43419 + i_46174] = - mem_param_45098[i_46173 * Rx_43419 + i_46174]; + for (int64_t i_129164 = 0; i_129164 < Ry_119114; i_129164++) { + for (int64_t i_129165 = 0; i_129165 < Rx_119116; i_129165++) { + mem_124868[i_129164 * Rx_119116 + i_129165] = + mem_param_124841[i_129164 * Rx_119116 + i_129165]; } } barrier(CLK_LOCAL_MEM_FENCE); - for (int64_t i_46175 = 0; i_46175 < Ry_43417; i_46175++) { - for (int64_t i_46176 = 0; i_46176 < Rx_43419; i_46176++) { - mem_45498[i_46175 * Rx_43419 + i_46176] = - mem_45125[i_46175 * Rx_43419 + i_46176]; + for (int64_t i_129166 = 0; i_129166 < Ry_119114; i_129166++) { + for (int64_t i_129167 = 0; i_129167 < Rx_119116; i_129167++) { + mem_125412[i_129166 * Rx_119116 + i_129167] = + mem_124868[i_129166 * Rx_119116 + i_129167]; } } } else { - for (int64_t i_46177 = 0; i_46177 < Ry_43417; i_46177++) { - for (int64_t i_46178 = 0; i_46178 < Rx_43419; i_46178++) { - mem_45498[i_46177 * Rx_43419 + i_46178] = - mem_param_45098[i_46177 * Rx_43419 + i_46178]; + for (int64_t i_129168 = 0; i_129168 < Ry_119114; i_129168++) { + for (int64_t i_129169 = 0; i_129169 < Rx_119116; i_129169++) { + mem_125412[i_129168 * Rx_119116 + i_129169] = + mem_param_124841[i_129168 * Rx_119116 + i_129169]; } } } - float mem_param_tmp_46163[Ry_43417 * Rx_43419]; + double mem_param_tmp_129154[Ry_119114 * Rx_119116]; - for (int32_t i_11 = 0; i_11 < Ry_43417 * Rx_43419; i_11++) - mem_param_tmp_46163[i_11] = mem_45498[i_11]; - for (int32_t i_12 = 0; i_12 < Ry_43417 * Rx_43419; i_12++) - mem_param_45098[i_12] = mem_param_tmp_46163[i_12]; + for (int32_t i_11 = 0; i_11 < Ry_119114 * Rx_119116; i_11++) + mem_param_tmp_129154[i_11] = mem_125412[i_11]; + for (int32_t i_12 = 0; i_12 < Ry_119114 * Rx_119116; i_12++) + mem_param_124841[i_12] = mem_param_tmp_129154[i_12]; } - for (int32_t i_13 = 0; i_13 < Ry_43417 * Rx_43419; i_13++) - loop_mem_45127[i_13] = mem_param_45098[i_13]; - - int64_t reg_tile_i_46179 = squot64(sext_i32_i64(local_tid_46129), Tx_43418); - int64_t reg_tile_i_46180 = sext_i32_i64(local_tid_46129) - - squot64(sext_i32_i64(local_tid_46129), Tx_43418) * Tx_43418; - int64_t tile_dim_start_46181 = Ry_43417 * (Ty_43416 * gid_y_43434 + - reg_tile_i_46179); - int64_t tile_dim_start_46182 = Rx_43419 * (Tx_43418 * gid_x_43433 + - reg_tile_i_46180); - - for (int64_t nest_i_46183 = 0; nest_i_46183 < Ry_43417; nest_i_46183++) { - for (int64_t nest_i_46184 = 0; nest_i_46184 < Rx_43419; - nest_i_46184++) { - if (slt64(tile_dim_start_46181 + nest_i_46183, m_27772) && - slt64(tile_dim_start_46182 + nest_i_46184, N_27771)) { - ((__global float *) mem_45130)[(tile_dim_start_46181 + - nest_i_46183) * N_27771 + - (tile_dim_start_46182 + - nest_i_46184)] = - loop_mem_45127[nest_i_46183 * Rx_43419 + nest_i_46184]; + for (int32_t i_13 = 0; i_13 < Ry_119114 * Rx_119116; i_13++) + loop_mem_124870[i_13] = mem_param_124841[i_13]; + + int64_t reg_tile_i_129170 = squot64(sext_i32_i64(local_tid_129120), + Tx_119115); + int64_t reg_tile_i_129171 = sext_i32_i64(local_tid_129120) - + squot64(sext_i32_i64(local_tid_129120), Tx_119115) * Tx_119115; + int64_t tile_dim_start_129172 = Ry_119114 * (Ty_119113 * gid_y_119131 + + reg_tile_i_129170); + int64_t tile_dim_start_129173 = Rx_119116 * (Tx_119115 * gid_x_119130 + + reg_tile_i_129171); + + for (int64_t nest_i_129174 = 0; nest_i_129174 < Ry_119114; + nest_i_129174++) { + for (int64_t nest_i_129175 = 0; nest_i_129175 < Rx_119116; + nest_i_129175++) { + if (slt64(tile_dim_start_129172 + nest_i_129174, m_75136) && + slt64(tile_dim_start_129173 + nest_i_129175, N_75135)) { + ((__global double *) mem_124873)[(tile_dim_start_129172 + + nest_i_129174) * N_75135 + + (tile_dim_start_129173 + + nest_i_129175)] = + loop_mem_124870[nest_i_129174 * Rx_119116 + nest_i_129175]; } } } error_9: return; - #undef Ty_43416 - #undef Ry_43417 - #undef Tx_43418 - #undef Rx_43419 - #undef Tk_43420 - #undef tk_div_tx_43421 - #undef tk_div_ty_43422 - #undef TxRx_43423 - #undef TyRy_43424 - #undef a_loc_szz_43426 - #undef b_loc_szz_43428 -} -__kernel void mainDetailedzisegmap_intragroup_43869(__global - int *global_failure, - int failure_is_an_option, - __global - int64_t *global_failure_args, - __local volatile - int64_t *mem_45203_backing_aligned_0, - __local volatile - int64_t *mem_45196_backing_aligned_1, - int64_t N_27771, - int64_t m_27772, - int32_t n_27775, - float hfrac_27777, - int64_t i32_res_27781, - int32_t k2p2_27783, - int64_t num_whole_tiles_43891, - int64_t residual_input_43992, - unsigned char cond_43993, - __global - unsigned char *mem_45182, - __global - unsigned char *mem_45185, - __global - unsigned char *mem_45216, - __global - unsigned char *mem_45218, - __global - unsigned char *mem_45220) + #undef Ty_119113 + #undef Ry_119114 + #undef Tx_119115 + #undef Rx_119116 + #undef Tk_119117 + #undef tk_div_tx_119118 + #undef tk_div_ty_119119 + #undef TxRx_119120 + #undef TyRy_119121 + #undef a_loc_szz_119123 + #undef b_loc_szz_119125 +} +__kernel void mainzisegred_large_104864(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_127109_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_127107_backing_aligned_1, + int64_t m_75136, int64_t k2p2zq_75151, + int64_t num_groups_106695, + int64_t groups_per_segment_127093, + int64_t elements_per_thread_127094, + int64_t virt_num_groups_127095, + int64_t threads_per_segment_127097, + __global unsigned char *mem_121831, + __global unsigned char *mem_121835, + __global unsigned char *mem_121840, + __global + unsigned char *group_res_arr_mem_127098, + __global + unsigned char *mainzicounter_mem_127100) { - #define segmap_group_sizze_32525 (mainDetailedzisegmap_group_sizze_32490) + #define segred_group_sizze_106694 (mainzisegred_group_sizze_104858) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict mem_45203_backing_5 = (__local volatile - char *) mem_45203_backing_aligned_0; - __local volatile char *restrict mem_45196_backing_0 = (__local volatile - char *) mem_45196_backing_aligned_1; - volatile __local bool local_failure; - - if (failure_is_an_option) { - int failed = *global_failure >= 0; - - if (failed) - return; - } - local_failure = false; - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t global_tid_46352; - int32_t local_tid_46353; - int64_t group_sizze_46356; - int32_t wave_sizze_46355; - int32_t group_tid_46354; - - global_tid_46352 = get_global_id(0); - local_tid_46353 = get_local_id(0); - group_sizze_46356 = get_local_size(0); - wave_sizze_46355 = LOCKSTEP_WIDTH; - group_tid_46354 = get_group_id(0); - - int32_t gid_flat_43869; - - gid_flat_43869 = group_tid_46354; - - int32_t ltid_pre_46357; - - ltid_pre_46357 = local_tid_46353; - - int64_t gid_43868; - - gid_43868 = sext_i32_i64(group_tid_46354); - - int64_t binop_x_43876; - - binop_x_43876 = segmap_group_sizze_32525 * gid_43868; + __local volatile char *restrict sync_arr_mem_127109_backing_1 = + (__local volatile + char *) sync_arr_mem_127109_backing_aligned_0; + __local volatile char *restrict red_arr_mem_127107_backing_0 = + (__local volatile + char *) red_arr_mem_127107_backing_aligned_1; - int32_t mem_45189[1]; - int64_t ltid_43870 = sext_i32_i64(ltid_pre_46357); - int32_t ltid_flat_43871 = local_tid_46353; - int64_t gtid_43877 = ltid_43870 + binop_x_43876; - bool cond_43878 = slt64(gtid_43877, m_27772); - int32_t pre_43879; + if (*global_failure >= 0) + return; - if (cond_43878) { - int32_t defunc_0_f_res_43881; - int32_t redout_44325 = 0; + int32_t global_tid_127102; + int32_t local_tid_127103; + int64_t group_sizze_127106; + int32_t wave_sizze_127105; + int32_t group_tid_127104; + + global_tid_127102 = get_global_id(0); + local_tid_127103 = get_local_id(0); + group_sizze_127106 = get_local_size(0); + wave_sizze_127105 = LOCKSTEP_WIDTH; + group_tid_127104 = get_group_id(0); + + int32_t phys_tid_104864; + + phys_tid_104864 = global_tid_127102; + + __local char *red_arr_mem_127107; + + red_arr_mem_127107 = (__local char *) red_arr_mem_127107_backing_0; + + __local char *sync_arr_mem_127109; + + sync_arr_mem_127109 = (__local char *) sync_arr_mem_127109_backing_1; + + int32_t phys_group_id_127111; + + phys_group_id_127111 = get_group_id(0); + for (int32_t i_127112 = 0; i_127112 < + sdiv_up32(sext_i64_i32(virt_num_groups_127095) - phys_group_id_127111, + sext_i64_i32(num_groups_106695)); i_127112++) { + int32_t virt_group_id_127113 = phys_group_id_127111 + i_127112 * + sext_i64_i32(num_groups_106695); + int32_t flat_segment_id_127114 = squot32(virt_group_id_127113, + sext_i64_i32(groups_per_segment_127093)); + int64_t global_tid_127115 = srem64(sext_i32_i64(virt_group_id_127113) * + segred_group_sizze_106694 + + sext_i32_i64(local_tid_127103), + segred_group_sizze_106694 * + groups_per_segment_127093); + int64_t gtid_104851 = squot64(sext_i32_i64(flat_segment_id_127114), + k2p2zq_75151 * k2p2zq_75151); + int64_t gtid_104852 = squot64(sext_i32_i64(flat_segment_id_127114) - + squot64(sext_i32_i64(flat_segment_id_127114), + k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151); + int64_t gtid_104853 = sext_i32_i64(flat_segment_id_127114) - + squot64(sext_i32_i64(flat_segment_id_127114), k2p2zq_75151 * + k2p2zq_75151) * (k2p2zq_75151 * k2p2zq_75151) - + squot64(sext_i32_i64(flat_segment_id_127114) - + squot64(sext_i32_i64(flat_segment_id_127114), + k2p2zq_75151 * k2p2zq_75151) * (k2p2zq_75151 * + k2p2zq_75151), + k2p2zq_75151) * k2p2zq_75151; + int64_t gtid_104863; + double x_acc_127116; + int64_t chunk_sizze_127117; + + chunk_sizze_127117 = smin64(elements_per_thread_127094, + sdiv_up64(k2p2zq_75151 - global_tid_127115, + threads_per_segment_127097)); + + double x_106698; + double x_106699; - for (int32_t i_44370 = 0; i_44370 < n_27775; i_44370++) { - int64_t i_44326 = sext_i32_i64(i_44370); - float x_43885 = ((__global float *) mem_45182)[i_44326 * m_27772 + - gtid_43877]; - bool isnan_res_43886; - - isnan_res_43886 = futrts_isnan32(x_43885); - - bool cond_43887 = !isnan_res_43886; - int32_t defunc_0_f_res_43888 = btoi_bool_i32(cond_43887); - int32_t defunc_1_op_res_43884 = add32(defunc_0_f_res_43888, - redout_44325); - int32_t redout_tmp_46358 = defunc_1_op_res_43884; - - redout_44325 = redout_tmp_46358; + // neutral-initialise the accumulators + { + x_acc_127116 = 0.0; } - defunc_0_f_res_43881 = redout_44325; - pre_43879 = defunc_0_f_res_43881; - } else { - pre_43879 = 0; - } - mem_45189[(int64_t) 0] = pre_43879; - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_45192[1]; - int64_t ltid_43892 = sext_i32_i64(ltid_pre_46357); - int32_t ltid_flat_43893 = local_tid_46353; - - mem_45192[(int64_t) 0] = 0.0F; - barrier(CLK_LOCAL_MEM_FENCE); - - __local char *mem_45196; - - mem_45196 = (__local char *) mem_45196_backing_0; - - float accs_mem_45200[1]; - float mem_param_45193[1]; - - for (int32_t i_1 = 0; i_1 < 1; i_1++) - mem_param_45193[i_1] = mem_45192[i_1]; - for (int64_t tile_id_43899 = 0; tile_id_43899 < num_whole_tiles_43891; - tile_id_43899++) { - int64_t binop_x_43948 = segmap_group_sizze_32525 * tile_id_43899; - int64_t ltid_43900 = sext_i32_i64(ltid_pre_46357); - int32_t ltid_flat_43901 = local_tid_46353; - int64_t j_43949 = ltid_43900 + binop_x_43948; - bool cond_43953 = slt64(j_43949, i32_res_27781); - int32_t pre_43954; - - if (cond_43953) { - int32_t index_primexp_44283 = sext_i64_i32(j_43949); - - pre_43954 = index_primexp_44283; - } else { - pre_43954 = 0; - } - ((__local int32_t *) mem_45196)[ltid_43900] = pre_43954; - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_45199[1]; - int64_t ltid_43919 = sext_i32_i64(ltid_pre_46357); - int32_t ltid_flat_43920 = local_tid_46353; - int64_t gtid_43959 = binop_x_43876 + ltid_43919; - float acc_43961 = mem_param_45193[(int64_t) 0]; - bool cond_43962 = slt64(gtid_43959, m_27772); - float acc_43963; - - if (cond_43962) { - int32_t defunc_0_f_res_43960 = mem_45189[(int64_t) 0]; - float x_43964; - float redout_44327 = acc_43961; - - for (int64_t i_44328 = 0; i_44328 < segmap_group_sizze_32525; - i_44328++) { - int32_t x_43968 = ((__local int32_t *) mem_45196)[i_44328]; - bool cond_43969 = slt32(x_43968, defunc_0_f_res_43960); - float defunc_0_f_res_43970; - - if (cond_43969) { - int64_t i_43971 = sext_i32_i64(x_43968); - bool x_43972 = sle64((int64_t) 0, i_43971); - bool y_43973 = slt64(i_43971, N_27771); - bool bounds_check_43974 = x_43972 && y_43973; - bool index_certs_43975; - - if (!bounds_check_43974) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 46) == -1) { - global_failure_args[0] = i_43971; - global_failure_args[1] = N_27771; - ; - } - local_failure = true; - goto error_3; - } - } - - float defunc_0_f_res_t_res_43976 = ((__global - float *) mem_45185)[i_43971 * - m_27772 + - gtid_43959]; + for (int64_t i_127121 = 0; i_127121 < chunk_sizze_127117; i_127121++) { + gtid_104863 = global_tid_127115 + threads_per_segment_127097 * + i_127121; + // apply map function + { + double x_106704 = ((__global double *) mem_121831)[gtid_104852 * + (k2p2zq_75151 * + m_75136) + + gtid_104851 * + k2p2zq_75151 + + gtid_104863]; + double x_106705 = ((__global double *) mem_121835)[gtid_104851 * + (k2p2zq_75151 * + k2p2zq_75151) + + gtid_104853 * + k2p2zq_75151 + + gtid_104863]; + double defunc_1_f_res_106706 = x_106704 * x_106705; + + // save map-out results + { } + // load accumulator + { + x_106698 = x_acc_127116; + } + // load new values + { + x_106699 = defunc_1_f_res_106706; + } + // apply reduction operator + { + double defunc_1_op_res_106700 = x_106698 + x_106699; - defunc_0_f_res_43970 = defunc_0_f_res_t_res_43976; - } else { - defunc_0_f_res_43970 = 0.0F; + // store in accumulator + { + x_acc_127116 = defunc_1_op_res_106700; + } } - - float defunc_0_f_res_43977 = defunc_0_f_res_43970 * - defunc_0_f_res_43970; - float defunc_1_op_res_43967 = defunc_0_f_res_43977 + - redout_44327; - float redout_tmp_46361 = defunc_1_op_res_43967; - - redout_44327 = redout_tmp_46361; } - x_43964 = redout_44327; - acc_43963 = x_43964; - } else { - acc_43963 = acc_43961; } - mem_45199[(int64_t) 0] = acc_43963; - - error_3: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; + // to reduce current chunk, first store our result in memory + { + x_106698 = x_acc_127116; + ((__local + double *) red_arr_mem_127107)[sext_i32_i64(local_tid_127103)] = + x_106698; + } barrier(CLK_LOCAL_MEM_FENCE); - float mem_param_tmp_46359[1]; - - for (int32_t i_2 = 0; i_2 < 1; i_2++) - mem_param_tmp_46359[i_2] = mem_45199[i_2]; - for (int32_t i_3 = 0; i_3 < 1; i_3++) - mem_param_45193[i_3] = mem_param_tmp_46359[i_3]; - } - for (int32_t i_4 = 0; i_4 < 1; i_4++) - accs_mem_45200[i_4] = mem_param_45193[i_4]; - - __local char *mem_45203; - - mem_45203 = (__local char *) mem_45203_backing_5; - - float mem_45206[1]; - float mem_45511[1]; - - if (cond_43993) { - mem_45511[(int64_t) 0] = accs_mem_45200[(int64_t) 0]; - } else { - int64_t binop_x_44003 = segmap_group_sizze_32525 * - num_whole_tiles_43891; - int64_t ltid_43994 = sext_i32_i64(ltid_pre_46357); - int32_t ltid_flat_43995 = local_tid_46353; - int64_t j_44004 = ltid_43994 + binop_x_44003; - bool cond_44008 = slt64(j_44004, i32_res_27781); - int32_t pre_44009; + int32_t offset_127122; + int32_t skip_waves_127123; - if (cond_44008) { - int32_t index_primexp_44284 = sext_i64_i32(j_44004); - - pre_44009 = index_primexp_44284; - } else { - pre_44009 = 0; - } - ((__local int32_t *) mem_45203)[ltid_43994] = pre_44009; - barrier(CLK_LOCAL_MEM_FENCE); + skip_waves_127123 = 1; - int64_t ltid_44014 = sext_i32_i64(ltid_pre_46357); - int32_t ltid_flat_44015 = local_tid_46353; - int64_t gtid_44022 = binop_x_43876 + ltid_44014; - float acc_44024 = accs_mem_45200[(int64_t) 0]; - bool cond_44025 = slt64(gtid_44022, m_27772); - float acc_44026; + double x_127118; + double x_127119; - if (cond_44025) { - int32_t defunc_0_f_res_44023 = mem_45189[(int64_t) 0]; - float x_44027; - float redout_44329 = acc_44024; - - for (int64_t i_44330 = 0; i_44330 < residual_input_43992; - i_44330++) { - int32_t x_44031 = ((__local int32_t *) mem_45203)[i_44330]; - bool cond_44032 = slt32(x_44031, defunc_0_f_res_44023); - float defunc_0_f_res_44033; - - if (cond_44032) { - int64_t i_44034 = sext_i32_i64(x_44031); - bool x_44035 = sle64((int64_t) 0, i_44034); - bool y_44036 = slt64(i_44034, N_27771); - bool bounds_check_44037 = x_44035 && y_44036; - bool index_certs_44038; - - if (!bounds_check_44037) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 47) == -1) { - global_failure_args[0] = i_44034; - global_failure_args[1] = N_27771; - ; - } - local_failure = true; - goto error_5; - } - } - - float defunc_0_f_res_t_res_44039 = ((__global - float *) mem_45185)[i_44034 * - m_27772 + - gtid_44022]; - - defunc_0_f_res_44033 = defunc_0_f_res_t_res_44039; - } else { - defunc_0_f_res_44033 = 0.0F; - } - - float defunc_0_f_res_44040 = defunc_0_f_res_44033 * - defunc_0_f_res_44033; - float defunc_1_op_res_44030 = defunc_0_f_res_44040 + - redout_44329; - float redout_tmp_46362 = defunc_1_op_res_44030; - - redout_44329 = redout_tmp_46362; - } - x_44027 = redout_44329; - acc_44026 = x_44027; - } else { - acc_44026 = acc_44024; - } - mem_45206[(int64_t) 0] = acc_44026; - - error_5: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); - mem_45511[(int64_t) 0] = mem_45206[(int64_t) 0]; - } - - int32_t mem_45210[1]; - int32_t mem_45212[1]; - float mem_45214[1]; - int64_t ltid_44043 = sext_i32_i64(ltid_pre_46357); - int32_t ltid_flat_44044 = local_tid_46353; - int64_t gtid_44054 = binop_x_43876 + ltid_44043; - bool cond_44056 = slt64(gtid_44054, m_27772); - int32_t postlude_44057; - int32_t postlude_44058; - float postlude_44059; - - if (cond_44056) { - float defunc_0_f_res_44055 = mem_45511[(int64_t) 0]; - int32_t defunc_0_f_res_44060 = mem_45189[(int64_t) 0]; - int32_t r32_arg_44061 = sub32(defunc_0_f_res_44060, k2p2_27783); - float i32_res_44062 = sitofp_i32_f32(r32_arg_44061); - float sqrt_arg_44063 = defunc_0_f_res_44055 / i32_res_44062; - float sqrt_res_44064; - - sqrt_res_44064 = futrts_sqrt32(sqrt_arg_44063); - - float i32_res_44065 = sitofp_i32_f32(defunc_0_f_res_44060); - float t32_arg_44066 = hfrac_27777 * i32_res_44065; - int32_t f32_res_44067 = fptosi_f32_i32(t32_arg_44066); - - postlude_44057 = f32_res_44067; - postlude_44058 = defunc_0_f_res_44060; - postlude_44059 = sqrt_res_44064; - } else { - postlude_44057 = 0; - postlude_44058 = 0; - postlude_44059 = 0.0F; - } - mem_45210[(int64_t) 0] = postlude_44057; - mem_45212[(int64_t) 0] = postlude_44058; - mem_45214[(int64_t) 0] = postlude_44059; - barrier(CLK_LOCAL_MEM_FENCE); - if (slt64(sext_i32_i64(local_tid_46353) + segmap_group_sizze_32525 * - sext_i32_i64(group_tid_46354), m_27772)) { - ((__global int32_t *) mem_45216)[sext_i32_i64(local_tid_46353) + - segmap_group_sizze_32525 * - sext_i32_i64(group_tid_46354)] = - mem_45210[(int64_t) 0]; - } - if (slt64(sext_i32_i64(local_tid_46353) + segmap_group_sizze_32525 * - sext_i32_i64(group_tid_46354), m_27772)) { - ((__global int32_t *) mem_45218)[sext_i32_i64(local_tid_46353) + - segmap_group_sizze_32525 * - sext_i32_i64(group_tid_46354)] = - mem_45212[(int64_t) 0]; - } - if (slt64(sext_i32_i64(local_tid_46353) + segmap_group_sizze_32525 * - sext_i32_i64(group_tid_46354), m_27772)) { - ((__global float *) mem_45220)[sext_i32_i64(local_tid_46353) + - segmap_group_sizze_32525 * - sext_i32_i64(group_tid_46354)] = - mem_45214[(int64_t) 0]; - } - - error_7: - return; - #undef segmap_group_sizze_32525 -} -__kernel void mainDetailedzisegmap_intragroup_44075(__global - int *global_failure, - int failure_is_an_option, - __global - int64_t *global_failure_args, - __local volatile - int64_t *mem_45269_backing_aligned_0, - __local volatile - int64_t *mem_45262_backing_aligned_1, - int64_t N_27771, - int64_t m_27772, - int64_t i32_res_28174, - int64_t num_whole_tiles_44095, - int64_t residual_input_44206, - unsigned char cond_44207, - __global - unsigned char *defunc_4_map_res_mem_45178, - __global - unsigned char *defunc_3_map_res_mem_45244, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global - unsigned char *mem_45275) -{ - #define segmap_group_sizze_32782 (mainDetailedzisegmap_group_sizze_32759) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - __local volatile char *restrict mem_45269_backing_5 = (__local volatile - char *) mem_45269_backing_aligned_0; - __local volatile char *restrict mem_45262_backing_0 = (__local volatile - char *) mem_45262_backing_aligned_1; - volatile __local bool local_failure; - - if (failure_is_an_option) { - int failed = *global_failure >= 0; - - if (failed) - return; - } - local_failure = false; - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t global_tid_46536; - int32_t local_tid_46537; - int64_t group_sizze_46540; - int32_t wave_sizze_46539; - int32_t group_tid_46538; - - global_tid_46536 = get_global_id(0); - local_tid_46537 = get_local_id(0); - group_sizze_46540 = get_local_size(0); - wave_sizze_46539 = LOCKSTEP_WIDTH; - group_tid_46538 = get_group_id(0); - - int32_t gid_flat_44075; - - gid_flat_44075 = group_tid_46538; - - int32_t ltid_pre_46541; - - ltid_pre_46541 = local_tid_46537; - - int64_t gid_44074; - - gid_44074 = sext_i32_i64(group_tid_46538); - - int64_t binop_x_44084; - - binop_x_44084 = segmap_group_sizze_32782 * gid_44074; - - int32_t mem_45253[1]; - int32_t mem_45255[1]; - int64_t ltid_44076 = sext_i32_i64(ltid_pre_46541); - int32_t ltid_flat_44077 = local_tid_46537; - int64_t gtid_44085 = ltid_44076 + binop_x_44084; - bool cond_44086 = slt64(gtid_44085, m_27772); - int32_t pre_44087; - int32_t pre_44088; - - if (cond_44086) { - int32_t x_44089 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_44085]; - int32_t x_44090 = ((__global - int32_t *) defunc_3_map_res_mem_45244)[gtid_44085]; - - pre_44087 = x_44089; - pre_44088 = x_44090; - } else { - pre_44087 = 0; - pre_44088 = 0; - } - mem_45253[(int64_t) 0] = pre_44087; - mem_45255[(int64_t) 0] = pre_44088; - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_45258[1]; - int64_t ltid_44096 = sext_i32_i64(ltid_pre_46541); - int32_t ltid_flat_44097 = local_tid_46537; - - mem_45258[(int64_t) 0] = 0.0F; - barrier(CLK_LOCAL_MEM_FENCE); - - __local char *mem_45262; - - mem_45262 = (__local char *) mem_45262_backing_0; - - float accs_mem_45266[1]; - float mem_param_45259[1]; - - for (int32_t i_1 = 0; i_1 < 1; i_1++) - mem_param_45259[i_1] = mem_45258[i_1]; - for (int64_t tile_id_44103 = 0; tile_id_44103 < num_whole_tiles_44095; - tile_id_44103++) { - int64_t binop_x_44156 = segmap_group_sizze_32782 * tile_id_44103; - int64_t ltid_44104 = sext_i32_i64(ltid_pre_46541); - int32_t ltid_flat_44105 = local_tid_46537; - int64_t j_44157 = ltid_44104 + binop_x_44156; - bool cond_44162 = slt64(j_44157, i32_res_28174); - int32_t pre_44163; - - if (cond_44162) { - int32_t index_primexp_44285 = sext_i64_i32(j_44157); - int32_t tile_elem_44164 = index_primexp_44285; - - pre_44163 = tile_elem_44164; - } else { - pre_44163 = 0; - } - ((__local int32_t *) mem_45262)[ltid_44104] = pre_44163; - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_45265[1]; - int64_t ltid_44124 = sext_i32_i64(ltid_pre_46541); - int32_t ltid_flat_44125 = local_tid_46537; - int64_t gtid_44168 = binop_x_44084 + ltid_44124; - float acc_44171 = mem_param_45259[(int64_t) 0]; - bool cond_44172 = slt64(gtid_44168, m_27772); - float acc_44173; - - if (cond_44172) { - int32_t x_44169 = mem_45253[(int64_t) 0]; - int32_t x_44170 = mem_45255[(int64_t) 0]; - float x_44174; - float redout_44331 = acc_44171; - - for (int64_t i_44332 = 0; i_44332 < segmap_group_sizze_32782; - i_44332++) { - int32_t x_44178 = ((__local int32_t *) mem_45262)[i_44332]; - bool cond_44179 = slt32(x_44178, x_44170); - float defunc_0_f_res_44180; - - if (cond_44179) { - int32_t x_44181 = add32(x_44169, x_44178); - int32_t x_44182 = sub32(x_44181, x_44170); - int32_t i_44183 = add32(1, x_44182); - int64_t i_44184 = sext_i32_i64(i_44183); - bool x_44185 = sle64((int64_t) 0, i_44184); - bool y_44186 = slt64(i_44184, N_27771); - bool bounds_check_44187 = x_44185 && y_44186; - bool index_certs_44188; - - if (!bounds_check_44187) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 51) == -1) { - global_failure_args[0] = i_44184; - global_failure_args[1] = N_27771; - ; - } - local_failure = true; - goto error_3; - } - } - - float defunc_0_f_res_t_res_44189 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_44168 * - N_27771 + - i_44184]; - - defunc_0_f_res_44180 = defunc_0_f_res_t_res_44189; - } else { - defunc_0_f_res_44180 = 0.0F; - } - - float defunc_1_op_res_44177 = defunc_0_f_res_44180 + - redout_44331; - float redout_tmp_46544 = defunc_1_op_res_44177; - - redout_44331 = redout_tmp_46544; - } - x_44174 = redout_44331; - acc_44173 = x_44174; - } else { - acc_44173 = acc_44171; - } - mem_45265[(int64_t) 0] = acc_44173; - - error_3: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_param_tmp_46542[1]; - - for (int32_t i_2 = 0; i_2 < 1; i_2++) - mem_param_tmp_46542[i_2] = mem_45265[i_2]; - for (int32_t i_3 = 0; i_3 < 1; i_3++) - mem_param_45259[i_3] = mem_param_tmp_46542[i_3]; - } - for (int32_t i_4 = 0; i_4 < 1; i_4++) - accs_mem_45266[i_4] = mem_param_45259[i_4]; - - __local char *mem_45269; - - mem_45269 = (__local char *) mem_45269_backing_5; - - float mem_45272[1]; - float mem_45520[1]; - - if (cond_44207) { - mem_45520[(int64_t) 0] = accs_mem_45266[(int64_t) 0]; - } else { - int64_t binop_x_44217 = segmap_group_sizze_32782 * - num_whole_tiles_44095; - int64_t ltid_44208 = sext_i32_i64(ltid_pre_46541); - int32_t ltid_flat_44209 = local_tid_46537; - int64_t j_44218 = ltid_44208 + binop_x_44217; - bool cond_44223 = slt64(j_44218, i32_res_28174); - int32_t pre_44224; - - if (cond_44223) { - int32_t index_primexp_44286 = sext_i64_i32(j_44218); - int32_t tile_elem_44225 = index_primexp_44286; - - pre_44224 = tile_elem_44225; - } else { - pre_44224 = 0; - } - ((__local int32_t *) mem_45269)[ltid_44208] = pre_44224; - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t ltid_44229 = sext_i32_i64(ltid_pre_46541); - int32_t ltid_flat_44230 = local_tid_46537; - int64_t gtid_44237 = binop_x_44084 + ltid_44229; - float acc_44240 = accs_mem_45266[(int64_t) 0]; - bool cond_44241 = slt64(gtid_44237, m_27772); - float acc_44242; - - if (cond_44241) { - int32_t x_44238 = mem_45253[(int64_t) 0]; - int32_t x_44239 = mem_45255[(int64_t) 0]; - float x_44243; - float redout_44333 = acc_44240; - - for (int64_t i_44334 = 0; i_44334 < residual_input_44206; - i_44334++) { - int32_t x_44247 = ((__local int32_t *) mem_45269)[i_44334]; - bool cond_44248 = slt32(x_44247, x_44239); - float defunc_0_f_res_44249; - - if (cond_44248) { - int32_t x_44250 = add32(x_44238, x_44247); - int32_t x_44251 = sub32(x_44250, x_44239); - int32_t i_44252 = add32(1, x_44251); - int64_t i_44253 = sext_i32_i64(i_44252); - bool x_44254 = sle64((int64_t) 0, i_44253); - bool y_44255 = slt64(i_44253, N_27771); - bool bounds_check_44256 = x_44254 && y_44255; - bool index_certs_44257; - - if (!bounds_check_44256) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 52) == -1) { - global_failure_args[0] = i_44253; - global_failure_args[1] = N_27771; - ; - } - local_failure = true; - goto error_5; - } - } - - float defunc_0_f_res_t_res_44258 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_44237 * - N_27771 + - i_44253]; - - defunc_0_f_res_44249 = defunc_0_f_res_t_res_44258; - } else { - defunc_0_f_res_44249 = 0.0F; - } - - float defunc_1_op_res_44246 = defunc_0_f_res_44249 + - redout_44333; - float redout_tmp_46545 = defunc_1_op_res_44246; - - redout_44333 = redout_tmp_46545; - } - x_44243 = redout_44333; - acc_44242 = x_44243; - } else { - acc_44242 = acc_44240; - } - mem_45272[(int64_t) 0] = acc_44242; - - error_5: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); - mem_45520[(int64_t) 0] = mem_45272[(int64_t) 0]; - } - if (slt64(sext_i32_i64(local_tid_46537) + segmap_group_sizze_32782 * - sext_i32_i64(group_tid_46538), m_27772)) { - ((__global float *) mem_45275)[sext_i32_i64(local_tid_46537) + - segmap_group_sizze_32782 * - sext_i32_i64(group_tid_46538)] = - mem_45520[(int64_t) 0]; - } - - error_6: - return; - #undef segmap_group_sizze_32782 -} -__kernel void mainDetailedzisegred_large_30429(__global int *global_failure, - __local volatile - int64_t *sync_arr_mem_45805_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_45803_backing_aligned_1, - int64_t N_27771, - int64_t i32_res_27781, - int64_t i32_res_27787, - int64_t num_groups_30568, - int64_t groups_per_segment_45789, - int64_t elements_per_thread_45790, - int64_t virt_num_groups_45791, - int64_t threads_per_segment_45793, - __global - unsigned char *images_mem_44381, - __global - unsigned char *binop_p_mem_44390, - __global - unsigned char *mem_44531, - __global - unsigned char *mem_44536, - __global - unsigned char *group_res_arr_mem_45794, - __global - unsigned char *mainDetailedzicounter_mem_45796) -{ - #define segred_group_sizze_30567 (mainDetailedzisegred_group_sizze_30423) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - __local volatile char *restrict sync_arr_mem_45805_backing_1 = - (__local volatile - char *) sync_arr_mem_45805_backing_aligned_0; - __local volatile char *restrict red_arr_mem_45803_backing_0 = - (__local volatile - char *) red_arr_mem_45803_backing_aligned_1; - - if (*global_failure >= 0) - return; - - int32_t global_tid_45798; - int32_t local_tid_45799; - int64_t group_sizze_45802; - int32_t wave_sizze_45801; - int32_t group_tid_45800; - - global_tid_45798 = get_global_id(0); - local_tid_45799 = get_local_id(0); - group_sizze_45802 = get_local_size(0); - wave_sizze_45801 = LOCKSTEP_WIDTH; - group_tid_45800 = get_group_id(0); - - int32_t phys_tid_30429; - - phys_tid_30429 = global_tid_45798; - - __local char *red_arr_mem_45803; - - red_arr_mem_45803 = (__local char *) red_arr_mem_45803_backing_0; - - __local char *sync_arr_mem_45805; - - sync_arr_mem_45805 = (__local char *) sync_arr_mem_45805_backing_1; - - int32_t phys_group_id_45807; - - phys_group_id_45807 = get_group_id(0); - for (int32_t i_45808 = 0; i_45808 < - sdiv_up32(sext_i64_i32(virt_num_groups_45791) - phys_group_id_45807, - sext_i64_i32(num_groups_30568)); i_45808++) { - int32_t virt_group_id_45809 = phys_group_id_45807 + i_45808 * - sext_i64_i32(num_groups_30568); - int32_t flat_segment_id_45810 = squot32(virt_group_id_45809, - sext_i64_i32(groups_per_segment_45789)); - int64_t global_tid_45811 = srem64(sext_i32_i64(virt_group_id_45809) * - segred_group_sizze_30567 + - sext_i32_i64(local_tid_45799), - segred_group_sizze_30567 * - groups_per_segment_45789); - int64_t gtid_30416 = squot64(sext_i32_i64(flat_segment_id_45810), - i32_res_27787 * i32_res_27787); - int64_t gtid_30417 = squot64(sext_i32_i64(flat_segment_id_45810) - - squot64(sext_i32_i64(flat_segment_id_45810), - i32_res_27787 * i32_res_27787) * - (i32_res_27787 * i32_res_27787), - i32_res_27787); - int64_t gtid_30418 = sext_i32_i64(flat_segment_id_45810) - - squot64(sext_i32_i64(flat_segment_id_45810), i32_res_27787 * - i32_res_27787) * (i32_res_27787 * i32_res_27787) - - squot64(sext_i32_i64(flat_segment_id_45810) - - squot64(sext_i32_i64(flat_segment_id_45810), - i32_res_27787 * i32_res_27787) * - (i32_res_27787 * i32_res_27787), i32_res_27787) * - i32_res_27787; - int64_t gtid_30428; - float x_acc_45812; - int64_t chunk_sizze_45813; - - chunk_sizze_45813 = smin64(elements_per_thread_45790, - sdiv_up64(i32_res_27781 - - sext_i32_i64(sext_i64_i32(global_tid_45811)), - threads_per_segment_45793)); - - float x_30571; - float x_30572; - - // neutral-initialise the accumulators - { - x_acc_45812 = 0.0F; - } - for (int64_t i_45817 = 0; i_45817 < chunk_sizze_45813; i_45817++) { - gtid_30428 = sext_i32_i64(sext_i64_i32(global_tid_45811)) + - threads_per_segment_45793 * i_45817; - // apply map function - { - float x_30577 = ((__global - float *) images_mem_44381)[gtid_30416 * - N_27771 + - gtid_30428]; - float x_30578 = ((__global - float *) binop_p_mem_44390)[gtid_30417 * - N_27771 + - gtid_30428]; - float x_30579 = ((__global float *) mem_44531)[gtid_30418 * - N_27771 + - gtid_30428]; - float x_30580 = x_30578 * x_30579; - bool isnan_res_30581; - - isnan_res_30581 = futrts_isnan32(x_30577); - - float y_30582; - - if (isnan_res_30581) { - y_30582 = 0.0F; - } else { - y_30582 = 1.0F; - } - - float defunc_2_f_res_30583 = x_30580 * y_30582; - - // save map-out results - { } - // load accumulator - { - x_30571 = x_acc_45812; - } - // load new values - { - x_30572 = defunc_2_f_res_30583; - } - // apply reduction operator - { - float defunc_1_op_res_30573 = x_30571 + x_30572; - - // store in accumulator - { - x_acc_45812 = defunc_1_op_res_30573; - } - } - } - } - // to reduce current chunk, first store our result in memory - { - x_30571 = x_acc_45812; - ((__local - float *) red_arr_mem_45803)[sext_i32_i64(local_tid_45799)] = - x_30571; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t offset_45818; - int32_t skip_waves_45819; - - skip_waves_45819 = 1; - - float x_45814; - float x_45815; - - offset_45818 = 0; + offset_127122 = 0; // participating threads read initial accumulator { - if (slt32(local_tid_45799, - sext_i64_i32(segred_group_sizze_30567))) { - x_45814 = ((__local - float *) red_arr_mem_45803)[sext_i32_i64(local_tid_45799 + - offset_45818)]; - } - } - offset_45818 = 1; - while (slt32(offset_45818, wave_sizze_45801)) { - if (slt32(local_tid_45799 + offset_45818, - sext_i64_i32(segred_group_sizze_30567)) && - ((local_tid_45799 - squot32(local_tid_45799, wave_sizze_45801) * - wave_sizze_45801) & (2 * offset_45818 - 1)) == 0) { + if (slt32(local_tid_127103, + sext_i64_i32(segred_group_sizze_106694))) { + x_127118 = ((__local + double *) red_arr_mem_127107)[sext_i32_i64(local_tid_127103 + + offset_127122)]; + } + } + offset_127122 = 1; + while (slt32(offset_127122, wave_sizze_127105)) { + if (slt32(local_tid_127103 + offset_127122, + sext_i64_i32(segred_group_sizze_106694)) && + ((local_tid_127103 - squot32(local_tid_127103, + wave_sizze_127105) * + wave_sizze_127105) & (2 * offset_127122 - 1)) == 0) { // read array element { - x_45815 = ((volatile __local - float *) red_arr_mem_45803)[sext_i32_i64(local_tid_45799 + - offset_45818)]; + x_127119 = ((volatile __local + double *) red_arr_mem_127107)[sext_i32_i64(local_tid_127103 + + offset_127122)]; } // apply reduction operation { - float defunc_1_op_res_45816 = x_45814 + x_45815; + double defunc_1_op_res_127120 = x_127118 + x_127119; - x_45814 = defunc_1_op_res_45816; + x_127118 = defunc_1_op_res_127120; } // write result of operation { ((volatile __local - float *) red_arr_mem_45803)[sext_i32_i64(local_tid_45799)] = - x_45814; + double *) red_arr_mem_127107)[sext_i32_i64(local_tid_127103)] = + x_127118; } } - offset_45818 *= 2; + offset_127122 *= 2; } - while (slt32(skip_waves_45819, - squot32(sext_i64_i32(segred_group_sizze_30567) + - wave_sizze_45801 - 1, wave_sizze_45801))) { + while (slt32(skip_waves_127123, + squot32(sext_i64_i32(segred_group_sizze_106694) + + wave_sizze_127105 - 1, wave_sizze_127105))) { barrier(CLK_LOCAL_MEM_FENCE); - offset_45818 = skip_waves_45819 * wave_sizze_45801; - if (slt32(local_tid_45799 + offset_45818, - sext_i64_i32(segred_group_sizze_30567)) && - ((local_tid_45799 - squot32(local_tid_45799, wave_sizze_45801) * - wave_sizze_45801) == 0 && (squot32(local_tid_45799, - wave_sizze_45801) & (2 * - skip_waves_45819 - - 1)) == + offset_127122 = skip_waves_127123 * wave_sizze_127105; + if (slt32(local_tid_127103 + offset_127122, + sext_i64_i32(segred_group_sizze_106694)) && + ((local_tid_127103 - squot32(local_tid_127103, + wave_sizze_127105) * + wave_sizze_127105) == 0 && (squot32(local_tid_127103, + wave_sizze_127105) & (2 * + skip_waves_127123 - + 1)) == 0)) { // read array element { - x_45815 = ((__local - float *) red_arr_mem_45803)[sext_i32_i64(local_tid_45799 + - offset_45818)]; + x_127119 = ((__local + double *) red_arr_mem_127107)[sext_i32_i64(local_tid_127103 + + offset_127122)]; } // apply reduction operation { - float defunc_1_op_res_45816 = x_45814 + x_45815; + double defunc_1_op_res_127120 = x_127118 + x_127119; - x_45814 = defunc_1_op_res_45816; + x_127118 = defunc_1_op_res_127120; } // write result of operation { ((__local - float *) red_arr_mem_45803)[sext_i32_i64(local_tid_45799)] = - x_45814; + double *) red_arr_mem_127107)[sext_i32_i64(local_tid_127103)] = + x_127118; } } - skip_waves_45819 *= 2; + skip_waves_127123 *= 2; } barrier(CLK_LOCAL_MEM_FENCE); // first thread saves the result in accumulator { - if (sext_i32_i64(local_tid_45799) == (int64_t) 0) { - x_acc_45812 = x_45814; + if (sext_i32_i64(local_tid_127103) == (int64_t) 0) { + x_acc_127116 = x_127118; } } - if (groups_per_segment_45789 == (int64_t) 1) { + if (groups_per_segment_127093 == (int64_t) 1) { // first thread in group saves final result to memory { - if (local_tid_45799 == 0) { - ((__global float *) mem_44536)[gtid_30416 * (i32_res_27787 * - i32_res_27787) + - gtid_30417 * i32_res_27787 + - gtid_30418] = x_acc_45812; + if (local_tid_127103 == 0) { + ((__global double *) mem_121840)[gtid_104851 * + (k2p2zq_75151 * + k2p2zq_75151) + + gtid_104852 * + k2p2zq_75151 + + gtid_104853] = + x_acc_127116; } } } else { - int32_t old_counter_45820; + int32_t old_counter_127124; // first thread in group saves group result to global memory { - if (local_tid_45799 == 0) { + if (local_tid_127103 == 0) { ((__global - float *) group_res_arr_mem_45794)[sext_i32_i64(virt_group_id_45809) * - segred_group_sizze_30567] = - x_acc_45812; + double *) group_res_arr_mem_127098)[sext_i32_i64(virt_group_id_127113) * + segred_group_sizze_106694] = + x_acc_127116; mem_fence_global(); - old_counter_45820 = + old_counter_127124 = atomic_add_i32_global(&((volatile __global - int *) mainDetailedzicounter_mem_45796)[sext_i32_i64(srem32(flat_segment_id_45810, - 10240))], + int *) mainzicounter_mem_127100)[sext_i32_i64(srem32(flat_segment_id_127114, + 10240))], (int) 1); - ((__local bool *) sync_arr_mem_45805)[(int64_t) 0] = - old_counter_45820 == groups_per_segment_45789 - + ((__local bool *) sync_arr_mem_127109)[(int64_t) 0] = + old_counter_127124 == groups_per_segment_127093 - (int64_t) 1; } } barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - bool is_last_group_45821; + bool is_last_group_127125; - is_last_group_45821 = ((__local - bool *) sync_arr_mem_45805)[(int64_t) 0]; - if (is_last_group_45821) { - if (local_tid_45799 == 0) { - old_counter_45820 = + is_last_group_127125 = ((__local + bool *) sync_arr_mem_127109)[(int64_t) 0]; + if (is_last_group_127125) { + if (local_tid_127103 == 0) { + old_counter_127124 = atomic_add_i32_global(&((volatile __global - int *) mainDetailedzicounter_mem_45796)[sext_i32_i64(srem32(flat_segment_id_45810, - 10240))], + int *) mainzicounter_mem_127100)[sext_i32_i64(srem32(flat_segment_id_127114, + 10240))], (int) ((int64_t) 0 - - groups_per_segment_45789)); + groups_per_segment_127093)); } // read in the per-group-results { - int64_t read_per_thread_45822 = - sdiv_up64(groups_per_segment_45789, - segred_group_sizze_30567); + int64_t read_per_thread_127126 = + sdiv_up64(groups_per_segment_127093, + segred_group_sizze_106694); - x_30571 = 0.0F; - for (int64_t i_45823 = 0; i_45823 < read_per_thread_45822; - i_45823++) { - int64_t group_res_id_45824 = - sext_i32_i64(local_tid_45799) * - read_per_thread_45822 + i_45823; - int64_t index_of_group_res_45825 = - sext_i32_i64(flat_segment_id_45810) * - groups_per_segment_45789 + group_res_id_45824; + x_106698 = 0.0; + for (int64_t i_127127 = 0; i_127127 < + read_per_thread_127126; i_127127++) { + int64_t group_res_id_127128 = + sext_i32_i64(local_tid_127103) * + read_per_thread_127126 + i_127127; + int64_t index_of_group_res_127129 = + sext_i32_i64(flat_segment_id_127114) * + groups_per_segment_127093 + group_res_id_127128; - if (slt64(group_res_id_45824, - groups_per_segment_45789)) { - x_30572 = ((__global - float *) group_res_arr_mem_45794)[index_of_group_res_45825 * - segred_group_sizze_30567]; + if (slt64(group_res_id_127128, + groups_per_segment_127093)) { + x_106699 = ((__global + double *) group_res_arr_mem_127098)[index_of_group_res_127129 * + segred_group_sizze_106694]; - float defunc_1_op_res_30573; + double defunc_1_op_res_106700; - defunc_1_op_res_30573 = x_30571 + x_30572; - x_30571 = defunc_1_op_res_30573; + defunc_1_op_res_106700 = x_106698 + x_106699; + x_106698 = defunc_1_op_res_106700; } } } ((__local - float *) red_arr_mem_45803)[sext_i32_i64(local_tid_45799)] = - x_30571; + double *) red_arr_mem_127107)[sext_i32_i64(local_tid_127103)] = + x_106698; barrier(CLK_LOCAL_MEM_FENCE); // reduce the per-group results { - int32_t offset_45826; - int32_t skip_waves_45827; + int32_t offset_127130; + int32_t skip_waves_127131; - skip_waves_45827 = 1; + skip_waves_127131 = 1; - float x_45814; - float x_45815; + double x_127118; + double x_127119; - offset_45826 = 0; + offset_127130 = 0; // participating threads read initial accumulator { - if (slt32(local_tid_45799, - sext_i64_i32(segred_group_sizze_30567))) { - x_45814 = ((__local - float *) red_arr_mem_45803)[sext_i32_i64(local_tid_45799 + - offset_45826)]; - } - } - offset_45826 = 1; - while (slt32(offset_45826, wave_sizze_45801)) { - if (slt32(local_tid_45799 + offset_45826, - sext_i64_i32(segred_group_sizze_30567)) && - ((local_tid_45799 - squot32(local_tid_45799, - wave_sizze_45801) * - wave_sizze_45801) & (2 * offset_45826 - 1)) == + if (slt32(local_tid_127103, + sext_i64_i32(segred_group_sizze_106694))) { + x_127118 = ((__local + double *) red_arr_mem_127107)[sext_i32_i64(local_tid_127103 + + offset_127130)]; + } + } + offset_127130 = 1; + while (slt32(offset_127130, wave_sizze_127105)) { + if (slt32(local_tid_127103 + offset_127130, + sext_i64_i32(segred_group_sizze_106694)) && + ((local_tid_127103 - squot32(local_tid_127103, + wave_sizze_127105) * + wave_sizze_127105) & (2 * offset_127130 - 1)) == 0) { // read array element { - x_45815 = ((volatile __local - float *) red_arr_mem_45803)[sext_i32_i64(local_tid_45799 + - offset_45826)]; + x_127119 = ((volatile __local + double *) red_arr_mem_127107)[sext_i32_i64(local_tid_127103 + + offset_127130)]; } // apply reduction operation { - float defunc_1_op_res_45816 = x_45814 + x_45815; + double defunc_1_op_res_127120 = x_127118 + + x_127119; - x_45814 = defunc_1_op_res_45816; + x_127118 = defunc_1_op_res_127120; } // write result of operation { ((volatile __local - float *) red_arr_mem_45803)[sext_i32_i64(local_tid_45799)] = - x_45814; + double *) red_arr_mem_127107)[sext_i32_i64(local_tid_127103)] = + x_127118; } } - offset_45826 *= 2; + offset_127130 *= 2; } - while (slt32(skip_waves_45827, - squot32(sext_i64_i32(segred_group_sizze_30567) + - wave_sizze_45801 - 1, - wave_sizze_45801))) { + while (slt32(skip_waves_127131, + squot32(sext_i64_i32(segred_group_sizze_106694) + + wave_sizze_127105 - 1, + wave_sizze_127105))) { barrier(CLK_LOCAL_MEM_FENCE); - offset_45826 = skip_waves_45827 * wave_sizze_45801; - if (slt32(local_tid_45799 + offset_45826, - sext_i64_i32(segred_group_sizze_30567)) && - ((local_tid_45799 - squot32(local_tid_45799, - wave_sizze_45801) * - wave_sizze_45801) == 0 && - (squot32(local_tid_45799, wave_sizze_45801) & (2 * - skip_waves_45827 - - 1)) == - 0)) { + offset_127130 = skip_waves_127131 * wave_sizze_127105; + if (slt32(local_tid_127103 + offset_127130, + sext_i64_i32(segred_group_sizze_106694)) && + ((local_tid_127103 - squot32(local_tid_127103, + wave_sizze_127105) * + wave_sizze_127105) == 0 && + (squot32(local_tid_127103, wave_sizze_127105) & + (2 * skip_waves_127131 - 1)) == 0)) { // read array element { - x_45815 = ((__local - float *) red_arr_mem_45803)[sext_i32_i64(local_tid_45799 + - offset_45826)]; + x_127119 = ((__local + double *) red_arr_mem_127107)[sext_i32_i64(local_tid_127103 + + offset_127130)]; } // apply reduction operation { - float defunc_1_op_res_45816 = x_45814 + x_45815; + double defunc_1_op_res_127120 = x_127118 + + x_127119; - x_45814 = defunc_1_op_res_45816; + x_127118 = defunc_1_op_res_127120; } // write result of operation { ((__local - float *) red_arr_mem_45803)[sext_i32_i64(local_tid_45799)] = - x_45814; + double *) red_arr_mem_127107)[sext_i32_i64(local_tid_127103)] = + x_127118; } } - skip_waves_45827 *= 2; + skip_waves_127131 *= 2; } // and back to memory with the final result { - if (local_tid_45799 == 0) { - ((__global float *) mem_44536)[gtid_30416 * - (i32_res_27787 * - i32_res_27787) + - gtid_30417 * - i32_res_27787 + - gtid_30418] = - x_45814; + if (local_tid_127103 == 0) { + ((__global double *) mem_121840)[gtid_104851 * + (k2p2zq_75151 * + k2p2zq_75151) + + gtid_104852 * + k2p2zq_75151 + + gtid_104853] = + x_127118; } } } @@ -27034,418 +32153,424 @@ def sync(self): error_1: return; - #undef segred_group_sizze_30567 + #undef segred_group_sizze_106694 } -__kernel void mainDetailedzisegred_large_31780(__global int *global_failure, - __local volatile - int64_t *sync_arr_mem_46002_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_46000_backing_aligned_1, - int64_t N_27771, - int64_t i32_res_27781, - int64_t i32_res_27787, - int64_t num_groups_31833, - int64_t groups_per_segment_45986, - int64_t elements_per_thread_45987, - int64_t virt_num_groups_45988, - int64_t threads_per_segment_45990, - __global - unsigned char *images_mem_44381, - __global - unsigned char *binop_p_mem_44390, - __global - unsigned char *mem_44844, - __global - unsigned char *group_res_arr_mem_45991, - __global - unsigned char *mainDetailedzicounter_mem_45993) +__kernel void mainzisegred_large_105142(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_126965_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_126963_backing_aligned_1, + int64_t k2p2zq_75151, int64_t x_106526, + int64_t i_106527, int64_t j_m_i_106531, + int64_t num_groups_106613, + int64_t binop_x_120251, + int64_t groups_per_segment_126949, + int64_t elements_per_thread_126950, + int64_t virt_num_groups_126951, + int64_t threads_per_segment_126953, + __global unsigned char *mem_121351, + __global + unsigned char *mem_param_121469, + __global unsigned char *mem_121555, + __global + unsigned char *group_res_arr_mem_126954, + __global + unsigned char *mainzicounter_mem_126956) { - #define segred_group_sizze_31832 (mainDetailedzisegred_group_sizze_31774) + #define segred_group_sizze_106612 (mainzisegred_group_sizze_105136) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict sync_arr_mem_46002_backing_1 = + __local volatile char *restrict sync_arr_mem_126965_backing_1 = (__local volatile - char *) sync_arr_mem_46002_backing_aligned_0; - __local volatile char *restrict red_arr_mem_46000_backing_0 = + char *) sync_arr_mem_126965_backing_aligned_0; + __local volatile char *restrict red_arr_mem_126963_backing_0 = (__local volatile - char *) red_arr_mem_46000_backing_aligned_1; + char *) red_arr_mem_126963_backing_aligned_1; if (*global_failure >= 0) return; - int32_t global_tid_45995; - int32_t local_tid_45996; - int64_t group_sizze_45999; - int32_t wave_sizze_45998; - int32_t group_tid_45997; - - global_tid_45995 = get_global_id(0); - local_tid_45996 = get_local_id(0); - group_sizze_45999 = get_local_size(0); - wave_sizze_45998 = LOCKSTEP_WIDTH; - group_tid_45997 = get_group_id(0); - - int32_t phys_tid_31780; - - phys_tid_31780 = global_tid_45995; - - __local char *red_arr_mem_46000; - - red_arr_mem_46000 = (__local char *) red_arr_mem_46000_backing_0; - - __local char *sync_arr_mem_46002; - - sync_arr_mem_46002 = (__local char *) sync_arr_mem_46002_backing_1; - - int32_t phys_group_id_46004; - - phys_group_id_46004 = get_group_id(0); - for (int32_t i_46005 = 0; i_46005 < - sdiv_up32(sext_i64_i32(virt_num_groups_45988) - phys_group_id_46004, - sext_i64_i32(num_groups_31833)); i_46005++) { - int32_t virt_group_id_46006 = phys_group_id_46004 + i_46005 * - sext_i64_i32(num_groups_31833); - int32_t flat_segment_id_46007 = squot32(virt_group_id_46006, - sext_i64_i32(groups_per_segment_45986)); - int64_t global_tid_46008 = srem64(sext_i32_i64(virt_group_id_46006) * - segred_group_sizze_31832 + - sext_i32_i64(local_tid_45996), - segred_group_sizze_31832 * - groups_per_segment_45986); - int64_t gtid_31769 = squot64(sext_i32_i64(flat_segment_id_46007), - i32_res_27787); - int64_t gtid_31770 = sext_i32_i64(flat_segment_id_46007) - - squot64(sext_i32_i64(flat_segment_id_46007), i32_res_27787) * - i32_res_27787; - int64_t gtid_31779; - float x_acc_46009; - int64_t chunk_sizze_46010; - - chunk_sizze_46010 = smin64(elements_per_thread_45987, - sdiv_up64(i32_res_27781 - - sext_i32_i64(sext_i64_i32(global_tid_46008)), - threads_per_segment_45990)); - - float x_31836; - float x_31837; + int32_t global_tid_126958; + int32_t local_tid_126959; + int64_t group_sizze_126962; + int32_t wave_sizze_126961; + int32_t group_tid_126960; + + global_tid_126958 = get_global_id(0); + local_tid_126959 = get_local_id(0); + group_sizze_126962 = get_local_size(0); + wave_sizze_126961 = LOCKSTEP_WIDTH; + group_tid_126960 = get_group_id(0); + + int32_t phys_tid_105142; + + phys_tid_105142 = global_tid_126958; + + __local char *red_arr_mem_126963; + + red_arr_mem_126963 = (__local char *) red_arr_mem_126963_backing_0; + + __local char *sync_arr_mem_126965; + + sync_arr_mem_126965 = (__local char *) sync_arr_mem_126965_backing_1; + + int32_t phys_group_id_126967; + + phys_group_id_126967 = get_group_id(0); + for (int32_t i_126968 = 0; i_126968 < + sdiv_up32(sext_i64_i32(virt_num_groups_126951) - phys_group_id_126967, + sext_i64_i32(num_groups_106613)); i_126968++) { + int32_t virt_group_id_126969 = phys_group_id_126967 + i_126968 * + sext_i64_i32(num_groups_106613); + int32_t flat_segment_id_126970 = squot32(virt_group_id_126969, + sext_i64_i32(groups_per_segment_126949)); + int64_t global_tid_126971 = srem64(sext_i32_i64(virt_group_id_126969) * + segred_group_sizze_106612 + + sext_i32_i64(local_tid_126959), + segred_group_sizze_106612 * + groups_per_segment_126949); + int64_t gtid_105131 = squot64(sext_i32_i64(flat_segment_id_126970), + k2p2zq_75151); + int64_t gtid_105132 = sext_i32_i64(flat_segment_id_126970) - + squot64(sext_i32_i64(flat_segment_id_126970), k2p2zq_75151) * + k2p2zq_75151; + int64_t gtid_105141; + double x_acc_126972; + int64_t chunk_sizze_126973; + + chunk_sizze_126973 = smin64(elements_per_thread_126950, + sdiv_up64(j_m_i_106531 - global_tid_126971, + threads_per_segment_126953)); + + double x_106616; + double x_106617; // neutral-initialise the accumulators { - x_acc_46009 = 0.0F; + x_acc_126972 = 0.0; } - for (int64_t i_46014 = 0; i_46014 < chunk_sizze_46010; i_46014++) { - gtid_31779 = sext_i32_i64(sext_i64_i32(global_tid_46008)) + - threads_per_segment_45990 * i_46014; + for (int64_t i_126977 = 0; i_126977 < chunk_sizze_126973; i_126977++) { + gtid_105141 = global_tid_126971 + threads_per_segment_126953 * + i_126977; // apply map function { - float x_31842 = ((__global - float *) images_mem_44381)[gtid_31769 * - N_27771 + - gtid_31779]; - bool isnan_res_31843; + int64_t slice_115048 = gtid_105141 + x_106526; + double x_106623 = ((__global double *) mem_121351)[gtid_105131 * + (k2p2zq_75151 * + k2p2zq_75151) + + slice_115048 * + k2p2zq_75151 + + i_106527]; + bool isnan_res_106624; - isnan_res_31843 = futrts_isnan32(x_31842); + isnan_res_106624 = futrts_isnan64(x_106623); - float defunc_1_f_res_31844; + double defunc_1_f_res_106625; - if (isnan_res_31843) { - defunc_1_f_res_31844 = 0.0F; + if (isnan_res_106624) { + defunc_1_f_res_106625 = 0.0; } else { - float x_31841 = ((__global - float *) binop_p_mem_44390)[gtid_31770 * - N_27771 + - gtid_31779]; - float defunc_1_f_res_f_res_31845 = x_31841 * x_31842; + double x_106622 = ((__global + double *) mem_param_121469)[gtid_105131 * + binop_x_120251 + + gtid_105132 * + k2p2zq_75151 + + slice_115048]; + double defunc_1_f_res_f_res_106626 = x_106622 * x_106623; - defunc_1_f_res_31844 = defunc_1_f_res_f_res_31845; + defunc_1_f_res_106625 = defunc_1_f_res_f_res_106626; } // save map-out results { } // load accumulator { - x_31836 = x_acc_46009; + x_106616 = x_acc_126972; } // load new values { - x_31837 = defunc_1_f_res_31844; + x_106617 = defunc_1_f_res_106625; } // apply reduction operator { - float defunc_1_op_res_31838 = x_31836 + x_31837; + double defunc_1_op_res_106618 = x_106616 + x_106617; // store in accumulator { - x_acc_46009 = defunc_1_op_res_31838; + x_acc_126972 = defunc_1_op_res_106618; } } } } // to reduce current chunk, first store our result in memory { - x_31836 = x_acc_46009; + x_106616 = x_acc_126972; ((__local - float *) red_arr_mem_46000)[sext_i32_i64(local_tid_45996)] = - x_31836; + double *) red_arr_mem_126963)[sext_i32_i64(local_tid_126959)] = + x_106616; } barrier(CLK_LOCAL_MEM_FENCE); - int32_t offset_46015; - int32_t skip_waves_46016; + int32_t offset_126978; + int32_t skip_waves_126979; - skip_waves_46016 = 1; + skip_waves_126979 = 1; - float x_46011; - float x_46012; + double x_126974; + double x_126975; - offset_46015 = 0; + offset_126978 = 0; // participating threads read initial accumulator { - if (slt32(local_tid_45996, - sext_i64_i32(segred_group_sizze_31832))) { - x_46011 = ((__local - float *) red_arr_mem_46000)[sext_i32_i64(local_tid_45996 + - offset_46015)]; - } - } - offset_46015 = 1; - while (slt32(offset_46015, wave_sizze_45998)) { - if (slt32(local_tid_45996 + offset_46015, - sext_i64_i32(segred_group_sizze_31832)) && - ((local_tid_45996 - squot32(local_tid_45996, wave_sizze_45998) * - wave_sizze_45998) & (2 * offset_46015 - 1)) == 0) { + if (slt32(local_tid_126959, + sext_i64_i32(segred_group_sizze_106612))) { + x_126974 = ((__local + double *) red_arr_mem_126963)[sext_i32_i64(local_tid_126959 + + offset_126978)]; + } + } + offset_126978 = 1; + while (slt32(offset_126978, wave_sizze_126961)) { + if (slt32(local_tid_126959 + offset_126978, + sext_i64_i32(segred_group_sizze_106612)) && + ((local_tid_126959 - squot32(local_tid_126959, + wave_sizze_126961) * + wave_sizze_126961) & (2 * offset_126978 - 1)) == 0) { // read array element { - x_46012 = ((volatile __local - float *) red_arr_mem_46000)[sext_i32_i64(local_tid_45996 + - offset_46015)]; + x_126975 = ((volatile __local + double *) red_arr_mem_126963)[sext_i32_i64(local_tid_126959 + + offset_126978)]; } // apply reduction operation { - float defunc_1_op_res_46013 = x_46011 + x_46012; + double defunc_1_op_res_126976 = x_126974 + x_126975; - x_46011 = defunc_1_op_res_46013; + x_126974 = defunc_1_op_res_126976; } // write result of operation { ((volatile __local - float *) red_arr_mem_46000)[sext_i32_i64(local_tid_45996)] = - x_46011; + double *) red_arr_mem_126963)[sext_i32_i64(local_tid_126959)] = + x_126974; } } - offset_46015 *= 2; + offset_126978 *= 2; } - while (slt32(skip_waves_46016, - squot32(sext_i64_i32(segred_group_sizze_31832) + - wave_sizze_45998 - 1, wave_sizze_45998))) { + while (slt32(skip_waves_126979, + squot32(sext_i64_i32(segred_group_sizze_106612) + + wave_sizze_126961 - 1, wave_sizze_126961))) { barrier(CLK_LOCAL_MEM_FENCE); - offset_46015 = skip_waves_46016 * wave_sizze_45998; - if (slt32(local_tid_45996 + offset_46015, - sext_i64_i32(segred_group_sizze_31832)) && - ((local_tid_45996 - squot32(local_tid_45996, wave_sizze_45998) * - wave_sizze_45998) == 0 && (squot32(local_tid_45996, - wave_sizze_45998) & (2 * - skip_waves_46016 - - 1)) == + offset_126978 = skip_waves_126979 * wave_sizze_126961; + if (slt32(local_tid_126959 + offset_126978, + sext_i64_i32(segred_group_sizze_106612)) && + ((local_tid_126959 - squot32(local_tid_126959, + wave_sizze_126961) * + wave_sizze_126961) == 0 && (squot32(local_tid_126959, + wave_sizze_126961) & (2 * + skip_waves_126979 - + 1)) == 0)) { // read array element { - x_46012 = ((__local - float *) red_arr_mem_46000)[sext_i32_i64(local_tid_45996 + - offset_46015)]; + x_126975 = ((__local + double *) red_arr_mem_126963)[sext_i32_i64(local_tid_126959 + + offset_126978)]; } // apply reduction operation { - float defunc_1_op_res_46013 = x_46011 + x_46012; + double defunc_1_op_res_126976 = x_126974 + x_126975; - x_46011 = defunc_1_op_res_46013; + x_126974 = defunc_1_op_res_126976; } // write result of operation { ((__local - float *) red_arr_mem_46000)[sext_i32_i64(local_tid_45996)] = - x_46011; + double *) red_arr_mem_126963)[sext_i32_i64(local_tid_126959)] = + x_126974; } } - skip_waves_46016 *= 2; + skip_waves_126979 *= 2; } barrier(CLK_LOCAL_MEM_FENCE); // first thread saves the result in accumulator { - if (sext_i32_i64(local_tid_45996) == (int64_t) 0) { - x_acc_46009 = x_46011; + if (sext_i32_i64(local_tid_126959) == (int64_t) 0) { + x_acc_126972 = x_126974; } } - if (groups_per_segment_45986 == (int64_t) 1) { + if (groups_per_segment_126949 == (int64_t) 1) { // first thread in group saves final result to memory { - if (local_tid_45996 == 0) { - ((__global float *) mem_44844)[gtid_31769 * i32_res_27787 + - gtid_31770] = x_acc_46009; + if (local_tid_126959 == 0) { + ((__global double *) mem_121555)[gtid_105131 * + k2p2zq_75151 + + gtid_105132] = + x_acc_126972; } } } else { - int32_t old_counter_46017; + int32_t old_counter_126980; // first thread in group saves group result to global memory { - if (local_tid_45996 == 0) { + if (local_tid_126959 == 0) { ((__global - float *) group_res_arr_mem_45991)[sext_i32_i64(virt_group_id_46006) * - segred_group_sizze_31832] = - x_acc_46009; + double *) group_res_arr_mem_126954)[sext_i32_i64(virt_group_id_126969) * + segred_group_sizze_106612] = + x_acc_126972; mem_fence_global(); - old_counter_46017 = + old_counter_126980 = atomic_add_i32_global(&((volatile __global - int *) mainDetailedzicounter_mem_45993)[sext_i32_i64(srem32(flat_segment_id_46007, - 10240))], + int *) mainzicounter_mem_126956)[sext_i32_i64(srem32(flat_segment_id_126970, + 10240))], (int) 1); - ((__local bool *) sync_arr_mem_46002)[(int64_t) 0] = - old_counter_46017 == groups_per_segment_45986 - + ((__local bool *) sync_arr_mem_126965)[(int64_t) 0] = + old_counter_126980 == groups_per_segment_126949 - (int64_t) 1; } } barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - bool is_last_group_46018; + bool is_last_group_126981; - is_last_group_46018 = ((__local - bool *) sync_arr_mem_46002)[(int64_t) 0]; - if (is_last_group_46018) { - if (local_tid_45996 == 0) { - old_counter_46017 = + is_last_group_126981 = ((__local + bool *) sync_arr_mem_126965)[(int64_t) 0]; + if (is_last_group_126981) { + if (local_tid_126959 == 0) { + old_counter_126980 = atomic_add_i32_global(&((volatile __global - int *) mainDetailedzicounter_mem_45993)[sext_i32_i64(srem32(flat_segment_id_46007, - 10240))], + int *) mainzicounter_mem_126956)[sext_i32_i64(srem32(flat_segment_id_126970, + 10240))], (int) ((int64_t) 0 - - groups_per_segment_45986)); + groups_per_segment_126949)); } // read in the per-group-results { - int64_t read_per_thread_46019 = - sdiv_up64(groups_per_segment_45986, - segred_group_sizze_31832); + int64_t read_per_thread_126982 = + sdiv_up64(groups_per_segment_126949, + segred_group_sizze_106612); - x_31836 = 0.0F; - for (int64_t i_46020 = 0; i_46020 < read_per_thread_46019; - i_46020++) { - int64_t group_res_id_46021 = - sext_i32_i64(local_tid_45996) * - read_per_thread_46019 + i_46020; - int64_t index_of_group_res_46022 = - sext_i32_i64(flat_segment_id_46007) * - groups_per_segment_45986 + group_res_id_46021; + x_106616 = 0.0; + for (int64_t i_126983 = 0; i_126983 < + read_per_thread_126982; i_126983++) { + int64_t group_res_id_126984 = + sext_i32_i64(local_tid_126959) * + read_per_thread_126982 + i_126983; + int64_t index_of_group_res_126985 = + sext_i32_i64(flat_segment_id_126970) * + groups_per_segment_126949 + group_res_id_126984; - if (slt64(group_res_id_46021, - groups_per_segment_45986)) { - x_31837 = ((__global - float *) group_res_arr_mem_45991)[index_of_group_res_46022 * - segred_group_sizze_31832]; + if (slt64(group_res_id_126984, + groups_per_segment_126949)) { + x_106617 = ((__global + double *) group_res_arr_mem_126954)[index_of_group_res_126985 * + segred_group_sizze_106612]; - float defunc_1_op_res_31838; + double defunc_1_op_res_106618; - defunc_1_op_res_31838 = x_31836 + x_31837; - x_31836 = defunc_1_op_res_31838; + defunc_1_op_res_106618 = x_106616 + x_106617; + x_106616 = defunc_1_op_res_106618; } } } ((__local - float *) red_arr_mem_46000)[sext_i32_i64(local_tid_45996)] = - x_31836; + double *) red_arr_mem_126963)[sext_i32_i64(local_tid_126959)] = + x_106616; barrier(CLK_LOCAL_MEM_FENCE); // reduce the per-group results { - int32_t offset_46023; - int32_t skip_waves_46024; + int32_t offset_126986; + int32_t skip_waves_126987; - skip_waves_46024 = 1; + skip_waves_126987 = 1; - float x_46011; - float x_46012; + double x_126974; + double x_126975; - offset_46023 = 0; + offset_126986 = 0; // participating threads read initial accumulator { - if (slt32(local_tid_45996, - sext_i64_i32(segred_group_sizze_31832))) { - x_46011 = ((__local - float *) red_arr_mem_46000)[sext_i32_i64(local_tid_45996 + - offset_46023)]; - } - } - offset_46023 = 1; - while (slt32(offset_46023, wave_sizze_45998)) { - if (slt32(local_tid_45996 + offset_46023, - sext_i64_i32(segred_group_sizze_31832)) && - ((local_tid_45996 - squot32(local_tid_45996, - wave_sizze_45998) * - wave_sizze_45998) & (2 * offset_46023 - 1)) == + if (slt32(local_tid_126959, + sext_i64_i32(segred_group_sizze_106612))) { + x_126974 = ((__local + double *) red_arr_mem_126963)[sext_i32_i64(local_tid_126959 + + offset_126986)]; + } + } + offset_126986 = 1; + while (slt32(offset_126986, wave_sizze_126961)) { + if (slt32(local_tid_126959 + offset_126986, + sext_i64_i32(segred_group_sizze_106612)) && + ((local_tid_126959 - squot32(local_tid_126959, + wave_sizze_126961) * + wave_sizze_126961) & (2 * offset_126986 - 1)) == 0) { // read array element { - x_46012 = ((volatile __local - float *) red_arr_mem_46000)[sext_i32_i64(local_tid_45996 + - offset_46023)]; + x_126975 = ((volatile __local + double *) red_arr_mem_126963)[sext_i32_i64(local_tid_126959 + + offset_126986)]; } // apply reduction operation { - float defunc_1_op_res_46013 = x_46011 + x_46012; + double defunc_1_op_res_126976 = x_126974 + + x_126975; - x_46011 = defunc_1_op_res_46013; + x_126974 = defunc_1_op_res_126976; } // write result of operation { ((volatile __local - float *) red_arr_mem_46000)[sext_i32_i64(local_tid_45996)] = - x_46011; + double *) red_arr_mem_126963)[sext_i32_i64(local_tid_126959)] = + x_126974; } } - offset_46023 *= 2; + offset_126986 *= 2; } - while (slt32(skip_waves_46024, - squot32(sext_i64_i32(segred_group_sizze_31832) + - wave_sizze_45998 - 1, - wave_sizze_45998))) { + while (slt32(skip_waves_126987, + squot32(sext_i64_i32(segred_group_sizze_106612) + + wave_sizze_126961 - 1, + wave_sizze_126961))) { barrier(CLK_LOCAL_MEM_FENCE); - offset_46023 = skip_waves_46024 * wave_sizze_45998; - if (slt32(local_tid_45996 + offset_46023, - sext_i64_i32(segred_group_sizze_31832)) && - ((local_tid_45996 - squot32(local_tid_45996, - wave_sizze_45998) * - wave_sizze_45998) == 0 && - (squot32(local_tid_45996, wave_sizze_45998) & (2 * - skip_waves_46024 - - 1)) == - 0)) { + offset_126986 = skip_waves_126987 * wave_sizze_126961; + if (slt32(local_tid_126959 + offset_126986, + sext_i64_i32(segred_group_sizze_106612)) && + ((local_tid_126959 - squot32(local_tid_126959, + wave_sizze_126961) * + wave_sizze_126961) == 0 && + (squot32(local_tid_126959, wave_sizze_126961) & + (2 * skip_waves_126987 - 1)) == 0)) { // read array element { - x_46012 = ((__local - float *) red_arr_mem_46000)[sext_i32_i64(local_tid_45996 + - offset_46023)]; + x_126975 = ((__local + double *) red_arr_mem_126963)[sext_i32_i64(local_tid_126959 + + offset_126986)]; } // apply reduction operation { - float defunc_1_op_res_46013 = x_46011 + x_46012; + double defunc_1_op_res_126976 = x_126974 + + x_126975; - x_46011 = defunc_1_op_res_46013; + x_126974 = defunc_1_op_res_126976; } // write result of operation { ((__local - float *) red_arr_mem_46000)[sext_i32_i64(local_tid_45996)] = - x_46011; + double *) red_arr_mem_126963)[sext_i32_i64(local_tid_126959)] = + x_126974; } } - skip_waves_46024 *= 2; + skip_waves_126987 *= 2; } // and back to memory with the final result { - if (local_tid_45996 == 0) { - ((__global float *) mem_44844)[gtid_31769 * - i32_res_27787 + - gtid_31770] = - x_46011; + if (local_tid_126959 == 0) { + ((__global double *) mem_121555)[gtid_105131 * + k2p2zq_75151 + + gtid_105132] = + x_126974; } } } @@ -27456,408 +32581,396 @@ def sync(self): error_1: return; - #undef segred_group_sizze_31832 + #undef segred_group_sizze_106612 } -__kernel void mainDetailedzisegred_large_31917(__global int *global_failure, - __local volatile - int64_t *sync_arr_mem_46090_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_46088_backing_aligned_1, - int64_t i32_res_27787, - int64_t num_groups_31966, - int64_t groups_per_segment_46074, - int64_t elements_per_thread_46075, - int64_t virt_num_groups_46076, - int64_t threads_per_segment_46078, - __global - unsigned char *defunc_3_map_res_mem_44629, - __global - unsigned char *defunc_3_map_res_mem_44850, - __global - unsigned char *mem_44910, - __global - unsigned char *group_res_arr_mem_46079, - __global - unsigned char *mainDetailedzicounter_mem_46081) +__kernel void mainzisegred_large_105899(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_126740_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_126738_backing_aligned_1, + int64_t m_75136, int64_t k2p2zq_75151, + int64_t defunc_2_reduce_res_75260, + int64_t j_106067, + int64_t num_groups_106100, + int64_t groups_per_segment_126724, + int64_t elements_per_thread_126725, + int64_t virt_num_groups_126726, + int64_t threads_per_segment_126728, + __global unsigned char *mem_120246, + __global unsigned char *mem_120938, + __global + unsigned char *group_res_arr_mem_126729, + __global + unsigned char *mainzicounter_mem_126731) { - #define segred_group_sizze_31965 (mainDetailedzisegred_group_sizze_31911) + #define segred_group_sizze_106099 (mainzisegred_group_sizze_105893) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict sync_arr_mem_46090_backing_1 = + __local volatile char *restrict sync_arr_mem_126740_backing_1 = (__local volatile - char *) sync_arr_mem_46090_backing_aligned_0; - __local volatile char *restrict red_arr_mem_46088_backing_0 = + char *) sync_arr_mem_126740_backing_aligned_0; + __local volatile char *restrict red_arr_mem_126738_backing_0 = (__local volatile - char *) red_arr_mem_46088_backing_aligned_1; + char *) red_arr_mem_126738_backing_aligned_1; if (*global_failure >= 0) return; - int32_t global_tid_46083; - int32_t local_tid_46084; - int64_t group_sizze_46087; - int32_t wave_sizze_46086; - int32_t group_tid_46085; + int32_t global_tid_126733; + int32_t local_tid_126734; + int64_t group_sizze_126737; + int32_t wave_sizze_126736; + int32_t group_tid_126735; - global_tid_46083 = get_global_id(0); - local_tid_46084 = get_local_id(0); - group_sizze_46087 = get_local_size(0); - wave_sizze_46086 = LOCKSTEP_WIDTH; - group_tid_46085 = get_group_id(0); + global_tid_126733 = get_global_id(0); + local_tid_126734 = get_local_id(0); + group_sizze_126737 = get_local_size(0); + wave_sizze_126736 = LOCKSTEP_WIDTH; + group_tid_126735 = get_group_id(0); - int32_t phys_tid_31917; + int32_t phys_tid_105899; - phys_tid_31917 = global_tid_46083; + phys_tid_105899 = global_tid_126733; - __local char *red_arr_mem_46088; + __local char *red_arr_mem_126738; - red_arr_mem_46088 = (__local char *) red_arr_mem_46088_backing_0; + red_arr_mem_126738 = (__local char *) red_arr_mem_126738_backing_0; - __local char *sync_arr_mem_46090; + __local char *sync_arr_mem_126740; - sync_arr_mem_46090 = (__local char *) sync_arr_mem_46090_backing_1; + sync_arr_mem_126740 = (__local char *) sync_arr_mem_126740_backing_1; - int32_t phys_group_id_46092; + int32_t phys_group_id_126742; - phys_group_id_46092 = get_group_id(0); - for (int32_t i_46093 = 0; i_46093 < - sdiv_up32(sext_i64_i32(virt_num_groups_46076) - phys_group_id_46092, - sext_i64_i32(num_groups_31966)); i_46093++) { - int32_t virt_group_id_46094 = phys_group_id_46092 + i_46093 * - sext_i64_i32(num_groups_31966); - int32_t flat_segment_id_46095 = squot32(virt_group_id_46094, - sext_i64_i32(groups_per_segment_46074)); - int64_t global_tid_46096 = srem64(sext_i32_i64(virt_group_id_46094) * - segred_group_sizze_31965 + - sext_i32_i64(local_tid_46084), - segred_group_sizze_31965 * - groups_per_segment_46074); - int64_t gtid_31906 = squot64(sext_i32_i64(flat_segment_id_46095), - i32_res_27787); - int64_t gtid_31907 = sext_i32_i64(flat_segment_id_46095) - - squot64(sext_i32_i64(flat_segment_id_46095), i32_res_27787) * - i32_res_27787; - int64_t gtid_31916; - float x_acc_46097; - int64_t chunk_sizze_46098; + phys_group_id_126742 = get_group_id(0); + for (int32_t i_126743 = 0; i_126743 < + sdiv_up32(sext_i64_i32(virt_num_groups_126726) - phys_group_id_126742, + sext_i64_i32(num_groups_106100)); i_126743++) { + int32_t virt_group_id_126744 = phys_group_id_126742 + i_126743 * + sext_i64_i32(num_groups_106100); + int32_t flat_segment_id_126745 = squot32(virt_group_id_126744, + sext_i64_i32(groups_per_segment_126724)); + int64_t global_tid_126746 = srem64(sext_i32_i64(virt_group_id_126744) * + segred_group_sizze_106099 + + sext_i32_i64(local_tid_126734), + segred_group_sizze_106099 * + groups_per_segment_126724); + int64_t gtid_105890 = sext_i32_i64(flat_segment_id_126745); + int64_t gtid_105898; + double x_acc_126747; + int64_t chunk_sizze_126748; - chunk_sizze_46098 = smin64(elements_per_thread_46075, - sdiv_up64(i32_res_27787 - - sext_i32_i64(sext_i64_i32(global_tid_46096)), - threads_per_segment_46078)); + chunk_sizze_126748 = smin64(elements_per_thread_126725, + sdiv_up64(k2p2zq_75151 - global_tid_126746, + threads_per_segment_126728)); - float x_31969; - float x_31970; + double x_106103; + double x_106104; // neutral-initialise the accumulators { - x_acc_46097 = 0.0F; + x_acc_126747 = 0.0; } - for (int64_t i_46102 = 0; i_46102 < chunk_sizze_46098; i_46102++) { - gtid_31916 = sext_i32_i64(sext_i64_i32(global_tid_46096)) + - threads_per_segment_46078 * i_46102; + for (int64_t i_126752 = 0; i_126752 < chunk_sizze_126748; i_126752++) { + gtid_105898 = global_tid_126746 + threads_per_segment_126728 * + i_126752; // apply map function { - float x_31975 = ((__global - float *) defunc_3_map_res_mem_44850)[gtid_31906 * - i32_res_27787 + - gtid_31916]; - float x_31976 = ((__global - float *) defunc_3_map_res_mem_44629)[gtid_31906 * - (i32_res_27787 * - i32_res_27787) + - gtid_31907 * - i32_res_27787 + - gtid_31916]; - float defunc_1_f_res_31977 = x_31975 * x_31976; + double x_106107 = ((__global double *) mem_120246)[j_106067 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_105890 * + defunc_2_reduce_res_75260 + + gtid_105898]; + double defunc_1_f_res_106108 = x_106107 * x_106107; // save map-out results { } // load accumulator { - x_31969 = x_acc_46097; + x_106103 = x_acc_126747; } // load new values { - x_31970 = defunc_1_f_res_31977; + x_106104 = defunc_1_f_res_106108; } // apply reduction operator { - float defunc_1_op_res_31971 = x_31969 + x_31970; + double defunc_1_op_res_106105 = x_106103 + x_106104; // store in accumulator { - x_acc_46097 = defunc_1_op_res_31971; + x_acc_126747 = defunc_1_op_res_106105; } } } } // to reduce current chunk, first store our result in memory { - x_31969 = x_acc_46097; + x_106103 = x_acc_126747; ((__local - float *) red_arr_mem_46088)[sext_i32_i64(local_tid_46084)] = - x_31969; + double *) red_arr_mem_126738)[sext_i32_i64(local_tid_126734)] = + x_106103; } barrier(CLK_LOCAL_MEM_FENCE); - int32_t offset_46103; - int32_t skip_waves_46104; + int32_t offset_126753; + int32_t skip_waves_126754; - skip_waves_46104 = 1; + skip_waves_126754 = 1; - float x_46099; - float x_46100; + double x_126749; + double x_126750; - offset_46103 = 0; + offset_126753 = 0; // participating threads read initial accumulator { - if (slt32(local_tid_46084, - sext_i64_i32(segred_group_sizze_31965))) { - x_46099 = ((__local - float *) red_arr_mem_46088)[sext_i32_i64(local_tid_46084 + - offset_46103)]; - } - } - offset_46103 = 1; - while (slt32(offset_46103, wave_sizze_46086)) { - if (slt32(local_tid_46084 + offset_46103, - sext_i64_i32(segred_group_sizze_31965)) && - ((local_tid_46084 - squot32(local_tid_46084, wave_sizze_46086) * - wave_sizze_46086) & (2 * offset_46103 - 1)) == 0) { + if (slt32(local_tid_126734, + sext_i64_i32(segred_group_sizze_106099))) { + x_126749 = ((__local + double *) red_arr_mem_126738)[sext_i32_i64(local_tid_126734 + + offset_126753)]; + } + } + offset_126753 = 1; + while (slt32(offset_126753, wave_sizze_126736)) { + if (slt32(local_tid_126734 + offset_126753, + sext_i64_i32(segred_group_sizze_106099)) && + ((local_tid_126734 - squot32(local_tid_126734, + wave_sizze_126736) * + wave_sizze_126736) & (2 * offset_126753 - 1)) == 0) { // read array element { - x_46100 = ((volatile __local - float *) red_arr_mem_46088)[sext_i32_i64(local_tid_46084 + - offset_46103)]; + x_126750 = ((volatile __local + double *) red_arr_mem_126738)[sext_i32_i64(local_tid_126734 + + offset_126753)]; } // apply reduction operation { - float defunc_1_op_res_46101 = x_46099 + x_46100; + double defunc_1_op_res_126751 = x_126749 + x_126750; - x_46099 = defunc_1_op_res_46101; + x_126749 = defunc_1_op_res_126751; } // write result of operation { ((volatile __local - float *) red_arr_mem_46088)[sext_i32_i64(local_tid_46084)] = - x_46099; + double *) red_arr_mem_126738)[sext_i32_i64(local_tid_126734)] = + x_126749; } } - offset_46103 *= 2; + offset_126753 *= 2; } - while (slt32(skip_waves_46104, - squot32(sext_i64_i32(segred_group_sizze_31965) + - wave_sizze_46086 - 1, wave_sizze_46086))) { + while (slt32(skip_waves_126754, + squot32(sext_i64_i32(segred_group_sizze_106099) + + wave_sizze_126736 - 1, wave_sizze_126736))) { barrier(CLK_LOCAL_MEM_FENCE); - offset_46103 = skip_waves_46104 * wave_sizze_46086; - if (slt32(local_tid_46084 + offset_46103, - sext_i64_i32(segred_group_sizze_31965)) && - ((local_tid_46084 - squot32(local_tid_46084, wave_sizze_46086) * - wave_sizze_46086) == 0 && (squot32(local_tid_46084, - wave_sizze_46086) & (2 * - skip_waves_46104 - - 1)) == + offset_126753 = skip_waves_126754 * wave_sizze_126736; + if (slt32(local_tid_126734 + offset_126753, + sext_i64_i32(segred_group_sizze_106099)) && + ((local_tid_126734 - squot32(local_tid_126734, + wave_sizze_126736) * + wave_sizze_126736) == 0 && (squot32(local_tid_126734, + wave_sizze_126736) & (2 * + skip_waves_126754 - + 1)) == 0)) { // read array element { - x_46100 = ((__local - float *) red_arr_mem_46088)[sext_i32_i64(local_tid_46084 + - offset_46103)]; + x_126750 = ((__local + double *) red_arr_mem_126738)[sext_i32_i64(local_tid_126734 + + offset_126753)]; } // apply reduction operation { - float defunc_1_op_res_46101 = x_46099 + x_46100; + double defunc_1_op_res_126751 = x_126749 + x_126750; - x_46099 = defunc_1_op_res_46101; + x_126749 = defunc_1_op_res_126751; } // write result of operation { ((__local - float *) red_arr_mem_46088)[sext_i32_i64(local_tid_46084)] = - x_46099; + double *) red_arr_mem_126738)[sext_i32_i64(local_tid_126734)] = + x_126749; } } - skip_waves_46104 *= 2; + skip_waves_126754 *= 2; } barrier(CLK_LOCAL_MEM_FENCE); // first thread saves the result in accumulator { - if (sext_i32_i64(local_tid_46084) == (int64_t) 0) { - x_acc_46097 = x_46099; + if (sext_i32_i64(local_tid_126734) == (int64_t) 0) { + x_acc_126747 = x_126749; } } - if (groups_per_segment_46074 == (int64_t) 1) { + if (groups_per_segment_126724 == (int64_t) 1) { // first thread in group saves final result to memory { - if (local_tid_46084 == 0) { - ((__global float *) mem_44910)[gtid_31906 * i32_res_27787 + - gtid_31907] = x_acc_46097; + if (local_tid_126734 == 0) { + ((__global double *) mem_120938)[gtid_105890] = + x_acc_126747; } } } else { - int32_t old_counter_46105; + int32_t old_counter_126755; // first thread in group saves group result to global memory { - if (local_tid_46084 == 0) { + if (local_tid_126734 == 0) { ((__global - float *) group_res_arr_mem_46079)[sext_i32_i64(virt_group_id_46094) * - segred_group_sizze_31965] = - x_acc_46097; + double *) group_res_arr_mem_126729)[sext_i32_i64(virt_group_id_126744) * + segred_group_sizze_106099] = + x_acc_126747; mem_fence_global(); - old_counter_46105 = + old_counter_126755 = atomic_add_i32_global(&((volatile __global - int *) mainDetailedzicounter_mem_46081)[sext_i32_i64(srem32(flat_segment_id_46095, - 10240))], + int *) mainzicounter_mem_126731)[sext_i32_i64(srem32(flat_segment_id_126745, + 10240))], (int) 1); - ((__local bool *) sync_arr_mem_46090)[(int64_t) 0] = - old_counter_46105 == groups_per_segment_46074 - + ((__local bool *) sync_arr_mem_126740)[(int64_t) 0] = + old_counter_126755 == groups_per_segment_126724 - (int64_t) 1; } } barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - bool is_last_group_46106; + bool is_last_group_126756; - is_last_group_46106 = ((__local - bool *) sync_arr_mem_46090)[(int64_t) 0]; - if (is_last_group_46106) { - if (local_tid_46084 == 0) { - old_counter_46105 = + is_last_group_126756 = ((__local + bool *) sync_arr_mem_126740)[(int64_t) 0]; + if (is_last_group_126756) { + if (local_tid_126734 == 0) { + old_counter_126755 = atomic_add_i32_global(&((volatile __global - int *) mainDetailedzicounter_mem_46081)[sext_i32_i64(srem32(flat_segment_id_46095, - 10240))], + int *) mainzicounter_mem_126731)[sext_i32_i64(srem32(flat_segment_id_126745, + 10240))], (int) ((int64_t) 0 - - groups_per_segment_46074)); + groups_per_segment_126724)); } // read in the per-group-results { - int64_t read_per_thread_46107 = - sdiv_up64(groups_per_segment_46074, - segred_group_sizze_31965); + int64_t read_per_thread_126757 = + sdiv_up64(groups_per_segment_126724, + segred_group_sizze_106099); - x_31969 = 0.0F; - for (int64_t i_46108 = 0; i_46108 < read_per_thread_46107; - i_46108++) { - int64_t group_res_id_46109 = - sext_i32_i64(local_tid_46084) * - read_per_thread_46107 + i_46108; - int64_t index_of_group_res_46110 = - sext_i32_i64(flat_segment_id_46095) * - groups_per_segment_46074 + group_res_id_46109; + x_106103 = 0.0; + for (int64_t i_126758 = 0; i_126758 < + read_per_thread_126757; i_126758++) { + int64_t group_res_id_126759 = + sext_i32_i64(local_tid_126734) * + read_per_thread_126757 + i_126758; + int64_t index_of_group_res_126760 = + sext_i32_i64(flat_segment_id_126745) * + groups_per_segment_126724 + group_res_id_126759; - if (slt64(group_res_id_46109, - groups_per_segment_46074)) { - x_31970 = ((__global - float *) group_res_arr_mem_46079)[index_of_group_res_46110 * - segred_group_sizze_31965]; + if (slt64(group_res_id_126759, + groups_per_segment_126724)) { + x_106104 = ((__global + double *) group_res_arr_mem_126729)[index_of_group_res_126760 * + segred_group_sizze_106099]; - float defunc_1_op_res_31971; + double defunc_1_op_res_106105; - defunc_1_op_res_31971 = x_31969 + x_31970; - x_31969 = defunc_1_op_res_31971; + defunc_1_op_res_106105 = x_106103 + x_106104; + x_106103 = defunc_1_op_res_106105; } } } ((__local - float *) red_arr_mem_46088)[sext_i32_i64(local_tid_46084)] = - x_31969; + double *) red_arr_mem_126738)[sext_i32_i64(local_tid_126734)] = + x_106103; barrier(CLK_LOCAL_MEM_FENCE); // reduce the per-group results { - int32_t offset_46111; - int32_t skip_waves_46112; + int32_t offset_126761; + int32_t skip_waves_126762; - skip_waves_46112 = 1; + skip_waves_126762 = 1; - float x_46099; - float x_46100; + double x_126749; + double x_126750; - offset_46111 = 0; + offset_126761 = 0; // participating threads read initial accumulator { - if (slt32(local_tid_46084, - sext_i64_i32(segred_group_sizze_31965))) { - x_46099 = ((__local - float *) red_arr_mem_46088)[sext_i32_i64(local_tid_46084 + - offset_46111)]; - } - } - offset_46111 = 1; - while (slt32(offset_46111, wave_sizze_46086)) { - if (slt32(local_tid_46084 + offset_46111, - sext_i64_i32(segred_group_sizze_31965)) && - ((local_tid_46084 - squot32(local_tid_46084, - wave_sizze_46086) * - wave_sizze_46086) & (2 * offset_46111 - 1)) == + if (slt32(local_tid_126734, + sext_i64_i32(segred_group_sizze_106099))) { + x_126749 = ((__local + double *) red_arr_mem_126738)[sext_i32_i64(local_tid_126734 + + offset_126761)]; + } + } + offset_126761 = 1; + while (slt32(offset_126761, wave_sizze_126736)) { + if (slt32(local_tid_126734 + offset_126761, + sext_i64_i32(segred_group_sizze_106099)) && + ((local_tid_126734 - squot32(local_tid_126734, + wave_sizze_126736) * + wave_sizze_126736) & (2 * offset_126761 - 1)) == 0) { // read array element { - x_46100 = ((volatile __local - float *) red_arr_mem_46088)[sext_i32_i64(local_tid_46084 + - offset_46111)]; + x_126750 = ((volatile __local + double *) red_arr_mem_126738)[sext_i32_i64(local_tid_126734 + + offset_126761)]; } // apply reduction operation { - float defunc_1_op_res_46101 = x_46099 + x_46100; + double defunc_1_op_res_126751 = x_126749 + + x_126750; - x_46099 = defunc_1_op_res_46101; + x_126749 = defunc_1_op_res_126751; } // write result of operation { ((volatile __local - float *) red_arr_mem_46088)[sext_i32_i64(local_tid_46084)] = - x_46099; + double *) red_arr_mem_126738)[sext_i32_i64(local_tid_126734)] = + x_126749; } } - offset_46111 *= 2; + offset_126761 *= 2; } - while (slt32(skip_waves_46112, - squot32(sext_i64_i32(segred_group_sizze_31965) + - wave_sizze_46086 - 1, - wave_sizze_46086))) { + while (slt32(skip_waves_126762, + squot32(sext_i64_i32(segred_group_sizze_106099) + + wave_sizze_126736 - 1, + wave_sizze_126736))) { barrier(CLK_LOCAL_MEM_FENCE); - offset_46111 = skip_waves_46112 * wave_sizze_46086; - if (slt32(local_tid_46084 + offset_46111, - sext_i64_i32(segred_group_sizze_31965)) && - ((local_tid_46084 - squot32(local_tid_46084, - wave_sizze_46086) * - wave_sizze_46086) == 0 && - (squot32(local_tid_46084, wave_sizze_46086) & (2 * - skip_waves_46112 - - 1)) == - 0)) { + offset_126761 = skip_waves_126762 * wave_sizze_126736; + if (slt32(local_tid_126734 + offset_126761, + sext_i64_i32(segred_group_sizze_106099)) && + ((local_tid_126734 - squot32(local_tid_126734, + wave_sizze_126736) * + wave_sizze_126736) == 0 && + (squot32(local_tid_126734, wave_sizze_126736) & + (2 * skip_waves_126762 - 1)) == 0)) { // read array element { - x_46100 = ((__local - float *) red_arr_mem_46088)[sext_i32_i64(local_tid_46084 + - offset_46111)]; + x_126750 = ((__local + double *) red_arr_mem_126738)[sext_i32_i64(local_tid_126734 + + offset_126761)]; } // apply reduction operation { - float defunc_1_op_res_46101 = x_46099 + x_46100; + double defunc_1_op_res_126751 = x_126749 + + x_126750; - x_46099 = defunc_1_op_res_46101; + x_126749 = defunc_1_op_res_126751; } // write result of operation { ((__local - float *) red_arr_mem_46088)[sext_i32_i64(local_tid_46084)] = - x_46099; + double *) red_arr_mem_126738)[sext_i32_i64(local_tid_126734)] = + x_126749; } } - skip_waves_46112 *= 2; + skip_waves_126762 *= 2; } // and back to memory with the final result { - if (local_tid_46084 == 0) { - ((__global float *) mem_44910)[gtid_31906 * - i32_res_27787 + - gtid_31907] = - x_46099; + if (local_tid_126734 == 0) { + ((__global double *) mem_120938)[gtid_105890] = + x_126749; } } } @@ -27868,404 +32981,425 @@ def sync(self): error_1: return; - #undef segred_group_sizze_31965 + #undef segred_group_sizze_106099 } -__kernel void mainDetailedzisegred_large_32047(__global int *global_failure, - __local volatile - int64_t *sync_arr_mem_46222_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_46220_backing_aligned_1, - int64_t N_27771, - int64_t i32_res_27787, - int64_t num_groups_32094, - int64_t groups_per_segment_46206, - int64_t elements_per_thread_46207, - int64_t virt_num_groups_46208, - int64_t threads_per_segment_46210, - __global - unsigned char *mem_44397, - __global - unsigned char *defunc_4_map_res_mem_44916, - __global - unsigned char *mem_45134, - __global - unsigned char *group_res_arr_mem_46211, - __global - unsigned char *mainDetailedzicounter_mem_46213) +__kernel void mainzisegred_large_108614(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_127893_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_127891_backing_aligned_1, + int64_t m_75136, int64_t k2p2zq_75151, + int64_t num_groups_110595, + int64_t groups_per_segment_127877, + int64_t elements_per_thread_127878, + int64_t virt_num_groups_127879, + int64_t threads_per_segment_127881, + __global unsigned char *mem_123614, + __global unsigned char *mem_123618, + __global unsigned char *mem_123623, + __global + unsigned char *group_res_arr_mem_127882, + __global + unsigned char *mainzicounter_mem_127884) { - #define segred_group_sizze_32093 (mainDetailedzisegred_group_sizze_32041) + #define segred_group_sizze_110594 (mainzisegred_group_sizze_108608) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict sync_arr_mem_46222_backing_1 = + __local volatile char *restrict sync_arr_mem_127893_backing_1 = (__local volatile - char *) sync_arr_mem_46222_backing_aligned_0; - __local volatile char *restrict red_arr_mem_46220_backing_0 = + char *) sync_arr_mem_127893_backing_aligned_0; + __local volatile char *restrict red_arr_mem_127891_backing_0 = (__local volatile - char *) red_arr_mem_46220_backing_aligned_1; + char *) red_arr_mem_127891_backing_aligned_1; if (*global_failure >= 0) return; - int32_t global_tid_46215; - int32_t local_tid_46216; - int64_t group_sizze_46219; - int32_t wave_sizze_46218; - int32_t group_tid_46217; - - global_tid_46215 = get_global_id(0); - local_tid_46216 = get_local_id(0); - group_sizze_46219 = get_local_size(0); - wave_sizze_46218 = LOCKSTEP_WIDTH; - group_tid_46217 = get_group_id(0); - - int32_t phys_tid_32047; - - phys_tid_32047 = global_tid_46215; - - __local char *red_arr_mem_46220; - - red_arr_mem_46220 = (__local char *) red_arr_mem_46220_backing_0; - - __local char *sync_arr_mem_46222; - - sync_arr_mem_46222 = (__local char *) sync_arr_mem_46222_backing_1; - - int32_t phys_group_id_46224; - - phys_group_id_46224 = get_group_id(0); - for (int32_t i_46225 = 0; i_46225 < - sdiv_up32(sext_i64_i32(virt_num_groups_46208) - phys_group_id_46224, - sext_i64_i32(num_groups_32094)); i_46225++) { - int32_t virt_group_id_46226 = phys_group_id_46224 + i_46225 * - sext_i64_i32(num_groups_32094); - int32_t flat_segment_id_46227 = squot32(virt_group_id_46226, - sext_i64_i32(groups_per_segment_46206)); - int64_t global_tid_46228 = srem64(sext_i32_i64(virt_group_id_46226) * - segred_group_sizze_32093 + - sext_i32_i64(local_tid_46216), - segred_group_sizze_32093 * - groups_per_segment_46206); - int64_t gtid_32036 = squot64(sext_i32_i64(flat_segment_id_46227), - N_27771); - int64_t gtid_32037 = sext_i32_i64(flat_segment_id_46227) - - squot64(sext_i32_i64(flat_segment_id_46227), N_27771) * N_27771; - int64_t gtid_32046; - float x_acc_46229; - int64_t chunk_sizze_46230; - - chunk_sizze_46230 = smin64(elements_per_thread_46207, - sdiv_up64(i32_res_27787 - - sext_i32_i64(sext_i64_i32(global_tid_46228)), - threads_per_segment_46210)); - - float x_32097; - float x_32098; + int32_t global_tid_127886; + int32_t local_tid_127887; + int64_t group_sizze_127890; + int32_t wave_sizze_127889; + int32_t group_tid_127888; + + global_tid_127886 = get_global_id(0); + local_tid_127887 = get_local_id(0); + group_sizze_127890 = get_local_size(0); + wave_sizze_127889 = LOCKSTEP_WIDTH; + group_tid_127888 = get_group_id(0); + + int32_t phys_tid_108614; + + phys_tid_108614 = global_tid_127886; + + __local char *red_arr_mem_127891; + + red_arr_mem_127891 = (__local char *) red_arr_mem_127891_backing_0; + + __local char *sync_arr_mem_127893; + + sync_arr_mem_127893 = (__local char *) sync_arr_mem_127893_backing_1; + + int32_t phys_group_id_127895; + + phys_group_id_127895 = get_group_id(0); + for (int32_t i_127896 = 0; i_127896 < + sdiv_up32(sext_i64_i32(virt_num_groups_127879) - phys_group_id_127895, + sext_i64_i32(num_groups_110595)); i_127896++) { + int32_t virt_group_id_127897 = phys_group_id_127895 + i_127896 * + sext_i64_i32(num_groups_110595); + int32_t flat_segment_id_127898 = squot32(virt_group_id_127897, + sext_i64_i32(groups_per_segment_127877)); + int64_t global_tid_127899 = srem64(sext_i32_i64(virt_group_id_127897) * + segred_group_sizze_110594 + + sext_i32_i64(local_tid_127887), + segred_group_sizze_110594 * + groups_per_segment_127877); + int64_t gtid_108601 = squot64(sext_i32_i64(flat_segment_id_127898), + k2p2zq_75151 * k2p2zq_75151); + int64_t gtid_108602 = squot64(sext_i32_i64(flat_segment_id_127898) - + squot64(sext_i32_i64(flat_segment_id_127898), + k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151); + int64_t gtid_108603 = sext_i32_i64(flat_segment_id_127898) - + squot64(sext_i32_i64(flat_segment_id_127898), k2p2zq_75151 * + k2p2zq_75151) * (k2p2zq_75151 * k2p2zq_75151) - + squot64(sext_i32_i64(flat_segment_id_127898) - + squot64(sext_i32_i64(flat_segment_id_127898), + k2p2zq_75151 * k2p2zq_75151) * (k2p2zq_75151 * + k2p2zq_75151), + k2p2zq_75151) * k2p2zq_75151; + int64_t gtid_108613; + double x_acc_127900; + int64_t chunk_sizze_127901; + + chunk_sizze_127901 = smin64(elements_per_thread_127878, + sdiv_up64(k2p2zq_75151 - global_tid_127899, + threads_per_segment_127881)); + + double x_110598; + double x_110599; // neutral-initialise the accumulators { - x_acc_46229 = 0.0F; + x_acc_127900 = 0.0; } - for (int64_t i_46234 = 0; i_46234 < chunk_sizze_46230; i_46234++) { - gtid_32046 = sext_i32_i64(sext_i64_i32(global_tid_46228)) + - threads_per_segment_46210 * i_46234; + for (int64_t i_127905 = 0; i_127905 < chunk_sizze_127901; i_127905++) { + gtid_108613 = global_tid_127899 + threads_per_segment_127881 * + i_127905; // apply map function { - float x_32102 = ((__global - float *) defunc_4_map_res_mem_44916)[gtid_32036 * - i32_res_27787 + - gtid_32046]; - float x_32103 = ((__global float *) mem_44397)[gtid_32037 * - i32_res_27787 + - gtid_32046]; - float defunc_1_f_res_32104 = x_32102 * x_32103; + double x_110604 = ((__global double *) mem_123614)[gtid_108602 * + (k2p2zq_75151 * + m_75136) + + gtid_108601 * + k2p2zq_75151 + + gtid_108613]; + double x_110605 = ((__global double *) mem_123618)[gtid_108601 * + (k2p2zq_75151 * + k2p2zq_75151) + + gtid_108603 * + k2p2zq_75151 + + gtid_108613]; + double defunc_1_f_res_110606 = x_110604 * x_110605; // save map-out results { } // load accumulator { - x_32097 = x_acc_46229; + x_110598 = x_acc_127900; } // load new values { - x_32098 = defunc_1_f_res_32104; + x_110599 = defunc_1_f_res_110606; } // apply reduction operator { - float defunc_1_op_res_32099 = x_32097 + x_32098; + double defunc_1_op_res_110600 = x_110598 + x_110599; // store in accumulator { - x_acc_46229 = defunc_1_op_res_32099; + x_acc_127900 = defunc_1_op_res_110600; } } } } // to reduce current chunk, first store our result in memory { - x_32097 = x_acc_46229; + x_110598 = x_acc_127900; ((__local - float *) red_arr_mem_46220)[sext_i32_i64(local_tid_46216)] = - x_32097; + double *) red_arr_mem_127891)[sext_i32_i64(local_tid_127887)] = + x_110598; } barrier(CLK_LOCAL_MEM_FENCE); - int32_t offset_46235; - int32_t skip_waves_46236; + int32_t offset_127906; + int32_t skip_waves_127907; - skip_waves_46236 = 1; + skip_waves_127907 = 1; - float x_46231; - float x_46232; + double x_127902; + double x_127903; - offset_46235 = 0; + offset_127906 = 0; // participating threads read initial accumulator { - if (slt32(local_tid_46216, - sext_i64_i32(segred_group_sizze_32093))) { - x_46231 = ((__local - float *) red_arr_mem_46220)[sext_i32_i64(local_tid_46216 + - offset_46235)]; - } - } - offset_46235 = 1; - while (slt32(offset_46235, wave_sizze_46218)) { - if (slt32(local_tid_46216 + offset_46235, - sext_i64_i32(segred_group_sizze_32093)) && - ((local_tid_46216 - squot32(local_tid_46216, wave_sizze_46218) * - wave_sizze_46218) & (2 * offset_46235 - 1)) == 0) { + if (slt32(local_tid_127887, + sext_i64_i32(segred_group_sizze_110594))) { + x_127902 = ((__local + double *) red_arr_mem_127891)[sext_i32_i64(local_tid_127887 + + offset_127906)]; + } + } + offset_127906 = 1; + while (slt32(offset_127906, wave_sizze_127889)) { + if (slt32(local_tid_127887 + offset_127906, + sext_i64_i32(segred_group_sizze_110594)) && + ((local_tid_127887 - squot32(local_tid_127887, + wave_sizze_127889) * + wave_sizze_127889) & (2 * offset_127906 - 1)) == 0) { // read array element { - x_46232 = ((volatile __local - float *) red_arr_mem_46220)[sext_i32_i64(local_tid_46216 + - offset_46235)]; + x_127903 = ((volatile __local + double *) red_arr_mem_127891)[sext_i32_i64(local_tid_127887 + + offset_127906)]; } // apply reduction operation { - float defunc_1_op_res_46233 = x_46231 + x_46232; + double defunc_1_op_res_127904 = x_127902 + x_127903; - x_46231 = defunc_1_op_res_46233; + x_127902 = defunc_1_op_res_127904; } // write result of operation { ((volatile __local - float *) red_arr_mem_46220)[sext_i32_i64(local_tid_46216)] = - x_46231; + double *) red_arr_mem_127891)[sext_i32_i64(local_tid_127887)] = + x_127902; } } - offset_46235 *= 2; + offset_127906 *= 2; } - while (slt32(skip_waves_46236, - squot32(sext_i64_i32(segred_group_sizze_32093) + - wave_sizze_46218 - 1, wave_sizze_46218))) { + while (slt32(skip_waves_127907, + squot32(sext_i64_i32(segred_group_sizze_110594) + + wave_sizze_127889 - 1, wave_sizze_127889))) { barrier(CLK_LOCAL_MEM_FENCE); - offset_46235 = skip_waves_46236 * wave_sizze_46218; - if (slt32(local_tid_46216 + offset_46235, - sext_i64_i32(segred_group_sizze_32093)) && - ((local_tid_46216 - squot32(local_tid_46216, wave_sizze_46218) * - wave_sizze_46218) == 0 && (squot32(local_tid_46216, - wave_sizze_46218) & (2 * - skip_waves_46236 - - 1)) == + offset_127906 = skip_waves_127907 * wave_sizze_127889; + if (slt32(local_tid_127887 + offset_127906, + sext_i64_i32(segred_group_sizze_110594)) && + ((local_tid_127887 - squot32(local_tid_127887, + wave_sizze_127889) * + wave_sizze_127889) == 0 && (squot32(local_tid_127887, + wave_sizze_127889) & (2 * + skip_waves_127907 - + 1)) == 0)) { // read array element { - x_46232 = ((__local - float *) red_arr_mem_46220)[sext_i32_i64(local_tid_46216 + - offset_46235)]; + x_127903 = ((__local + double *) red_arr_mem_127891)[sext_i32_i64(local_tid_127887 + + offset_127906)]; } // apply reduction operation { - float defunc_1_op_res_46233 = x_46231 + x_46232; + double defunc_1_op_res_127904 = x_127902 + x_127903; - x_46231 = defunc_1_op_res_46233; + x_127902 = defunc_1_op_res_127904; } // write result of operation { ((__local - float *) red_arr_mem_46220)[sext_i32_i64(local_tid_46216)] = - x_46231; + double *) red_arr_mem_127891)[sext_i32_i64(local_tid_127887)] = + x_127902; } } - skip_waves_46236 *= 2; + skip_waves_127907 *= 2; } barrier(CLK_LOCAL_MEM_FENCE); // first thread saves the result in accumulator { - if (sext_i32_i64(local_tid_46216) == (int64_t) 0) { - x_acc_46229 = x_46231; + if (sext_i32_i64(local_tid_127887) == (int64_t) 0) { + x_acc_127900 = x_127902; } } - if (groups_per_segment_46206 == (int64_t) 1) { + if (groups_per_segment_127877 == (int64_t) 1) { // first thread in group saves final result to memory { - if (local_tid_46216 == 0) { - ((__global float *) mem_45134)[gtid_32036 * N_27771 + - gtid_32037] = x_acc_46229; + if (local_tid_127887 == 0) { + ((__global double *) mem_123623)[gtid_108601 * + (k2p2zq_75151 * + k2p2zq_75151) + + gtid_108602 * + k2p2zq_75151 + + gtid_108603] = + x_acc_127900; } } } else { - int32_t old_counter_46237; + int32_t old_counter_127908; // first thread in group saves group result to global memory { - if (local_tid_46216 == 0) { + if (local_tid_127887 == 0) { ((__global - float *) group_res_arr_mem_46211)[sext_i32_i64(virt_group_id_46226) * - segred_group_sizze_32093] = - x_acc_46229; + double *) group_res_arr_mem_127882)[sext_i32_i64(virt_group_id_127897) * + segred_group_sizze_110594] = + x_acc_127900; mem_fence_global(); - old_counter_46237 = + old_counter_127908 = atomic_add_i32_global(&((volatile __global - int *) mainDetailedzicounter_mem_46213)[sext_i32_i64(srem32(flat_segment_id_46227, - 10240))], + int *) mainzicounter_mem_127884)[sext_i32_i64(srem32(flat_segment_id_127898, + 10240))], (int) 1); - ((__local bool *) sync_arr_mem_46222)[(int64_t) 0] = - old_counter_46237 == groups_per_segment_46206 - + ((__local bool *) sync_arr_mem_127893)[(int64_t) 0] = + old_counter_127908 == groups_per_segment_127877 - (int64_t) 1; } } barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - bool is_last_group_46238; + bool is_last_group_127909; - is_last_group_46238 = ((__local - bool *) sync_arr_mem_46222)[(int64_t) 0]; - if (is_last_group_46238) { - if (local_tid_46216 == 0) { - old_counter_46237 = + is_last_group_127909 = ((__local + bool *) sync_arr_mem_127893)[(int64_t) 0]; + if (is_last_group_127909) { + if (local_tid_127887 == 0) { + old_counter_127908 = atomic_add_i32_global(&((volatile __global - int *) mainDetailedzicounter_mem_46213)[sext_i32_i64(srem32(flat_segment_id_46227, - 10240))], + int *) mainzicounter_mem_127884)[sext_i32_i64(srem32(flat_segment_id_127898, + 10240))], (int) ((int64_t) 0 - - groups_per_segment_46206)); + groups_per_segment_127877)); } // read in the per-group-results { - int64_t read_per_thread_46239 = - sdiv_up64(groups_per_segment_46206, - segred_group_sizze_32093); + int64_t read_per_thread_127910 = + sdiv_up64(groups_per_segment_127877, + segred_group_sizze_110594); - x_32097 = 0.0F; - for (int64_t i_46240 = 0; i_46240 < read_per_thread_46239; - i_46240++) { - int64_t group_res_id_46241 = - sext_i32_i64(local_tid_46216) * - read_per_thread_46239 + i_46240; - int64_t index_of_group_res_46242 = - sext_i32_i64(flat_segment_id_46227) * - groups_per_segment_46206 + group_res_id_46241; + x_110598 = 0.0; + for (int64_t i_127911 = 0; i_127911 < + read_per_thread_127910; i_127911++) { + int64_t group_res_id_127912 = + sext_i32_i64(local_tid_127887) * + read_per_thread_127910 + i_127911; + int64_t index_of_group_res_127913 = + sext_i32_i64(flat_segment_id_127898) * + groups_per_segment_127877 + group_res_id_127912; - if (slt64(group_res_id_46241, - groups_per_segment_46206)) { - x_32098 = ((__global - float *) group_res_arr_mem_46211)[index_of_group_res_46242 * - segred_group_sizze_32093]; + if (slt64(group_res_id_127912, + groups_per_segment_127877)) { + x_110599 = ((__global + double *) group_res_arr_mem_127882)[index_of_group_res_127913 * + segred_group_sizze_110594]; - float defunc_1_op_res_32099; + double defunc_1_op_res_110600; - defunc_1_op_res_32099 = x_32097 + x_32098; - x_32097 = defunc_1_op_res_32099; + defunc_1_op_res_110600 = x_110598 + x_110599; + x_110598 = defunc_1_op_res_110600; } } } ((__local - float *) red_arr_mem_46220)[sext_i32_i64(local_tid_46216)] = - x_32097; + double *) red_arr_mem_127891)[sext_i32_i64(local_tid_127887)] = + x_110598; barrier(CLK_LOCAL_MEM_FENCE); // reduce the per-group results { - int32_t offset_46243; - int32_t skip_waves_46244; + int32_t offset_127914; + int32_t skip_waves_127915; - skip_waves_46244 = 1; + skip_waves_127915 = 1; - float x_46231; - float x_46232; + double x_127902; + double x_127903; - offset_46243 = 0; + offset_127914 = 0; // participating threads read initial accumulator { - if (slt32(local_tid_46216, - sext_i64_i32(segred_group_sizze_32093))) { - x_46231 = ((__local - float *) red_arr_mem_46220)[sext_i32_i64(local_tid_46216 + - offset_46243)]; - } - } - offset_46243 = 1; - while (slt32(offset_46243, wave_sizze_46218)) { - if (slt32(local_tid_46216 + offset_46243, - sext_i64_i32(segred_group_sizze_32093)) && - ((local_tid_46216 - squot32(local_tid_46216, - wave_sizze_46218) * - wave_sizze_46218) & (2 * offset_46243 - 1)) == + if (slt32(local_tid_127887, + sext_i64_i32(segred_group_sizze_110594))) { + x_127902 = ((__local + double *) red_arr_mem_127891)[sext_i32_i64(local_tid_127887 + + offset_127914)]; + } + } + offset_127914 = 1; + while (slt32(offset_127914, wave_sizze_127889)) { + if (slt32(local_tid_127887 + offset_127914, + sext_i64_i32(segred_group_sizze_110594)) && + ((local_tid_127887 - squot32(local_tid_127887, + wave_sizze_127889) * + wave_sizze_127889) & (2 * offset_127914 - 1)) == 0) { // read array element { - x_46232 = ((volatile __local - float *) red_arr_mem_46220)[sext_i32_i64(local_tid_46216 + - offset_46243)]; + x_127903 = ((volatile __local + double *) red_arr_mem_127891)[sext_i32_i64(local_tid_127887 + + offset_127914)]; } // apply reduction operation { - float defunc_1_op_res_46233 = x_46231 + x_46232; + double defunc_1_op_res_127904 = x_127902 + + x_127903; - x_46231 = defunc_1_op_res_46233; + x_127902 = defunc_1_op_res_127904; } // write result of operation { ((volatile __local - float *) red_arr_mem_46220)[sext_i32_i64(local_tid_46216)] = - x_46231; + double *) red_arr_mem_127891)[sext_i32_i64(local_tid_127887)] = + x_127902; } } - offset_46243 *= 2; + offset_127914 *= 2; } - while (slt32(skip_waves_46244, - squot32(sext_i64_i32(segred_group_sizze_32093) + - wave_sizze_46218 - 1, - wave_sizze_46218))) { + while (slt32(skip_waves_127915, + squot32(sext_i64_i32(segred_group_sizze_110594) + + wave_sizze_127889 - 1, + wave_sizze_127889))) { barrier(CLK_LOCAL_MEM_FENCE); - offset_46243 = skip_waves_46244 * wave_sizze_46218; - if (slt32(local_tid_46216 + offset_46243, - sext_i64_i32(segred_group_sizze_32093)) && - ((local_tid_46216 - squot32(local_tid_46216, - wave_sizze_46218) * - wave_sizze_46218) == 0 && - (squot32(local_tid_46216, wave_sizze_46218) & (2 * - skip_waves_46244 - - 1)) == - 0)) { + offset_127914 = skip_waves_127915 * wave_sizze_127889; + if (slt32(local_tid_127887 + offset_127914, + sext_i64_i32(segred_group_sizze_110594)) && + ((local_tid_127887 - squot32(local_tid_127887, + wave_sizze_127889) * + wave_sizze_127889) == 0 && + (squot32(local_tid_127887, wave_sizze_127889) & + (2 * skip_waves_127915 - 1)) == 0)) { // read array element { - x_46232 = ((__local - float *) red_arr_mem_46220)[sext_i32_i64(local_tid_46216 + - offset_46243)]; + x_127903 = ((__local + double *) red_arr_mem_127891)[sext_i32_i64(local_tid_127887 + + offset_127914)]; } // apply reduction operation { - float defunc_1_op_res_46233 = x_46231 + x_46232; + double defunc_1_op_res_127904 = x_127902 + + x_127903; - x_46231 = defunc_1_op_res_46233; + x_127902 = defunc_1_op_res_127904; } // write result of operation { ((__local - float *) red_arr_mem_46220)[sext_i32_i64(local_tid_46216)] = - x_46231; + double *) red_arr_mem_127891)[sext_i32_i64(local_tid_127887)] = + x_127902; } } - skip_waves_46244 *= 2; + skip_waves_127915 *= 2; } // and back to memory with the final result { - if (local_tid_46216 == 0) { - ((__global float *) mem_45134)[gtid_32036 * - N_27771 + - gtid_32037] = - x_46231; + if (local_tid_127887 == 0) { + ((__global double *) mem_123623)[gtid_108601 * + (k2p2zq_75151 * + k2p2zq_75151) + + gtid_108602 * + k2p2zq_75151 + + gtid_108603] = + x_127902; } } } @@ -28276,444 +33410,824 @@ def sync(self): error_1: return; - #undef segred_group_sizze_32093 + #undef segred_group_sizze_110594 } -__kernel void mainDetailedzisegred_large_32625(__global int *global_failure, - int failure_is_an_option, - __global - int64_t *global_failure_args, - __local volatile - int64_t *sync_arr_mem_46474_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_46472_backing_aligned_1, - int64_t N_27771, - int64_t i32_res_27781, - int64_t num_groups_32676, - int64_t groups_per_segment_46458, - int64_t elements_per_thread_46459, - int64_t virt_num_groups_46460, - int64_t threads_per_segment_46462, - __global - unsigned char *defunc_4_map_res_mem_45178, - __global - unsigned char *mem_45232, - __global - unsigned char *mem_45235, - __global - unsigned char *group_res_arr_mem_46463, - __global - unsigned char *mainDetailedzicounter_mem_46465) +__kernel void mainzisegred_large_108892(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_127749_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_127747_backing_aligned_1, + int64_t k2p2zq_75151, int64_t x_110426, + int64_t i_110427, int64_t j_m_i_110431, + int64_t num_groups_110513, + int64_t binop_x_120251, + int64_t groups_per_segment_127733, + int64_t elements_per_thread_127734, + int64_t virt_num_groups_127735, + int64_t threads_per_segment_127737, + __global unsigned char *mem_123143, + __global + unsigned char *mem_param_123252, + __global unsigned char *mem_123338, + __global + unsigned char *group_res_arr_mem_127738, + __global + unsigned char *mainzicounter_mem_127740) { - #define segred_group_sizze_32675 (mainDetailedzisegred_group_sizze_32619) + #define segred_group_sizze_110512 (mainzisegred_group_sizze_108886) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict sync_arr_mem_46474_backing_1 = + __local volatile char *restrict sync_arr_mem_127749_backing_1 = (__local volatile - char *) sync_arr_mem_46474_backing_aligned_0; - __local volatile char *restrict red_arr_mem_46472_backing_0 = + char *) sync_arr_mem_127749_backing_aligned_0; + __local volatile char *restrict red_arr_mem_127747_backing_0 = (__local volatile - char *) red_arr_mem_46472_backing_aligned_1; - volatile __local bool local_failure; + char *) red_arr_mem_127747_backing_aligned_1; - if (failure_is_an_option) { - int failed = *global_failure >= 0; + if (*global_failure >= 0) + return; + + int32_t global_tid_127742; + int32_t local_tid_127743; + int64_t group_sizze_127746; + int32_t wave_sizze_127745; + int32_t group_tid_127744; + + global_tid_127742 = get_global_id(0); + local_tid_127743 = get_local_id(0); + group_sizze_127746 = get_local_size(0); + wave_sizze_127745 = LOCKSTEP_WIDTH; + group_tid_127744 = get_group_id(0); + + int32_t phys_tid_108892; + + phys_tid_108892 = global_tid_127742; + + __local char *red_arr_mem_127747; + + red_arr_mem_127747 = (__local char *) red_arr_mem_127747_backing_0; + + __local char *sync_arr_mem_127749; + + sync_arr_mem_127749 = (__local char *) sync_arr_mem_127749_backing_1; + + int32_t phys_group_id_127751; + + phys_group_id_127751 = get_group_id(0); + for (int32_t i_127752 = 0; i_127752 < + sdiv_up32(sext_i64_i32(virt_num_groups_127735) - phys_group_id_127751, + sext_i64_i32(num_groups_110513)); i_127752++) { + int32_t virt_group_id_127753 = phys_group_id_127751 + i_127752 * + sext_i64_i32(num_groups_110513); + int32_t flat_segment_id_127754 = squot32(virt_group_id_127753, + sext_i64_i32(groups_per_segment_127733)); + int64_t global_tid_127755 = srem64(sext_i32_i64(virt_group_id_127753) * + segred_group_sizze_110512 + + sext_i32_i64(local_tid_127743), + segred_group_sizze_110512 * + groups_per_segment_127733); + int64_t gtid_108881 = squot64(sext_i32_i64(flat_segment_id_127754), + k2p2zq_75151); + int64_t gtid_108882 = sext_i32_i64(flat_segment_id_127754) - + squot64(sext_i32_i64(flat_segment_id_127754), k2p2zq_75151) * + k2p2zq_75151; + int64_t gtid_108891; + double x_acc_127756; + int64_t chunk_sizze_127757; + + chunk_sizze_127757 = smin64(elements_per_thread_127734, + sdiv_up64(j_m_i_110431 - global_tid_127755, + threads_per_segment_127737)); + + double x_110516; + double x_110517; - if (failed) - return; + // neutral-initialise the accumulators + { + x_acc_127756 = 0.0; + } + for (int64_t i_127761 = 0; i_127761 < chunk_sizze_127757; i_127761++) { + gtid_108891 = global_tid_127755 + threads_per_segment_127737 * + i_127761; + // apply map function + { + int64_t slice_115165 = gtid_108891 + x_110426; + double x_110523 = ((__global double *) mem_123143)[gtid_108881 * + (k2p2zq_75151 * + k2p2zq_75151) + + slice_115165 * + k2p2zq_75151 + + i_110427]; + bool isnan_res_110524; + + isnan_res_110524 = futrts_isnan64(x_110523); + + double defunc_1_f_res_110525; + + if (isnan_res_110524) { + defunc_1_f_res_110525 = 0.0; + } else { + double x_110522 = ((__global + double *) mem_param_123252)[gtid_108881 * + binop_x_120251 + + gtid_108882 * + k2p2zq_75151 + + slice_115165]; + double defunc_1_f_res_f_res_110526 = x_110522 * x_110523; + + defunc_1_f_res_110525 = defunc_1_f_res_f_res_110526; + } + // save map-out results + { } + // load accumulator + { + x_110516 = x_acc_127756; + } + // load new values + { + x_110517 = defunc_1_f_res_110525; + } + // apply reduction operator + { + double defunc_1_op_res_110518 = x_110516 + x_110517; + + // store in accumulator + { + x_acc_127756 = defunc_1_op_res_110518; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_110516 = x_acc_127756; + ((__local + double *) red_arr_mem_127747)[sext_i32_i64(local_tid_127743)] = + x_110516; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_127762; + int32_t skip_waves_127763; + + skip_waves_127763 = 1; + + double x_127758; + double x_127759; + + offset_127762 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127743, + sext_i64_i32(segred_group_sizze_110512))) { + x_127758 = ((__local + double *) red_arr_mem_127747)[sext_i32_i64(local_tid_127743 + + offset_127762)]; + } + } + offset_127762 = 1; + while (slt32(offset_127762, wave_sizze_127745)) { + if (slt32(local_tid_127743 + offset_127762, + sext_i64_i32(segred_group_sizze_110512)) && + ((local_tid_127743 - squot32(local_tid_127743, + wave_sizze_127745) * + wave_sizze_127745) & (2 * offset_127762 - 1)) == 0) { + // read array element + { + x_127759 = ((volatile __local + double *) red_arr_mem_127747)[sext_i32_i64(local_tid_127743 + + offset_127762)]; + } + // apply reduction operation + { + double defunc_1_op_res_127760 = x_127758 + x_127759; + + x_127758 = defunc_1_op_res_127760; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_127747)[sext_i32_i64(local_tid_127743)] = + x_127758; + } + } + offset_127762 *= 2; + } + while (slt32(skip_waves_127763, + squot32(sext_i64_i32(segred_group_sizze_110512) + + wave_sizze_127745 - 1, wave_sizze_127745))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_127762 = skip_waves_127763 * wave_sizze_127745; + if (slt32(local_tid_127743 + offset_127762, + sext_i64_i32(segred_group_sizze_110512)) && + ((local_tid_127743 - squot32(local_tid_127743, + wave_sizze_127745) * + wave_sizze_127745) == 0 && (squot32(local_tid_127743, + wave_sizze_127745) & (2 * + skip_waves_127763 - + 1)) == + 0)) { + // read array element + { + x_127759 = ((__local + double *) red_arr_mem_127747)[sext_i32_i64(local_tid_127743 + + offset_127762)]; + } + // apply reduction operation + { + double defunc_1_op_res_127760 = x_127758 + x_127759; + + x_127758 = defunc_1_op_res_127760; + } + // write result of operation + { + ((__local + double *) red_arr_mem_127747)[sext_i32_i64(local_tid_127743)] = + x_127758; + } + } + skip_waves_127763 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_127743) == (int64_t) 0) { + x_acc_127756 = x_127758; + } + } + if (groups_per_segment_127733 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_127743 == 0) { + ((__global double *) mem_123338)[gtid_108881 * + k2p2zq_75151 + + gtid_108882] = + x_acc_127756; + } + } + } else { + int32_t old_counter_127764; + + // first thread in group saves group result to global memory + { + if (local_tid_127743 == 0) { + ((__global + double *) group_res_arr_mem_127738)[sext_i32_i64(virt_group_id_127753) * + segred_group_sizze_110512] = + x_acc_127756; + mem_fence_global(); + old_counter_127764 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_127740)[sext_i32_i64(srem32(flat_segment_id_127754, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_127749)[(int64_t) 0] = + old_counter_127764 == groups_per_segment_127733 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_127765; + + is_last_group_127765 = ((__local + bool *) sync_arr_mem_127749)[(int64_t) 0]; + if (is_last_group_127765) { + if (local_tid_127743 == 0) { + old_counter_127764 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_127740)[sext_i32_i64(srem32(flat_segment_id_127754, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_127733)); + } + // read in the per-group-results + { + int64_t read_per_thread_127766 = + sdiv_up64(groups_per_segment_127733, + segred_group_sizze_110512); + + x_110516 = 0.0; + for (int64_t i_127767 = 0; i_127767 < + read_per_thread_127766; i_127767++) { + int64_t group_res_id_127768 = + sext_i32_i64(local_tid_127743) * + read_per_thread_127766 + i_127767; + int64_t index_of_group_res_127769 = + sext_i32_i64(flat_segment_id_127754) * + groups_per_segment_127733 + group_res_id_127768; + + if (slt64(group_res_id_127768, + groups_per_segment_127733)) { + x_110517 = ((__global + double *) group_res_arr_mem_127738)[index_of_group_res_127769 * + segred_group_sizze_110512]; + + double defunc_1_op_res_110518; + + defunc_1_op_res_110518 = x_110516 + x_110517; + x_110516 = defunc_1_op_res_110518; + } + } + } + ((__local + double *) red_arr_mem_127747)[sext_i32_i64(local_tid_127743)] = + x_110516; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_127770; + int32_t skip_waves_127771; + + skip_waves_127771 = 1; + + double x_127758; + double x_127759; + + offset_127770 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127743, + sext_i64_i32(segred_group_sizze_110512))) { + x_127758 = ((__local + double *) red_arr_mem_127747)[sext_i32_i64(local_tid_127743 + + offset_127770)]; + } + } + offset_127770 = 1; + while (slt32(offset_127770, wave_sizze_127745)) { + if (slt32(local_tid_127743 + offset_127770, + sext_i64_i32(segred_group_sizze_110512)) && + ((local_tid_127743 - squot32(local_tid_127743, + wave_sizze_127745) * + wave_sizze_127745) & (2 * offset_127770 - 1)) == + 0) { + // read array element + { + x_127759 = ((volatile __local + double *) red_arr_mem_127747)[sext_i32_i64(local_tid_127743 + + offset_127770)]; + } + // apply reduction operation + { + double defunc_1_op_res_127760 = x_127758 + + x_127759; + + x_127758 = defunc_1_op_res_127760; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_127747)[sext_i32_i64(local_tid_127743)] = + x_127758; + } + } + offset_127770 *= 2; + } + while (slt32(skip_waves_127771, + squot32(sext_i64_i32(segred_group_sizze_110512) + + wave_sizze_127745 - 1, + wave_sizze_127745))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_127770 = skip_waves_127771 * wave_sizze_127745; + if (slt32(local_tid_127743 + offset_127770, + sext_i64_i32(segred_group_sizze_110512)) && + ((local_tid_127743 - squot32(local_tid_127743, + wave_sizze_127745) * + wave_sizze_127745) == 0 && + (squot32(local_tid_127743, wave_sizze_127745) & + (2 * skip_waves_127771 - 1)) == 0)) { + // read array element + { + x_127759 = ((__local + double *) red_arr_mem_127747)[sext_i32_i64(local_tid_127743 + + offset_127770)]; + } + // apply reduction operation + { + double defunc_1_op_res_127760 = x_127758 + + x_127759; + + x_127758 = defunc_1_op_res_127760; + } + // write result of operation + { + ((__local + double *) red_arr_mem_127747)[sext_i32_i64(local_tid_127743)] = + x_127758; + } + } + skip_waves_127771 *= 2; + } + // and back to memory with the final result + { + if (local_tid_127743 == 0) { + ((__global double *) mem_123338)[gtid_108881 * + k2p2zq_75151 + + gtid_108882] = + x_127758; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - local_failure = false; - barrier(CLK_LOCAL_MEM_FENCE); - int32_t global_tid_46467; - int32_t local_tid_46468; - int64_t group_sizze_46471; - int32_t wave_sizze_46470; - int32_t group_tid_46469; + error_1: + return; + #undef segred_group_sizze_110512 +} +__kernel void mainzisegred_large_109665(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_127524_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_127522_backing_aligned_1, + int64_t m_75136, + int64_t defunc_2_reduce_res_75260, + int64_t rp1_75837, int64_t j_109957, + int64_t num_groups_109990, + int64_t groups_per_segment_127508, + int64_t elements_per_thread_127509, + int64_t virt_num_groups_127510, + int64_t threads_per_segment_127512, + __global unsigned char *mem_120246, + __global unsigned char *mem_122730, + __global + unsigned char *group_res_arr_mem_127513, + __global + unsigned char *mainzicounter_mem_127515) +{ + #define segred_group_sizze_109989 (mainzisegred_group_sizze_109659) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_127524_backing_1 = + (__local volatile + char *) sync_arr_mem_127524_backing_aligned_0; + __local volatile char *restrict red_arr_mem_127522_backing_0 = + (__local volatile + char *) red_arr_mem_127522_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127517; + int32_t local_tid_127518; + int64_t group_sizze_127521; + int32_t wave_sizze_127520; + int32_t group_tid_127519; - global_tid_46467 = get_global_id(0); - local_tid_46468 = get_local_id(0); - group_sizze_46471 = get_local_size(0); - wave_sizze_46470 = LOCKSTEP_WIDTH; - group_tid_46469 = get_group_id(0); + global_tid_127517 = get_global_id(0); + local_tid_127518 = get_local_id(0); + group_sizze_127521 = get_local_size(0); + wave_sizze_127520 = LOCKSTEP_WIDTH; + group_tid_127519 = get_group_id(0); - int32_t phys_tid_32625; + int32_t phys_tid_109665; - phys_tid_32625 = global_tid_46467; + phys_tid_109665 = global_tid_127517; - __local char *red_arr_mem_46472; + __local char *red_arr_mem_127522; - red_arr_mem_46472 = (__local char *) red_arr_mem_46472_backing_0; + red_arr_mem_127522 = (__local char *) red_arr_mem_127522_backing_0; - __local char *sync_arr_mem_46474; + __local char *sync_arr_mem_127524; - sync_arr_mem_46474 = (__local char *) sync_arr_mem_46474_backing_1; + sync_arr_mem_127524 = (__local char *) sync_arr_mem_127524_backing_1; - int32_t phys_group_id_46476; + int32_t phys_group_id_127526; - phys_group_id_46476 = get_group_id(0); - for (int32_t i_46477 = 0; i_46477 < - sdiv_up32(sext_i64_i32(virt_num_groups_46460) - phys_group_id_46476, - sext_i64_i32(num_groups_32676)); i_46477++) { - int32_t virt_group_id_46478 = phys_group_id_46476 + i_46477 * - sext_i64_i32(num_groups_32676); - int32_t flat_segment_id_46479 = squot32(virt_group_id_46478, - sext_i64_i32(groups_per_segment_46458)); - int64_t global_tid_46480 = srem64(sext_i32_i64(virt_group_id_46478) * - segred_group_sizze_32675 + - sext_i32_i64(local_tid_46468), - segred_group_sizze_32675 * - groups_per_segment_46458); - int64_t gtid_32616 = sext_i32_i64(flat_segment_id_46479); - int64_t gtid_32624; - float x_acc_46481; - int64_t chunk_sizze_46482; + phys_group_id_127526 = get_group_id(0); + for (int32_t i_127527 = 0; i_127527 < + sdiv_up32(sext_i64_i32(virt_num_groups_127510) - phys_group_id_127526, + sext_i64_i32(num_groups_109990)); i_127527++) { + int32_t virt_group_id_127528 = phys_group_id_127526 + i_127527 * + sext_i64_i32(num_groups_109990); + int32_t flat_segment_id_127529 = squot32(virt_group_id_127528, + sext_i64_i32(groups_per_segment_127508)); + int64_t global_tid_127530 = srem64(sext_i32_i64(virt_group_id_127528) * + segred_group_sizze_109989 + + sext_i32_i64(local_tid_127518), + segred_group_sizze_109989 * + groups_per_segment_127508); + int64_t gtid_109656 = sext_i32_i64(flat_segment_id_127529); + int64_t gtid_109664; + double x_acc_127531; + int64_t chunk_sizze_127532; - chunk_sizze_46482 = smin64(elements_per_thread_46459, - sdiv_up64(i32_res_27781 - - sext_i32_i64(sext_i64_i32(global_tid_46480)), - threads_per_segment_46462)); + chunk_sizze_127532 = smin64(elements_per_thread_127509, + sdiv_up64(rp1_75837 - global_tid_127530, + threads_per_segment_127512)); - float x_32679; - float x_32680; + double x_109993; + double x_109994; // neutral-initialise the accumulators { - x_acc_46481 = 0.0F; + x_acc_127531 = 0.0; } - for (int64_t i_46486 = 0; i_46486 < chunk_sizze_46482; i_46486++) { - gtid_32624 = sext_i32_i64(sext_i64_i32(global_tid_46480)) + - threads_per_segment_46462 * i_46486; + for (int64_t i_127536 = 0; i_127536 < chunk_sizze_127532; i_127536++) { + gtid_109664 = global_tid_127530 + threads_per_segment_127512 * + i_127536; // apply map function { - int32_t defunc_0_f_res_32683 = ((__global - int32_t *) mem_45232)[gtid_32616]; - int32_t index_primexp_42385 = sext_i64_i32(gtid_32624); - bool cond_32685 = slt32(index_primexp_42385, - defunc_0_f_res_32683); - float defunc_0_f_res_32686; - - if (cond_32685) { - int64_t i_32687 = sext_i32_i64(index_primexp_42385); - bool x_32688 = sle64((int64_t) 0, i_32687); - bool y_32689 = slt64(i_32687, N_27771); - bool bounds_check_32690 = x_32688 && y_32689; - bool index_certs_32691; - - if (!bounds_check_32690) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 50) == -1) { - global_failure_args[0] = i_32687; - global_failure_args[1] = N_27771; - ; - } - local_failure = true; - goto error_0; - } - } - - float defunc_0_f_res_t_res_32692 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_32616 * - N_27771 + - i_32687]; - - defunc_0_f_res_32686 = defunc_0_f_res_t_res_32692; - } else { - defunc_0_f_res_32686 = 0.0F; - } - - float defunc_0_f_res_32693 = defunc_0_f_res_32686 * - defunc_0_f_res_32686; + double x_109997 = ((__global double *) mem_120246)[j_109957 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_109656 * + defunc_2_reduce_res_75260 + + gtid_109664]; + double defunc_1_f_res_109998 = x_109997 * x_109997; // save map-out results { } // load accumulator { - x_32679 = x_acc_46481; + x_109993 = x_acc_127531; } // load new values { - x_32680 = defunc_0_f_res_32693; + x_109994 = defunc_1_f_res_109998; } // apply reduction operator { - float defunc_1_op_res_32681 = x_32679 + x_32680; + double defunc_1_op_res_109995 = x_109993 + x_109994; // store in accumulator { - x_acc_46481 = defunc_1_op_res_32681; + x_acc_127531 = defunc_1_op_res_109995; } } } } // to reduce current chunk, first store our result in memory { - x_32679 = x_acc_46481; + x_109993 = x_acc_127531; ((__local - float *) red_arr_mem_46472)[sext_i32_i64(local_tid_46468)] = - x_32679; + double *) red_arr_mem_127522)[sext_i32_i64(local_tid_127518)] = + x_109993; } - - error_0: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; barrier(CLK_LOCAL_MEM_FENCE); - int32_t offset_46487; - int32_t skip_waves_46488; + int32_t offset_127537; + int32_t skip_waves_127538; - skip_waves_46488 = 1; + skip_waves_127538 = 1; - float x_46483; - float x_46484; + double x_127533; + double x_127534; - offset_46487 = 0; + offset_127537 = 0; // participating threads read initial accumulator { - if (slt32(local_tid_46468, - sext_i64_i32(segred_group_sizze_32675))) { - x_46483 = ((__local - float *) red_arr_mem_46472)[sext_i32_i64(local_tid_46468 + - offset_46487)]; - } - } - offset_46487 = 1; - while (slt32(offset_46487, wave_sizze_46470)) { - if (slt32(local_tid_46468 + offset_46487, - sext_i64_i32(segred_group_sizze_32675)) && - ((local_tid_46468 - squot32(local_tid_46468, wave_sizze_46470) * - wave_sizze_46470) & (2 * offset_46487 - 1)) == 0) { + if (slt32(local_tid_127518, + sext_i64_i32(segred_group_sizze_109989))) { + x_127533 = ((__local + double *) red_arr_mem_127522)[sext_i32_i64(local_tid_127518 + + offset_127537)]; + } + } + offset_127537 = 1; + while (slt32(offset_127537, wave_sizze_127520)) { + if (slt32(local_tid_127518 + offset_127537, + sext_i64_i32(segred_group_sizze_109989)) && + ((local_tid_127518 - squot32(local_tid_127518, + wave_sizze_127520) * + wave_sizze_127520) & (2 * offset_127537 - 1)) == 0) { // read array element { - x_46484 = ((volatile __local - float *) red_arr_mem_46472)[sext_i32_i64(local_tid_46468 + - offset_46487)]; + x_127534 = ((volatile __local + double *) red_arr_mem_127522)[sext_i32_i64(local_tid_127518 + + offset_127537)]; } // apply reduction operation { - float defunc_1_op_res_46485 = x_46483 + x_46484; + double defunc_1_op_res_127535 = x_127533 + x_127534; - x_46483 = defunc_1_op_res_46485; + x_127533 = defunc_1_op_res_127535; } // write result of operation { ((volatile __local - float *) red_arr_mem_46472)[sext_i32_i64(local_tid_46468)] = - x_46483; + double *) red_arr_mem_127522)[sext_i32_i64(local_tid_127518)] = + x_127533; } } - offset_46487 *= 2; + offset_127537 *= 2; } - while (slt32(skip_waves_46488, - squot32(sext_i64_i32(segred_group_sizze_32675) + - wave_sizze_46470 - 1, wave_sizze_46470))) { + while (slt32(skip_waves_127538, + squot32(sext_i64_i32(segred_group_sizze_109989) + + wave_sizze_127520 - 1, wave_sizze_127520))) { barrier(CLK_LOCAL_MEM_FENCE); - offset_46487 = skip_waves_46488 * wave_sizze_46470; - if (slt32(local_tid_46468 + offset_46487, - sext_i64_i32(segred_group_sizze_32675)) && - ((local_tid_46468 - squot32(local_tid_46468, wave_sizze_46470) * - wave_sizze_46470) == 0 && (squot32(local_tid_46468, - wave_sizze_46470) & (2 * - skip_waves_46488 - - 1)) == + offset_127537 = skip_waves_127538 * wave_sizze_127520; + if (slt32(local_tid_127518 + offset_127537, + sext_i64_i32(segred_group_sizze_109989)) && + ((local_tid_127518 - squot32(local_tid_127518, + wave_sizze_127520) * + wave_sizze_127520) == 0 && (squot32(local_tid_127518, + wave_sizze_127520) & (2 * + skip_waves_127538 - + 1)) == 0)) { // read array element { - x_46484 = ((__local - float *) red_arr_mem_46472)[sext_i32_i64(local_tid_46468 + - offset_46487)]; + x_127534 = ((__local + double *) red_arr_mem_127522)[sext_i32_i64(local_tid_127518 + + offset_127537)]; } // apply reduction operation { - float defunc_1_op_res_46485 = x_46483 + x_46484; + double defunc_1_op_res_127535 = x_127533 + x_127534; - x_46483 = defunc_1_op_res_46485; + x_127533 = defunc_1_op_res_127535; } // write result of operation { ((__local - float *) red_arr_mem_46472)[sext_i32_i64(local_tid_46468)] = - x_46483; + double *) red_arr_mem_127522)[sext_i32_i64(local_tid_127518)] = + x_127533; } } - skip_waves_46488 *= 2; + skip_waves_127538 *= 2; } barrier(CLK_LOCAL_MEM_FENCE); // first thread saves the result in accumulator { - if (sext_i32_i64(local_tid_46468) == (int64_t) 0) { - x_acc_46481 = x_46483; + if (sext_i32_i64(local_tid_127518) == (int64_t) 0) { + x_acc_127531 = x_127533; } } - if (groups_per_segment_46458 == (int64_t) 1) { + if (groups_per_segment_127508 == (int64_t) 1) { // first thread in group saves final result to memory { - if (local_tid_46468 == 0) { - ((__global float *) mem_45235)[gtid_32616] = x_acc_46481; + if (local_tid_127518 == 0) { + ((__global double *) mem_122730)[gtid_109656] = + x_acc_127531; } } } else { - int32_t old_counter_46489; + int32_t old_counter_127539; // first thread in group saves group result to global memory { - if (local_tid_46468 == 0) { + if (local_tid_127518 == 0) { ((__global - float *) group_res_arr_mem_46463)[sext_i32_i64(virt_group_id_46478) * - segred_group_sizze_32675] = - x_acc_46481; + double *) group_res_arr_mem_127513)[sext_i32_i64(virt_group_id_127528) * + segred_group_sizze_109989] = + x_acc_127531; mem_fence_global(); - old_counter_46489 = + old_counter_127539 = atomic_add_i32_global(&((volatile __global - int *) mainDetailedzicounter_mem_46465)[sext_i32_i64(srem32(flat_segment_id_46479, - 10240))], + int *) mainzicounter_mem_127515)[sext_i32_i64(srem32(flat_segment_id_127529, + 10240))], (int) 1); - ((__local bool *) sync_arr_mem_46474)[(int64_t) 0] = - old_counter_46489 == groups_per_segment_46458 - + ((__local bool *) sync_arr_mem_127524)[(int64_t) 0] = + old_counter_127539 == groups_per_segment_127508 - (int64_t) 1; } } barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - bool is_last_group_46490; + bool is_last_group_127540; - is_last_group_46490 = ((__local - bool *) sync_arr_mem_46474)[(int64_t) 0]; - if (is_last_group_46490) { - if (local_tid_46468 == 0) { - old_counter_46489 = + is_last_group_127540 = ((__local + bool *) sync_arr_mem_127524)[(int64_t) 0]; + if (is_last_group_127540) { + if (local_tid_127518 == 0) { + old_counter_127539 = atomic_add_i32_global(&((volatile __global - int *) mainDetailedzicounter_mem_46465)[sext_i32_i64(srem32(flat_segment_id_46479, - 10240))], + int *) mainzicounter_mem_127515)[sext_i32_i64(srem32(flat_segment_id_127529, + 10240))], (int) ((int64_t) 0 - - groups_per_segment_46458)); + groups_per_segment_127508)); } // read in the per-group-results { - int64_t read_per_thread_46491 = - sdiv_up64(groups_per_segment_46458, - segred_group_sizze_32675); + int64_t read_per_thread_127541 = + sdiv_up64(groups_per_segment_127508, + segred_group_sizze_109989); - x_32679 = 0.0F; - for (int64_t i_46492 = 0; i_46492 < read_per_thread_46491; - i_46492++) { - int64_t group_res_id_46493 = - sext_i32_i64(local_tid_46468) * - read_per_thread_46491 + i_46492; - int64_t index_of_group_res_46494 = - sext_i32_i64(flat_segment_id_46479) * - groups_per_segment_46458 + group_res_id_46493; + x_109993 = 0.0; + for (int64_t i_127542 = 0; i_127542 < + read_per_thread_127541; i_127542++) { + int64_t group_res_id_127543 = + sext_i32_i64(local_tid_127518) * + read_per_thread_127541 + i_127542; + int64_t index_of_group_res_127544 = + sext_i32_i64(flat_segment_id_127529) * + groups_per_segment_127508 + group_res_id_127543; - if (slt64(group_res_id_46493, - groups_per_segment_46458)) { - x_32680 = ((__global - float *) group_res_arr_mem_46463)[index_of_group_res_46494 * - segred_group_sizze_32675]; + if (slt64(group_res_id_127543, + groups_per_segment_127508)) { + x_109994 = ((__global + double *) group_res_arr_mem_127513)[index_of_group_res_127544 * + segred_group_sizze_109989]; - float defunc_1_op_res_32681; + double defunc_1_op_res_109995; - defunc_1_op_res_32681 = x_32679 + x_32680; - x_32679 = defunc_1_op_res_32681; + defunc_1_op_res_109995 = x_109993 + x_109994; + x_109993 = defunc_1_op_res_109995; } } } ((__local - float *) red_arr_mem_46472)[sext_i32_i64(local_tid_46468)] = - x_32679; + double *) red_arr_mem_127522)[sext_i32_i64(local_tid_127518)] = + x_109993; barrier(CLK_LOCAL_MEM_FENCE); // reduce the per-group results { - int32_t offset_46495; - int32_t skip_waves_46496; + int32_t offset_127545; + int32_t skip_waves_127546; - skip_waves_46496 = 1; + skip_waves_127546 = 1; - float x_46483; - float x_46484; + double x_127533; + double x_127534; - offset_46495 = 0; + offset_127545 = 0; // participating threads read initial accumulator { - if (slt32(local_tid_46468, - sext_i64_i32(segred_group_sizze_32675))) { - x_46483 = ((__local - float *) red_arr_mem_46472)[sext_i32_i64(local_tid_46468 + - offset_46495)]; - } - } - offset_46495 = 1; - while (slt32(offset_46495, wave_sizze_46470)) { - if (slt32(local_tid_46468 + offset_46495, - sext_i64_i32(segred_group_sizze_32675)) && - ((local_tid_46468 - squot32(local_tid_46468, - wave_sizze_46470) * - wave_sizze_46470) & (2 * offset_46495 - 1)) == + if (slt32(local_tid_127518, + sext_i64_i32(segred_group_sizze_109989))) { + x_127533 = ((__local + double *) red_arr_mem_127522)[sext_i32_i64(local_tid_127518 + + offset_127545)]; + } + } + offset_127545 = 1; + while (slt32(offset_127545, wave_sizze_127520)) { + if (slt32(local_tid_127518 + offset_127545, + sext_i64_i32(segred_group_sizze_109989)) && + ((local_tid_127518 - squot32(local_tid_127518, + wave_sizze_127520) * + wave_sizze_127520) & (2 * offset_127545 - 1)) == 0) { // read array element { - x_46484 = ((volatile __local - float *) red_arr_mem_46472)[sext_i32_i64(local_tid_46468 + - offset_46495)]; + x_127534 = ((volatile __local + double *) red_arr_mem_127522)[sext_i32_i64(local_tid_127518 + + offset_127545)]; } // apply reduction operation { - float defunc_1_op_res_46485 = x_46483 + x_46484; + double defunc_1_op_res_127535 = x_127533 + + x_127534; - x_46483 = defunc_1_op_res_46485; + x_127533 = defunc_1_op_res_127535; } // write result of operation { ((volatile __local - float *) red_arr_mem_46472)[sext_i32_i64(local_tid_46468)] = - x_46483; + double *) red_arr_mem_127522)[sext_i32_i64(local_tid_127518)] = + x_127533; } } - offset_46495 *= 2; + offset_127545 *= 2; } - while (slt32(skip_waves_46496, - squot32(sext_i64_i32(segred_group_sizze_32675) + - wave_sizze_46470 - 1, - wave_sizze_46470))) { + while (slt32(skip_waves_127546, + squot32(sext_i64_i32(segred_group_sizze_109989) + + wave_sizze_127520 - 1, + wave_sizze_127520))) { barrier(CLK_LOCAL_MEM_FENCE); - offset_46495 = skip_waves_46496 * wave_sizze_46470; - if (slt32(local_tid_46468 + offset_46495, - sext_i64_i32(segred_group_sizze_32675)) && - ((local_tid_46468 - squot32(local_tid_46468, - wave_sizze_46470) * - wave_sizze_46470) == 0 && - (squot32(local_tid_46468, wave_sizze_46470) & (2 * - skip_waves_46496 - - 1)) == - 0)) { + offset_127545 = skip_waves_127546 * wave_sizze_127520; + if (slt32(local_tid_127518 + offset_127545, + sext_i64_i32(segred_group_sizze_109989)) && + ((local_tid_127518 - squot32(local_tid_127518, + wave_sizze_127520) * + wave_sizze_127520) == 0 && + (squot32(local_tid_127518, wave_sizze_127520) & + (2 * skip_waves_127546 - 1)) == 0)) { // read array element { - x_46484 = ((__local - float *) red_arr_mem_46472)[sext_i32_i64(local_tid_46468 + - offset_46495)]; + x_127534 = ((__local + double *) red_arr_mem_127522)[sext_i32_i64(local_tid_127518 + + offset_127545)]; } // apply reduction operation { - float defunc_1_op_res_46485 = x_46483 + x_46484; + double defunc_1_op_res_127535 = x_127533 + + x_127534; - x_46483 = defunc_1_op_res_46485; + x_127533 = defunc_1_op_res_127535; } // write result of operation { ((__local - float *) red_arr_mem_46472)[sext_i32_i64(local_tid_46468)] = - x_46483; + double *) red_arr_mem_127522)[sext_i32_i64(local_tid_127518)] = + x_127533; } } - skip_waves_46496 *= 2; + skip_waves_127546 *= 2; } // and back to memory with the final result { - if (local_tid_46468 == 0) { - ((__global float *) mem_45235)[gtid_32616] = - x_46483; + if (local_tid_127518 == 0) { + ((__global double *) mem_122730)[gtid_109656] = + x_127533; } } } @@ -28724,400 +34238,402 @@ def sync(self): error_1: return; - #undef segred_group_sizze_32675 + #undef segred_group_sizze_109989 } -__kernel void mainDetailedzisegred_large_32650(__global int *global_failure, - __local volatile - int64_t *sync_arr_mem_46414_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_46412_backing_aligned_1, - int64_t N_27771, - int64_t i32_res_27781, - int64_t num_groups_32662, - int64_t groups_per_segment_46398, - int64_t elements_per_thread_46399, - int64_t virt_num_groups_46400, - int64_t threads_per_segment_46402, - __global - unsigned char *images_mem_44381, - __global - unsigned char *mem_45232, - __global - unsigned char *group_res_arr_mem_46403, - __global - unsigned char *mainDetailedzicounter_mem_46405) +__kernel void mainzisegred_large_109818(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_127438_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_127436_backing_aligned_1, + int64_t m_75136, int64_t k2p2zq_75151, + int64_t defunc_2_reduce_res_75260, + int64_t r_75826, + int64_t num_groups_109897, + int64_t groups_per_segment_127422, + int64_t elements_per_thread_127423, + int64_t virt_num_groups_127424, + int64_t threads_per_segment_127426, + __global unsigned char *mem_120246, + __global + unsigned char *mem_param_121967, + __global unsigned char *mem_122677, + __global + unsigned char *group_res_arr_mem_127427, + __global + unsigned char *mainzicounter_mem_127429) { - #define segred_group_sizze_32661 (mainDetailedzisegred_group_sizze_32644) + #define segred_group_sizze_109896 (mainzisegred_group_sizze_109812) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict sync_arr_mem_46414_backing_1 = + __local volatile char *restrict sync_arr_mem_127438_backing_1 = (__local volatile - char *) sync_arr_mem_46414_backing_aligned_0; - __local volatile char *restrict red_arr_mem_46412_backing_0 = + char *) sync_arr_mem_127438_backing_aligned_0; + __local volatile char *restrict red_arr_mem_127436_backing_0 = (__local volatile - char *) red_arr_mem_46412_backing_aligned_1; + char *) red_arr_mem_127436_backing_aligned_1; if (*global_failure >= 0) return; - int32_t global_tid_46407; - int32_t local_tid_46408; - int64_t group_sizze_46411; - int32_t wave_sizze_46410; - int32_t group_tid_46409; + int32_t global_tid_127431; + int32_t local_tid_127432; + int64_t group_sizze_127435; + int32_t wave_sizze_127434; + int32_t group_tid_127433; - global_tid_46407 = get_global_id(0); - local_tid_46408 = get_local_id(0); - group_sizze_46411 = get_local_size(0); - wave_sizze_46410 = LOCKSTEP_WIDTH; - group_tid_46409 = get_group_id(0); + global_tid_127431 = get_global_id(0); + local_tid_127432 = get_local_id(0); + group_sizze_127435 = get_local_size(0); + wave_sizze_127434 = LOCKSTEP_WIDTH; + group_tid_127433 = get_group_id(0); - int32_t phys_tid_32650; + int32_t phys_tid_109818; - phys_tid_32650 = global_tid_46407; + phys_tid_109818 = global_tid_127431; - __local char *red_arr_mem_46412; + __local char *red_arr_mem_127436; - red_arr_mem_46412 = (__local char *) red_arr_mem_46412_backing_0; + red_arr_mem_127436 = (__local char *) red_arr_mem_127436_backing_0; - __local char *sync_arr_mem_46414; + __local char *sync_arr_mem_127438; - sync_arr_mem_46414 = (__local char *) sync_arr_mem_46414_backing_1; + sync_arr_mem_127438 = (__local char *) sync_arr_mem_127438_backing_1; - int32_t phys_group_id_46416; + int32_t phys_group_id_127440; - phys_group_id_46416 = get_group_id(0); - for (int32_t i_46417 = 0; i_46417 < - sdiv_up32(sext_i64_i32(virt_num_groups_46400) - phys_group_id_46416, - sext_i64_i32(num_groups_32662)); i_46417++) { - int32_t virt_group_id_46418 = phys_group_id_46416 + i_46417 * - sext_i64_i32(num_groups_32662); - int32_t flat_segment_id_46419 = squot32(virt_group_id_46418, - sext_i64_i32(groups_per_segment_46398)); - int64_t global_tid_46420 = srem64(sext_i32_i64(virt_group_id_46418) * - segred_group_sizze_32661 + - sext_i32_i64(local_tid_46408), - segred_group_sizze_32661 * - groups_per_segment_46398); - int64_t gtid_32641 = sext_i32_i64(flat_segment_id_46419); - int64_t gtid_32649; - int32_t x_acc_46421; - int64_t chunk_sizze_46422; + phys_group_id_127440 = get_group_id(0); + for (int32_t i_127441 = 0; i_127441 < + sdiv_up32(sext_i64_i32(virt_num_groups_127424) - phys_group_id_127440, + sext_i64_i32(num_groups_109897)); i_127441++) { + int32_t virt_group_id_127442 = phys_group_id_127440 + i_127441 * + sext_i64_i32(num_groups_109897); + int32_t flat_segment_id_127443 = squot32(virt_group_id_127442, + sext_i64_i32(groups_per_segment_127422)); + int64_t global_tid_127444 = srem64(sext_i32_i64(virt_group_id_127442) * + segred_group_sizze_109896 + + sext_i32_i64(local_tid_127432), + segred_group_sizze_109896 * + groups_per_segment_127422); + int64_t gtid_109809 = sext_i32_i64(flat_segment_id_127443); + int64_t gtid_109817; + double x_acc_127445; + int64_t chunk_sizze_127446; - chunk_sizze_46422 = smin64(elements_per_thread_46399, - sdiv_up64(i32_res_27781 - - sext_i32_i64(sext_i64_i32(global_tid_46420)), - threads_per_segment_46402)); + chunk_sizze_127446 = smin64(elements_per_thread_127423, + sdiv_up64(k2p2zq_75151 - global_tid_127444, + threads_per_segment_127426)); - int32_t x_32665; - int32_t x_32666; + double x_109900; + double x_109901; // neutral-initialise the accumulators { - x_acc_46421 = 0; + x_acc_127445 = 0.0; } - for (int64_t i_46426 = 0; i_46426 < chunk_sizze_46422; i_46426++) { - gtid_32649 = sext_i32_i64(sext_i64_i32(global_tid_46420)) + - threads_per_segment_46402 * i_46426; + for (int64_t i_127450 = 0; i_127450 < chunk_sizze_127446; i_127450++) { + gtid_109817 = global_tid_127444 + threads_per_segment_127426 * + i_127450; // apply map function { - float x_32669 = ((__global - float *) images_mem_44381)[gtid_32641 * - N_27771 + - gtid_32649]; - bool isnan_res_32670; - - isnan_res_32670 = futrts_isnan32(x_32669); - - bool cond_32671 = !isnan_res_32670; - int32_t defunc_0_f_res_32672 = btoi_bool_i32(cond_32671); + double x_109905 = ((__global double *) mem_120246)[gtid_109817 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_109809 * + defunc_2_reduce_res_75260 + + r_75826]; + double x_109906 = ((__global + double *) mem_param_121967)[gtid_109809 * + k2p2zq_75151 + + gtid_109817]; + double defunc_1_f_res_109907 = x_109905 * x_109906; // save map-out results { } // load accumulator { - x_32665 = x_acc_46421; + x_109900 = x_acc_127445; } // load new values { - x_32666 = defunc_0_f_res_32672; + x_109901 = defunc_1_f_res_109907; } // apply reduction operator { - int32_t defunc_1_op_res_32667 = add32(x_32665, x_32666); + double defunc_1_op_res_109902 = x_109900 + x_109901; // store in accumulator { - x_acc_46421 = defunc_1_op_res_32667; + x_acc_127445 = defunc_1_op_res_109902; } } } } // to reduce current chunk, first store our result in memory { - x_32665 = x_acc_46421; + x_109900 = x_acc_127445; ((__local - int32_t *) red_arr_mem_46412)[sext_i32_i64(local_tid_46408)] = - x_32665; + double *) red_arr_mem_127436)[sext_i32_i64(local_tid_127432)] = + x_109900; } barrier(CLK_LOCAL_MEM_FENCE); - int32_t offset_46427; - int32_t skip_waves_46428; + int32_t offset_127451; + int32_t skip_waves_127452; - skip_waves_46428 = 1; + skip_waves_127452 = 1; - int32_t x_46423; - int32_t x_46424; + double x_127447; + double x_127448; - offset_46427 = 0; + offset_127451 = 0; // participating threads read initial accumulator { - if (slt32(local_tid_46408, - sext_i64_i32(segred_group_sizze_32661))) { - x_46423 = ((__local - int32_t *) red_arr_mem_46412)[sext_i32_i64(local_tid_46408 + - offset_46427)]; - } - } - offset_46427 = 1; - while (slt32(offset_46427, wave_sizze_46410)) { - if (slt32(local_tid_46408 + offset_46427, - sext_i64_i32(segred_group_sizze_32661)) && - ((local_tid_46408 - squot32(local_tid_46408, wave_sizze_46410) * - wave_sizze_46410) & (2 * offset_46427 - 1)) == 0) { + if (slt32(local_tid_127432, + sext_i64_i32(segred_group_sizze_109896))) { + x_127447 = ((__local + double *) red_arr_mem_127436)[sext_i32_i64(local_tid_127432 + + offset_127451)]; + } + } + offset_127451 = 1; + while (slt32(offset_127451, wave_sizze_127434)) { + if (slt32(local_tid_127432 + offset_127451, + sext_i64_i32(segred_group_sizze_109896)) && + ((local_tid_127432 - squot32(local_tid_127432, + wave_sizze_127434) * + wave_sizze_127434) & (2 * offset_127451 - 1)) == 0) { // read array element { - x_46424 = ((volatile __local - int32_t *) red_arr_mem_46412)[sext_i32_i64(local_tid_46408 + - offset_46427)]; + x_127448 = ((volatile __local + double *) red_arr_mem_127436)[sext_i32_i64(local_tid_127432 + + offset_127451)]; } // apply reduction operation { - int32_t defunc_1_op_res_46425 = add32(x_46423, x_46424); + double defunc_1_op_res_127449 = x_127447 + x_127448; - x_46423 = defunc_1_op_res_46425; + x_127447 = defunc_1_op_res_127449; } // write result of operation { ((volatile __local - int32_t *) red_arr_mem_46412)[sext_i32_i64(local_tid_46408)] = - x_46423; + double *) red_arr_mem_127436)[sext_i32_i64(local_tid_127432)] = + x_127447; } } - offset_46427 *= 2; + offset_127451 *= 2; } - while (slt32(skip_waves_46428, - squot32(sext_i64_i32(segred_group_sizze_32661) + - wave_sizze_46410 - 1, wave_sizze_46410))) { + while (slt32(skip_waves_127452, + squot32(sext_i64_i32(segred_group_sizze_109896) + + wave_sizze_127434 - 1, wave_sizze_127434))) { barrier(CLK_LOCAL_MEM_FENCE); - offset_46427 = skip_waves_46428 * wave_sizze_46410; - if (slt32(local_tid_46408 + offset_46427, - sext_i64_i32(segred_group_sizze_32661)) && - ((local_tid_46408 - squot32(local_tid_46408, wave_sizze_46410) * - wave_sizze_46410) == 0 && (squot32(local_tid_46408, - wave_sizze_46410) & (2 * - skip_waves_46428 - - 1)) == + offset_127451 = skip_waves_127452 * wave_sizze_127434; + if (slt32(local_tid_127432 + offset_127451, + sext_i64_i32(segred_group_sizze_109896)) && + ((local_tid_127432 - squot32(local_tid_127432, + wave_sizze_127434) * + wave_sizze_127434) == 0 && (squot32(local_tid_127432, + wave_sizze_127434) & (2 * + skip_waves_127452 - + 1)) == 0)) { // read array element { - x_46424 = ((__local - int32_t *) red_arr_mem_46412)[sext_i32_i64(local_tid_46408 + - offset_46427)]; + x_127448 = ((__local + double *) red_arr_mem_127436)[sext_i32_i64(local_tid_127432 + + offset_127451)]; } // apply reduction operation { - int32_t defunc_1_op_res_46425 = add32(x_46423, x_46424); + double defunc_1_op_res_127449 = x_127447 + x_127448; - x_46423 = defunc_1_op_res_46425; + x_127447 = defunc_1_op_res_127449; } // write result of operation { ((__local - int32_t *) red_arr_mem_46412)[sext_i32_i64(local_tid_46408)] = - x_46423; + double *) red_arr_mem_127436)[sext_i32_i64(local_tid_127432)] = + x_127447; } } - skip_waves_46428 *= 2; + skip_waves_127452 *= 2; } barrier(CLK_LOCAL_MEM_FENCE); // first thread saves the result in accumulator { - if (sext_i32_i64(local_tid_46408) == (int64_t) 0) { - x_acc_46421 = x_46423; + if (sext_i32_i64(local_tid_127432) == (int64_t) 0) { + x_acc_127445 = x_127447; } } - if (groups_per_segment_46398 == (int64_t) 1) { + if (groups_per_segment_127422 == (int64_t) 1) { // first thread in group saves final result to memory { - if (local_tid_46408 == 0) { - ((__global int32_t *) mem_45232)[gtid_32641] = x_acc_46421; + if (local_tid_127432 == 0) { + ((__global double *) mem_122677)[gtid_109809] = + x_acc_127445; } } } else { - int32_t old_counter_46429; + int32_t old_counter_127453; // first thread in group saves group result to global memory { - if (local_tid_46408 == 0) { + if (local_tid_127432 == 0) { ((__global - int32_t *) group_res_arr_mem_46403)[sext_i32_i64(virt_group_id_46418) * - segred_group_sizze_32661] = - x_acc_46421; + double *) group_res_arr_mem_127427)[sext_i32_i64(virt_group_id_127442) * + segred_group_sizze_109896] = + x_acc_127445; mem_fence_global(); - old_counter_46429 = + old_counter_127453 = atomic_add_i32_global(&((volatile __global - int *) mainDetailedzicounter_mem_46405)[sext_i32_i64(srem32(flat_segment_id_46419, - 10240))], + int *) mainzicounter_mem_127429)[sext_i32_i64(srem32(flat_segment_id_127443, + 10240))], (int) 1); - ((__local bool *) sync_arr_mem_46414)[(int64_t) 0] = - old_counter_46429 == groups_per_segment_46398 - + ((__local bool *) sync_arr_mem_127438)[(int64_t) 0] = + old_counter_127453 == groups_per_segment_127422 - (int64_t) 1; } } barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - bool is_last_group_46430; + bool is_last_group_127454; - is_last_group_46430 = ((__local - bool *) sync_arr_mem_46414)[(int64_t) 0]; - if (is_last_group_46430) { - if (local_tid_46408 == 0) { - old_counter_46429 = + is_last_group_127454 = ((__local + bool *) sync_arr_mem_127438)[(int64_t) 0]; + if (is_last_group_127454) { + if (local_tid_127432 == 0) { + old_counter_127453 = atomic_add_i32_global(&((volatile __global - int *) mainDetailedzicounter_mem_46405)[sext_i32_i64(srem32(flat_segment_id_46419, - 10240))], + int *) mainzicounter_mem_127429)[sext_i32_i64(srem32(flat_segment_id_127443, + 10240))], (int) ((int64_t) 0 - - groups_per_segment_46398)); + groups_per_segment_127422)); } // read in the per-group-results { - int64_t read_per_thread_46431 = - sdiv_up64(groups_per_segment_46398, - segred_group_sizze_32661); + int64_t read_per_thread_127455 = + sdiv_up64(groups_per_segment_127422, + segred_group_sizze_109896); - x_32665 = 0; - for (int64_t i_46432 = 0; i_46432 < read_per_thread_46431; - i_46432++) { - int64_t group_res_id_46433 = - sext_i32_i64(local_tid_46408) * - read_per_thread_46431 + i_46432; - int64_t index_of_group_res_46434 = - sext_i32_i64(flat_segment_id_46419) * - groups_per_segment_46398 + group_res_id_46433; + x_109900 = 0.0; + for (int64_t i_127456 = 0; i_127456 < + read_per_thread_127455; i_127456++) { + int64_t group_res_id_127457 = + sext_i32_i64(local_tid_127432) * + read_per_thread_127455 + i_127456; + int64_t index_of_group_res_127458 = + sext_i32_i64(flat_segment_id_127443) * + groups_per_segment_127422 + group_res_id_127457; - if (slt64(group_res_id_46433, - groups_per_segment_46398)) { - x_32666 = ((__global - int32_t *) group_res_arr_mem_46403)[index_of_group_res_46434 * - segred_group_sizze_32661]; + if (slt64(group_res_id_127457, + groups_per_segment_127422)) { + x_109901 = ((__global + double *) group_res_arr_mem_127427)[index_of_group_res_127458 * + segred_group_sizze_109896]; - int32_t defunc_1_op_res_32667; + double defunc_1_op_res_109902; - defunc_1_op_res_32667 = add32(x_32665, x_32666); - x_32665 = defunc_1_op_res_32667; + defunc_1_op_res_109902 = x_109900 + x_109901; + x_109900 = defunc_1_op_res_109902; } } } ((__local - int32_t *) red_arr_mem_46412)[sext_i32_i64(local_tid_46408)] = - x_32665; + double *) red_arr_mem_127436)[sext_i32_i64(local_tid_127432)] = + x_109900; barrier(CLK_LOCAL_MEM_FENCE); // reduce the per-group results { - int32_t offset_46435; - int32_t skip_waves_46436; + int32_t offset_127459; + int32_t skip_waves_127460; - skip_waves_46436 = 1; + skip_waves_127460 = 1; - int32_t x_46423; - int32_t x_46424; + double x_127447; + double x_127448; - offset_46435 = 0; + offset_127459 = 0; // participating threads read initial accumulator { - if (slt32(local_tid_46408, - sext_i64_i32(segred_group_sizze_32661))) { - x_46423 = ((__local - int32_t *) red_arr_mem_46412)[sext_i32_i64(local_tid_46408 + - offset_46435)]; - } - } - offset_46435 = 1; - while (slt32(offset_46435, wave_sizze_46410)) { - if (slt32(local_tid_46408 + offset_46435, - sext_i64_i32(segred_group_sizze_32661)) && - ((local_tid_46408 - squot32(local_tid_46408, - wave_sizze_46410) * - wave_sizze_46410) & (2 * offset_46435 - 1)) == + if (slt32(local_tid_127432, + sext_i64_i32(segred_group_sizze_109896))) { + x_127447 = ((__local + double *) red_arr_mem_127436)[sext_i32_i64(local_tid_127432 + + offset_127459)]; + } + } + offset_127459 = 1; + while (slt32(offset_127459, wave_sizze_127434)) { + if (slt32(local_tid_127432 + offset_127459, + sext_i64_i32(segred_group_sizze_109896)) && + ((local_tid_127432 - squot32(local_tid_127432, + wave_sizze_127434) * + wave_sizze_127434) & (2 * offset_127459 - 1)) == 0) { // read array element { - x_46424 = ((volatile __local - int32_t *) red_arr_mem_46412)[sext_i32_i64(local_tid_46408 + - offset_46435)]; + x_127448 = ((volatile __local + double *) red_arr_mem_127436)[sext_i32_i64(local_tid_127432 + + offset_127459)]; } // apply reduction operation { - int32_t defunc_1_op_res_46425 = add32(x_46423, - x_46424); + double defunc_1_op_res_127449 = x_127447 + + x_127448; - x_46423 = defunc_1_op_res_46425; + x_127447 = defunc_1_op_res_127449; } // write result of operation { ((volatile __local - int32_t *) red_arr_mem_46412)[sext_i32_i64(local_tid_46408)] = - x_46423; + double *) red_arr_mem_127436)[sext_i32_i64(local_tid_127432)] = + x_127447; } } - offset_46435 *= 2; + offset_127459 *= 2; } - while (slt32(skip_waves_46436, - squot32(sext_i64_i32(segred_group_sizze_32661) + - wave_sizze_46410 - 1, - wave_sizze_46410))) { + while (slt32(skip_waves_127460, + squot32(sext_i64_i32(segred_group_sizze_109896) + + wave_sizze_127434 - 1, + wave_sizze_127434))) { barrier(CLK_LOCAL_MEM_FENCE); - offset_46435 = skip_waves_46436 * wave_sizze_46410; - if (slt32(local_tid_46408 + offset_46435, - sext_i64_i32(segred_group_sizze_32661)) && - ((local_tid_46408 - squot32(local_tid_46408, - wave_sizze_46410) * - wave_sizze_46410) == 0 && - (squot32(local_tid_46408, wave_sizze_46410) & (2 * - skip_waves_46436 - - 1)) == - 0)) { + offset_127459 = skip_waves_127460 * wave_sizze_127434; + if (slt32(local_tid_127432 + offset_127459, + sext_i64_i32(segred_group_sizze_109896)) && + ((local_tid_127432 - squot32(local_tid_127432, + wave_sizze_127434) * + wave_sizze_127434) == 0 && + (squot32(local_tid_127432, wave_sizze_127434) & + (2 * skip_waves_127460 - 1)) == 0)) { // read array element { - x_46424 = ((__local - int32_t *) red_arr_mem_46412)[sext_i32_i64(local_tid_46408 + - offset_46435)]; + x_127448 = ((__local + double *) red_arr_mem_127436)[sext_i32_i64(local_tid_127432 + + offset_127459)]; } // apply reduction operation { - int32_t defunc_1_op_res_46425 = add32(x_46423, - x_46424); + double defunc_1_op_res_127449 = x_127447 + + x_127448; - x_46423 = defunc_1_op_res_46425; + x_127447 = defunc_1_op_res_127449; } // write result of operation { ((__local - int32_t *) red_arr_mem_46412)[sext_i32_i64(local_tid_46408)] = - x_46423; + double *) red_arr_mem_127436)[sext_i32_i64(local_tid_127432)] = + x_127447; } } - skip_waves_46436 *= 2; + skip_waves_127460 *= 2; } // and back to memory with the final result { - if (local_tid_46408 == 0) { - ((__global int32_t *) mem_45232)[gtid_32641] = - x_46423; + if (local_tid_127432 == 0) { + ((__global double *) mem_122677)[gtid_109809] = + x_127447; } } } @@ -29128,446 +34644,431 @@ def sync(self): error_1: return; - #undef segred_group_sizze_32661 + #undef segred_group_sizze_109896 } -__kernel void mainDetailedzisegred_large_32813(__global int *global_failure, - int failure_is_an_option, - __global - int64_t *global_failure_args, - __local volatile - int64_t *sync_arr_mem_46583_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_46581_backing_aligned_1, - int64_t N_27771, - int64_t i32_res_28174, - int64_t num_groups_32835, - int64_t groups_per_segment_46567, - int64_t elements_per_thread_46568, - int64_t virt_num_groups_46569, - int64_t threads_per_segment_46571, - __global - unsigned char *defunc_4_map_res_mem_45178, - __global - unsigned char *defunc_3_map_res_mem_45244, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global - unsigned char *mem_45278, - __global - unsigned char *group_res_arr_mem_46572, - __global - unsigned char *mainDetailedzicounter_mem_46574) +__kernel void mainzisegred_large_109847(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_127372_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_127370_backing_aligned_1, + int64_t m_75136, int64_t k2p2zq_75151, + int64_t defunc_2_reduce_res_75260, + int64_t r_75826, + int64_t num_groups_109866, + int64_t groups_per_segment_127356, + int64_t elements_per_thread_127357, + int64_t virt_num_groups_127358, + int64_t threads_per_segment_127360, + __global unsigned char *mem_120246, + __global unsigned char *mem_122665, + __global unsigned char *mem_122668, + __global unsigned char *mem_122671, + __global + unsigned char *group_res_arr_mem_127361, + __global + unsigned char *mainzicounter_mem_127363) { - #define segred_group_sizze_32834 (mainDetailedzisegred_group_sizze_32807) + #define segred_group_sizze_109865 (mainzisegred_group_sizze_109841) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict sync_arr_mem_46583_backing_1 = + __local volatile char *restrict sync_arr_mem_127372_backing_1 = (__local volatile - char *) sync_arr_mem_46583_backing_aligned_0; - __local volatile char *restrict red_arr_mem_46581_backing_0 = + char *) sync_arr_mem_127372_backing_aligned_0; + __local volatile char *restrict red_arr_mem_127370_backing_0 = (__local volatile - char *) red_arr_mem_46581_backing_aligned_1; - volatile __local bool local_failure; + char *) red_arr_mem_127370_backing_aligned_1; - if (failure_is_an_option) { - int failed = *global_failure >= 0; - - if (failed) - return; - } - local_failure = false; - barrier(CLK_LOCAL_MEM_FENCE); + if (*global_failure >= 0) + return; - int32_t global_tid_46576; - int32_t local_tid_46577; - int64_t group_sizze_46580; - int32_t wave_sizze_46579; - int32_t group_tid_46578; + int32_t global_tid_127365; + int32_t local_tid_127366; + int64_t group_sizze_127369; + int32_t wave_sizze_127368; + int32_t group_tid_127367; - global_tid_46576 = get_global_id(0); - local_tid_46577 = get_local_id(0); - group_sizze_46580 = get_local_size(0); - wave_sizze_46579 = LOCKSTEP_WIDTH; - group_tid_46578 = get_group_id(0); + global_tid_127365 = get_global_id(0); + local_tid_127366 = get_local_id(0); + group_sizze_127369 = get_local_size(0); + wave_sizze_127368 = LOCKSTEP_WIDTH; + group_tid_127367 = get_group_id(0); - int32_t phys_tid_32813; + int32_t phys_tid_109847; - phys_tid_32813 = global_tid_46576; + phys_tid_109847 = global_tid_127365; - __local char *red_arr_mem_46581; + __local char *red_arr_mem_127370; - red_arr_mem_46581 = (__local char *) red_arr_mem_46581_backing_0; + red_arr_mem_127370 = (__local char *) red_arr_mem_127370_backing_0; - __local char *sync_arr_mem_46583; + __local char *sync_arr_mem_127372; - sync_arr_mem_46583 = (__local char *) sync_arr_mem_46583_backing_1; + sync_arr_mem_127372 = (__local char *) sync_arr_mem_127372_backing_1; - int32_t phys_group_id_46585; + int32_t phys_group_id_127374; - phys_group_id_46585 = get_group_id(0); - for (int32_t i_46586 = 0; i_46586 < - sdiv_up32(sext_i64_i32(virt_num_groups_46569) - phys_group_id_46585, - sext_i64_i32(num_groups_32835)); i_46586++) { - int32_t virt_group_id_46587 = phys_group_id_46585 + i_46586 * - sext_i64_i32(num_groups_32835); - int32_t flat_segment_id_46588 = squot32(virt_group_id_46587, - sext_i64_i32(groups_per_segment_46567)); - int64_t global_tid_46589 = srem64(sext_i32_i64(virt_group_id_46587) * - segred_group_sizze_32834 + - sext_i32_i64(local_tid_46577), - segred_group_sizze_32834 * - groups_per_segment_46567); - int64_t gtid_32804 = sext_i32_i64(flat_segment_id_46588); - int64_t gtid_32812; - float x_acc_46590; - int64_t chunk_sizze_46591; + phys_group_id_127374 = get_group_id(0); + for (int32_t i_127375 = 0; i_127375 < + sdiv_up32(sext_i64_i32(virt_num_groups_127358) - phys_group_id_127374, + sext_i64_i32(num_groups_109866)); i_127375++) { + int32_t virt_group_id_127376 = phys_group_id_127374 + i_127375 * + sext_i64_i32(num_groups_109866); + int32_t flat_segment_id_127377 = squot32(virt_group_id_127376, + sext_i64_i32(groups_per_segment_127356)); + int64_t global_tid_127378 = srem64(sext_i32_i64(virt_group_id_127376) * + segred_group_sizze_109865 + + sext_i32_i64(local_tid_127366), + segred_group_sizze_109865 * + groups_per_segment_127356); + int64_t gtid_109838 = sext_i32_i64(flat_segment_id_127377); + int64_t gtid_109846; + double x_acc_127379; + int64_t chunk_sizze_127380; - chunk_sizze_46591 = smin64(elements_per_thread_46568, - sdiv_up64(i32_res_28174 - - sext_i32_i64(sext_i64_i32(global_tid_46589)), - threads_per_segment_46571)); + chunk_sizze_127380 = smin64(elements_per_thread_127357, + sdiv_up64(k2p2zq_75151 - global_tid_127378, + threads_per_segment_127360)); - float x_32838; - float x_32839; + double x_109870; + double x_109871; // neutral-initialise the accumulators { - x_acc_46590 = 0.0F; + x_acc_127379 = 0.0; } - for (int64_t i_46595 = 0; i_46595 < chunk_sizze_46591; i_46595++) { - gtid_32812 = sext_i32_i64(sext_i64_i32(global_tid_46589)) + - threads_per_segment_46571 * i_46595; + for (int64_t i_127384 = 0; i_127384 < chunk_sizze_127380; i_127384++) { + gtid_109846 = global_tid_127378 + threads_per_segment_127360 * + i_127384; // apply map function { - int32_t x_32843 = ((__global - int32_t *) defunc_3_map_res_mem_45244)[gtid_32804]; - int32_t index_primexp_42390 = sext_i64_i32(gtid_32812); - bool cond_32845 = slt32(index_primexp_42390, x_32843); - float defunc_0_f_res_32846; - - if (cond_32845) { - int32_t x_32842 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_32804]; - int32_t x_32847 = add32(x_32842, index_primexp_42390); - int32_t x_32848 = sub32(x_32847, x_32843); - int32_t i_32849 = add32(1, x_32848); - int64_t i_32850 = sext_i32_i64(i_32849); - bool x_32851 = sle64((int64_t) 0, i_32850); - bool y_32852 = slt64(i_32850, N_27771); - bool bounds_check_32853 = x_32851 && y_32852; - bool index_certs_32854; - - if (!bounds_check_32853) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 54) == -1) { - global_failure_args[0] = i_32850; - global_failure_args[1] = N_27771; - ; - } - local_failure = true; - goto error_0; - } - } + double x_109876 = ((__global double *) mem_120246)[gtid_109846 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_109838 * + defunc_2_reduce_res_75260 + + r_75826]; + double defunc_0_f_res_109877; + double redout_119829 = 0.0; + + for (int64_t i_119830 = 0; i_119830 < k2p2zq_75151; + i_119830++) { + double x_109881 = ((__global + double *) mem_120246)[i_119830 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_109838 * + defunc_2_reduce_res_75260 + + r_75826]; + double x_109882 = ((__global + double *) mem_122665)[i_119830 * + (k2p2zq_75151 * + m_75136) + + gtid_109838 * + k2p2zq_75151 + + gtid_109846]; + double defunc_1_f_res_109883 = x_109881 * x_109882; + double defunc_1_op_res_109880 = defunc_1_f_res_109883 + + redout_119829; + double redout_tmp_127385 = defunc_1_op_res_109880; - float defunc_0_f_res_t_res_32855 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_32804 * - N_27771 + - i_32850]; - - defunc_0_f_res_32846 = defunc_0_f_res_t_res_32855; - } else { - defunc_0_f_res_32846 = 0.0F; + redout_119829 = redout_tmp_127385; } + defunc_0_f_res_109877 = redout_119829; + + double defunc_1_f_res_109884 = x_109876 * defunc_0_f_res_109877; + // save map-out results - { } + { + ((__global double *) mem_122671)[gtid_109838 * + k2p2zq_75151 + + gtid_109846] = + defunc_0_f_res_109877; + } // load accumulator { - x_32838 = x_acc_46590; + x_109870 = x_acc_127379; } // load new values { - x_32839 = defunc_0_f_res_32846; + x_109871 = defunc_1_f_res_109884; } // apply reduction operator { - float defunc_1_op_res_32840 = x_32838 + x_32839; + double defunc_1_op_res_109872 = x_109870 + x_109871; // store in accumulator { - x_acc_46590 = defunc_1_op_res_32840; + x_acc_127379 = defunc_1_op_res_109872; } } } } // to reduce current chunk, first store our result in memory { - x_32838 = x_acc_46590; + x_109870 = x_acc_127379; ((__local - float *) red_arr_mem_46581)[sext_i32_i64(local_tid_46577)] = - x_32838; + double *) red_arr_mem_127370)[sext_i32_i64(local_tid_127366)] = + x_109870; } - - error_0: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; barrier(CLK_LOCAL_MEM_FENCE); - int32_t offset_46596; - int32_t skip_waves_46597; + int32_t offset_127386; + int32_t skip_waves_127387; - skip_waves_46597 = 1; + skip_waves_127387 = 1; - float x_46592; - float x_46593; + double x_127381; + double x_127382; - offset_46596 = 0; + offset_127386 = 0; // participating threads read initial accumulator { - if (slt32(local_tid_46577, - sext_i64_i32(segred_group_sizze_32834))) { - x_46592 = ((__local - float *) red_arr_mem_46581)[sext_i32_i64(local_tid_46577 + - offset_46596)]; - } - } - offset_46596 = 1; - while (slt32(offset_46596, wave_sizze_46579)) { - if (slt32(local_tid_46577 + offset_46596, - sext_i64_i32(segred_group_sizze_32834)) && - ((local_tid_46577 - squot32(local_tid_46577, wave_sizze_46579) * - wave_sizze_46579) & (2 * offset_46596 - 1)) == 0) { + if (slt32(local_tid_127366, + sext_i64_i32(segred_group_sizze_109865))) { + x_127381 = ((__local + double *) red_arr_mem_127370)[sext_i32_i64(local_tid_127366 + + offset_127386)]; + } + } + offset_127386 = 1; + while (slt32(offset_127386, wave_sizze_127368)) { + if (slt32(local_tid_127366 + offset_127386, + sext_i64_i32(segred_group_sizze_109865)) && + ((local_tid_127366 - squot32(local_tid_127366, + wave_sizze_127368) * + wave_sizze_127368) & (2 * offset_127386 - 1)) == 0) { // read array element { - x_46593 = ((volatile __local - float *) red_arr_mem_46581)[sext_i32_i64(local_tid_46577 + - offset_46596)]; + x_127382 = ((volatile __local + double *) red_arr_mem_127370)[sext_i32_i64(local_tid_127366 + + offset_127386)]; } // apply reduction operation { - float defunc_1_op_res_46594 = x_46592 + x_46593; + double defunc_1_op_res_127383 = x_127381 + x_127382; - x_46592 = defunc_1_op_res_46594; + x_127381 = defunc_1_op_res_127383; } // write result of operation { ((volatile __local - float *) red_arr_mem_46581)[sext_i32_i64(local_tid_46577)] = - x_46592; + double *) red_arr_mem_127370)[sext_i32_i64(local_tid_127366)] = + x_127381; } } - offset_46596 *= 2; + offset_127386 *= 2; } - while (slt32(skip_waves_46597, - squot32(sext_i64_i32(segred_group_sizze_32834) + - wave_sizze_46579 - 1, wave_sizze_46579))) { + while (slt32(skip_waves_127387, + squot32(sext_i64_i32(segred_group_sizze_109865) + + wave_sizze_127368 - 1, wave_sizze_127368))) { barrier(CLK_LOCAL_MEM_FENCE); - offset_46596 = skip_waves_46597 * wave_sizze_46579; - if (slt32(local_tid_46577 + offset_46596, - sext_i64_i32(segred_group_sizze_32834)) && - ((local_tid_46577 - squot32(local_tid_46577, wave_sizze_46579) * - wave_sizze_46579) == 0 && (squot32(local_tid_46577, - wave_sizze_46579) & (2 * - skip_waves_46597 - - 1)) == + offset_127386 = skip_waves_127387 * wave_sizze_127368; + if (slt32(local_tid_127366 + offset_127386, + sext_i64_i32(segred_group_sizze_109865)) && + ((local_tid_127366 - squot32(local_tid_127366, + wave_sizze_127368) * + wave_sizze_127368) == 0 && (squot32(local_tid_127366, + wave_sizze_127368) & (2 * + skip_waves_127387 - + 1)) == 0)) { // read array element { - x_46593 = ((__local - float *) red_arr_mem_46581)[sext_i32_i64(local_tid_46577 + - offset_46596)]; + x_127382 = ((__local + double *) red_arr_mem_127370)[sext_i32_i64(local_tid_127366 + + offset_127386)]; } // apply reduction operation { - float defunc_1_op_res_46594 = x_46592 + x_46593; + double defunc_1_op_res_127383 = x_127381 + x_127382; - x_46592 = defunc_1_op_res_46594; + x_127381 = defunc_1_op_res_127383; } // write result of operation { ((__local - float *) red_arr_mem_46581)[sext_i32_i64(local_tid_46577)] = - x_46592; + double *) red_arr_mem_127370)[sext_i32_i64(local_tid_127366)] = + x_127381; } } - skip_waves_46597 *= 2; + skip_waves_127387 *= 2; } barrier(CLK_LOCAL_MEM_FENCE); // first thread saves the result in accumulator { - if (sext_i32_i64(local_tid_46577) == (int64_t) 0) { - x_acc_46590 = x_46592; + if (sext_i32_i64(local_tid_127366) == (int64_t) 0) { + x_acc_127379 = x_127381; } } - if (groups_per_segment_46567 == (int64_t) 1) { + if (groups_per_segment_127356 == (int64_t) 1) { // first thread in group saves final result to memory { - if (local_tid_46577 == 0) { - ((__global float *) mem_45278)[gtid_32804] = x_acc_46590; + if (local_tid_127366 == 0) { + ((__global double *) mem_122668)[gtid_109838] = + x_acc_127379; } } } else { - int32_t old_counter_46598; + int32_t old_counter_127388; // first thread in group saves group result to global memory { - if (local_tid_46577 == 0) { + if (local_tid_127366 == 0) { ((__global - float *) group_res_arr_mem_46572)[sext_i32_i64(virt_group_id_46587) * - segred_group_sizze_32834] = - x_acc_46590; + double *) group_res_arr_mem_127361)[sext_i32_i64(virt_group_id_127376) * + segred_group_sizze_109865] = + x_acc_127379; mem_fence_global(); - old_counter_46598 = + old_counter_127388 = atomic_add_i32_global(&((volatile __global - int *) mainDetailedzicounter_mem_46574)[sext_i32_i64(srem32(flat_segment_id_46588, - 10240))], + int *) mainzicounter_mem_127363)[sext_i32_i64(srem32(flat_segment_id_127377, + 10240))], (int) 1); - ((__local bool *) sync_arr_mem_46583)[(int64_t) 0] = - old_counter_46598 == groups_per_segment_46567 - + ((__local bool *) sync_arr_mem_127372)[(int64_t) 0] = + old_counter_127388 == groups_per_segment_127356 - (int64_t) 1; } } barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - bool is_last_group_46599; + bool is_last_group_127389; - is_last_group_46599 = ((__local - bool *) sync_arr_mem_46583)[(int64_t) 0]; - if (is_last_group_46599) { - if (local_tid_46577 == 0) { - old_counter_46598 = + is_last_group_127389 = ((__local + bool *) sync_arr_mem_127372)[(int64_t) 0]; + if (is_last_group_127389) { + if (local_tid_127366 == 0) { + old_counter_127388 = atomic_add_i32_global(&((volatile __global - int *) mainDetailedzicounter_mem_46574)[sext_i32_i64(srem32(flat_segment_id_46588, - 10240))], + int *) mainzicounter_mem_127363)[sext_i32_i64(srem32(flat_segment_id_127377, + 10240))], (int) ((int64_t) 0 - - groups_per_segment_46567)); + groups_per_segment_127356)); } // read in the per-group-results { - int64_t read_per_thread_46600 = - sdiv_up64(groups_per_segment_46567, - segred_group_sizze_32834); + int64_t read_per_thread_127390 = + sdiv_up64(groups_per_segment_127356, + segred_group_sizze_109865); - x_32838 = 0.0F; - for (int64_t i_46601 = 0; i_46601 < read_per_thread_46600; - i_46601++) { - int64_t group_res_id_46602 = - sext_i32_i64(local_tid_46577) * - read_per_thread_46600 + i_46601; - int64_t index_of_group_res_46603 = - sext_i32_i64(flat_segment_id_46588) * - groups_per_segment_46567 + group_res_id_46602; + x_109870 = 0.0; + for (int64_t i_127391 = 0; i_127391 < + read_per_thread_127390; i_127391++) { + int64_t group_res_id_127392 = + sext_i32_i64(local_tid_127366) * + read_per_thread_127390 + i_127391; + int64_t index_of_group_res_127393 = + sext_i32_i64(flat_segment_id_127377) * + groups_per_segment_127356 + group_res_id_127392; - if (slt64(group_res_id_46602, - groups_per_segment_46567)) { - x_32839 = ((__global - float *) group_res_arr_mem_46572)[index_of_group_res_46603 * - segred_group_sizze_32834]; + if (slt64(group_res_id_127392, + groups_per_segment_127356)) { + x_109871 = ((__global + double *) group_res_arr_mem_127361)[index_of_group_res_127393 * + segred_group_sizze_109865]; - float defunc_1_op_res_32840; + double defunc_1_op_res_109872; - defunc_1_op_res_32840 = x_32838 + x_32839; - x_32838 = defunc_1_op_res_32840; + defunc_1_op_res_109872 = x_109870 + x_109871; + x_109870 = defunc_1_op_res_109872; } } } ((__local - float *) red_arr_mem_46581)[sext_i32_i64(local_tid_46577)] = - x_32838; + double *) red_arr_mem_127370)[sext_i32_i64(local_tid_127366)] = + x_109870; barrier(CLK_LOCAL_MEM_FENCE); // reduce the per-group results { - int32_t offset_46604; - int32_t skip_waves_46605; + int32_t offset_127394; + int32_t skip_waves_127395; - skip_waves_46605 = 1; + skip_waves_127395 = 1; - float x_46592; - float x_46593; + double x_127381; + double x_127382; - offset_46604 = 0; + offset_127394 = 0; // participating threads read initial accumulator { - if (slt32(local_tid_46577, - sext_i64_i32(segred_group_sizze_32834))) { - x_46592 = ((__local - float *) red_arr_mem_46581)[sext_i32_i64(local_tid_46577 + - offset_46604)]; - } - } - offset_46604 = 1; - while (slt32(offset_46604, wave_sizze_46579)) { - if (slt32(local_tid_46577 + offset_46604, - sext_i64_i32(segred_group_sizze_32834)) && - ((local_tid_46577 - squot32(local_tid_46577, - wave_sizze_46579) * - wave_sizze_46579) & (2 * offset_46604 - 1)) == + if (slt32(local_tid_127366, + sext_i64_i32(segred_group_sizze_109865))) { + x_127381 = ((__local + double *) red_arr_mem_127370)[sext_i32_i64(local_tid_127366 + + offset_127394)]; + } + } + offset_127394 = 1; + while (slt32(offset_127394, wave_sizze_127368)) { + if (slt32(local_tid_127366 + offset_127394, + sext_i64_i32(segred_group_sizze_109865)) && + ((local_tid_127366 - squot32(local_tid_127366, + wave_sizze_127368) * + wave_sizze_127368) & (2 * offset_127394 - 1)) == 0) { // read array element { - x_46593 = ((volatile __local - float *) red_arr_mem_46581)[sext_i32_i64(local_tid_46577 + - offset_46604)]; + x_127382 = ((volatile __local + double *) red_arr_mem_127370)[sext_i32_i64(local_tid_127366 + + offset_127394)]; } // apply reduction operation { - float defunc_1_op_res_46594 = x_46592 + x_46593; + double defunc_1_op_res_127383 = x_127381 + + x_127382; - x_46592 = defunc_1_op_res_46594; + x_127381 = defunc_1_op_res_127383; } // write result of operation { ((volatile __local - float *) red_arr_mem_46581)[sext_i32_i64(local_tid_46577)] = - x_46592; + double *) red_arr_mem_127370)[sext_i32_i64(local_tid_127366)] = + x_127381; } } - offset_46604 *= 2; + offset_127394 *= 2; } - while (slt32(skip_waves_46605, - squot32(sext_i64_i32(segred_group_sizze_32834) + - wave_sizze_46579 - 1, - wave_sizze_46579))) { + while (slt32(skip_waves_127395, + squot32(sext_i64_i32(segred_group_sizze_109865) + + wave_sizze_127368 - 1, + wave_sizze_127368))) { barrier(CLK_LOCAL_MEM_FENCE); - offset_46604 = skip_waves_46605 * wave_sizze_46579; - if (slt32(local_tid_46577 + offset_46604, - sext_i64_i32(segred_group_sizze_32834)) && - ((local_tid_46577 - squot32(local_tid_46577, - wave_sizze_46579) * - wave_sizze_46579) == 0 && - (squot32(local_tid_46577, wave_sizze_46579) & (2 * - skip_waves_46605 - - 1)) == - 0)) { + offset_127394 = skip_waves_127395 * wave_sizze_127368; + if (slt32(local_tid_127366 + offset_127394, + sext_i64_i32(segred_group_sizze_109865)) && + ((local_tid_127366 - squot32(local_tid_127366, + wave_sizze_127368) * + wave_sizze_127368) == 0 && + (squot32(local_tid_127366, wave_sizze_127368) & + (2 * skip_waves_127395 - 1)) == 0)) { // read array element { - x_46593 = ((__local - float *) red_arr_mem_46581)[sext_i32_i64(local_tid_46577 + - offset_46604)]; + x_127382 = ((__local + double *) red_arr_mem_127370)[sext_i32_i64(local_tid_127366 + + offset_127394)]; } // apply reduction operation { - float defunc_1_op_res_46594 = x_46592 + x_46593; + double defunc_1_op_res_127383 = x_127381 + + x_127382; - x_46592 = defunc_1_op_res_46594; + x_127381 = defunc_1_op_res_127383; } // write result of operation { ((__local - float *) red_arr_mem_46581)[sext_i32_i64(local_tid_46577)] = - x_46592; + double *) red_arr_mem_127370)[sext_i32_i64(local_tid_127366)] = + x_127381; } } - skip_waves_46605 *= 2; + skip_waves_127395 *= 2; } // and back to memory with the final result { - if (local_tid_46577 == 0) { - ((__global float *) mem_45278)[gtid_32804] = - x_46592; + if (local_tid_127366 == 0) { + ((__global double *) mem_122668)[gtid_109838] = + x_127381; } } } @@ -29578,765 +35079,435 @@ def sync(self): error_1: return; - #undef segred_group_sizze_32834 + #undef segred_group_sizze_109865 } -__kernel void mainDetailedzisegred_large_33994(__global int *global_failure, - __local volatile - int64_t *sync_arr_mem_46810_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_46808_backing_aligned_1, - __local volatile - int64_t *red_arr_mem_46806_backing_aligned_2, - __local volatile - int64_t *red_arr_mem_46804_backing_aligned_3, - int64_t iota32_arg_28203, - int64_t iota32_arg_28233, - int64_t num_groups_34139, - int64_t groups_per_segment_46786, - int64_t elements_per_thread_46787, - int64_t virt_num_groups_46788, - __global - unsigned char *mem_45282, - __global - unsigned char *mem_45399, - __global - unsigned char *mem_45403, - __global - unsigned char *mem_45406, - __global - unsigned char *mem_45409, - __global - unsigned char *mem_45411, - __global - unsigned char *mem_45413, - __global - unsigned char *mem_45416, - __global - unsigned char *group_res_arr_mem_46791, - __global - unsigned char *group_res_arr_mem_46793, - __global - unsigned char *group_res_arr_mem_46795, - __global - unsigned char *mainDetailedzicounter_mem_46797) +__kernel void mainzisegred_large_111144(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_128247_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128245_backing_aligned_1, + int64_t m_75136, int64_t k2p2zq_75151, + int64_t defunc_2_reduce_res_75260, + int64_t index_primexp_76437, + int64_t num_groups_111352, + int64_t binop_x_120251, + int64_t groups_per_segment_128231, + int64_t elements_per_thread_128232, + int64_t virt_num_groups_128233, + int64_t threads_per_segment_128235, + __global unsigned char *mem_120246, + __global + unsigned char *mem_param_123778, + __global unsigned char *mem_123907, + __global unsigned char *mem_123910, + __global unsigned char *mem_123944, + __global unsigned char *mem_123948, + __global + unsigned char *group_res_arr_mem_128236, + __global + unsigned char *mainzicounter_mem_128238) { - #define segred_group_sizze_34138 (mainDetailedzisegred_group_sizze_33988) + #define segred_group_sizze_111351 (mainzisegred_group_sizze_111138) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict sync_arr_mem_46810_backing_3 = + __local volatile char *restrict sync_arr_mem_128247_backing_1 = (__local volatile - char *) sync_arr_mem_46810_backing_aligned_0; - __local volatile char *restrict red_arr_mem_46808_backing_2 = + char *) sync_arr_mem_128247_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128245_backing_0 = (__local volatile - char *) red_arr_mem_46808_backing_aligned_1; - __local volatile char *restrict red_arr_mem_46806_backing_1 = - (__local volatile - char *) red_arr_mem_46806_backing_aligned_2; - __local volatile char *restrict red_arr_mem_46804_backing_0 = - (__local volatile - char *) red_arr_mem_46804_backing_aligned_3; + char *) red_arr_mem_128245_backing_aligned_1; if (*global_failure >= 0) return; - int32_t global_tid_46799; - int32_t local_tid_46800; - int64_t group_sizze_46803; - int32_t wave_sizze_46802; - int32_t group_tid_46801; - - global_tid_46799 = get_global_id(0); - local_tid_46800 = get_local_id(0); - group_sizze_46803 = get_local_size(0); - wave_sizze_46802 = LOCKSTEP_WIDTH; - group_tid_46801 = get_group_id(0); - - int32_t phys_tid_33994; - - phys_tid_33994 = global_tid_46799; - - __local char *red_arr_mem_46804; - - red_arr_mem_46804 = (__local char *) red_arr_mem_46804_backing_0; - - __local char *red_arr_mem_46806; - - red_arr_mem_46806 = (__local char *) red_arr_mem_46806_backing_1; - - __local char *red_arr_mem_46808; - - red_arr_mem_46808 = (__local char *) red_arr_mem_46808_backing_2; - - __local char *sync_arr_mem_46810; - - sync_arr_mem_46810 = (__local char *) sync_arr_mem_46810_backing_3; - - int32_t phys_group_id_46812; - - phys_group_id_46812 = get_group_id(0); - for (int32_t i_46813 = 0; i_46813 < - sdiv_up32(sext_i64_i32(virt_num_groups_46788) - phys_group_id_46812, - sext_i64_i32(num_groups_34139)); i_46813++) { - int32_t virt_group_id_46814 = phys_group_id_46812 + i_46813 * - sext_i64_i32(num_groups_34139); - int32_t flat_segment_id_46815 = squot32(virt_group_id_46814, - sext_i64_i32(groups_per_segment_46786)); - int64_t global_tid_46816 = srem64(sext_i32_i64(virt_group_id_46814) * - segred_group_sizze_34138 + - sext_i32_i64(local_tid_46800), - segred_group_sizze_34138 * - groups_per_segment_46786); - int64_t gtid_33985 = sext_i32_i64(flat_segment_id_46815); - int64_t gtid_33993; - bool x_acc_46817; - int32_t x_acc_46818; - float x_acc_46819; - int64_t chunk_sizze_46820; - int64_t starting_point_46821; - - starting_point_46821 = sext_i32_i64(sext_i64_i32(global_tid_46816)) * - elements_per_thread_46787; - - int64_t remaining_elements_46822; - - remaining_elements_46822 = iota32_arg_28203 - starting_point_46821; - if (sle64(remaining_elements_46822, (int64_t) 0) || - sle64(iota32_arg_28203, starting_point_46821)) { - chunk_sizze_46820 = (int64_t) 0; - } else { - if (slt64(iota32_arg_28203, - (sext_i32_i64(sext_i64_i32(global_tid_46816)) + - (int64_t) 1) * elements_per_thread_46787)) { - chunk_sizze_46820 = iota32_arg_28203 - - sext_i32_i64(sext_i64_i32(global_tid_46816)) * - elements_per_thread_46787; - } else { - chunk_sizze_46820 = elements_per_thread_46787; - } - } - - bool x_34145; - int32_t x_34146; - float x_34147; - bool x_34148; - int32_t x_34149; - float x_34150; + int32_t global_tid_128240; + int32_t local_tid_128241; + int64_t group_sizze_128244; + int32_t wave_sizze_128243; + int32_t group_tid_128242; + + global_tid_128240 = get_global_id(0); + local_tid_128241 = get_local_id(0); + group_sizze_128244 = get_local_size(0); + wave_sizze_128243 = LOCKSTEP_WIDTH; + group_tid_128242 = get_group_id(0); + + int32_t phys_tid_111144; + + phys_tid_111144 = global_tid_128240; + + __local char *red_arr_mem_128245; + + red_arr_mem_128245 = (__local char *) red_arr_mem_128245_backing_0; + + __local char *sync_arr_mem_128247; + + sync_arr_mem_128247 = (__local char *) sync_arr_mem_128247_backing_1; + + int32_t phys_group_id_128249; + + phys_group_id_128249 = get_group_id(0); + for (int32_t i_128250 = 0; i_128250 < + sdiv_up32(sext_i64_i32(virt_num_groups_128233) - phys_group_id_128249, + sext_i64_i32(num_groups_111352)); i_128250++) { + int32_t virt_group_id_128251 = phys_group_id_128249 + i_128250 * + sext_i64_i32(num_groups_111352); + int32_t flat_segment_id_128252 = squot32(virt_group_id_128251, + sext_i64_i32(groups_per_segment_128231)); + int64_t global_tid_128253 = srem64(sext_i32_i64(virt_group_id_128251) * + segred_group_sizze_111351 + + sext_i32_i64(local_tid_128241), + segred_group_sizze_111351 * + groups_per_segment_128231); + int64_t gtid_111133 = squot64(sext_i32_i64(flat_segment_id_128252), + k2p2zq_75151); + int64_t gtid_111134 = sext_i32_i64(flat_segment_id_128252) - + squot64(sext_i32_i64(flat_segment_id_128252), k2p2zq_75151) * + k2p2zq_75151; + int64_t gtid_111143; + double x_acc_128254; + int64_t chunk_sizze_128255; + + chunk_sizze_128255 = smin64(elements_per_thread_128232, + sdiv_up64(k2p2zq_75151 - global_tid_128253, + threads_per_segment_128235)); + + double x_111356; + double x_111357; // neutral-initialise the accumulators { - x_acc_46817 = 0; - x_acc_46818 = -1; - x_acc_46819 = 0.0F; - } - for (int64_t i_46837 = 0; i_46837 < elements_per_thread_46787; - i_46837++) { - gtid_33993 = sext_i32_i64(local_tid_46800) + - (sext_i32_i64(squot32(sext_i64_i32(global_tid_46816), - sext_i64_i32(segred_group_sizze_34138))) * - elements_per_thread_46787 + i_46837) * - segred_group_sizze_34138; - if (slt64(gtid_33993, iota32_arg_28203)) { - // apply map function + x_acc_128254 = 0.0; + } + for (int64_t i_128259 = 0; i_128259 < chunk_sizze_128255; i_128259++) { + gtid_111143 = global_tid_128253 + threads_per_segment_128235 * + i_128259; + // apply map function + { + double fr_111362 = ((__global + double *) mem_123910)[gtid_111133]; + double x_111363 = ((__global double *) mem_123907)[gtid_111133 * + k2p2zq_75151 + + gtid_111134]; + double x_111365 = ((__global double *) mem_123907)[gtid_111133 * + k2p2zq_75151 + + gtid_111143]; + double x_111366 = ((__global + double *) mem_param_123778)[gtid_111133 * + binop_x_120251 + + gtid_111134 * + k2p2zq_75151 + + gtid_111143]; + double x_111367 = ((__global double *) mem_120246)[gtid_111143 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_111133 * + defunc_2_reduce_res_75260 + + index_primexp_76437]; + double x_111368 = x_111363 * x_111365; + double y_111369 = x_111368 / fr_111362; + double defunc_1_f_res_111370 = x_111366 - y_111369; + double defunc_1_f_res_111371 = x_111367 * defunc_1_f_res_111370; + + // save map-out results { - int32_t y_34159 = ((__global - int32_t *) mem_45399)[gtid_33985]; - float y_34160 = ((__global float *) mem_45406)[gtid_33985]; - int64_t binop_x_42448 = iota32_arg_28203 * gtid_33985; - int64_t binop_x_42449 = gtid_33993 + binop_x_42448; - int64_t new_index_42450 = squot64(binop_x_42449, - iota32_arg_28233); - int64_t binop_y_42456 = iota32_arg_28233 * new_index_42450; - int64_t new_index_42457 = binop_x_42449 - binop_y_42456; - float x_34162 = ((__global - float *) mem_45403)[new_index_42450 * - iota32_arg_28233 + - new_index_42457]; - float x_34163 = ((__global float *) mem_45282)[gtid_33993]; - int32_t index_primexp_42414 = sext_i64_i32(gtid_33993); - float defunc_0_f_res_34165 = x_34162 / y_34160; - bool cond_34166 = slt32(index_primexp_42414, y_34159); - bool isnan_res_34167; - - isnan_res_34167 = futrts_isnan32(defunc_0_f_res_34165); - - bool cond_t_res_34168 = !isnan_res_34167; - bool x_34169 = cond_34166 && cond_t_res_34168; - float abs_res_34170 = (float) fabs(defunc_0_f_res_34165); - bool defunc_2_f_res_t_res_34171 = x_34163 < abs_res_34170; - bool x_34172 = x_34169 && defunc_2_f_res_t_res_34171; - float defunc_1_f_res_34173; - - if (cond_34166) { - defunc_1_f_res_34173 = defunc_0_f_res_34165; - } else { - defunc_1_f_res_34173 = 0.0F; - } - // save map-out results - { - ((__global float *) mem_45416)[gtid_33985 * - iota32_arg_28203 + - gtid_33993] = - defunc_0_f_res_34165; - } - // load accumulator - { - x_34145 = x_acc_46817; - x_34146 = x_acc_46818; - x_34147 = x_acc_46819; - } - // load new values - { - x_34148 = x_34172; - x_34149 = index_primexp_42414; - x_34150 = defunc_1_f_res_34173; - } - // apply reduction operator - { - bool defunc_1_op_res_34151; - int32_t defunc_1_op_res_34152; - - if (x_34145) { - defunc_1_op_res_34151 = x_34145; - defunc_1_op_res_34152 = x_34146; - } else { - bool x_34153 = x_34148 && x_34148; - bool x_34154 = !x_34148; - bool y_34155 = x_34145 && x_34154; - bool defunc_1_op_res_f_res_34156 = x_34153 || - y_34155; - int32_t defunc_1_op_res_f_res_34157; - - if (x_34148) { - defunc_1_op_res_f_res_34157 = x_34149; - } else { - defunc_1_op_res_f_res_34157 = x_34146; - } - defunc_1_op_res_34151 = defunc_1_op_res_f_res_34156; - defunc_1_op_res_34152 = defunc_1_op_res_f_res_34157; - } - - float defunc_1_op_res_34158 = x_34147 + x_34150; - - // store in accumulator - { - x_acc_46817 = defunc_1_op_res_34151; - x_acc_46818 = defunc_1_op_res_34152; - x_acc_46819 = defunc_1_op_res_34158; - } - } + ((__global double *) mem_123948)[gtid_111133 * + (k2p2zq_75151 * + k2p2zq_75151) + + gtid_111134 * + k2p2zq_75151 + + gtid_111143] = + defunc_1_f_res_111370; } - } - // to reduce current chunk, first store our result in memory - { - x_34145 = x_acc_46817; - x_34146 = x_acc_46818; - x_34147 = x_acc_46819; - ((__local - bool *) red_arr_mem_46804)[sext_i32_i64(local_tid_46800)] = - x_34145; - ((__local - int32_t *) red_arr_mem_46806)[sext_i32_i64(local_tid_46800)] = - x_34146; - ((__local - float *) red_arr_mem_46808)[sext_i32_i64(local_tid_46800)] = - x_34147; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t offset_46838; - int32_t skip_waves_46839; - - skip_waves_46839 = 1; - - bool x_46823; - int32_t x_46824; - float x_46825; - bool x_46826; - int32_t x_46827; - float x_46828; - - offset_46838 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46800, - sext_i64_i32(segred_group_sizze_34138))) { - x_46823 = ((__local - bool *) red_arr_mem_46804)[sext_i32_i64(local_tid_46800 + - offset_46838)]; - x_46824 = ((__local - int32_t *) red_arr_mem_46806)[sext_i32_i64(local_tid_46800 + - offset_46838)]; - x_46825 = ((__local - float *) red_arr_mem_46808)[sext_i32_i64(local_tid_46800 + - offset_46838)]; - } - } - offset_46838 = 1; - while (slt32(offset_46838, wave_sizze_46802)) { - if (slt32(local_tid_46800 + offset_46838, - sext_i64_i32(segred_group_sizze_34138)) && - ((local_tid_46800 - squot32(local_tid_46800, - wave_sizze_46802) * - wave_sizze_46802) & (2 * offset_46838 - 1)) == 0) { - // read array element - { - x_46826 = ((volatile __local - bool *) red_arr_mem_46804)[sext_i32_i64(local_tid_46800 + - offset_46838)]; - x_46827 = ((volatile __local - int32_t *) red_arr_mem_46806)[sext_i32_i64(local_tid_46800 + - offset_46838)]; - x_46828 = ((volatile __local - float *) red_arr_mem_46808)[sext_i32_i64(local_tid_46800 + - offset_46838)]; - } - // apply reduction operation - { - bool defunc_1_op_res_46829; - int32_t defunc_1_op_res_46830; - - if (x_46823) { - defunc_1_op_res_46829 = x_46823; - defunc_1_op_res_46830 = x_46824; - } else { - bool x_46831 = x_46826 && x_46826; - bool x_46832 = !x_46826; - bool y_46833 = x_46823 && x_46832; - bool defunc_1_op_res_f_res_46834 = x_46831 || - y_46833; - int32_t defunc_1_op_res_f_res_46835; - - if (x_46826) { - defunc_1_op_res_f_res_46835 = x_46827; - } else { - defunc_1_op_res_f_res_46835 = x_46824; - } - defunc_1_op_res_46829 = defunc_1_op_res_f_res_46834; - defunc_1_op_res_46830 = defunc_1_op_res_f_res_46835; - } - - float defunc_1_op_res_46836 = x_46825 + x_46828; - - x_46823 = defunc_1_op_res_46829; - x_46824 = defunc_1_op_res_46830; - x_46825 = defunc_1_op_res_46836; - } - // write result of operation + // load accumulator + { + x_111356 = x_acc_128254; + } + // load new values + { + x_111357 = defunc_1_f_res_111371; + } + // apply reduction operator + { + double defunc_1_op_res_111358 = x_111356 + x_111357; + + // store in accumulator { - ((volatile __local - bool *) red_arr_mem_46804)[sext_i32_i64(local_tid_46800)] = - x_46823; - ((volatile __local - int32_t *) red_arr_mem_46806)[sext_i32_i64(local_tid_46800)] = - x_46824; - ((volatile __local - float *) red_arr_mem_46808)[sext_i32_i64(local_tid_46800)] = - x_46825; + x_acc_128254 = defunc_1_op_res_111358; } } - offset_46838 *= 2; } - while (slt32(skip_waves_46839, - squot32(sext_i64_i32(segred_group_sizze_34138) + - wave_sizze_46802 - 1, wave_sizze_46802))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46838 = skip_waves_46839 * wave_sizze_46802; - if (slt32(local_tid_46800 + offset_46838, - sext_i64_i32(segred_group_sizze_34138)) && - ((local_tid_46800 - squot32(local_tid_46800, - wave_sizze_46802) * - wave_sizze_46802) == 0 && (squot32(local_tid_46800, - wave_sizze_46802) & - (2 * skip_waves_46839 - 1)) == - 0)) { - // read array element - { - x_46826 = ((__local - bool *) red_arr_mem_46804)[sext_i32_i64(local_tid_46800 + - offset_46838)]; - x_46827 = ((__local - int32_t *) red_arr_mem_46806)[sext_i32_i64(local_tid_46800 + - offset_46838)]; - x_46828 = ((__local - float *) red_arr_mem_46808)[sext_i32_i64(local_tid_46800 + - offset_46838)]; - } - // apply reduction operation - { - bool defunc_1_op_res_46829; - int32_t defunc_1_op_res_46830; - - if (x_46823) { - defunc_1_op_res_46829 = x_46823; - defunc_1_op_res_46830 = x_46824; - } else { - bool x_46831 = x_46826 && x_46826; - bool x_46832 = !x_46826; - bool y_46833 = x_46823 && x_46832; - bool defunc_1_op_res_f_res_46834 = x_46831 || - y_46833; - int32_t defunc_1_op_res_f_res_46835; - - if (x_46826) { - defunc_1_op_res_f_res_46835 = x_46827; - } else { - defunc_1_op_res_f_res_46835 = x_46824; - } - defunc_1_op_res_46829 = defunc_1_op_res_f_res_46834; - defunc_1_op_res_46830 = defunc_1_op_res_f_res_46835; - } - - float defunc_1_op_res_46836 = x_46825 + x_46828; - - x_46823 = defunc_1_op_res_46829; - x_46824 = defunc_1_op_res_46830; - x_46825 = defunc_1_op_res_46836; - } - // write result of operation - { - ((__local - bool *) red_arr_mem_46804)[sext_i32_i64(local_tid_46800)] = - x_46823; - ((__local - int32_t *) red_arr_mem_46806)[sext_i32_i64(local_tid_46800)] = - x_46824; - ((__local - float *) red_arr_mem_46808)[sext_i32_i64(local_tid_46800)] = - x_46825; - } + } + // to reduce current chunk, first store our result in memory + { + x_111356 = x_acc_128254; + ((__local + double *) red_arr_mem_128245)[sext_i32_i64(local_tid_128241)] = + x_111356; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128260; + int32_t skip_waves_128261; + + skip_waves_128261 = 1; + + double x_128256; + double x_128257; + + offset_128260 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128241, + sext_i64_i32(segred_group_sizze_111351))) { + x_128256 = ((__local + double *) red_arr_mem_128245)[sext_i32_i64(local_tid_128241 + + offset_128260)]; + } + } + offset_128260 = 1; + while (slt32(offset_128260, wave_sizze_128243)) { + if (slt32(local_tid_128241 + offset_128260, + sext_i64_i32(segred_group_sizze_111351)) && + ((local_tid_128241 - squot32(local_tid_128241, + wave_sizze_128243) * + wave_sizze_128243) & (2 * offset_128260 - 1)) == 0) { + // read array element + { + x_128257 = ((volatile __local + double *) red_arr_mem_128245)[sext_i32_i64(local_tid_128241 + + offset_128260)]; + } + // apply reduction operation + { + double defunc_1_op_res_128258 = x_128256 + x_128257; + + x_128256 = defunc_1_op_res_128258; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128245)[sext_i32_i64(local_tid_128241)] = + x_128256; } - skip_waves_46839 *= 2; } + offset_128260 *= 2; + } + while (slt32(skip_waves_128261, + squot32(sext_i64_i32(segred_group_sizze_111351) + + wave_sizze_128243 - 1, wave_sizze_128243))) { barrier(CLK_LOCAL_MEM_FENCE); - // first thread saves the result in accumulator - { - if (sext_i32_i64(local_tid_46800) == (int64_t) 0) { - x_acc_46817 = x_46823; - x_acc_46818 = x_46824; - x_acc_46819 = x_46825; + offset_128260 = skip_waves_128261 * wave_sizze_128243; + if (slt32(local_tid_128241 + offset_128260, + sext_i64_i32(segred_group_sizze_111351)) && + ((local_tid_128241 - squot32(local_tid_128241, + wave_sizze_128243) * + wave_sizze_128243) == 0 && (squot32(local_tid_128241, + wave_sizze_128243) & (2 * + skip_waves_128261 - + 1)) == + 0)) { + // read array element + { + x_128257 = ((__local + double *) red_arr_mem_128245)[sext_i32_i64(local_tid_128241 + + offset_128260)]; } - } - // first thread keeps accumulator; others reset to neutral element - { - if (!(sext_i32_i64(local_tid_46800) == (int64_t) 0)) { - x_acc_46817 = 0; - x_acc_46818 = -1; - x_acc_46819 = 0.0F; + // apply reduction operation + { + double defunc_1_op_res_128258 = x_128256 + x_128257; + + x_128256 = defunc_1_op_res_128258; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128245)[sext_i32_i64(local_tid_128241)] = + x_128256; } } + skip_waves_128261 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_128241) == (int64_t) 0) { + x_acc_128254 = x_128256; + } } - x_34145 = x_acc_46817; - x_34146 = x_acc_46818; - x_34147 = x_acc_46819; - if (groups_per_segment_46786 == (int64_t) 1) { + if (groups_per_segment_128231 == (int64_t) 1) { // first thread in group saves final result to memory { - if (local_tid_46800 == 0) { - ((__global bool *) mem_45409)[gtid_33985] = x_acc_46817; - ((__global int32_t *) mem_45411)[gtid_33985] = x_acc_46818; - ((__global float *) mem_45413)[gtid_33985] = x_acc_46819; + if (local_tid_128241 == 0) { + ((__global double *) mem_123944)[gtid_111133 * + k2p2zq_75151 + + gtid_111134] = + x_acc_128254; } } } else { - int32_t old_counter_46840; + int32_t old_counter_128262; // first thread in group saves group result to global memory { - if (local_tid_46800 == 0) { - ((__global - bool *) group_res_arr_mem_46791)[sext_i32_i64(virt_group_id_46814) * - segred_group_sizze_34138] = - x_acc_46817; - ((__global - int32_t *) group_res_arr_mem_46793)[sext_i32_i64(virt_group_id_46814) * - segred_group_sizze_34138] = - x_acc_46818; + if (local_tid_128241 == 0) { ((__global - float *) group_res_arr_mem_46795)[sext_i32_i64(virt_group_id_46814) * - segred_group_sizze_34138] = - x_acc_46819; + double *) group_res_arr_mem_128236)[sext_i32_i64(virt_group_id_128251) * + segred_group_sizze_111351] = + x_acc_128254; mem_fence_global(); - old_counter_46840 = + old_counter_128262 = atomic_add_i32_global(&((volatile __global - int *) mainDetailedzicounter_mem_46797)[sext_i32_i64(srem32(flat_segment_id_46815, - 10240))], + int *) mainzicounter_mem_128238)[sext_i32_i64(srem32(flat_segment_id_128252, + 10240))], (int) 1); - ((__local bool *) sync_arr_mem_46810)[(int64_t) 0] = - old_counter_46840 == groups_per_segment_46786 - + ((__local bool *) sync_arr_mem_128247)[(int64_t) 0] = + old_counter_128262 == groups_per_segment_128231 - (int64_t) 1; } } barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - bool is_last_group_46841; + bool is_last_group_128263; - is_last_group_46841 = ((__local - bool *) sync_arr_mem_46810)[(int64_t) 0]; - if (is_last_group_46841) { - if (local_tid_46800 == 0) { - old_counter_46840 = + is_last_group_128263 = ((__local + bool *) sync_arr_mem_128247)[(int64_t) 0]; + if (is_last_group_128263) { + if (local_tid_128241 == 0) { + old_counter_128262 = atomic_add_i32_global(&((volatile __global - int *) mainDetailedzicounter_mem_46797)[sext_i32_i64(srem32(flat_segment_id_46815, - 10240))], + int *) mainzicounter_mem_128238)[sext_i32_i64(srem32(flat_segment_id_128252, + 10240))], (int) ((int64_t) 0 - - groups_per_segment_46786)); + groups_per_segment_128231)); } // read in the per-group-results { - int64_t read_per_thread_46842 = - sdiv_up64(groups_per_segment_46786, - segred_group_sizze_34138); - - x_34145 = 0; - x_34146 = -1; - x_34147 = 0.0F; - for (int64_t i_46843 = 0; i_46843 < read_per_thread_46842; - i_46843++) { - int64_t group_res_id_46844 = - sext_i32_i64(local_tid_46800) * - read_per_thread_46842 + i_46843; - int64_t index_of_group_res_46845 = - sext_i32_i64(flat_segment_id_46815) * - groups_per_segment_46786 + group_res_id_46844; - - if (slt64(group_res_id_46844, - groups_per_segment_46786)) { - x_34148 = ((__global - bool *) group_res_arr_mem_46791)[index_of_group_res_46845 * - segred_group_sizze_34138]; - x_34149 = ((__global - int32_t *) group_res_arr_mem_46793)[index_of_group_res_46845 * - segred_group_sizze_34138]; - x_34150 = ((__global - float *) group_res_arr_mem_46795)[index_of_group_res_46845 * - segred_group_sizze_34138]; - - bool defunc_1_op_res_34151; - int32_t defunc_1_op_res_34152; - - if (x_34145) { - defunc_1_op_res_34151 = x_34145; - defunc_1_op_res_34152 = x_34146; - } else { - bool x_34153 = x_34148 && x_34148; - bool x_34154 = !x_34148; - bool y_34155 = x_34145 && x_34154; - bool defunc_1_op_res_f_res_34156 = x_34153 || - y_34155; - int32_t defunc_1_op_res_f_res_34157; - - if (x_34148) { - defunc_1_op_res_f_res_34157 = x_34149; - } else { - defunc_1_op_res_f_res_34157 = x_34146; - } - defunc_1_op_res_34151 = - defunc_1_op_res_f_res_34156; - defunc_1_op_res_34152 = - defunc_1_op_res_f_res_34157; - } + int64_t read_per_thread_128264 = + sdiv_up64(groups_per_segment_128231, + segred_group_sizze_111351); + + x_111356 = 0.0; + for (int64_t i_128265 = 0; i_128265 < + read_per_thread_128264; i_128265++) { + int64_t group_res_id_128266 = + sext_i32_i64(local_tid_128241) * + read_per_thread_128264 + i_128265; + int64_t index_of_group_res_128267 = + sext_i32_i64(flat_segment_id_128252) * + groups_per_segment_128231 + group_res_id_128266; + + if (slt64(group_res_id_128266, + groups_per_segment_128231)) { + x_111357 = ((__global + double *) group_res_arr_mem_128236)[index_of_group_res_128267 * + segred_group_sizze_111351]; - float defunc_1_op_res_34158 = x_34147 + x_34150; + double defunc_1_op_res_111358; - x_34145 = defunc_1_op_res_34151; - x_34146 = defunc_1_op_res_34152; - x_34147 = defunc_1_op_res_34158; + defunc_1_op_res_111358 = x_111356 + x_111357; + x_111356 = defunc_1_op_res_111358; } } } ((__local - bool *) red_arr_mem_46804)[sext_i32_i64(local_tid_46800)] = - x_34145; - ((__local - int32_t *) red_arr_mem_46806)[sext_i32_i64(local_tid_46800)] = - x_34146; - ((__local - float *) red_arr_mem_46808)[sext_i32_i64(local_tid_46800)] = - x_34147; + double *) red_arr_mem_128245)[sext_i32_i64(local_tid_128241)] = + x_111356; barrier(CLK_LOCAL_MEM_FENCE); // reduce the per-group results { - int32_t offset_46846; - int32_t skip_waves_46847; + int32_t offset_128268; + int32_t skip_waves_128269; - skip_waves_46847 = 1; + skip_waves_128269 = 1; - bool x_46823; - int32_t x_46824; - float x_46825; - bool x_46826; - int32_t x_46827; - float x_46828; + double x_128256; + double x_128257; - offset_46846 = 0; + offset_128268 = 0; // participating threads read initial accumulator { - if (slt32(local_tid_46800, - sext_i64_i32(segred_group_sizze_34138))) { - x_46823 = ((__local - bool *) red_arr_mem_46804)[sext_i32_i64(local_tid_46800 + - offset_46846)]; - x_46824 = ((__local - int32_t *) red_arr_mem_46806)[sext_i32_i64(local_tid_46800 + - offset_46846)]; - x_46825 = ((__local - float *) red_arr_mem_46808)[sext_i32_i64(local_tid_46800 + - offset_46846)]; - } - } - offset_46846 = 1; - while (slt32(offset_46846, wave_sizze_46802)) { - if (slt32(local_tid_46800 + offset_46846, - sext_i64_i32(segred_group_sizze_34138)) && - ((local_tid_46800 - squot32(local_tid_46800, - wave_sizze_46802) * - wave_sizze_46802) & (2 * offset_46846 - 1)) == + if (slt32(local_tid_128241, + sext_i64_i32(segred_group_sizze_111351))) { + x_128256 = ((__local + double *) red_arr_mem_128245)[sext_i32_i64(local_tid_128241 + + offset_128268)]; + } + } + offset_128268 = 1; + while (slt32(offset_128268, wave_sizze_128243)) { + if (slt32(local_tid_128241 + offset_128268, + sext_i64_i32(segred_group_sizze_111351)) && + ((local_tid_128241 - squot32(local_tid_128241, + wave_sizze_128243) * + wave_sizze_128243) & (2 * offset_128268 - 1)) == 0) { // read array element { - x_46826 = ((volatile __local - bool *) red_arr_mem_46804)[sext_i32_i64(local_tid_46800 + - offset_46846)]; - x_46827 = ((volatile __local - int32_t *) red_arr_mem_46806)[sext_i32_i64(local_tid_46800 + - offset_46846)]; - x_46828 = ((volatile __local - float *) red_arr_mem_46808)[sext_i32_i64(local_tid_46800 + - offset_46846)]; + x_128257 = ((volatile __local + double *) red_arr_mem_128245)[sext_i32_i64(local_tid_128241 + + offset_128268)]; } // apply reduction operation { - bool defunc_1_op_res_46829; - int32_t defunc_1_op_res_46830; + double defunc_1_op_res_128258 = x_128256 + + x_128257; - if (x_46823) { - defunc_1_op_res_46829 = x_46823; - defunc_1_op_res_46830 = x_46824; - } else { - bool x_46831 = x_46826 && x_46826; - bool x_46832 = !x_46826; - bool y_46833 = x_46823 && x_46832; - bool defunc_1_op_res_f_res_46834 = - x_46831 || y_46833; - int32_t defunc_1_op_res_f_res_46835; - - if (x_46826) { - defunc_1_op_res_f_res_46835 = x_46827; - } else { - defunc_1_op_res_f_res_46835 = x_46824; - } - defunc_1_op_res_46829 = - defunc_1_op_res_f_res_46834; - defunc_1_op_res_46830 = - defunc_1_op_res_f_res_46835; - } - - float defunc_1_op_res_46836 = x_46825 + x_46828; - - x_46823 = defunc_1_op_res_46829; - x_46824 = defunc_1_op_res_46830; - x_46825 = defunc_1_op_res_46836; + x_128256 = defunc_1_op_res_128258; } // write result of operation { ((volatile __local - bool *) red_arr_mem_46804)[sext_i32_i64(local_tid_46800)] = - x_46823; - ((volatile __local - int32_t *) red_arr_mem_46806)[sext_i32_i64(local_tid_46800)] = - x_46824; - ((volatile __local - float *) red_arr_mem_46808)[sext_i32_i64(local_tid_46800)] = - x_46825; + double *) red_arr_mem_128245)[sext_i32_i64(local_tid_128241)] = + x_128256; } } - offset_46846 *= 2; + offset_128268 *= 2; } - while (slt32(skip_waves_46847, - squot32(sext_i64_i32(segred_group_sizze_34138) + - wave_sizze_46802 - 1, - wave_sizze_46802))) { + while (slt32(skip_waves_128269, + squot32(sext_i64_i32(segred_group_sizze_111351) + + wave_sizze_128243 - 1, + wave_sizze_128243))) { barrier(CLK_LOCAL_MEM_FENCE); - offset_46846 = skip_waves_46847 * wave_sizze_46802; - if (slt32(local_tid_46800 + offset_46846, - sext_i64_i32(segred_group_sizze_34138)) && - ((local_tid_46800 - squot32(local_tid_46800, - wave_sizze_46802) * - wave_sizze_46802) == 0 && - (squot32(local_tid_46800, wave_sizze_46802) & (2 * - skip_waves_46847 - - 1)) == - 0)) { + offset_128268 = skip_waves_128269 * wave_sizze_128243; + if (slt32(local_tid_128241 + offset_128268, + sext_i64_i32(segred_group_sizze_111351)) && + ((local_tid_128241 - squot32(local_tid_128241, + wave_sizze_128243) * + wave_sizze_128243) == 0 && + (squot32(local_tid_128241, wave_sizze_128243) & + (2 * skip_waves_128269 - 1)) == 0)) { // read array element { - x_46826 = ((__local - bool *) red_arr_mem_46804)[sext_i32_i64(local_tid_46800 + - offset_46846)]; - x_46827 = ((__local - int32_t *) red_arr_mem_46806)[sext_i32_i64(local_tid_46800 + - offset_46846)]; - x_46828 = ((__local - float *) red_arr_mem_46808)[sext_i32_i64(local_tid_46800 + - offset_46846)]; + x_128257 = ((__local + double *) red_arr_mem_128245)[sext_i32_i64(local_tid_128241 + + offset_128268)]; } // apply reduction operation { - bool defunc_1_op_res_46829; - int32_t defunc_1_op_res_46830; - - if (x_46823) { - defunc_1_op_res_46829 = x_46823; - defunc_1_op_res_46830 = x_46824; - } else { - bool x_46831 = x_46826 && x_46826; - bool x_46832 = !x_46826; - bool y_46833 = x_46823 && x_46832; - bool defunc_1_op_res_f_res_46834 = - x_46831 || y_46833; - int32_t defunc_1_op_res_f_res_46835; - - if (x_46826) { - defunc_1_op_res_f_res_46835 = x_46827; - } else { - defunc_1_op_res_f_res_46835 = x_46824; - } - defunc_1_op_res_46829 = - defunc_1_op_res_f_res_46834; - defunc_1_op_res_46830 = - defunc_1_op_res_f_res_46835; - } + double defunc_1_op_res_128258 = x_128256 + + x_128257; - float defunc_1_op_res_46836 = x_46825 + x_46828; - - x_46823 = defunc_1_op_res_46829; - x_46824 = defunc_1_op_res_46830; - x_46825 = defunc_1_op_res_46836; + x_128256 = defunc_1_op_res_128258; } // write result of operation { ((__local - bool *) red_arr_mem_46804)[sext_i32_i64(local_tid_46800)] = - x_46823; - ((__local - int32_t *) red_arr_mem_46806)[sext_i32_i64(local_tid_46800)] = - x_46824; - ((__local - float *) red_arr_mem_46808)[sext_i32_i64(local_tid_46800)] = - x_46825; + double *) red_arr_mem_128245)[sext_i32_i64(local_tid_128241)] = + x_128256; } } - skip_waves_46847 *= 2; + skip_waves_128269 *= 2; } // and back to memory with the final result { - if (local_tid_46800 == 0) { - ((__global bool *) mem_45409)[gtid_33985] = x_46823; - ((__global int32_t *) mem_45411)[gtid_33985] = - x_46824; - ((__global float *) mem_45413)[gtid_33985] = - x_46825; + if (local_tid_128241 == 0) { + ((__global double *) mem_123944)[gtid_111133 * + k2p2zq_75151 + + gtid_111134] = + x_128256; } } } @@ -30347,16557 +35518,14686 @@ def sync(self): error_1: return; - #undef segred_group_sizze_34138 + #undef segred_group_sizze_111351 } -__kernel void mainDetailedzisegred_nonseg_32723(__global int *global_failure, - __local volatile - int64_t *red_arr_mem_46517_backing_aligned_0, - __local volatile - int64_t *sync_arr_mem_46515_backing_aligned_1, - int64_t m_27772, - int64_t num_groups_32718, - int64_t num_threads_46509, - __global - unsigned char *defunc_3_map_res_mem_45244, - __global - unsigned char *mem_45249, - __global - unsigned char *mainDetailedzicounter_mem_46505, - __global - unsigned char *group_res_arr_mem_46507) +__kernel void mainzisegred_large_111219(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_128171_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128169_backing_aligned_1, + int64_t m_75136, int64_t k2p2zq_75151, + int64_t defunc_2_reduce_res_75260, + int64_t index_primexp_76437, + int64_t num_groups_111293, + int64_t groups_per_segment_128155, + int64_t elements_per_thread_128156, + int64_t virt_num_groups_128157, + int64_t threads_per_segment_128159, + __global unsigned char *mem_120246, + __global + unsigned char *mem_param_123786, + __global unsigned char *mem_123913, + __global + unsigned char *group_res_arr_mem_128160, + __global + unsigned char *mainzicounter_mem_128162) { - #define segred_group_sizze_32716 (mainDetailedzisegred_group_sizze_32715) + #define segred_group_sizze_111292 (mainzisegred_group_sizze_111213) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_46517_backing_1 = + __local volatile char *restrict sync_arr_mem_128171_backing_1 = (__local volatile - char *) red_arr_mem_46517_backing_aligned_0; - __local volatile char *restrict sync_arr_mem_46515_backing_0 = + char *) sync_arr_mem_128171_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128169_backing_0 = (__local volatile - char *) sync_arr_mem_46515_backing_aligned_1; + char *) red_arr_mem_128169_backing_aligned_1; if (*global_failure >= 0) return; - int32_t global_tid_46510; - int32_t local_tid_46511; - int64_t group_sizze_46514; - int32_t wave_sizze_46513; - int32_t group_tid_46512; - - global_tid_46510 = get_global_id(0); - local_tid_46511 = get_local_id(0); - group_sizze_46514 = get_local_size(0); - wave_sizze_46513 = LOCKSTEP_WIDTH; - group_tid_46512 = get_group_id(0); - - int32_t phys_tid_32723; - - phys_tid_32723 = global_tid_46510; - - __local char *sync_arr_mem_46515; + int32_t global_tid_128164; + int32_t local_tid_128165; + int64_t group_sizze_128168; + int32_t wave_sizze_128167; + int32_t group_tid_128166; - sync_arr_mem_46515 = (__local char *) sync_arr_mem_46515_backing_0; + global_tid_128164 = get_global_id(0); + local_tid_128165 = get_local_id(0); + group_sizze_128168 = get_local_size(0); + wave_sizze_128167 = LOCKSTEP_WIDTH; + group_tid_128166 = get_group_id(0); - __local char *red_arr_mem_46517; + int32_t phys_tid_111219; - red_arr_mem_46517 = (__local char *) red_arr_mem_46517_backing_1; + phys_tid_111219 = global_tid_128164; - int64_t dummy_32721; + __local char *red_arr_mem_128169; - dummy_32721 = (int64_t) 0; + red_arr_mem_128169 = (__local char *) red_arr_mem_128169_backing_0; - int64_t gtid_32722; + __local char *sync_arr_mem_128171; - gtid_32722 = (int64_t) 0; + sync_arr_mem_128171 = (__local char *) sync_arr_mem_128171_backing_1; - int32_t x_acc_46519; - int64_t chunk_sizze_46520; + int32_t phys_group_id_128173; - chunk_sizze_46520 = smin64(sdiv_up64(m_27772, - sext_i32_i64(sext_i64_i32(segred_group_sizze_32716 * - num_groups_32718))), - sdiv_up64(m_27772 - sext_i32_i64(phys_tid_32723), - num_threads_46509)); - - int32_t x_28170; - int32_t x_28171; - - // neutral-initialise the accumulators - { - x_acc_46519 = 0; - } - for (int64_t i_46524 = 0; i_46524 < chunk_sizze_46520; i_46524++) { - gtid_32722 = sext_i32_i64(phys_tid_32723) + num_threads_46509 * i_46524; - // apply map function + phys_group_id_128173 = get_group_id(0); + for (int32_t i_128174 = 0; i_128174 < + sdiv_up32(sext_i64_i32(virt_num_groups_128157) - phys_group_id_128173, + sext_i64_i32(num_groups_111293)); i_128174++) { + int32_t virt_group_id_128175 = phys_group_id_128173 + i_128174 * + sext_i64_i32(num_groups_111293); + int32_t flat_segment_id_128176 = squot32(virt_group_id_128175, + sext_i64_i32(groups_per_segment_128155)); + int64_t global_tid_128177 = srem64(sext_i32_i64(virt_group_id_128175) * + segred_group_sizze_111292 + + sext_i32_i64(local_tid_128165), + segred_group_sizze_111292 * + groups_per_segment_128155); + int64_t gtid_111210 = sext_i32_i64(flat_segment_id_128176); + int64_t gtid_111218; + double x_acc_128178; + int64_t chunk_sizze_128179; + + chunk_sizze_128179 = smin64(elements_per_thread_128156, + sdiv_up64(k2p2zq_75151 - global_tid_128177, + threads_per_segment_128159)); + + double x_111296; + double x_111297; + + // neutral-initialise the accumulators { - int32_t x_28173 = ((__global - int32_t *) defunc_3_map_res_mem_45244)[gtid_32722]; - - // save map-out results - { } - // load accumulator - { - x_28170 = x_acc_46519; - } - // load new values - { - x_28171 = x_28173; - } - // apply reduction operator + x_acc_128178 = 0.0; + } + for (int64_t i_128183 = 0; i_128183 < chunk_sizze_128179; i_128183++) { + gtid_111218 = global_tid_128177 + threads_per_segment_128159 * + i_128183; + // apply map function { - int32_t defunc_1_op_res_28172 = smax32(x_28170, x_28171); + double x_111301 = ((__global double *) mem_120246)[gtid_111218 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_111210 * + defunc_2_reduce_res_75260 + + index_primexp_76437]; + double x_111302 = ((__global + double *) mem_param_123786)[gtid_111210 * + k2p2zq_75151 + + gtid_111218]; + double defunc_1_f_res_111303 = x_111301 * x_111302; - // store in accumulator + // save map-out results + { } + // load accumulator { - x_acc_46519 = defunc_1_op_res_28172; + x_111296 = x_acc_128178; + } + // load new values + { + x_111297 = defunc_1_f_res_111303; + } + // apply reduction operator + { + double defunc_1_op_res_111298 = x_111296 + x_111297; + + // store in accumulator + { + x_acc_128178 = defunc_1_op_res_111298; + } } } } - } - // to reduce current chunk, first store our result in memory - { - x_28170 = x_acc_46519; - ((__local int32_t *) red_arr_mem_46517)[sext_i32_i64(local_tid_46511)] = - x_28170; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t offset_46525; - int32_t skip_waves_46526; - - skip_waves_46526 = 1; - - int32_t x_46521; - int32_t x_46522; - - offset_46525 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46511, sext_i64_i32(segred_group_sizze_32716))) { - x_46521 = ((__local - int32_t *) red_arr_mem_46517)[sext_i32_i64(local_tid_46511 + - offset_46525)]; - } - } - offset_46525 = 1; - while (slt32(offset_46525, wave_sizze_46513)) { - if (slt32(local_tid_46511 + offset_46525, - sext_i64_i32(segred_group_sizze_32716)) && ((local_tid_46511 - - squot32(local_tid_46511, - wave_sizze_46513) * - wave_sizze_46513) & - (2 * - offset_46525 - - 1)) == 0) { - // read array element - { - x_46522 = ((volatile __local - int32_t *) red_arr_mem_46517)[sext_i32_i64(local_tid_46511 + - offset_46525)]; - } - // apply reduction operation - { - int32_t defunc_1_op_res_46523 = smax32(x_46521, x_46522); - - x_46521 = defunc_1_op_res_46523; - } - // write result of operation - { - ((volatile __local - int32_t *) red_arr_mem_46517)[sext_i32_i64(local_tid_46511)] = - x_46521; - } - } - offset_46525 *= 2; - } - while (slt32(skip_waves_46526, - squot32(sext_i64_i32(segred_group_sizze_32716) + - wave_sizze_46513 - 1, wave_sizze_46513))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46525 = skip_waves_46526 * wave_sizze_46513; - if (slt32(local_tid_46511 + offset_46525, - sext_i64_i32(segred_group_sizze_32716)) && ((local_tid_46511 - - squot32(local_tid_46511, - wave_sizze_46513) * - wave_sizze_46513) == - 0 && - (squot32(local_tid_46511, - wave_sizze_46513) & - (2 * - skip_waves_46526 - - 1)) == 0)) { - // read array element - { - x_46522 = ((__local - int32_t *) red_arr_mem_46517)[sext_i32_i64(local_tid_46511 + - offset_46525)]; - } - // apply reduction operation - { - int32_t defunc_1_op_res_46523 = smax32(x_46521, x_46522); - - x_46521 = defunc_1_op_res_46523; - } - // write result of operation - { - ((__local - int32_t *) red_arr_mem_46517)[sext_i32_i64(local_tid_46511)] = - x_46521; - } - } - skip_waves_46526 *= 2; - } - barrier(CLK_LOCAL_MEM_FENCE); - // first thread saves the result in accumulator - { - if (sext_i32_i64(local_tid_46511) == (int64_t) 0) { - x_acc_46519 = x_46521; - } - } - - int32_t old_counter_46527; - - // first thread in group saves group result to global memory - { - if (local_tid_46511 == 0) { - ((__global - int32_t *) group_res_arr_mem_46507)[sext_i32_i64(group_tid_46512) * - segred_group_sizze_32716] = - x_acc_46519; - mem_fence_global(); - old_counter_46527 = atomic_add_i32_global(&((volatile __global - int *) mainDetailedzicounter_mem_46505)[(int64_t) 0], - (int) 1); - ((__local bool *) sync_arr_mem_46515)[(int64_t) 0] = - old_counter_46527 == num_groups_32718 - (int64_t) 1; - } - } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - - bool is_last_group_46528; - - is_last_group_46528 = ((__local bool *) sync_arr_mem_46515)[(int64_t) 0]; - if (is_last_group_46528) { - if (local_tid_46511 == 0) { - old_counter_46527 = atomic_add_i32_global(&((volatile __global - int *) mainDetailedzicounter_mem_46505)[(int64_t) 0], - (int) ((int64_t) 0 - - num_groups_32718)); + // to reduce current chunk, first store our result in memory + { + x_111296 = x_acc_128178; + ((__local + double *) red_arr_mem_128169)[sext_i32_i64(local_tid_128165)] = + x_111296; } - // read in the per-group-results + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128184; + int32_t skip_waves_128185; + + skip_waves_128185 = 1; + + double x_128180; + double x_128181; + + offset_128184 = 0; + // participating threads read initial accumulator { - int64_t read_per_thread_46529 = sdiv_up64(num_groups_32718, - segred_group_sizze_32716); - - x_28170 = 0; - for (int64_t i_46530 = 0; i_46530 < read_per_thread_46529; - i_46530++) { - int64_t group_res_id_46531 = sext_i32_i64(local_tid_46511) * - read_per_thread_46529 + i_46530; - int64_t index_of_group_res_46532 = group_res_id_46531; - - if (slt64(group_res_id_46531, num_groups_32718)) { - x_28171 = ((__global - int32_t *) group_res_arr_mem_46507)[index_of_group_res_46532 * - segred_group_sizze_32716]; + if (slt32(local_tid_128165, + sext_i64_i32(segred_group_sizze_111292))) { + x_128180 = ((__local + double *) red_arr_mem_128169)[sext_i32_i64(local_tid_128165 + + offset_128184)]; + } + } + offset_128184 = 1; + while (slt32(offset_128184, wave_sizze_128167)) { + if (slt32(local_tid_128165 + offset_128184, + sext_i64_i32(segred_group_sizze_111292)) && + ((local_tid_128165 - squot32(local_tid_128165, + wave_sizze_128167) * + wave_sizze_128167) & (2 * offset_128184 - 1)) == 0) { + // read array element + { + x_128181 = ((volatile __local + double *) red_arr_mem_128169)[sext_i32_i64(local_tid_128165 + + offset_128184)]; + } + // apply reduction operation + { + double defunc_1_op_res_128182 = x_128180 + x_128181; - int32_t defunc_1_op_res_28172; + x_128180 = defunc_1_op_res_128182; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128169)[sext_i32_i64(local_tid_128165)] = + x_128180; + } + } + offset_128184 *= 2; + } + while (slt32(skip_waves_128185, + squot32(sext_i64_i32(segred_group_sizze_111292) + + wave_sizze_128167 - 1, wave_sizze_128167))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128184 = skip_waves_128185 * wave_sizze_128167; + if (slt32(local_tid_128165 + offset_128184, + sext_i64_i32(segred_group_sizze_111292)) && + ((local_tid_128165 - squot32(local_tid_128165, + wave_sizze_128167) * + wave_sizze_128167) == 0 && (squot32(local_tid_128165, + wave_sizze_128167) & (2 * + skip_waves_128185 - + 1)) == + 0)) { + // read array element + { + x_128181 = ((__local + double *) red_arr_mem_128169)[sext_i32_i64(local_tid_128165 + + offset_128184)]; + } + // apply reduction operation + { + double defunc_1_op_res_128182 = x_128180 + x_128181; - defunc_1_op_res_28172 = smax32(x_28170, x_28171); - x_28170 = defunc_1_op_res_28172; + x_128180 = defunc_1_op_res_128182; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128169)[sext_i32_i64(local_tid_128165)] = + x_128180; } } + skip_waves_128185 *= 2; } - ((__local int32_t *) red_arr_mem_46517)[sext_i32_i64(local_tid_46511)] = - x_28170; barrier(CLK_LOCAL_MEM_FENCE); - // reduce the per-group results + // first thread saves the result in accumulator { - int32_t offset_46533; - int32_t skip_waves_46534; - - skip_waves_46534 = 1; - - int32_t x_46521; - int32_t x_46522; + if (sext_i32_i64(local_tid_128165) == (int64_t) 0) { + x_acc_128178 = x_128180; + } + } + if (groups_per_segment_128155 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_128165 == 0) { + ((__global double *) mem_123913)[gtid_111210] = + x_acc_128178; + } + } + } else { + int32_t old_counter_128186; - offset_46533 = 0; - // participating threads read initial accumulator + // first thread in group saves group result to global memory { - if (slt32(local_tid_46511, - sext_i64_i32(segred_group_sizze_32716))) { - x_46521 = ((__local - int32_t *) red_arr_mem_46517)[sext_i32_i64(local_tid_46511 + - offset_46533)]; + if (local_tid_128165 == 0) { + ((__global + double *) group_res_arr_mem_128160)[sext_i32_i64(virt_group_id_128175) * + segred_group_sizze_111292] = + x_acc_128178; + mem_fence_global(); + old_counter_128186 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_128162)[sext_i32_i64(srem32(flat_segment_id_128176, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_128171)[(int64_t) 0] = + old_counter_128186 == groups_per_segment_128155 - + (int64_t) 1; } } - offset_46533 = 1; - while (slt32(offset_46533, wave_sizze_46513)) { - if (slt32(local_tid_46511 + offset_46533, - sext_i64_i32(segred_group_sizze_32716)) && - ((local_tid_46511 - squot32(local_tid_46511, - wave_sizze_46513) * - wave_sizze_46513) & (2 * offset_46533 - 1)) == 0) { - // read array element - { - x_46522 = ((volatile __local - int32_t *) red_arr_mem_46517)[sext_i32_i64(local_tid_46511 + - offset_46533)]; - } - // apply reduction operation - { - int32_t defunc_1_op_res_46523 = smax32(x_46521, - x_46522); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_128187; + + is_last_group_128187 = ((__local + bool *) sync_arr_mem_128171)[(int64_t) 0]; + if (is_last_group_128187) { + if (local_tid_128165 == 0) { + old_counter_128186 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_128162)[sext_i32_i64(srem32(flat_segment_id_128176, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_128155)); + } + // read in the per-group-results + { + int64_t read_per_thread_128188 = + sdiv_up64(groups_per_segment_128155, + segred_group_sizze_111292); + + x_111296 = 0.0; + for (int64_t i_128189 = 0; i_128189 < + read_per_thread_128188; i_128189++) { + int64_t group_res_id_128190 = + sext_i32_i64(local_tid_128165) * + read_per_thread_128188 + i_128189; + int64_t index_of_group_res_128191 = + sext_i32_i64(flat_segment_id_128176) * + groups_per_segment_128155 + group_res_id_128190; - x_46521 = defunc_1_op_res_46523; - } - // write result of operation - { - ((volatile __local - int32_t *) red_arr_mem_46517)[sext_i32_i64(local_tid_46511)] = - x_46521; + if (slt64(group_res_id_128190, + groups_per_segment_128155)) { + x_111297 = ((__global + double *) group_res_arr_mem_128160)[index_of_group_res_128191 * + segred_group_sizze_111292]; + + double defunc_1_op_res_111298; + + defunc_1_op_res_111298 = x_111296 + x_111297; + x_111296 = defunc_1_op_res_111298; + } } } - offset_46533 *= 2; - } - while (slt32(skip_waves_46534, - squot32(sext_i64_i32(segred_group_sizze_32716) + - wave_sizze_46513 - 1, wave_sizze_46513))) { + ((__local + double *) red_arr_mem_128169)[sext_i32_i64(local_tid_128165)] = + x_111296; barrier(CLK_LOCAL_MEM_FENCE); - offset_46533 = skip_waves_46534 * wave_sizze_46513; - if (slt32(local_tid_46511 + offset_46533, - sext_i64_i32(segred_group_sizze_32716)) && - ((local_tid_46511 - squot32(local_tid_46511, - wave_sizze_46513) * - wave_sizze_46513) == 0 && (squot32(local_tid_46511, - wave_sizze_46513) & - (2 * skip_waves_46534 - 1)) == - 0)) { - // read array element + // reduce the per-group results + { + int32_t offset_128192; + int32_t skip_waves_128193; + + skip_waves_128193 = 1; + + double x_128180; + double x_128181; + + offset_128192 = 0; + // participating threads read initial accumulator { - x_46522 = ((__local - int32_t *) red_arr_mem_46517)[sext_i32_i64(local_tid_46511 + - offset_46533)]; + if (slt32(local_tid_128165, + sext_i64_i32(segred_group_sizze_111292))) { + x_128180 = ((__local + double *) red_arr_mem_128169)[sext_i32_i64(local_tid_128165 + + offset_128192)]; + } + } + offset_128192 = 1; + while (slt32(offset_128192, wave_sizze_128167)) { + if (slt32(local_tid_128165 + offset_128192, + sext_i64_i32(segred_group_sizze_111292)) && + ((local_tid_128165 - squot32(local_tid_128165, + wave_sizze_128167) * + wave_sizze_128167) & (2 * offset_128192 - 1)) == + 0) { + // read array element + { + x_128181 = ((volatile __local + double *) red_arr_mem_128169)[sext_i32_i64(local_tid_128165 + + offset_128192)]; + } + // apply reduction operation + { + double defunc_1_op_res_128182 = x_128180 + + x_128181; + + x_128180 = defunc_1_op_res_128182; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128169)[sext_i32_i64(local_tid_128165)] = + x_128180; + } + } + offset_128192 *= 2; } - // apply reduction operation - { - int32_t defunc_1_op_res_46523 = smax32(x_46521, - x_46522); - - x_46521 = defunc_1_op_res_46523; + while (slt32(skip_waves_128193, + squot32(sext_i64_i32(segred_group_sizze_111292) + + wave_sizze_128167 - 1, + wave_sizze_128167))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128192 = skip_waves_128193 * wave_sizze_128167; + if (slt32(local_tid_128165 + offset_128192, + sext_i64_i32(segred_group_sizze_111292)) && + ((local_tid_128165 - squot32(local_tid_128165, + wave_sizze_128167) * + wave_sizze_128167) == 0 && + (squot32(local_tid_128165, wave_sizze_128167) & + (2 * skip_waves_128193 - 1)) == 0)) { + // read array element + { + x_128181 = ((__local + double *) red_arr_mem_128169)[sext_i32_i64(local_tid_128165 + + offset_128192)]; + } + // apply reduction operation + { + double defunc_1_op_res_128182 = x_128180 + + x_128181; + + x_128180 = defunc_1_op_res_128182; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128169)[sext_i32_i64(local_tid_128165)] = + x_128180; + } + } + skip_waves_128193 *= 2; } - // write result of operation + // and back to memory with the final result { - ((__local - int32_t *) red_arr_mem_46517)[sext_i32_i64(local_tid_46511)] = - x_46521; + if (local_tid_128165 == 0) { + ((__global double *) mem_123913)[gtid_111210] = + x_128180; + } } } - skip_waves_46534 *= 2; - } - // and back to memory with the final result - { - if (local_tid_46511 == 0) { - ((__global int32_t *) mem_45249)[(int64_t) 0] = x_46521; - } } } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } error_1: return; - #undef segred_group_sizze_32716 + #undef segred_group_sizze_111292 } -__kernel void mainDetailedzisegred_small_30429(__global int *global_failure, - __local volatile - int64_t *red_arr_mem_45775_backing_aligned_0, - int64_t N_27771, int64_t m_27772, - int64_t i32_res_27781, - int64_t i32_res_27787, - int64_t num_groups_30568, - int64_t segment_sizze_nonzzero_45768, - __global - unsigned char *images_mem_44381, - __global - unsigned char *binop_p_mem_44390, - __global - unsigned char *mem_44531, - __global - unsigned char *mem_44536) -{ - #define segred_group_sizze_30567 (mainDetailedzisegred_group_sizze_30423) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_45775_backing_0 = - (__local volatile - char *) red_arr_mem_45775_backing_aligned_0; - - if (*global_failure >= 0) - return; - - int32_t global_tid_45770; - int32_t local_tid_45771; - int64_t group_sizze_45774; - int32_t wave_sizze_45773; - int32_t group_tid_45772; - - global_tid_45770 = get_global_id(0); - local_tid_45771 = get_local_id(0); - group_sizze_45774 = get_local_size(0); - wave_sizze_45773 = LOCKSTEP_WIDTH; - group_tid_45772 = get_group_id(0); - - int32_t phys_tid_30429; - - phys_tid_30429 = global_tid_45770; - - __local char *red_arr_mem_45775; - - red_arr_mem_45775 = (__local char *) red_arr_mem_45775_backing_0; - - int32_t phys_group_id_45777; - - phys_group_id_45777 = get_group_id(0); - for (int32_t i_45778 = 0; i_45778 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_27772 * i32_res_27787 * - i32_res_27787, - squot64(segred_group_sizze_30567, - segment_sizze_nonzzero_45768))) - - phys_group_id_45777, sext_i64_i32(num_groups_30568)); - i_45778++) { - int32_t virt_group_id_45779 = phys_group_id_45777 + i_45778 * - sext_i64_i32(num_groups_30568); - int64_t gtid_30416 = squot64(squot64(sext_i32_i64(local_tid_45771), - segment_sizze_nonzzero_45768) + - sext_i32_i64(virt_group_id_45779) * - squot64(segred_group_sizze_30567, - segment_sizze_nonzzero_45768), - i32_res_27787 * i32_res_27787); - int64_t gtid_30417 = squot64(squot64(sext_i32_i64(local_tid_45771), - segment_sizze_nonzzero_45768) + - sext_i32_i64(virt_group_id_45779) * - squot64(segred_group_sizze_30567, - segment_sizze_nonzzero_45768) - - squot64(squot64(sext_i32_i64(local_tid_45771), - segment_sizze_nonzzero_45768) + - sext_i32_i64(virt_group_id_45779) * - squot64(segred_group_sizze_30567, - segment_sizze_nonzzero_45768), - i32_res_27787 * i32_res_27787) * - (i32_res_27787 * i32_res_27787), - i32_res_27787); - int64_t gtid_30418 = squot64(sext_i32_i64(local_tid_45771), - segment_sizze_nonzzero_45768) + - sext_i32_i64(virt_group_id_45779) * - squot64(segred_group_sizze_30567, - segment_sizze_nonzzero_45768) - - squot64(squot64(sext_i32_i64(local_tid_45771), - segment_sizze_nonzzero_45768) + - sext_i32_i64(virt_group_id_45779) * - squot64(segred_group_sizze_30567, - segment_sizze_nonzzero_45768), i32_res_27787 * - i32_res_27787) * (i32_res_27787 * i32_res_27787) - - squot64(squot64(sext_i32_i64(local_tid_45771), - segment_sizze_nonzzero_45768) + - sext_i32_i64(virt_group_id_45779) * - squot64(segred_group_sizze_30567, - segment_sizze_nonzzero_45768) - - squot64(squot64(sext_i32_i64(local_tid_45771), - segment_sizze_nonzzero_45768) + - sext_i32_i64(virt_group_id_45779) * - squot64(segred_group_sizze_30567, - segment_sizze_nonzzero_45768), - i32_res_27787 * i32_res_27787) * - (i32_res_27787 * i32_res_27787), i32_res_27787) * - i32_res_27787; - int64_t gtid_30428 = srem64(sext_i32_i64(local_tid_45771), - i32_res_27781); +__kernel void mainzisegred_large_111246(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_128105_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128103_backing_aligned_1, + int64_t m_75136, int64_t k2p2zq_75151, + int64_t defunc_2_reduce_res_75260, + int64_t index_primexp_76437, + int64_t num_groups_111265, + int64_t groups_per_segment_128089, + int64_t elements_per_thread_128090, + int64_t virt_num_groups_128091, + int64_t threads_per_segment_128093, + __global unsigned char *mem_120246, + __global unsigned char *mem_123901, + __global unsigned char *mem_123904, + __global unsigned char *mem_123907, + __global + unsigned char *group_res_arr_mem_128094, + __global + unsigned char *mainzicounter_mem_128096) +{ + #define segred_group_sizze_111264 (mainzisegred_group_sizze_111240) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_128105_backing_1 = + (__local volatile + char *) sync_arr_mem_128105_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128103_backing_0 = + (__local volatile + char *) red_arr_mem_128103_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128098; + int32_t local_tid_128099; + int64_t group_sizze_128102; + int32_t wave_sizze_128101; + int32_t group_tid_128100; + + global_tid_128098 = get_global_id(0); + local_tid_128099 = get_local_id(0); + group_sizze_128102 = get_local_size(0); + wave_sizze_128101 = LOCKSTEP_WIDTH; + group_tid_128100 = get_group_id(0); + + int32_t phys_tid_111246; + + phys_tid_111246 = global_tid_128098; + + __local char *red_arr_mem_128103; + + red_arr_mem_128103 = (__local char *) red_arr_mem_128103_backing_0; + + __local char *sync_arr_mem_128105; + + sync_arr_mem_128105 = (__local char *) sync_arr_mem_128105_backing_1; + + int32_t phys_group_id_128107; + + phys_group_id_128107 = get_group_id(0); + for (int32_t i_128108 = 0; i_128108 < + sdiv_up32(sext_i64_i32(virt_num_groups_128091) - phys_group_id_128107, + sext_i64_i32(num_groups_111265)); i_128108++) { + int32_t virt_group_id_128109 = phys_group_id_128107 + i_128108 * + sext_i64_i32(num_groups_111265); + int32_t flat_segment_id_128110 = squot32(virt_group_id_128109, + sext_i64_i32(groups_per_segment_128089)); + int64_t global_tid_128111 = srem64(sext_i32_i64(virt_group_id_128109) * + segred_group_sizze_111264 + + sext_i32_i64(local_tid_128099), + segred_group_sizze_111264 * + groups_per_segment_128089); + int64_t gtid_111237 = sext_i32_i64(flat_segment_id_128110); + int64_t gtid_111245; + double x_acc_128112; + int64_t chunk_sizze_128113; - // apply map function if in bounds + chunk_sizze_128113 = smin64(elements_per_thread_128090, + sdiv_up64(k2p2zq_75151 - global_tid_128111, + threads_per_segment_128093)); + + double x_111269; + double x_111270; + + // neutral-initialise the accumulators { - if (slt64((int64_t) 0, i32_res_27781) && (((slt64(gtid_30416, - m_27772) && - slt64(gtid_30417, - i32_res_27787)) && - slt64(gtid_30418, - i32_res_27787)) && - slt64(sext_i32_i64(local_tid_45771), - i32_res_27781 * - squot64(segred_group_sizze_30567, - segment_sizze_nonzzero_45768)))) { - float x_30577 = ((__global - float *) images_mem_44381)[gtid_30416 * - N_27771 + - gtid_30428]; - float x_30578 = ((__global - float *) binop_p_mem_44390)[gtid_30417 * - N_27771 + - gtid_30428]; - float x_30579 = ((__global float *) mem_44531)[gtid_30418 * - N_27771 + - gtid_30428]; - float x_30580 = x_30578 * x_30579; - bool isnan_res_30581; - - isnan_res_30581 = futrts_isnan32(x_30577); - - float y_30582; - - if (isnan_res_30581) { - y_30582 = 0.0F; - } else { - y_30582 = 1.0F; + x_acc_128112 = 0.0; + } + for (int64_t i_128117 = 0; i_128117 < chunk_sizze_128113; i_128117++) { + gtid_111245 = global_tid_128111 + threads_per_segment_128093 * + i_128117; + // apply map function + { + double x_111275 = ((__global double *) mem_120246)[gtid_111245 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_111237 * + defunc_2_reduce_res_75260 + + index_primexp_76437]; + double defunc_0_f_res_111276; + double redout_119889 = 0.0; + + for (int64_t i_119890 = 0; i_119890 < k2p2zq_75151; + i_119890++) { + double x_111280 = ((__global + double *) mem_120246)[i_119890 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_111237 * + defunc_2_reduce_res_75260 + + index_primexp_76437]; + double x_111281 = ((__global + double *) mem_123901)[i_119890 * + (k2p2zq_75151 * + m_75136) + + gtid_111237 * + k2p2zq_75151 + + gtid_111245]; + double defunc_1_f_res_111282 = x_111280 * x_111281; + double defunc_1_op_res_111279 = defunc_1_f_res_111282 + + redout_119889; + double redout_tmp_128118 = defunc_1_op_res_111279; + + redout_119889 = redout_tmp_128118; } + defunc_0_f_res_111276 = redout_119889; - float defunc_2_f_res_30583 = x_30580 * y_30582; + double defunc_1_f_res_111283 = x_111275 * defunc_0_f_res_111276; // save map-out results - { } - // save results to be reduced { - ((__local - float *) red_arr_mem_45775)[sext_i32_i64(local_tid_45771)] = - defunc_2_f_res_30583; + ((__global double *) mem_123907)[gtid_111237 * + k2p2zq_75151 + + gtid_111245] = + defunc_0_f_res_111276; + } + // load accumulator + { + x_111269 = x_acc_128112; + } + // load new values + { + x_111270 = defunc_1_f_res_111283; + } + // apply reduction operator + { + double defunc_1_op_res_111271 = x_111269 + x_111270; + + // store in accumulator + { + x_acc_128112 = defunc_1_op_res_111271; + } } - } else { - ((__local - float *) red_arr_mem_45775)[sext_i32_i64(local_tid_45771)] = - 0.0F; } } + // to reduce current chunk, first store our result in memory + { + x_111269 = x_acc_128112; + ((__local + double *) red_arr_mem_128103)[sext_i32_i64(local_tid_128099)] = + x_111269; + } barrier(CLK_LOCAL_MEM_FENCE); - if (slt64((int64_t) 0, i32_res_27781)) { - // perform segmented scan to imitate reduction - { - float x_30571; - float x_30572; - float x_45780; - float x_45781; - bool ltid_in_bounds_45783; - - ltid_in_bounds_45783 = slt64(sext_i32_i64(local_tid_45771), - i32_res_27781 * - squot64(segred_group_sizze_30567, - segment_sizze_nonzzero_45768)); - - int32_t skip_threads_45784; - - // read input for in-block scan + + int32_t offset_128119; + int32_t skip_waves_128120; + + skip_waves_128120 = 1; + + double x_128114; + double x_128115; + + offset_128119 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128099, + sext_i64_i32(segred_group_sizze_111264))) { + x_128114 = ((__local + double *) red_arr_mem_128103)[sext_i32_i64(local_tid_128099 + + offset_128119)]; + } + } + offset_128119 = 1; + while (slt32(offset_128119, wave_sizze_128101)) { + if (slt32(local_tid_128099 + offset_128119, + sext_i64_i32(segred_group_sizze_111264)) && + ((local_tid_128099 - squot32(local_tid_128099, + wave_sizze_128101) * + wave_sizze_128101) & (2 * offset_128119 - 1)) == 0) { + // read array element { - if (ltid_in_bounds_45783) { - x_30572 = ((volatile __local - float *) red_arr_mem_45775)[sext_i32_i64(local_tid_45771)]; - if ((local_tid_45771 - squot32(local_tid_45771, 32) * - 32) == 0) { - x_30571 = x_30572; - } - } + x_128115 = ((volatile __local + double *) red_arr_mem_128103)[sext_i32_i64(local_tid_128099 + + offset_128119)]; } - // in-block scan (hopefully no barriers needed) + // apply reduction operation { - skip_threads_45784 = 1; - while (slt32(skip_threads_45784, 32)) { - if (sle32(skip_threads_45784, local_tid_45771 - - squot32(local_tid_45771, 32) * 32) && - ltid_in_bounds_45783) { - // read operands - { - x_30571 = ((volatile __local - float *) red_arr_mem_45775)[sext_i32_i64(local_tid_45771) - - sext_i32_i64(skip_threads_45784)]; - } - // perform operation - { - bool inactive_45785 = - slt64(srem64(sext_i32_i64(local_tid_45771), - i32_res_27781), - sext_i32_i64(local_tid_45771) - - sext_i32_i64(local_tid_45771 - - skip_threads_45784)); - - if (inactive_45785) { - x_30571 = x_30572; - } - if (!inactive_45785) { - float defunc_1_op_res_30573 = x_30571 + - x_30572; - - x_30571 = defunc_1_op_res_30573; - } - } - } - if (sle32(wave_sizze_45773, skip_threads_45784)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_45784, local_tid_45771 - - squot32(local_tid_45771, 32) * 32) && - ltid_in_bounds_45783) { - // write result - { - ((volatile __local - float *) red_arr_mem_45775)[sext_i32_i64(local_tid_45771)] = - x_30571; - x_30572 = x_30571; - } - } - if (sle32(wave_sizze_45773, skip_threads_45784)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_45784 *= 2; - } + double defunc_1_op_res_128116 = x_128114 + x_128115; + + x_128114 = defunc_1_op_res_128116; } - barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' + // write result of operation { - if ((local_tid_45771 - squot32(local_tid_45771, 32) * 32) == - 31 && ltid_in_bounds_45783) { - ((volatile __local - float *) red_arr_mem_45775)[sext_i32_i64(squot32(local_tid_45771, - 32))] = - x_30571; - } + ((volatile __local + double *) red_arr_mem_128103)[sext_i32_i64(local_tid_128099)] = + x_128114; } - barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + } + offset_128119 *= 2; + } + while (slt32(skip_waves_128120, + squot32(sext_i64_i32(segred_group_sizze_111264) + + wave_sizze_128101 - 1, wave_sizze_128101))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128119 = skip_waves_128120 * wave_sizze_128101; + if (slt32(local_tid_128099 + offset_128119, + sext_i64_i32(segred_group_sizze_111264)) && + ((local_tid_128099 - squot32(local_tid_128099, + wave_sizze_128101) * + wave_sizze_128101) == 0 && (squot32(local_tid_128099, + wave_sizze_128101) & (2 * + skip_waves_128120 - + 1)) == + 0)) { + // read array element + { + x_128115 = ((__local + double *) red_arr_mem_128103)[sext_i32_i64(local_tid_128099 + + offset_128119)]; + } + // apply reduction operation { - int32_t skip_threads_45786; + double defunc_1_op_res_128116 = x_128114 + x_128115; - // read input for in-block scan - { - if (squot32(local_tid_45771, 32) == 0 && - ltid_in_bounds_45783) { - x_45781 = ((volatile __local - float *) red_arr_mem_45775)[sext_i32_i64(local_tid_45771)]; - if ((local_tid_45771 - squot32(local_tid_45771, - 32) * 32) == 0) { - x_45780 = x_45781; - } + x_128114 = defunc_1_op_res_128116; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128103)[sext_i32_i64(local_tid_128099)] = + x_128114; + } + } + skip_waves_128120 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_128099) == (int64_t) 0) { + x_acc_128112 = x_128114; + } + } + if (groups_per_segment_128089 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_128099 == 0) { + ((__global double *) mem_123904)[gtid_111237] = + x_acc_128112; + } + } + } else { + int32_t old_counter_128121; + + // first thread in group saves group result to global memory + { + if (local_tid_128099 == 0) { + ((__global + double *) group_res_arr_mem_128094)[sext_i32_i64(virt_group_id_128109) * + segred_group_sizze_111264] = + x_acc_128112; + mem_fence_global(); + old_counter_128121 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_128096)[sext_i32_i64(srem32(flat_segment_id_128110, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_128105)[(int64_t) 0] = + old_counter_128121 == groups_per_segment_128089 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_128122; + + is_last_group_128122 = ((__local + bool *) sync_arr_mem_128105)[(int64_t) 0]; + if (is_last_group_128122) { + if (local_tid_128099 == 0) { + old_counter_128121 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_128096)[sext_i32_i64(srem32(flat_segment_id_128110, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_128089)); + } + // read in the per-group-results + { + int64_t read_per_thread_128123 = + sdiv_up64(groups_per_segment_128089, + segred_group_sizze_111264); + + x_111269 = 0.0; + for (int64_t i_128124 = 0; i_128124 < + read_per_thread_128123; i_128124++) { + int64_t group_res_id_128125 = + sext_i32_i64(local_tid_128099) * + read_per_thread_128123 + i_128124; + int64_t index_of_group_res_128126 = + sext_i32_i64(flat_segment_id_128110) * + groups_per_segment_128089 + group_res_id_128125; + + if (slt64(group_res_id_128125, + groups_per_segment_128089)) { + x_111270 = ((__global + double *) group_res_arr_mem_128094)[index_of_group_res_128126 * + segred_group_sizze_111264]; + + double defunc_1_op_res_111271; + + defunc_1_op_res_111271 = x_111269 + x_111270; + x_111269 = defunc_1_op_res_111271; } } - // in-block scan (hopefully no barriers needed) + } + ((__local + double *) red_arr_mem_128103)[sext_i32_i64(local_tid_128099)] = + x_111269; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_128127; + int32_t skip_waves_128128; + + skip_waves_128128 = 1; + + double x_128114; + double x_128115; + + offset_128127 = 0; + // participating threads read initial accumulator { - skip_threads_45786 = 1; - while (slt32(skip_threads_45786, 32)) { - if (sle32(skip_threads_45786, local_tid_45771 - - squot32(local_tid_45771, 32) * 32) && - (squot32(local_tid_45771, 32) == 0 && - ltid_in_bounds_45783)) { - // read operands - { - x_45780 = ((volatile __local - float *) red_arr_mem_45775)[sext_i32_i64(local_tid_45771) - - sext_i32_i64(skip_threads_45786)]; - } - // perform operation - { - bool inactive_45787 = - slt64(srem64(sext_i32_i64(local_tid_45771 * - 32 + 32 - 1), - i32_res_27781), - sext_i32_i64(local_tid_45771 * - 32 + 32 - 1) - - sext_i32_i64((local_tid_45771 - - skip_threads_45786) * - 32 + 32 - 1)); - - if (inactive_45787) { - x_45780 = x_45781; - } - if (!inactive_45787) { - float defunc_1_op_res_45782 = x_45780 + - x_45781; - - x_45780 = defunc_1_op_res_45782; - } - } - } - if (sle32(wave_sizze_45773, skip_threads_45786)) { - barrier(CLK_LOCAL_MEM_FENCE); + if (slt32(local_tid_128099, + sext_i64_i32(segred_group_sizze_111264))) { + x_128114 = ((__local + double *) red_arr_mem_128103)[sext_i32_i64(local_tid_128099 + + offset_128127)]; + } + } + offset_128127 = 1; + while (slt32(offset_128127, wave_sizze_128101)) { + if (slt32(local_tid_128099 + offset_128127, + sext_i64_i32(segred_group_sizze_111264)) && + ((local_tid_128099 - squot32(local_tid_128099, + wave_sizze_128101) * + wave_sizze_128101) & (2 * offset_128127 - 1)) == + 0) { + // read array element + { + x_128115 = ((volatile __local + double *) red_arr_mem_128103)[sext_i32_i64(local_tid_128099 + + offset_128127)]; } - if (sle32(skip_threads_45786, local_tid_45771 - - squot32(local_tid_45771, 32) * 32) && - (squot32(local_tid_45771, 32) == 0 && - ltid_in_bounds_45783)) { - // write result - { - ((volatile __local - float *) red_arr_mem_45775)[sext_i32_i64(local_tid_45771)] = - x_45780; - x_45781 = x_45780; - } + // apply reduction operation + { + double defunc_1_op_res_128116 = x_128114 + + x_128115; + + x_128114 = defunc_1_op_res_128116; } - if (sle32(wave_sizze_45773, skip_threads_45786)) { - barrier(CLK_LOCAL_MEM_FENCE); + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128103)[sext_i32_i64(local_tid_128099)] = + x_128114; } - skip_threads_45786 *= 2; } + offset_128127 *= 2; } - } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_45771, 32) == 0 || - !ltid_in_bounds_45783)) { - // read operands - { - x_30572 = x_30571; - x_30571 = ((__local - float *) red_arr_mem_45775)[sext_i32_i64(squot32(local_tid_45771, - 32)) - - (int64_t) 1]; - } - // perform operation - { - bool inactive_45788 = - slt64(srem64(sext_i32_i64(local_tid_45771), - i32_res_27781), - sext_i32_i64(local_tid_45771) - - sext_i32_i64(squot32(local_tid_45771, - 32) * 32 - 1)); - - if (inactive_45788) { - x_30571 = x_30572; + while (slt32(skip_waves_128128, + squot32(sext_i64_i32(segred_group_sizze_111264) + + wave_sizze_128101 - 1, + wave_sizze_128101))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128127 = skip_waves_128128 * wave_sizze_128101; + if (slt32(local_tid_128099 + offset_128127, + sext_i64_i32(segred_group_sizze_111264)) && + ((local_tid_128099 - squot32(local_tid_128099, + wave_sizze_128101) * + wave_sizze_128101) == 0 && + (squot32(local_tid_128099, wave_sizze_128101) & + (2 * skip_waves_128128 - 1)) == 0)) { + // read array element + { + x_128115 = ((__local + double *) red_arr_mem_128103)[sext_i32_i64(local_tid_128099 + + offset_128127)]; } - if (!inactive_45788) { - float defunc_1_op_res_30573 = x_30571 + x_30572; + // apply reduction operation + { + double defunc_1_op_res_128116 = x_128114 + + x_128115; - x_30571 = defunc_1_op_res_30573; + x_128114 = defunc_1_op_res_128116; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128103)[sext_i32_i64(local_tid_128099)] = + x_128114; } } - // write final result - { - ((__local - float *) red_arr_mem_45775)[sext_i32_i64(local_tid_45771)] = - x_30571; - } + skip_waves_128128 *= 2; } - } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_45771, 32) == 0) { - ((__local - float *) red_arr_mem_45775)[sext_i32_i64(local_tid_45771)] = - x_30572; + // and back to memory with the final result + { + if (local_tid_128099 == 0) { + ((__global double *) mem_123904)[gtid_111237] = + x_128114; + } } } - barrier(CLK_LOCAL_MEM_FENCE); - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // save final values of segments - { - if (slt64(sext_i32_i64(virt_group_id_45779) * - squot64(segred_group_sizze_30567, - segment_sizze_nonzzero_45768) + - sext_i32_i64(local_tid_45771), m_27772 * i32_res_27787 * - i32_res_27787) && slt64(sext_i32_i64(local_tid_45771), - squot64(segred_group_sizze_30567, - segment_sizze_nonzzero_45768))) { - ((__global - float *) mem_44536)[squot64(sext_i32_i64(virt_group_id_45779) * - squot64(segred_group_sizze_30567, - segment_sizze_nonzzero_45768) + - sext_i32_i64(local_tid_45771), - i32_res_27787 * i32_res_27787) * - (i32_res_27787 * i32_res_27787) + - squot64(sext_i32_i64(virt_group_id_45779) * - squot64(segred_group_sizze_30567, - segment_sizze_nonzzero_45768) + - sext_i32_i64(local_tid_45771) - - squot64(sext_i32_i64(virt_group_id_45779) * - squot64(segred_group_sizze_30567, - segment_sizze_nonzzero_45768) + - sext_i32_i64(local_tid_45771), - i32_res_27787 * - i32_res_27787) * - (i32_res_27787 * i32_res_27787), - i32_res_27787) * i32_res_27787 + - (sext_i32_i64(virt_group_id_45779) * - squot64(segred_group_sizze_30567, - segment_sizze_nonzzero_45768) + - sext_i32_i64(local_tid_45771) - - squot64(sext_i32_i64(virt_group_id_45779) * - squot64(segred_group_sizze_30567, - segment_sizze_nonzzero_45768) + - sext_i32_i64(local_tid_45771), - i32_res_27787 * i32_res_27787) * - (i32_res_27787 * i32_res_27787) - - squot64(sext_i32_i64(virt_group_id_45779) * - squot64(segred_group_sizze_30567, - segment_sizze_nonzzero_45768) + - sext_i32_i64(local_tid_45771) - - squot64(sext_i32_i64(virt_group_id_45779) * - squot64(segred_group_sizze_30567, - segment_sizze_nonzzero_45768) + - sext_i32_i64(local_tid_45771), - i32_res_27787 * - i32_res_27787) * - (i32_res_27787 * i32_res_27787), - i32_res_27787) * - i32_res_27787)] = ((__local - float *) red_arr_mem_45775)[(sext_i32_i64(local_tid_45771) + - (int64_t) 1) * - segment_sizze_nonzzero_45768 - - (int64_t) 1]; } } - barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } error_1: return; - #undef segred_group_sizze_30567 + #undef segred_group_sizze_111264 } -__kernel void mainDetailedzisegred_small_31780(__global int *global_failure, - __local volatile - int64_t *red_arr_mem_45972_backing_aligned_0, - int64_t N_27771, int64_t m_27772, - int64_t i32_res_27781, - int64_t i32_res_27787, - int64_t num_groups_31833, - int64_t segment_sizze_nonzzero_45965, - __global - unsigned char *images_mem_44381, - __global - unsigned char *binop_p_mem_44390, - __global - unsigned char *mem_44844) +__kernel void mainzisegred_large_111603(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_128439_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128437_backing_aligned_1, + int64_t num_recresids_padded_75809, + int64_t num_groups_111685, + int64_t groups_per_segment_128423, + int64_t elements_per_thread_128424, + int64_t virt_num_groups_128425, + int64_t threads_per_segment_128427, + __global unsigned char *mem_124045, + __global unsigned char *mem_124051, + __global unsigned char *mem_124054, + __global + unsigned char *group_res_arr_mem_128428, + __global + unsigned char *mainzicounter_mem_128430) { - #define segred_group_sizze_31832 (mainDetailedzisegred_group_sizze_31774) + #define segred_group_sizze_111684 (mainzisegred_group_sizze_111597) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_45972_backing_0 = + __local volatile char *restrict sync_arr_mem_128439_backing_1 = + (__local volatile + char *) sync_arr_mem_128439_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128437_backing_0 = (__local volatile - char *) red_arr_mem_45972_backing_aligned_0; + char *) red_arr_mem_128437_backing_aligned_1; if (*global_failure >= 0) return; - int32_t global_tid_45967; - int32_t local_tid_45968; - int64_t group_sizze_45971; - int32_t wave_sizze_45970; - int32_t group_tid_45969; + int32_t global_tid_128432; + int32_t local_tid_128433; + int64_t group_sizze_128436; + int32_t wave_sizze_128435; + int32_t group_tid_128434; + + global_tid_128432 = get_global_id(0); + local_tid_128433 = get_local_id(0); + group_sizze_128436 = get_local_size(0); + wave_sizze_128435 = LOCKSTEP_WIDTH; + group_tid_128434 = get_group_id(0); + + int32_t phys_tid_111603; - global_tid_45967 = get_global_id(0); - local_tid_45968 = get_local_id(0); - group_sizze_45971 = get_local_size(0); - wave_sizze_45970 = LOCKSTEP_WIDTH; - group_tid_45969 = get_group_id(0); + phys_tid_111603 = global_tid_128432; - int32_t phys_tid_31780; + __local char *red_arr_mem_128437; - phys_tid_31780 = global_tid_45967; + red_arr_mem_128437 = (__local char *) red_arr_mem_128437_backing_0; - __local char *red_arr_mem_45972; + __local char *sync_arr_mem_128439; - red_arr_mem_45972 = (__local char *) red_arr_mem_45972_backing_0; + sync_arr_mem_128439 = (__local char *) sync_arr_mem_128439_backing_1; - int32_t phys_group_id_45974; + int32_t phys_group_id_128441; - phys_group_id_45974 = get_group_id(0); - for (int32_t i_45975 = 0; i_45975 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_27772 * i32_res_27787, - squot64(segred_group_sizze_31832, - segment_sizze_nonzzero_45965))) - - phys_group_id_45974, sext_i64_i32(num_groups_31833)); - i_45975++) { - int32_t virt_group_id_45976 = phys_group_id_45974 + i_45975 * - sext_i64_i32(num_groups_31833); - int64_t gtid_31769 = squot64(squot64(sext_i32_i64(local_tid_45968), - segment_sizze_nonzzero_45965) + - sext_i32_i64(virt_group_id_45976) * - squot64(segred_group_sizze_31832, - segment_sizze_nonzzero_45965), - i32_res_27787); - int64_t gtid_31770 = squot64(sext_i32_i64(local_tid_45968), - segment_sizze_nonzzero_45965) + - sext_i32_i64(virt_group_id_45976) * - squot64(segred_group_sizze_31832, - segment_sizze_nonzzero_45965) - - squot64(squot64(sext_i32_i64(local_tid_45968), - segment_sizze_nonzzero_45965) + - sext_i32_i64(virt_group_id_45976) * - squot64(segred_group_sizze_31832, - segment_sizze_nonzzero_45965), i32_res_27787) * - i32_res_27787; - int64_t gtid_31779 = srem64(sext_i32_i64(local_tid_45968), - i32_res_27781); + phys_group_id_128441 = get_group_id(0); + for (int32_t i_128442 = 0; i_128442 < + sdiv_up32(sext_i64_i32(virt_num_groups_128425) - phys_group_id_128441, + sext_i64_i32(num_groups_111685)); i_128442++) { + int32_t virt_group_id_128443 = phys_group_id_128441 + i_128442 * + sext_i64_i32(num_groups_111685); + int32_t flat_segment_id_128444 = squot32(virt_group_id_128443, + sext_i64_i32(groups_per_segment_128423)); + int64_t global_tid_128445 = srem64(sext_i32_i64(virt_group_id_128443) * + segred_group_sizze_111684 + + sext_i32_i64(local_tid_128433), + segred_group_sizze_111684 * + groups_per_segment_128423); + int64_t gtid_111594 = sext_i32_i64(flat_segment_id_128444); + int64_t gtid_111602; + double x_acc_128446; + int64_t chunk_sizze_128447; - // apply map function if in bounds + chunk_sizze_128447 = smin64(elements_per_thread_128424, + sdiv_up64(num_recresids_padded_75809 - + global_tid_128445, + threads_per_segment_128427)); + + double x_111688; + double x_111689; + + // neutral-initialise the accumulators { - if (slt64((int64_t) 0, i32_res_27781) && ((slt64(gtid_31769, - m_27772) && - slt64(gtid_31770, - i32_res_27787)) && - slt64(sext_i32_i64(local_tid_45968), - i32_res_27781 * - squot64(segred_group_sizze_31832, - segment_sizze_nonzzero_45965)))) { - float x_31842 = ((__global - float *) images_mem_44381)[gtid_31769 * - N_27771 + - gtid_31779]; - bool isnan_res_31843; - - isnan_res_31843 = futrts_isnan32(x_31842); - - float defunc_1_f_res_31844; - - if (isnan_res_31843) { - defunc_1_f_res_31844 = 0.0F; + x_acc_128446 = 0.0; + } + for (int64_t i_128451 = 0; i_128451 < chunk_sizze_128447; i_128451++) { + gtid_111602 = global_tid_128445 + threads_per_segment_128427 * + i_128451; + // apply map function + { + double x_111693 = ((__global double *) mem_124045)[gtid_111594 * + num_recresids_padded_75809 + + gtid_111602]; + bool isnan_res_111694; + + isnan_res_111694 = futrts_isnan64(x_111693); + + double defunc_0_f_res_111695; + + if (isnan_res_111694) { + defunc_0_f_res_111695 = 0.0; } else { - float x_31841 = ((__global - float *) binop_p_mem_44390)[gtid_31770 * - N_27771 + - gtid_31779]; - float defunc_1_f_res_f_res_31845 = x_31841 * x_31842; + double x_mean_111692 = ((__global + double *) mem_124051)[gtid_111594]; + double x_111696 = x_111693 - x_mean_111692; + double defunc_0_f_res_f_res_111697 = fpow64(x_111696, 2.0); - defunc_1_f_res_31844 = defunc_1_f_res_f_res_31845; + defunc_0_f_res_111695 = defunc_0_f_res_f_res_111697; } // save map-out results { } - // save results to be reduced + // load accumulator + { + x_111688 = x_acc_128446; + } + // load new values + { + x_111689 = defunc_0_f_res_111695; + } + // apply reduction operator + { + double defunc_1_op_res_111690 = x_111688 + x_111689; + + // store in accumulator + { + x_acc_128446 = defunc_1_op_res_111690; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_111688 = x_acc_128446; + ((__local + double *) red_arr_mem_128437)[sext_i32_i64(local_tid_128433)] = + x_111688; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128452; + int32_t skip_waves_128453; + + skip_waves_128453 = 1; + + double x_128448; + double x_128449; + + offset_128452 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128433, + sext_i64_i32(segred_group_sizze_111684))) { + x_128448 = ((__local + double *) red_arr_mem_128437)[sext_i32_i64(local_tid_128433 + + offset_128452)]; + } + } + offset_128452 = 1; + while (slt32(offset_128452, wave_sizze_128435)) { + if (slt32(local_tid_128433 + offset_128452, + sext_i64_i32(segred_group_sizze_111684)) && + ((local_tid_128433 - squot32(local_tid_128433, + wave_sizze_128435) * + wave_sizze_128435) & (2 * offset_128452 - 1)) == 0) { + // read array element + { + x_128449 = ((volatile __local + double *) red_arr_mem_128437)[sext_i32_i64(local_tid_128433 + + offset_128452)]; + } + // apply reduction operation + { + double defunc_1_op_res_128450 = x_128448 + x_128449; + + x_128448 = defunc_1_op_res_128450; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128437)[sext_i32_i64(local_tid_128433)] = + x_128448; + } + } + offset_128452 *= 2; + } + while (slt32(skip_waves_128453, + squot32(sext_i64_i32(segred_group_sizze_111684) + + wave_sizze_128435 - 1, wave_sizze_128435))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128452 = skip_waves_128453 * wave_sizze_128435; + if (slt32(local_tid_128433 + offset_128452, + sext_i64_i32(segred_group_sizze_111684)) && + ((local_tid_128433 - squot32(local_tid_128433, + wave_sizze_128435) * + wave_sizze_128435) == 0 && (squot32(local_tid_128433, + wave_sizze_128435) & (2 * + skip_waves_128453 - + 1)) == + 0)) { + // read array element + { + x_128449 = ((__local + double *) red_arr_mem_128437)[sext_i32_i64(local_tid_128433 + + offset_128452)]; + } + // apply reduction operation + { + double defunc_1_op_res_128450 = x_128448 + x_128449; + + x_128448 = defunc_1_op_res_128450; + } + // write result of operation { ((__local - float *) red_arr_mem_45972)[sext_i32_i64(local_tid_45968)] = - defunc_1_f_res_31844; + double *) red_arr_mem_128437)[sext_i32_i64(local_tid_128433)] = + x_128448; } - } else { - ((__local - float *) red_arr_mem_45972)[sext_i32_i64(local_tid_45968)] = - 0.0F; } + skip_waves_128453 *= 2; } barrier(CLK_LOCAL_MEM_FENCE); - if (slt64((int64_t) 0, i32_res_27781)) { - // perform segmented scan to imitate reduction + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_128433) == (int64_t) 0) { + x_acc_128446 = x_128448; + } + } + if (groups_per_segment_128423 == (int64_t) 1) { + // first thread in group saves final result to memory { - float x_31836; - float x_31837; - float x_45977; - float x_45978; - bool ltid_in_bounds_45980; - - ltid_in_bounds_45980 = slt64(sext_i32_i64(local_tid_45968), - i32_res_27781 * - squot64(segred_group_sizze_31832, - segment_sizze_nonzzero_45965)); - - int32_t skip_threads_45981; - - // read input for in-block scan + if (local_tid_128433 == 0) { + ((__global double *) mem_124054)[gtid_111594] = + x_acc_128446; + } + } + } else { + int32_t old_counter_128454; + + // first thread in group saves group result to global memory + { + if (local_tid_128433 == 0) { + ((__global + double *) group_res_arr_mem_128428)[sext_i32_i64(virt_group_id_128443) * + segred_group_sizze_111684] = + x_acc_128446; + mem_fence_global(); + old_counter_128454 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_128430)[sext_i32_i64(srem32(flat_segment_id_128444, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_128439)[(int64_t) 0] = + old_counter_128454 == groups_per_segment_128423 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_128455; + + is_last_group_128455 = ((__local + bool *) sync_arr_mem_128439)[(int64_t) 0]; + if (is_last_group_128455) { + if (local_tid_128433 == 0) { + old_counter_128454 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_128430)[sext_i32_i64(srem32(flat_segment_id_128444, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_128423)); + } + // read in the per-group-results { - if (ltid_in_bounds_45980) { - x_31837 = ((volatile __local - float *) red_arr_mem_45972)[sext_i32_i64(local_tid_45968)]; - if ((local_tid_45968 - squot32(local_tid_45968, 32) * - 32) == 0) { - x_31836 = x_31837; + int64_t read_per_thread_128456 = + sdiv_up64(groups_per_segment_128423, + segred_group_sizze_111684); + + x_111688 = 0.0; + for (int64_t i_128457 = 0; i_128457 < + read_per_thread_128456; i_128457++) { + int64_t group_res_id_128458 = + sext_i32_i64(local_tid_128433) * + read_per_thread_128456 + i_128457; + int64_t index_of_group_res_128459 = + sext_i32_i64(flat_segment_id_128444) * + groups_per_segment_128423 + group_res_id_128458; + + if (slt64(group_res_id_128458, + groups_per_segment_128423)) { + x_111689 = ((__global + double *) group_res_arr_mem_128428)[index_of_group_res_128459 * + segred_group_sizze_111684]; + + double defunc_1_op_res_111690; + + defunc_1_op_res_111690 = x_111688 + x_111689; + x_111688 = defunc_1_op_res_111690; } } } - // in-block scan (hopefully no barriers needed) + ((__local + double *) red_arr_mem_128437)[sext_i32_i64(local_tid_128433)] = + x_111688; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results { - skip_threads_45981 = 1; - while (slt32(skip_threads_45981, 32)) { - if (sle32(skip_threads_45981, local_tid_45968 - - squot32(local_tid_45968, 32) * 32) && - ltid_in_bounds_45980) { - // read operands + int32_t offset_128460; + int32_t skip_waves_128461; + + skip_waves_128461 = 1; + + double x_128448; + double x_128449; + + offset_128460 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128433, + sext_i64_i32(segred_group_sizze_111684))) { + x_128448 = ((__local + double *) red_arr_mem_128437)[sext_i32_i64(local_tid_128433 + + offset_128460)]; + } + } + offset_128460 = 1; + while (slt32(offset_128460, wave_sizze_128435)) { + if (slt32(local_tid_128433 + offset_128460, + sext_i64_i32(segred_group_sizze_111684)) && + ((local_tid_128433 - squot32(local_tid_128433, + wave_sizze_128435) * + wave_sizze_128435) & (2 * offset_128460 - 1)) == + 0) { + // read array element { - x_31836 = ((volatile __local - float *) red_arr_mem_45972)[sext_i32_i64(local_tid_45968) - - sext_i32_i64(skip_threads_45981)]; + x_128449 = ((volatile __local + double *) red_arr_mem_128437)[sext_i32_i64(local_tid_128433 + + offset_128460)]; } - // perform operation + // apply reduction operation { - bool inactive_45982 = - slt64(srem64(sext_i32_i64(local_tid_45968), - i32_res_27781), - sext_i32_i64(local_tid_45968) - - sext_i32_i64(local_tid_45968 - - skip_threads_45981)); + double defunc_1_op_res_128450 = x_128448 + + x_128449; - if (inactive_45982) { - x_31836 = x_31837; - } - if (!inactive_45982) { - float defunc_1_op_res_31838 = x_31836 + - x_31837; - - x_31836 = defunc_1_op_res_31838; - } + x_128448 = defunc_1_op_res_128450; } - } - if (sle32(wave_sizze_45970, skip_threads_45981)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_45981, local_tid_45968 - - squot32(local_tid_45968, 32) * 32) && - ltid_in_bounds_45980) { - // write result + // write result of operation { ((volatile __local - float *) red_arr_mem_45972)[sext_i32_i64(local_tid_45968)] = - x_31836; - x_31837 = x_31836; - } - } - if (sle32(wave_sizze_45970, skip_threads_45981)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_45981 *= 2; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' - { - if ((local_tid_45968 - squot32(local_tid_45968, 32) * 32) == - 31 && ltid_in_bounds_45980) { - ((volatile __local - float *) red_arr_mem_45972)[sext_i32_i64(squot32(local_tid_45968, - 32))] = - x_31836; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' - { - int32_t skip_threads_45983; - - // read input for in-block scan - { - if (squot32(local_tid_45968, 32) == 0 && - ltid_in_bounds_45980) { - x_45978 = ((volatile __local - float *) red_arr_mem_45972)[sext_i32_i64(local_tid_45968)]; - if ((local_tid_45968 - squot32(local_tid_45968, - 32) * 32) == 0) { - x_45977 = x_45978; + double *) red_arr_mem_128437)[sext_i32_i64(local_tid_128433)] = + x_128448; } } + offset_128460 *= 2; } - // in-block scan (hopefully no barriers needed) - { - skip_threads_45983 = 1; - while (slt32(skip_threads_45983, 32)) { - if (sle32(skip_threads_45983, local_tid_45968 - - squot32(local_tid_45968, 32) * 32) && - (squot32(local_tid_45968, 32) == 0 && - ltid_in_bounds_45980)) { - // read operands - { - x_45977 = ((volatile __local - float *) red_arr_mem_45972)[sext_i32_i64(local_tid_45968) - - sext_i32_i64(skip_threads_45983)]; - } - // perform operation - { - bool inactive_45984 = - slt64(srem64(sext_i32_i64(local_tid_45968 * - 32 + 32 - 1), - i32_res_27781), - sext_i32_i64(local_tid_45968 * - 32 + 32 - 1) - - sext_i32_i64((local_tid_45968 - - skip_threads_45983) * - 32 + 32 - 1)); - - if (inactive_45984) { - x_45977 = x_45978; - } - if (!inactive_45984) { - float defunc_1_op_res_45979 = x_45977 + - x_45978; - - x_45977 = defunc_1_op_res_45979; - } - } - } - if (sle32(wave_sizze_45970, skip_threads_45983)) { - barrier(CLK_LOCAL_MEM_FENCE); + while (slt32(skip_waves_128461, + squot32(sext_i64_i32(segred_group_sizze_111684) + + wave_sizze_128435 - 1, + wave_sizze_128435))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128460 = skip_waves_128461 * wave_sizze_128435; + if (slt32(local_tid_128433 + offset_128460, + sext_i64_i32(segred_group_sizze_111684)) && + ((local_tid_128433 - squot32(local_tid_128433, + wave_sizze_128435) * + wave_sizze_128435) == 0 && + (squot32(local_tid_128433, wave_sizze_128435) & + (2 * skip_waves_128461 - 1)) == 0)) { + // read array element + { + x_128449 = ((__local + double *) red_arr_mem_128437)[sext_i32_i64(local_tid_128433 + + offset_128460)]; } - if (sle32(skip_threads_45983, local_tid_45968 - - squot32(local_tid_45968, 32) * 32) && - (squot32(local_tid_45968, 32) == 0 && - ltid_in_bounds_45980)) { - // write result - { - ((volatile __local - float *) red_arr_mem_45972)[sext_i32_i64(local_tid_45968)] = - x_45977; - x_45978 = x_45977; - } + // apply reduction operation + { + double defunc_1_op_res_128450 = x_128448 + + x_128449; + + x_128448 = defunc_1_op_res_128450; } - if (sle32(wave_sizze_45970, skip_threads_45983)) { - barrier(CLK_LOCAL_MEM_FENCE); + // write result of operation + { + ((__local + double *) red_arr_mem_128437)[sext_i32_i64(local_tid_128433)] = + x_128448; } - skip_threads_45983 *= 2; } + skip_waves_128461 *= 2; } - } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_45968, 32) == 0 || - !ltid_in_bounds_45980)) { - // read operands - { - x_31837 = x_31836; - x_31836 = ((__local - float *) red_arr_mem_45972)[sext_i32_i64(squot32(local_tid_45968, - 32)) - - (int64_t) 1]; - } - // perform operation - { - bool inactive_45985 = - slt64(srem64(sext_i32_i64(local_tid_45968), - i32_res_27781), - sext_i32_i64(local_tid_45968) - - sext_i32_i64(squot32(local_tid_45968, - 32) * 32 - 1)); - - if (inactive_45985) { - x_31836 = x_31837; - } - if (!inactive_45985) { - float defunc_1_op_res_31838 = x_31836 + x_31837; - - x_31836 = defunc_1_op_res_31838; - } - } - // write final result - { - ((__local - float *) red_arr_mem_45972)[sext_i32_i64(local_tid_45968)] = - x_31836; + // and back to memory with the final result + { + if (local_tid_128433 == 0) { + ((__global double *) mem_124054)[gtid_111594] = + x_128448; } } } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_45968, 32) == 0) { - ((__local - float *) red_arr_mem_45972)[sext_i32_i64(local_tid_45968)] = - x_31837; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // save final values of segments - { - if (slt64(sext_i32_i64(virt_group_id_45976) * - squot64(segred_group_sizze_31832, - segment_sizze_nonzzero_45965) + - sext_i32_i64(local_tid_45968), m_27772 * i32_res_27787) && - slt64(sext_i32_i64(local_tid_45968), - squot64(segred_group_sizze_31832, - segment_sizze_nonzzero_45965))) { - ((__global - float *) mem_44844)[squot64(sext_i32_i64(virt_group_id_45976) * - squot64(segred_group_sizze_31832, - segment_sizze_nonzzero_45965) + - sext_i32_i64(local_tid_45968), - i32_res_27787) * i32_res_27787 + - (sext_i32_i64(virt_group_id_45976) * - squot64(segred_group_sizze_31832, - segment_sizze_nonzzero_45965) + - sext_i32_i64(local_tid_45968) - - squot64(sext_i32_i64(virt_group_id_45976) * - squot64(segred_group_sizze_31832, - segment_sizze_nonzzero_45965) + - sext_i32_i64(local_tid_45968), - i32_res_27787) * - i32_res_27787)] = ((__local - float *) red_arr_mem_45972)[(sext_i32_i64(local_tid_45968) + - (int64_t) 1) * - segment_sizze_nonzzero_45965 - - (int64_t) 1]; } } - barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } error_1: return; - #undef segred_group_sizze_31832 + #undef segred_group_sizze_111684 } -__kernel void mainDetailedzisegred_small_31917(__global int *global_failure, - __local volatile - int64_t *red_arr_mem_46060_backing_aligned_0, - int64_t m_27772, - int64_t i32_res_27787, - int64_t num_groups_31966, - int64_t segment_sizze_nonzzero_46053, - __global - unsigned char *defunc_3_map_res_mem_44629, - __global - unsigned char *defunc_3_map_res_mem_44850, - __global - unsigned char *mem_44910) +__kernel void mainzisegred_large_111633(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_128368_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128366_backing_aligned_1, + int64_t num_recresids_padded_75809, + int64_t num_groups_111663, + int64_t groups_per_segment_128352, + int64_t elements_per_thread_128353, + int64_t virt_num_groups_128354, __global + unsigned char *mem_124045, __global + unsigned char *mem_124048, __global + unsigned char *group_res_arr_mem_128357, + __global + unsigned char *mainzicounter_mem_128359) { - #define segred_group_sizze_31965 (mainDetailedzisegred_group_sizze_31911) + #define segred_group_sizze_111662 (mainzisegred_group_sizze_111627) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_46060_backing_0 = + __local volatile char *restrict sync_arr_mem_128368_backing_1 = + (__local volatile + char *) sync_arr_mem_128368_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128366_backing_0 = (__local volatile - char *) red_arr_mem_46060_backing_aligned_0; + char *) red_arr_mem_128366_backing_aligned_1; if (*global_failure >= 0) return; - int32_t global_tid_46055; - int32_t local_tid_46056; - int64_t group_sizze_46059; - int32_t wave_sizze_46058; - int32_t group_tid_46057; + int32_t global_tid_128361; + int32_t local_tid_128362; + int64_t group_sizze_128365; + int32_t wave_sizze_128364; + int32_t group_tid_128363; + + global_tid_128361 = get_global_id(0); + local_tid_128362 = get_local_id(0); + group_sizze_128365 = get_local_size(0); + wave_sizze_128364 = LOCKSTEP_WIDTH; + group_tid_128363 = get_group_id(0); + + int32_t phys_tid_111633; - global_tid_46055 = get_global_id(0); - local_tid_46056 = get_local_id(0); - group_sizze_46059 = get_local_size(0); - wave_sizze_46058 = LOCKSTEP_WIDTH; - group_tid_46057 = get_group_id(0); + phys_tid_111633 = global_tid_128361; - int32_t phys_tid_31917; + __local char *red_arr_mem_128366; - phys_tid_31917 = global_tid_46055; + red_arr_mem_128366 = (__local char *) red_arr_mem_128366_backing_0; - __local char *red_arr_mem_46060; + __local char *sync_arr_mem_128368; - red_arr_mem_46060 = (__local char *) red_arr_mem_46060_backing_0; + sync_arr_mem_128368 = (__local char *) sync_arr_mem_128368_backing_1; - int32_t phys_group_id_46062; + int32_t phys_group_id_128370; - phys_group_id_46062 = get_group_id(0); - for (int32_t i_46063 = 0; i_46063 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_27772 * i32_res_27787, - squot64(segred_group_sizze_31965, - segment_sizze_nonzzero_46053))) - - phys_group_id_46062, sext_i64_i32(num_groups_31966)); - i_46063++) { - int32_t virt_group_id_46064 = phys_group_id_46062 + i_46063 * - sext_i64_i32(num_groups_31966); - int64_t gtid_31906 = squot64(squot64(sext_i32_i64(local_tid_46056), - segment_sizze_nonzzero_46053) + - sext_i32_i64(virt_group_id_46064) * - squot64(segred_group_sizze_31965, - segment_sizze_nonzzero_46053), - i32_res_27787); - int64_t gtid_31907 = squot64(sext_i32_i64(local_tid_46056), - segment_sizze_nonzzero_46053) + - sext_i32_i64(virt_group_id_46064) * - squot64(segred_group_sizze_31965, - segment_sizze_nonzzero_46053) - - squot64(squot64(sext_i32_i64(local_tid_46056), - segment_sizze_nonzzero_46053) + - sext_i32_i64(virt_group_id_46064) * - squot64(segred_group_sizze_31965, - segment_sizze_nonzzero_46053), i32_res_27787) * - i32_res_27787; - int64_t gtid_31916 = srem64(sext_i32_i64(local_tid_46056), - i32_res_27787); + phys_group_id_128370 = get_group_id(0); + for (int32_t i_128371 = 0; i_128371 < + sdiv_up32(sext_i64_i32(virt_num_groups_128354) - phys_group_id_128370, + sext_i64_i32(num_groups_111663)); i_128371++) { + int32_t virt_group_id_128372 = phys_group_id_128370 + i_128371 * + sext_i64_i32(num_groups_111663); + int32_t flat_segment_id_128373 = squot32(virt_group_id_128372, + sext_i64_i32(groups_per_segment_128352)); + int64_t global_tid_128374 = srem64(sext_i32_i64(virt_group_id_128372) * + segred_group_sizze_111662 + + sext_i32_i64(local_tid_128362), + segred_group_sizze_111662 * + groups_per_segment_128352); + int64_t gtid_111624 = sext_i32_i64(flat_segment_id_128373); + int64_t gtid_111632; + double x_acc_128375; + int64_t chunk_sizze_128376; + int64_t starting_point_128377; - // apply map function if in bounds - { - if (slt64((int64_t) 0, i32_res_27787) && ((slt64(gtid_31906, - m_27772) && - slt64(gtid_31907, - i32_res_27787)) && - slt64(sext_i32_i64(local_tid_46056), - i32_res_27787 * - squot64(segred_group_sizze_31965, - segment_sizze_nonzzero_46053)))) { - float x_31975 = ((__global - float *) defunc_3_map_res_mem_44850)[gtid_31906 * - i32_res_27787 + - gtid_31916]; - float x_31976 = ((__global - float *) defunc_3_map_res_mem_44629)[gtid_31906 * - (i32_res_27787 * - i32_res_27787) + - gtid_31907 * - i32_res_27787 + - gtid_31916]; - float defunc_1_f_res_31977 = x_31975 * x_31976; - - // save map-out results - { } - // save results to be reduced - { - ((__local - float *) red_arr_mem_46060)[sext_i32_i64(local_tid_46056)] = - defunc_1_f_res_31977; - } + starting_point_128377 = global_tid_128374 * elements_per_thread_128353; + + int64_t remaining_elements_128378; + + remaining_elements_128378 = num_recresids_padded_75809 - + starting_point_128377; + if (sle64(remaining_elements_128378, (int64_t) 0) || + sle64(num_recresids_padded_75809, starting_point_128377)) { + chunk_sizze_128376 = (int64_t) 0; + } else { + if (slt64(num_recresids_padded_75809, (global_tid_128374 + + (int64_t) 1) * + elements_per_thread_128353)) { + chunk_sizze_128376 = num_recresids_padded_75809 - + global_tid_128374 * elements_per_thread_128353; } else { - ((__local - float *) red_arr_mem_46060)[sext_i32_i64(local_tid_46056)] = - 0.0F; + chunk_sizze_128376 = elements_per_thread_128353; } } - barrier(CLK_LOCAL_MEM_FENCE); - if (slt64((int64_t) 0, i32_res_27787)) { - // perform segmented scan to imitate reduction - { - float x_31969; - float x_31970; - float x_46065; - float x_46066; - bool ltid_in_bounds_46068; - - ltid_in_bounds_46068 = slt64(sext_i32_i64(local_tid_46056), - i32_res_27787 * - squot64(segred_group_sizze_31965, - segment_sizze_nonzzero_46053)); - - int32_t skip_threads_46069; - - // read input for in-block scan + + double x_111666; + double x_111667; + + // neutral-initialise the accumulators + { + x_acc_128375 = 0.0; + } + for (int64_t i_128386 = 0; i_128386 < elements_per_thread_128353; + i_128386++) { + gtid_111632 = sext_i32_i64(local_tid_128362) + + (squot64(global_tid_128374, segred_group_sizze_111662) * + elements_per_thread_128353 + i_128386) * + segred_group_sizze_111662; + if (slt64(gtid_111632, num_recresids_padded_75809)) { + // apply map function { - if (ltid_in_bounds_46068) { - x_31970 = ((volatile __local - float *) red_arr_mem_46060)[sext_i32_i64(local_tid_46056)]; - if ((local_tid_46056 - squot32(local_tid_46056, 32) * - 32) == 0) { - x_31969 = x_31970; - } + double x_111674 = ((__global + double *) mem_124045)[gtid_111624 * + num_recresids_padded_75809 + + gtid_111632]; + + // save map-out results + { } + // load accumulator + { + x_111666 = x_acc_128375; } - } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46069 = 1; - while (slt32(skip_threads_46069, 32)) { - if (sle32(skip_threads_46069, local_tid_46056 - - squot32(local_tid_46056, 32) * 32) && - ltid_in_bounds_46068) { - // read operands - { - x_31969 = ((volatile __local - float *) red_arr_mem_46060)[sext_i32_i64(local_tid_46056) - - sext_i32_i64(skip_threads_46069)]; - } - // perform operation - { - bool inactive_46070 = - slt64(srem64(sext_i32_i64(local_tid_46056), - i32_res_27787), - sext_i32_i64(local_tid_46056) - - sext_i32_i64(local_tid_46056 - - skip_threads_46069)); + // load new values + { + x_111667 = x_111674; + } + // apply reduction operator + { + bool isnan_res_111668; + + isnan_res_111668 = futrts_isnan64(x_111666); + + double defunc_1_op_res_111669; + + if (isnan_res_111668) { + defunc_1_op_res_111669 = x_111667; + } else { + bool isnan_res_111670; + + isnan_res_111670 = futrts_isnan64(x_111667); + + double defunc_1_op_res_f_res_111671; + + if (isnan_res_111670) { + defunc_1_op_res_f_res_111671 = x_111666; + } else { + double defunc_1_op_res_f_res_f_res_111672 = + x_111666 + x_111667; - if (inactive_46070) { - x_31969 = x_31970; - } - if (!inactive_46070) { - float defunc_1_op_res_31971 = x_31969 + - x_31970; - - x_31969 = defunc_1_op_res_31971; - } + defunc_1_op_res_f_res_111671 = + defunc_1_op_res_f_res_f_res_111672; } + defunc_1_op_res_111669 = + defunc_1_op_res_f_res_111671; } - if (sle32(wave_sizze_46058, skip_threads_46069)) { - barrier(CLK_LOCAL_MEM_FENCE); + // store in accumulator + { + x_acc_128375 = defunc_1_op_res_111669; } - if (sle32(skip_threads_46069, local_tid_46056 - - squot32(local_tid_46056, 32) * 32) && - ltid_in_bounds_46068) { - // write result - { - ((volatile __local - float *) red_arr_mem_46060)[sext_i32_i64(local_tid_46056)] = - x_31969; - x_31970 = x_31969; + } + } + } + // to reduce current chunk, first store our result in memory + { + x_111666 = x_acc_128375; + ((__local + double *) red_arr_mem_128366)[sext_i32_i64(local_tid_128362)] = + x_111666; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128387; + int32_t skip_waves_128388; + + skip_waves_128388 = 1; + + double x_128379; + double x_128380; + + offset_128387 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128362, + sext_i64_i32(segred_group_sizze_111662))) { + x_128379 = ((__local + double *) red_arr_mem_128366)[sext_i32_i64(local_tid_128362 + + offset_128387)]; + } + } + offset_128387 = 1; + while (slt32(offset_128387, wave_sizze_128364)) { + if (slt32(local_tid_128362 + offset_128387, + sext_i64_i32(segred_group_sizze_111662)) && + ((local_tid_128362 - squot32(local_tid_128362, + wave_sizze_128364) * + wave_sizze_128364) & (2 * offset_128387 - 1)) == 0) { + // read array element + { + x_128380 = ((volatile __local + double *) red_arr_mem_128366)[sext_i32_i64(local_tid_128362 + + offset_128387)]; + } + // apply reduction operation + { + bool isnan_res_128381; + + isnan_res_128381 = futrts_isnan64(x_128379); + + double defunc_1_op_res_128382; + + if (isnan_res_128381) { + defunc_1_op_res_128382 = x_128380; + } else { + bool isnan_res_128383; + + isnan_res_128383 = futrts_isnan64(x_128380); + + double defunc_1_op_res_f_res_128384; + + if (isnan_res_128383) { + defunc_1_op_res_f_res_128384 = x_128379; + } else { + double defunc_1_op_res_f_res_f_res_128385 = + x_128379 + x_128380; + + defunc_1_op_res_f_res_128384 = + defunc_1_op_res_f_res_f_res_128385; } + defunc_1_op_res_128382 = + defunc_1_op_res_f_res_128384; } - if (sle32(wave_sizze_46058, skip_threads_46069)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46069 *= 2; + x_128379 = defunc_1_op_res_128382; } - } - barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' - { - if ((local_tid_46056 - squot32(local_tid_46056, 32) * 32) == - 31 && ltid_in_bounds_46068) { + // write result of operation + { ((volatile __local - float *) red_arr_mem_46060)[sext_i32_i64(squot32(local_tid_46056, - 32))] = - x_31969; + double *) red_arr_mem_128366)[sext_i32_i64(local_tid_128362)] = + x_128379; } } + offset_128387 *= 2; + } + while (slt32(skip_waves_128388, + squot32(sext_i64_i32(segred_group_sizze_111662) + + wave_sizze_128364 - 1, wave_sizze_128364))) { barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' - { - int32_t skip_threads_46071; - - // read input for in-block scan + offset_128387 = skip_waves_128388 * wave_sizze_128364; + if (slt32(local_tid_128362 + offset_128387, + sext_i64_i32(segred_group_sizze_111662)) && + ((local_tid_128362 - squot32(local_tid_128362, + wave_sizze_128364) * + wave_sizze_128364) == 0 && (squot32(local_tid_128362, + wave_sizze_128364) & + (2 * skip_waves_128388 - + 1)) == 0)) { + // read array element + { + x_128380 = ((__local + double *) red_arr_mem_128366)[sext_i32_i64(local_tid_128362 + + offset_128387)]; + } + // apply reduction operation { - if (squot32(local_tid_46056, 32) == 0 && - ltid_in_bounds_46068) { - x_46066 = ((volatile __local - float *) red_arr_mem_46060)[sext_i32_i64(local_tid_46056)]; - if ((local_tid_46056 - squot32(local_tid_46056, - 32) * 32) == 0) { - x_46065 = x_46066; + bool isnan_res_128381; + + isnan_res_128381 = futrts_isnan64(x_128379); + + double defunc_1_op_res_128382; + + if (isnan_res_128381) { + defunc_1_op_res_128382 = x_128380; + } else { + bool isnan_res_128383; + + isnan_res_128383 = futrts_isnan64(x_128380); + + double defunc_1_op_res_f_res_128384; + + if (isnan_res_128383) { + defunc_1_op_res_f_res_128384 = x_128379; + } else { + double defunc_1_op_res_f_res_f_res_128385 = + x_128379 + x_128380; + + defunc_1_op_res_f_res_128384 = + defunc_1_op_res_f_res_f_res_128385; } + defunc_1_op_res_128382 = + defunc_1_op_res_f_res_128384; } + x_128379 = defunc_1_op_res_128382; } - // in-block scan (hopefully no barriers needed) + // write result of operation { - skip_threads_46071 = 1; - while (slt32(skip_threads_46071, 32)) { - if (sle32(skip_threads_46071, local_tid_46056 - - squot32(local_tid_46056, 32) * 32) && - (squot32(local_tid_46056, 32) == 0 && - ltid_in_bounds_46068)) { - // read operands - { - x_46065 = ((volatile __local - float *) red_arr_mem_46060)[sext_i32_i64(local_tid_46056) - - sext_i32_i64(skip_threads_46071)]; - } - // perform operation - { - bool inactive_46072 = - slt64(srem64(sext_i32_i64(local_tid_46056 * - 32 + 32 - 1), - i32_res_27787), - sext_i32_i64(local_tid_46056 * - 32 + 32 - 1) - - sext_i32_i64((local_tid_46056 - - skip_threads_46071) * - 32 + 32 - 1)); - - if (inactive_46072) { - x_46065 = x_46066; - } - if (!inactive_46072) { - float defunc_1_op_res_46067 = x_46065 + - x_46066; - - x_46065 = defunc_1_op_res_46067; - } - } - } - if (sle32(wave_sizze_46058, skip_threads_46071)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46071, local_tid_46056 - - squot32(local_tid_46056, 32) * 32) && - (squot32(local_tid_46056, 32) == 0 && - ltid_in_bounds_46068)) { - // write result - { - ((volatile __local - float *) red_arr_mem_46060)[sext_i32_i64(local_tid_46056)] = - x_46065; - x_46066 = x_46065; - } - } - if (sle32(wave_sizze_46058, skip_threads_46071)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46071 *= 2; - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_46056, 32) == 0 || - !ltid_in_bounds_46068)) { - // read operands - { - x_31970 = x_31969; - x_31969 = ((__local - float *) red_arr_mem_46060)[sext_i32_i64(squot32(local_tid_46056, - 32)) - - (int64_t) 1]; - } - // perform operation - { - bool inactive_46073 = - slt64(srem64(sext_i32_i64(local_tid_46056), - i32_res_27787), - sext_i32_i64(local_tid_46056) - - sext_i32_i64(squot32(local_tid_46056, - 32) * 32 - 1)); - - if (inactive_46073) { - x_31969 = x_31970; - } - if (!inactive_46073) { - float defunc_1_op_res_31971 = x_31969 + x_31970; - - x_31969 = defunc_1_op_res_31971; - } - } - // write final result - { - ((__local - float *) red_arr_mem_46060)[sext_i32_i64(local_tid_46056)] = - x_31969; - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_46056, 32) == 0) { ((__local - float *) red_arr_mem_46060)[sext_i32_i64(local_tid_46056)] = - x_31970; + double *) red_arr_mem_128366)[sext_i32_i64(local_tid_128362)] = + x_128379; } } - barrier(CLK_LOCAL_MEM_FENCE); + skip_waves_128388 *= 2; } - } - barrier(CLK_LOCAL_MEM_FENCE); - // save final values of segments - { - if (slt64(sext_i32_i64(virt_group_id_46064) * - squot64(segred_group_sizze_31965, - segment_sizze_nonzzero_46053) + - sext_i32_i64(local_tid_46056), m_27772 * i32_res_27787) && - slt64(sext_i32_i64(local_tid_46056), - squot64(segred_group_sizze_31965, - segment_sizze_nonzzero_46053))) { - ((__global - float *) mem_44910)[squot64(sext_i32_i64(virt_group_id_46064) * - squot64(segred_group_sizze_31965, - segment_sizze_nonzzero_46053) + - sext_i32_i64(local_tid_46056), - i32_res_27787) * i32_res_27787 + - (sext_i32_i64(virt_group_id_46064) * - squot64(segred_group_sizze_31965, - segment_sizze_nonzzero_46053) + - sext_i32_i64(local_tid_46056) - - squot64(sext_i32_i64(virt_group_id_46064) * - squot64(segred_group_sizze_31965, - segment_sizze_nonzzero_46053) + - sext_i32_i64(local_tid_46056), - i32_res_27787) * - i32_res_27787)] = ((__local - float *) red_arr_mem_46060)[(sext_i32_i64(local_tid_46056) + - (int64_t) 1) * - segment_sizze_nonzzero_46053 - - (int64_t) 1]; + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_128362) == (int64_t) 0) { + x_acc_128375 = x_128379; + } } - } - barrier(CLK_LOCAL_MEM_FENCE); - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - } - - error_1: - return; - #undef segred_group_sizze_31965 -} -__kernel void mainDetailedzisegred_small_32047(__global int *global_failure, - __local volatile - int64_t *red_arr_mem_46192_backing_aligned_0, - int64_t N_27771, int64_t m_27772, - int64_t i32_res_27787, - int64_t num_groups_32094, - int64_t segment_sizze_nonzzero_46185, - __global - unsigned char *mem_44397, - __global - unsigned char *defunc_4_map_res_mem_44916, - __global - unsigned char *mem_45134) -{ - #define segred_group_sizze_32093 (mainDetailedzisegred_group_sizze_32041) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_46192_backing_0 = - (__local volatile - char *) red_arr_mem_46192_backing_aligned_0; - - if (*global_failure >= 0) - return; - - int32_t global_tid_46187; - int32_t local_tid_46188; - int64_t group_sizze_46191; - int32_t wave_sizze_46190; - int32_t group_tid_46189; - - global_tid_46187 = get_global_id(0); - local_tid_46188 = get_local_id(0); - group_sizze_46191 = get_local_size(0); - wave_sizze_46190 = LOCKSTEP_WIDTH; - group_tid_46189 = get_group_id(0); - - int32_t phys_tid_32047; - - phys_tid_32047 = global_tid_46187; - - __local char *red_arr_mem_46192; - - red_arr_mem_46192 = (__local char *) red_arr_mem_46192_backing_0; - - int32_t phys_group_id_46194; - - phys_group_id_46194 = get_group_id(0); - for (int32_t i_46195 = 0; i_46195 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_27772 * N_27771, - squot64(segred_group_sizze_32093, - segment_sizze_nonzzero_46185))) - - phys_group_id_46194, sext_i64_i32(num_groups_32094)); - i_46195++) { - int32_t virt_group_id_46196 = phys_group_id_46194 + i_46195 * - sext_i64_i32(num_groups_32094); - int64_t gtid_32036 = squot64(squot64(sext_i32_i64(local_tid_46188), - segment_sizze_nonzzero_46185) + - sext_i32_i64(virt_group_id_46196) * - squot64(segred_group_sizze_32093, - segment_sizze_nonzzero_46185), - N_27771); - int64_t gtid_32037 = squot64(sext_i32_i64(local_tid_46188), - segment_sizze_nonzzero_46185) + - sext_i32_i64(virt_group_id_46196) * - squot64(segred_group_sizze_32093, - segment_sizze_nonzzero_46185) - - squot64(squot64(sext_i32_i64(local_tid_46188), - segment_sizze_nonzzero_46185) + - sext_i32_i64(virt_group_id_46196) * - squot64(segred_group_sizze_32093, - segment_sizze_nonzzero_46185), N_27771) * - N_27771; - int64_t gtid_32046 = srem64(sext_i32_i64(local_tid_46188), - i32_res_27787); - - // apply map function if in bounds - { - if (slt64((int64_t) 0, i32_res_27787) && ((slt64(gtid_32036, - m_27772) && - slt64(gtid_32037, - N_27771)) && - slt64(sext_i32_i64(local_tid_46188), - i32_res_27787 * - squot64(segred_group_sizze_32093, - segment_sizze_nonzzero_46185)))) { - float x_32102 = ((__global - float *) defunc_4_map_res_mem_44916)[gtid_32036 * - i32_res_27787 + - gtid_32046]; - float x_32103 = ((__global float *) mem_44397)[gtid_32037 * - i32_res_27787 + - gtid_32046]; - float defunc_1_f_res_32104 = x_32102 * x_32103; - - // save map-out results - { } - // save results to be reduced - { - ((__local - float *) red_arr_mem_46192)[sext_i32_i64(local_tid_46188)] = - defunc_1_f_res_32104; + // first thread keeps accumulator; others reset to neutral element + { + if (!(sext_i32_i64(local_tid_128362) == (int64_t) 0)) { + x_acc_128375 = 0.0; } - } else { - ((__local - float *) red_arr_mem_46192)[sext_i32_i64(local_tid_46188)] = - 0.0F; } } - barrier(CLK_LOCAL_MEM_FENCE); - if (slt64((int64_t) 0, i32_res_27787)) { - // perform segmented scan to imitate reduction + x_111666 = x_acc_128375; + if (groups_per_segment_128352 == (int64_t) 1) { + // first thread in group saves final result to memory { - float x_32097; - float x_32098; - float x_46197; - float x_46198; - bool ltid_in_bounds_46200; - - ltid_in_bounds_46200 = slt64(sext_i32_i64(local_tid_46188), - i32_res_27787 * - squot64(segred_group_sizze_32093, - segment_sizze_nonzzero_46185)); - - int32_t skip_threads_46201; - - // read input for in-block scan - { - if (ltid_in_bounds_46200) { - x_32098 = ((volatile __local - float *) red_arr_mem_46192)[sext_i32_i64(local_tid_46188)]; - if ((local_tid_46188 - squot32(local_tid_46188, 32) * - 32) == 0) { - x_32097 = x_32098; - } - } + if (local_tid_128362 == 0) { + ((__global double *) mem_124048)[gtid_111624] = + x_acc_128375; } - // in-block scan (hopefully no barriers needed) + } + } else { + int32_t old_counter_128389; + + // first thread in group saves group result to global memory + { + if (local_tid_128362 == 0) { + ((__global + double *) group_res_arr_mem_128357)[sext_i32_i64(virt_group_id_128372) * + segred_group_sizze_111662] = + x_acc_128375; + mem_fence_global(); + old_counter_128389 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_128359)[sext_i32_i64(srem32(flat_segment_id_128373, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_128368)[(int64_t) 0] = + old_counter_128389 == groups_per_segment_128352 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_128390; + + is_last_group_128390 = ((__local + bool *) sync_arr_mem_128368)[(int64_t) 0]; + if (is_last_group_128390) { + if (local_tid_128362 == 0) { + old_counter_128389 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_128359)[sext_i32_i64(srem32(flat_segment_id_128373, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_128352)); + } + // read in the per-group-results { - skip_threads_46201 = 1; - while (slt32(skip_threads_46201, 32)) { - if (sle32(skip_threads_46201, local_tid_46188 - - squot32(local_tid_46188, 32) * 32) && - ltid_in_bounds_46200) { - // read operands - { - x_32097 = ((volatile __local - float *) red_arr_mem_46192)[sext_i32_i64(local_tid_46188) - - sext_i32_i64(skip_threads_46201)]; - } - // perform operation - { - bool inactive_46202 = - slt64(srem64(sext_i32_i64(local_tid_46188), - i32_res_27787), - sext_i32_i64(local_tid_46188) - - sext_i32_i64(local_tid_46188 - - skip_threads_46201)); + int64_t read_per_thread_128391 = + sdiv_up64(groups_per_segment_128352, + segred_group_sizze_111662); + + x_111666 = 0.0; + for (int64_t i_128392 = 0; i_128392 < + read_per_thread_128391; i_128392++) { + int64_t group_res_id_128393 = + sext_i32_i64(local_tid_128362) * + read_per_thread_128391 + i_128392; + int64_t index_of_group_res_128394 = + sext_i32_i64(flat_segment_id_128373) * + groups_per_segment_128352 + group_res_id_128393; + + if (slt64(group_res_id_128393, + groups_per_segment_128352)) { + x_111667 = ((__global + double *) group_res_arr_mem_128357)[index_of_group_res_128394 * + segred_group_sizze_111662]; + + bool isnan_res_111668; + + isnan_res_111668 = futrts_isnan64(x_111666); + + double defunc_1_op_res_111669; + + if (isnan_res_111668) { + defunc_1_op_res_111669 = x_111667; + } else { + bool isnan_res_111670; - if (inactive_46202) { - x_32097 = x_32098; - } - if (!inactive_46202) { - float defunc_1_op_res_32099 = x_32097 + - x_32098; + isnan_res_111670 = futrts_isnan64(x_111667); + + double defunc_1_op_res_f_res_111671; + + if (isnan_res_111670) { + defunc_1_op_res_f_res_111671 = x_111666; + } else { + double defunc_1_op_res_f_res_f_res_111672 = + x_111666 + x_111667; - x_32097 = defunc_1_op_res_32099; + defunc_1_op_res_f_res_111671 = + defunc_1_op_res_f_res_f_res_111672; } + defunc_1_op_res_111669 = + defunc_1_op_res_f_res_111671; } + x_111666 = defunc_1_op_res_111669; } - if (sle32(wave_sizze_46190, skip_threads_46201)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46201, local_tid_46188 - - squot32(local_tid_46188, 32) * 32) && - ltid_in_bounds_46200) { - // write result - { - ((volatile __local - float *) red_arr_mem_46192)[sext_i32_i64(local_tid_46188)] = - x_32097; - x_32098 = x_32097; - } - } - if (sle32(wave_sizze_46190, skip_threads_46201)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46201 *= 2; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' - { - if ((local_tid_46188 - squot32(local_tid_46188, 32) * 32) == - 31 && ltid_in_bounds_46200) { - ((volatile __local - float *) red_arr_mem_46192)[sext_i32_i64(squot32(local_tid_46188, - 32))] = - x_32097; } } + ((__local + double *) red_arr_mem_128366)[sext_i32_i64(local_tid_128362)] = + x_111666; barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + // reduce the per-group results { - int32_t skip_threads_46203; + int32_t offset_128395; + int32_t skip_waves_128396; - // read input for in-block scan + skip_waves_128396 = 1; + + double x_128379; + double x_128380; + + offset_128395 = 0; + // participating threads read initial accumulator { - if (squot32(local_tid_46188, 32) == 0 && - ltid_in_bounds_46200) { - x_46198 = ((volatile __local - float *) red_arr_mem_46192)[sext_i32_i64(local_tid_46188)]; - if ((local_tid_46188 - squot32(local_tid_46188, - 32) * 32) == 0) { - x_46197 = x_46198; + if (slt32(local_tid_128362, + sext_i64_i32(segred_group_sizze_111662))) { + x_128379 = ((__local + double *) red_arr_mem_128366)[sext_i32_i64(local_tid_128362 + + offset_128395)]; + } + } + offset_128395 = 1; + while (slt32(offset_128395, wave_sizze_128364)) { + if (slt32(local_tid_128362 + offset_128395, + sext_i64_i32(segred_group_sizze_111662)) && + ((local_tid_128362 - squot32(local_tid_128362, + wave_sizze_128364) * + wave_sizze_128364) & (2 * offset_128395 - 1)) == + 0) { + // read array element + { + x_128380 = ((volatile __local + double *) red_arr_mem_128366)[sext_i32_i64(local_tid_128362 + + offset_128395)]; } - } - } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46203 = 1; - while (slt32(skip_threads_46203, 32)) { - if (sle32(skip_threads_46203, local_tid_46188 - - squot32(local_tid_46188, 32) * 32) && - (squot32(local_tid_46188, 32) == 0 && - ltid_in_bounds_46200)) { - // read operands - { - x_46197 = ((volatile __local - float *) red_arr_mem_46192)[sext_i32_i64(local_tid_46188) - - sext_i32_i64(skip_threads_46203)]; - } - // perform operation - { - bool inactive_46204 = - slt64(srem64(sext_i32_i64(local_tid_46188 * - 32 + 32 - 1), - i32_res_27787), - sext_i32_i64(local_tid_46188 * - 32 + 32 - 1) - - sext_i32_i64((local_tid_46188 - - skip_threads_46203) * - 32 + 32 - 1)); + // apply reduction operation + { + bool isnan_res_128381; + + isnan_res_128381 = futrts_isnan64(x_128379); + + double defunc_1_op_res_128382; + + if (isnan_res_128381) { + defunc_1_op_res_128382 = x_128380; + } else { + bool isnan_res_128383; - if (inactive_46204) { - x_46197 = x_46198; - } - if (!inactive_46204) { - float defunc_1_op_res_46199 = x_46197 + - x_46198; + isnan_res_128383 = futrts_isnan64(x_128380); + + double defunc_1_op_res_f_res_128384; + + if (isnan_res_128383) { + defunc_1_op_res_f_res_128384 = x_128379; + } else { + double + defunc_1_op_res_f_res_f_res_128385 = + x_128379 + x_128380; - x_46197 = defunc_1_op_res_46199; + defunc_1_op_res_f_res_128384 = + defunc_1_op_res_f_res_f_res_128385; } + defunc_1_op_res_128382 = + defunc_1_op_res_f_res_128384; } + x_128379 = defunc_1_op_res_128382; } - if (sle32(wave_sizze_46190, skip_threads_46203)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46203, local_tid_46188 - - squot32(local_tid_46188, 32) * 32) && - (squot32(local_tid_46188, 32) == 0 && - ltid_in_bounds_46200)) { - // write result - { - ((volatile __local - float *) red_arr_mem_46192)[sext_i32_i64(local_tid_46188)] = - x_46197; - x_46198 = x_46197; - } - } - if (sle32(wave_sizze_46190, skip_threads_46203)) { - barrier(CLK_LOCAL_MEM_FENCE); + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128366)[sext_i32_i64(local_tid_128362)] = + x_128379; } - skip_threads_46203 *= 2; } + offset_128395 *= 2; } - } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_46188, 32) == 0 || - !ltid_in_bounds_46200)) { - // read operands - { - x_32098 = x_32097; - x_32097 = ((__local - float *) red_arr_mem_46192)[sext_i32_i64(squot32(local_tid_46188, - 32)) - - (int64_t) 1]; - } - // perform operation - { - bool inactive_46205 = - slt64(srem64(sext_i32_i64(local_tid_46188), - i32_res_27787), - sext_i32_i64(local_tid_46188) - - sext_i32_i64(squot32(local_tid_46188, - 32) * 32 - 1)); - - if (inactive_46205) { - x_32097 = x_32098; + while (slt32(skip_waves_128396, + squot32(sext_i64_i32(segred_group_sizze_111662) + + wave_sizze_128364 - 1, + wave_sizze_128364))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128395 = skip_waves_128396 * wave_sizze_128364; + if (slt32(local_tid_128362 + offset_128395, + sext_i64_i32(segred_group_sizze_111662)) && + ((local_tid_128362 - squot32(local_tid_128362, + wave_sizze_128364) * + wave_sizze_128364) == 0 && + (squot32(local_tid_128362, wave_sizze_128364) & + (2 * skip_waves_128396 - 1)) == 0)) { + // read array element + { + x_128380 = ((__local + double *) red_arr_mem_128366)[sext_i32_i64(local_tid_128362 + + offset_128395)]; } - if (!inactive_46205) { - float defunc_1_op_res_32099 = x_32097 + x_32098; + // apply reduction operation + { + bool isnan_res_128381; + + isnan_res_128381 = futrts_isnan64(x_128379); + + double defunc_1_op_res_128382; - x_32097 = defunc_1_op_res_32099; + if (isnan_res_128381) { + defunc_1_op_res_128382 = x_128380; + } else { + bool isnan_res_128383; + + isnan_res_128383 = futrts_isnan64(x_128380); + + double defunc_1_op_res_f_res_128384; + + if (isnan_res_128383) { + defunc_1_op_res_f_res_128384 = x_128379; + } else { + double + defunc_1_op_res_f_res_f_res_128385 = + x_128379 + x_128380; + + defunc_1_op_res_f_res_128384 = + defunc_1_op_res_f_res_f_res_128385; + } + defunc_1_op_res_128382 = + defunc_1_op_res_f_res_128384; + } + x_128379 = defunc_1_op_res_128382; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128366)[sext_i32_i64(local_tid_128362)] = + x_128379; } } - // write final result - { - ((__local - float *) red_arr_mem_46192)[sext_i32_i64(local_tid_46188)] = - x_32097; - } + skip_waves_128396 *= 2; } - } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_46188, 32) == 0) { - ((__local - float *) red_arr_mem_46192)[sext_i32_i64(local_tid_46188)] = - x_32098; + // and back to memory with the final result + { + if (local_tid_128362 == 0) { + ((__global double *) mem_124048)[gtid_111624] = + x_128379; + } } } - barrier(CLK_LOCAL_MEM_FENCE); - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // save final values of segments - { - if (slt64(sext_i32_i64(virt_group_id_46196) * - squot64(segred_group_sizze_32093, - segment_sizze_nonzzero_46185) + - sext_i32_i64(local_tid_46188), m_27772 * N_27771) && - slt64(sext_i32_i64(local_tid_46188), - squot64(segred_group_sizze_32093, - segment_sizze_nonzzero_46185))) { - ((__global - float *) mem_45134)[squot64(sext_i32_i64(virt_group_id_46196) * - squot64(segred_group_sizze_32093, - segment_sizze_nonzzero_46185) + - sext_i32_i64(local_tid_46188), - N_27771) * N_27771 + - (sext_i32_i64(virt_group_id_46196) * - squot64(segred_group_sizze_32093, - segment_sizze_nonzzero_46185) + - sext_i32_i64(local_tid_46188) - - squot64(sext_i32_i64(virt_group_id_46196) * - squot64(segred_group_sizze_32093, - segment_sizze_nonzzero_46185) + - sext_i32_i64(local_tid_46188), - N_27771) * N_27771)] = ((__local - float *) red_arr_mem_46192)[(sext_i32_i64(local_tid_46188) + - (int64_t) 1) * - segment_sizze_nonzzero_46185 - - (int64_t) 1]; } } - barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } error_1: return; - #undef segred_group_sizze_32093 + #undef segred_group_sizze_111662 } -__kernel void mainDetailedzisegred_small_32625(__global int *global_failure, - int failure_is_an_option, - __global - int64_t *global_failure_args, - __local volatile - int64_t *red_arr_mem_46444_backing_aligned_0, - int64_t N_27771, int64_t m_27772, - int64_t i32_res_27781, - int64_t num_groups_32676, - int64_t segment_sizze_nonzzero_46437, - __global - unsigned char *defunc_4_map_res_mem_45178, - __global - unsigned char *mem_45232, - __global - unsigned char *mem_45235) +__kernel void mainzisegred_large_112268(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_128669_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128667_backing_aligned_1, + int64_t num_recresids_padded_75809, + int64_t Nmk_76536, + int64_t num_groups_112545, + int64_t groups_per_segment_128653, + int64_t elements_per_thread_128654, + int64_t virt_num_groups_128655, + int64_t threads_per_segment_128657, + __global + unsigned char *defunc_3_map_res_mem_124068, + __global unsigned char *mem_124078, + __global unsigned char *mem_124130, + __global + unsigned char *group_res_arr_mem_128658, + __global + unsigned char *mainzicounter_mem_128660) { - #define segred_group_sizze_32675 (mainDetailedzisegred_group_sizze_32619) + #define segred_group_sizze_112544 (mainzisegred_group_sizze_112262) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_46444_backing_0 = + __local volatile char *restrict sync_arr_mem_128669_backing_1 = (__local volatile - char *) red_arr_mem_46444_backing_aligned_0; - volatile __local bool local_failure; + char *) sync_arr_mem_128669_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128667_backing_0 = + (__local volatile + char *) red_arr_mem_128667_backing_aligned_1; - if (failure_is_an_option) { - int failed = *global_failure >= 0; - - if (failed) - return; - } - local_failure = false; - barrier(CLK_LOCAL_MEM_FENCE); + if (*global_failure >= 0) + return; - int32_t global_tid_46439; - int32_t local_tid_46440; - int64_t group_sizze_46443; - int32_t wave_sizze_46442; - int32_t group_tid_46441; + int32_t global_tid_128662; + int32_t local_tid_128663; + int64_t group_sizze_128666; + int32_t wave_sizze_128665; + int32_t group_tid_128664; - global_tid_46439 = get_global_id(0); - local_tid_46440 = get_local_id(0); - group_sizze_46443 = get_local_size(0); - wave_sizze_46442 = LOCKSTEP_WIDTH; - group_tid_46441 = get_group_id(0); + global_tid_128662 = get_global_id(0); + local_tid_128663 = get_local_id(0); + group_sizze_128666 = get_local_size(0); + wave_sizze_128665 = LOCKSTEP_WIDTH; + group_tid_128664 = get_group_id(0); - int32_t phys_tid_32625; + int32_t phys_tid_112268; - phys_tid_32625 = global_tid_46439; + phys_tid_112268 = global_tid_128662; - __local char *red_arr_mem_46444; + __local char *red_arr_mem_128667; - red_arr_mem_46444 = (__local char *) red_arr_mem_46444_backing_0; + red_arr_mem_128667 = (__local char *) red_arr_mem_128667_backing_0; - int32_t phys_group_id_46446; + __local char *sync_arr_mem_128669; - phys_group_id_46446 = get_group_id(0); - for (int32_t i_46447 = 0; i_46447 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_27772, - squot64(segred_group_sizze_32675, - segment_sizze_nonzzero_46437))) - - phys_group_id_46446, sext_i64_i32(num_groups_32676)); - i_46447++) { - int32_t virt_group_id_46448 = phys_group_id_46446 + i_46447 * - sext_i64_i32(num_groups_32676); - int64_t gtid_32616 = squot64(sext_i32_i64(local_tid_46440), - segment_sizze_nonzzero_46437) + - sext_i32_i64(virt_group_id_46448) * - squot64(segred_group_sizze_32675, segment_sizze_nonzzero_46437); - int64_t gtid_32624 = srem64(sext_i32_i64(local_tid_46440), - i32_res_27781); + sync_arr_mem_128669 = (__local char *) sync_arr_mem_128669_backing_1; + + int32_t phys_group_id_128671; + + phys_group_id_128671 = get_group_id(0); + for (int32_t i_128672 = 0; i_128672 < + sdiv_up32(sext_i64_i32(virt_num_groups_128655) - phys_group_id_128671, + sext_i64_i32(num_groups_112545)); i_128672++) { + int32_t virt_group_id_128673 = phys_group_id_128671 + i_128672 * + sext_i64_i32(num_groups_112545); + int32_t flat_segment_id_128674 = squot32(virt_group_id_128673, + sext_i64_i32(groups_per_segment_128653)); + int64_t global_tid_128675 = srem64(sext_i32_i64(virt_group_id_128673) * + segred_group_sizze_112544 + + sext_i32_i64(local_tid_128663), + segred_group_sizze_112544 * + groups_per_segment_128653); + int64_t gtid_112259 = sext_i32_i64(flat_segment_id_128674); + int64_t gtid_112267; + int64_t x_acc_128676; + int64_t chunk_sizze_128677; - // apply map function if in bounds + chunk_sizze_128677 = smin64(elements_per_thread_128654, + sdiv_up64(num_recresids_padded_75809 - + global_tid_128675, + threads_per_segment_128657)); + + int64_t x_112548; + int64_t x_112549; + + // neutral-initialise the accumulators { - if (slt64((int64_t) 0, i32_res_27781) && (slt64(gtid_32616, - m_27772) && - slt64(sext_i32_i64(local_tid_46440), - i32_res_27781 * - squot64(segred_group_sizze_32675, - segment_sizze_nonzzero_46437)))) { - int32_t defunc_0_f_res_32683 = ((__global - int32_t *) mem_45232)[gtid_32616]; - int32_t index_primexp_42385 = sext_i64_i32(gtid_32624); - bool cond_32685 = slt32(index_primexp_42385, - defunc_0_f_res_32683); - float defunc_0_f_res_32686; - - if (cond_32685) { - int64_t i_32687 = sext_i32_i64(index_primexp_42385); - bool x_32688 = sle64((int64_t) 0, i_32687); - bool y_32689 = slt64(i_32687, N_27771); - bool bounds_check_32690 = x_32688 && y_32689; - bool index_certs_32691; - - if (!bounds_check_32690) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 49) == -1) { - global_failure_args[0] = i_32687; - global_failure_args[1] = N_27771; - ; - } - local_failure = true; - goto error_0; - } - } - - float defunc_0_f_res_t_res_32692 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_32616 * - N_27771 + - i_32687]; - - defunc_0_f_res_32686 = defunc_0_f_res_t_res_32692; + x_acc_128676 = (int64_t) 9223372036854775807; + } + for (int64_t i_128681 = 0; i_128681 < chunk_sizze_128677; i_128681++) { + gtid_112267 = global_tid_128675 + threads_per_segment_128657 * + i_128681; + // apply map function + { + int64_t slice_115288 = (int64_t) 1 + gtid_112267; + double x_112554 = ((__global + double *) defunc_3_map_res_mem_124068)[gtid_112259 * + Nmk_76536 + + slice_115288]; + double x_112555 = ((__global double *) mem_124078)[gtid_112259 * + Nmk_76536 + + slice_115288]; + double abs_res_112556 = fabs(x_112554); + bool cond_112557 = x_112555 < abs_res_112556; + int64_t defunc_2_f_res_112558; + + if (cond_112557) { + defunc_2_f_res_112558 = gtid_112267; } else { - defunc_0_f_res_32686 = 0.0F; + defunc_2_f_res_112558 = (int64_t) 9223372036854775807; } - - float defunc_0_f_res_32693 = defunc_0_f_res_32686 * - defunc_0_f_res_32686; - // save map-out results { } - // save results to be reduced + // load accumulator { - ((__local - float *) red_arr_mem_46444)[sext_i32_i64(local_tid_46440)] = - defunc_0_f_res_32693; + x_112548 = x_acc_128676; + } + // load new values + { + x_112549 = defunc_2_f_res_112558; + } + // apply reduction operator + { + int64_t defunc_1_op_res_112550 = smin64(x_112548, x_112549); + + // store in accumulator + { + x_acc_128676 = defunc_1_op_res_112550; + } } - } else { - ((__local - float *) red_arr_mem_46444)[sext_i32_i64(local_tid_46440)] = - 0.0F; } } - - error_0: + // to reduce current chunk, first store our result in memory + { + x_112548 = x_acc_128676; + ((__local + int64_t *) red_arr_mem_128667)[sext_i32_i64(local_tid_128663)] = + x_112548; + } barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; + + int32_t offset_128682; + int32_t skip_waves_128683; + + skip_waves_128683 = 1; + + int64_t x_128678; + int64_t x_128679; + + offset_128682 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128663, + sext_i64_i32(segred_group_sizze_112544))) { + x_128678 = ((__local + int64_t *) red_arr_mem_128667)[sext_i32_i64(local_tid_128663 + + offset_128682)]; + } + } + offset_128682 = 1; + while (slt32(offset_128682, wave_sizze_128665)) { + if (slt32(local_tid_128663 + offset_128682, + sext_i64_i32(segred_group_sizze_112544)) && + ((local_tid_128663 - squot32(local_tid_128663, + wave_sizze_128665) * + wave_sizze_128665) & (2 * offset_128682 - 1)) == 0) { + // read array element + { + x_128679 = ((volatile __local + int64_t *) red_arr_mem_128667)[sext_i32_i64(local_tid_128663 + + offset_128682)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_128680 = smin64(x_128678, x_128679); + + x_128678 = defunc_1_op_res_128680; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_128667)[sext_i32_i64(local_tid_128663)] = + x_128678; + } + } + offset_128682 *= 2; + } + while (slt32(skip_waves_128683, + squot32(sext_i64_i32(segred_group_sizze_112544) + + wave_sizze_128665 - 1, wave_sizze_128665))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128682 = skip_waves_128683 * wave_sizze_128665; + if (slt32(local_tid_128663 + offset_128682, + sext_i64_i32(segred_group_sizze_112544)) && + ((local_tid_128663 - squot32(local_tid_128663, + wave_sizze_128665) * + wave_sizze_128665) == 0 && (squot32(local_tid_128663, + wave_sizze_128665) & (2 * + skip_waves_128683 - + 1)) == + 0)) { + // read array element + { + x_128679 = ((__local + int64_t *) red_arr_mem_128667)[sext_i32_i64(local_tid_128663 + + offset_128682)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_128680 = smin64(x_128678, x_128679); + + x_128678 = defunc_1_op_res_128680; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_128667)[sext_i32_i64(local_tid_128663)] = + x_128678; + } + } + skip_waves_128683 *= 2; + } barrier(CLK_LOCAL_MEM_FENCE); - if (slt64((int64_t) 0, i32_res_27781)) { - // perform segmented scan to imitate reduction + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_128663) == (int64_t) 0) { + x_acc_128676 = x_128678; + } + } + if (groups_per_segment_128653 == (int64_t) 1) { + // first thread in group saves final result to memory { - float x_32679; - float x_32680; - float x_46449; - float x_46450; - bool ltid_in_bounds_46452; - - ltid_in_bounds_46452 = slt64(sext_i32_i64(local_tid_46440), - i32_res_27781 * - squot64(segred_group_sizze_32675, - segment_sizze_nonzzero_46437)); - - int32_t skip_threads_46453; - - // read input for in-block scan + if (local_tid_128663 == 0) { + ((__global int64_t *) mem_124130)[gtid_112259] = + x_acc_128676; + } + } + } else { + int32_t old_counter_128684; + + // first thread in group saves group result to global memory + { + if (local_tid_128663 == 0) { + ((__global + int64_t *) group_res_arr_mem_128658)[sext_i32_i64(virt_group_id_128673) * + segred_group_sizze_112544] = + x_acc_128676; + mem_fence_global(); + old_counter_128684 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_128660)[sext_i32_i64(srem32(flat_segment_id_128674, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_128669)[(int64_t) 0] = + old_counter_128684 == groups_per_segment_128653 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_128685; + + is_last_group_128685 = ((__local + bool *) sync_arr_mem_128669)[(int64_t) 0]; + if (is_last_group_128685) { + if (local_tid_128663 == 0) { + old_counter_128684 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_128660)[sext_i32_i64(srem32(flat_segment_id_128674, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_128653)); + } + // read in the per-group-results { - if (ltid_in_bounds_46452) { - x_32680 = ((volatile __local - float *) red_arr_mem_46444)[sext_i32_i64(local_tid_46440)]; - if ((local_tid_46440 - squot32(local_tid_46440, 32) * - 32) == 0) { - x_32679 = x_32680; + int64_t read_per_thread_128686 = + sdiv_up64(groups_per_segment_128653, + segred_group_sizze_112544); + + x_112548 = (int64_t) 9223372036854775807; + for (int64_t i_128687 = 0; i_128687 < + read_per_thread_128686; i_128687++) { + int64_t group_res_id_128688 = + sext_i32_i64(local_tid_128663) * + read_per_thread_128686 + i_128687; + int64_t index_of_group_res_128689 = + sext_i32_i64(flat_segment_id_128674) * + groups_per_segment_128653 + group_res_id_128688; + + if (slt64(group_res_id_128688, + groups_per_segment_128653)) { + x_112549 = ((__global + int64_t *) group_res_arr_mem_128658)[index_of_group_res_128689 * + segred_group_sizze_112544]; + + int64_t defunc_1_op_res_112550; + + defunc_1_op_res_112550 = smin64(x_112548, x_112549); + x_112548 = defunc_1_op_res_112550; } } } - // in-block scan (hopefully no barriers needed) + ((__local + int64_t *) red_arr_mem_128667)[sext_i32_i64(local_tid_128663)] = + x_112548; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results { - skip_threads_46453 = 1; - while (slt32(skip_threads_46453, 32)) { - if (sle32(skip_threads_46453, local_tid_46440 - - squot32(local_tid_46440, 32) * 32) && - ltid_in_bounds_46452) { - // read operands + int32_t offset_128690; + int32_t skip_waves_128691; + + skip_waves_128691 = 1; + + int64_t x_128678; + int64_t x_128679; + + offset_128690 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128663, + sext_i64_i32(segred_group_sizze_112544))) { + x_128678 = ((__local + int64_t *) red_arr_mem_128667)[sext_i32_i64(local_tid_128663 + + offset_128690)]; + } + } + offset_128690 = 1; + while (slt32(offset_128690, wave_sizze_128665)) { + if (slt32(local_tid_128663 + offset_128690, + sext_i64_i32(segred_group_sizze_112544)) && + ((local_tid_128663 - squot32(local_tid_128663, + wave_sizze_128665) * + wave_sizze_128665) & (2 * offset_128690 - 1)) == + 0) { + // read array element { - x_32679 = ((volatile __local - float *) red_arr_mem_46444)[sext_i32_i64(local_tid_46440) - - sext_i32_i64(skip_threads_46453)]; + x_128679 = ((volatile __local + int64_t *) red_arr_mem_128667)[sext_i32_i64(local_tid_128663 + + offset_128690)]; } - // perform operation + // apply reduction operation { - bool inactive_46454 = - slt64(srem64(sext_i32_i64(local_tid_46440), - i32_res_27781), - sext_i32_i64(local_tid_46440) - - sext_i32_i64(local_tid_46440 - - skip_threads_46453)); + int64_t defunc_1_op_res_128680 = + smin64(x_128678, x_128679); - if (inactive_46454) { - x_32679 = x_32680; - } - if (!inactive_46454) { - float defunc_1_op_res_32681 = x_32679 + - x_32680; - - x_32679 = defunc_1_op_res_32681; - } + x_128678 = defunc_1_op_res_128680; } - } - if (sle32(wave_sizze_46442, skip_threads_46453)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46453, local_tid_46440 - - squot32(local_tid_46440, 32) * 32) && - ltid_in_bounds_46452) { - // write result + // write result of operation { ((volatile __local - float *) red_arr_mem_46444)[sext_i32_i64(local_tid_46440)] = - x_32679; - x_32680 = x_32679; - } - } - if (sle32(wave_sizze_46442, skip_threads_46453)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46453 *= 2; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' - { - if ((local_tid_46440 - squot32(local_tid_46440, 32) * 32) == - 31 && ltid_in_bounds_46452) { - ((volatile __local - float *) red_arr_mem_46444)[sext_i32_i64(squot32(local_tid_46440, - 32))] = - x_32679; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' - { - int32_t skip_threads_46455; - - // read input for in-block scan - { - if (squot32(local_tid_46440, 32) == 0 && - ltid_in_bounds_46452) { - x_46450 = ((volatile __local - float *) red_arr_mem_46444)[sext_i32_i64(local_tid_46440)]; - if ((local_tid_46440 - squot32(local_tid_46440, - 32) * 32) == 0) { - x_46449 = x_46450; + int64_t *) red_arr_mem_128667)[sext_i32_i64(local_tid_128663)] = + x_128678; } } + offset_128690 *= 2; } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46455 = 1; - while (slt32(skip_threads_46455, 32)) { - if (sle32(skip_threads_46455, local_tid_46440 - - squot32(local_tid_46440, 32) * 32) && - (squot32(local_tid_46440, 32) == 0 && - ltid_in_bounds_46452)) { - // read operands - { - x_46449 = ((volatile __local - float *) red_arr_mem_46444)[sext_i32_i64(local_tid_46440) - - sext_i32_i64(skip_threads_46455)]; - } - // perform operation - { - bool inactive_46456 = - slt64(srem64(sext_i32_i64(local_tid_46440 * - 32 + 32 - 1), - i32_res_27781), - sext_i32_i64(local_tid_46440 * - 32 + 32 - 1) - - sext_i32_i64((local_tid_46440 - - skip_threads_46455) * - 32 + 32 - 1)); - - if (inactive_46456) { - x_46449 = x_46450; - } - if (!inactive_46456) { - float defunc_1_op_res_46451 = x_46449 + - x_46450; - - x_46449 = defunc_1_op_res_46451; - } - } - } - if (sle32(wave_sizze_46442, skip_threads_46455)) { - barrier(CLK_LOCAL_MEM_FENCE); + while (slt32(skip_waves_128691, + squot32(sext_i64_i32(segred_group_sizze_112544) + + wave_sizze_128665 - 1, + wave_sizze_128665))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128690 = skip_waves_128691 * wave_sizze_128665; + if (slt32(local_tid_128663 + offset_128690, + sext_i64_i32(segred_group_sizze_112544)) && + ((local_tid_128663 - squot32(local_tid_128663, + wave_sizze_128665) * + wave_sizze_128665) == 0 && + (squot32(local_tid_128663, wave_sizze_128665) & + (2 * skip_waves_128691 - 1)) == 0)) { + // read array element + { + x_128679 = ((__local + int64_t *) red_arr_mem_128667)[sext_i32_i64(local_tid_128663 + + offset_128690)]; } - if (sle32(skip_threads_46455, local_tid_46440 - - squot32(local_tid_46440, 32) * 32) && - (squot32(local_tid_46440, 32) == 0 && - ltid_in_bounds_46452)) { - // write result - { - ((volatile __local - float *) red_arr_mem_46444)[sext_i32_i64(local_tid_46440)] = - x_46449; - x_46450 = x_46449; - } + // apply reduction operation + { + int64_t defunc_1_op_res_128680 = + smin64(x_128678, x_128679); + + x_128678 = defunc_1_op_res_128680; } - if (sle32(wave_sizze_46442, skip_threads_46455)) { - barrier(CLK_LOCAL_MEM_FENCE); + // write result of operation + { + ((__local + int64_t *) red_arr_mem_128667)[sext_i32_i64(local_tid_128663)] = + x_128678; } - skip_threads_46455 *= 2; } + skip_waves_128691 *= 2; } - } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_46440, 32) == 0 || - !ltid_in_bounds_46452)) { - // read operands - { - x_32680 = x_32679; - x_32679 = ((__local - float *) red_arr_mem_46444)[sext_i32_i64(squot32(local_tid_46440, - 32)) - - (int64_t) 1]; - } - // perform operation - { - bool inactive_46457 = - slt64(srem64(sext_i32_i64(local_tid_46440), - i32_res_27781), - sext_i32_i64(local_tid_46440) - - sext_i32_i64(squot32(local_tid_46440, - 32) * 32 - 1)); - - if (inactive_46457) { - x_32679 = x_32680; - } - if (!inactive_46457) { - float defunc_1_op_res_32681 = x_32679 + x_32680; - - x_32679 = defunc_1_op_res_32681; - } - } - // write final result - { - ((__local - float *) red_arr_mem_46444)[sext_i32_i64(local_tid_46440)] = - x_32679; + // and back to memory with the final result + { + if (local_tid_128663 == 0) { + ((__global int64_t *) mem_124130)[gtid_112259] = + x_128678; } } } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_46440, 32) == 0) { - ((__local - float *) red_arr_mem_46444)[sext_i32_i64(local_tid_46440)] = - x_32680; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // save final values of segments - { - if (slt64(sext_i32_i64(virt_group_id_46448) * - squot64(segred_group_sizze_32675, - segment_sizze_nonzzero_46437) + - sext_i32_i64(local_tid_46440), m_27772) && - slt64(sext_i32_i64(local_tid_46440), - squot64(segred_group_sizze_32675, - segment_sizze_nonzzero_46437))) { - ((__global - float *) mem_45235)[sext_i32_i64(virt_group_id_46448) * - squot64(segred_group_sizze_32675, - segment_sizze_nonzzero_46437) + - sext_i32_i64(local_tid_46440)] = ((__local - float *) red_arr_mem_46444)[(sext_i32_i64(local_tid_46440) + - (int64_t) 1) * - segment_sizze_nonzzero_46437 - - (int64_t) 1]; } } - barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } error_1: return; - #undef segred_group_sizze_32675 + #undef segred_group_sizze_112544 } -__kernel void mainDetailedzisegred_small_32650(__global int *global_failure, - __local volatile - int64_t *red_arr_mem_46384_backing_aligned_0, - int64_t N_27771, int64_t m_27772, - int64_t i32_res_27781, - int64_t num_groups_32662, - int64_t segment_sizze_nonzzero_46377, - __global - unsigned char *images_mem_44381, - __global - unsigned char *mem_45232) +__kernel void mainzisegred_large_112393(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_128604_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128602_backing_aligned_1, + int64_t num_recresids_padded_75809, + int64_t Nmk_76536, + int64_t num_groups_112425, + int64_t groups_per_segment_128588, + int64_t elements_per_thread_128589, + int64_t virt_num_groups_128590, + int64_t threads_per_segment_128592, + __global + unsigned char *defunc_3_map_res_mem_124068, + __global unsigned char *mem_124121, + __global unsigned char *mem_124124, + __global + unsigned char *group_res_arr_mem_128593, + __global + unsigned char *mainzicounter_mem_128595) { - #define segred_group_sizze_32661 (mainDetailedzisegred_group_sizze_32644) + #define segred_group_sizze_112424 (mainzisegred_group_sizze_112387) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_46384_backing_0 = + __local volatile char *restrict sync_arr_mem_128604_backing_1 = (__local volatile - char *) red_arr_mem_46384_backing_aligned_0; + char *) sync_arr_mem_128604_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128602_backing_0 = + (__local volatile + char *) red_arr_mem_128602_backing_aligned_1; if (*global_failure >= 0) return; - int32_t global_tid_46379; - int32_t local_tid_46380; - int64_t group_sizze_46383; - int32_t wave_sizze_46382; - int32_t group_tid_46381; + int32_t global_tid_128597; + int32_t local_tid_128598; + int64_t group_sizze_128601; + int32_t wave_sizze_128600; + int32_t group_tid_128599; - global_tid_46379 = get_global_id(0); - local_tid_46380 = get_local_id(0); - group_sizze_46383 = get_local_size(0); - wave_sizze_46382 = LOCKSTEP_WIDTH; - group_tid_46381 = get_group_id(0); + global_tid_128597 = get_global_id(0); + local_tid_128598 = get_local_id(0); + group_sizze_128601 = get_local_size(0); + wave_sizze_128600 = LOCKSTEP_WIDTH; + group_tid_128599 = get_group_id(0); - int32_t phys_tid_32650; + int32_t phys_tid_112393; - phys_tid_32650 = global_tid_46379; + phys_tid_112393 = global_tid_128597; - __local char *red_arr_mem_46384; + __local char *red_arr_mem_128602; - red_arr_mem_46384 = (__local char *) red_arr_mem_46384_backing_0; + red_arr_mem_128602 = (__local char *) red_arr_mem_128602_backing_0; - int32_t phys_group_id_46386; + __local char *sync_arr_mem_128604; - phys_group_id_46386 = get_group_id(0); - for (int32_t i_46387 = 0; i_46387 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_27772, - squot64(segred_group_sizze_32661, - segment_sizze_nonzzero_46377))) - - phys_group_id_46386, sext_i64_i32(num_groups_32662)); - i_46387++) { - int32_t virt_group_id_46388 = phys_group_id_46386 + i_46387 * - sext_i64_i32(num_groups_32662); - int64_t gtid_32641 = squot64(sext_i32_i64(local_tid_46380), - segment_sizze_nonzzero_46377) + - sext_i32_i64(virt_group_id_46388) * - squot64(segred_group_sizze_32661, segment_sizze_nonzzero_46377); - int64_t gtid_32649 = srem64(sext_i32_i64(local_tid_46380), - i32_res_27781); - - // apply map function if in bounds + sync_arr_mem_128604 = (__local char *) sync_arr_mem_128604_backing_1; + + int32_t phys_group_id_128606; + + phys_group_id_128606 = get_group_id(0); + for (int32_t i_128607 = 0; i_128607 < + sdiv_up32(sext_i64_i32(virt_num_groups_128590) - phys_group_id_128606, + sext_i64_i32(num_groups_112425)); i_128607++) { + int32_t virt_group_id_128608 = phys_group_id_128606 + i_128607 * + sext_i64_i32(num_groups_112425); + int32_t flat_segment_id_128609 = squot32(virt_group_id_128608, + sext_i64_i32(groups_per_segment_128588)); + int64_t global_tid_128610 = srem64(sext_i32_i64(virt_group_id_128608) * + segred_group_sizze_112424 + + sext_i32_i64(local_tid_128598), + segred_group_sizze_112424 * + groups_per_segment_128588); + int64_t gtid_112384 = sext_i32_i64(flat_segment_id_128609); + int64_t gtid_112392; + double x_acc_128611; + int64_t chunk_sizze_128612; + + chunk_sizze_128612 = smin64(elements_per_thread_128589, + sdiv_up64(num_recresids_padded_75809 - + global_tid_128610, + threads_per_segment_128592)); + + double x_112428; + double x_112429; + + // neutral-initialise the accumulators { - if (slt64((int64_t) 0, i32_res_27781) && (slt64(gtid_32641, - m_27772) && - slt64(sext_i32_i64(local_tid_46380), - i32_res_27781 * - squot64(segred_group_sizze_32661, - segment_sizze_nonzzero_46377)))) { - float x_32669 = ((__global - float *) images_mem_44381)[gtid_32641 * - N_27771 + - gtid_32649]; - bool isnan_res_32670; - - isnan_res_32670 = futrts_isnan32(x_32669); - - bool cond_32671 = !isnan_res_32670; - int32_t defunc_0_f_res_32672 = btoi_bool_i32(cond_32671); + x_acc_128611 = -INFINITY; + } + for (int64_t i_128616 = 0; i_128616 < chunk_sizze_128612; i_128616++) { + gtid_112392 = global_tid_128610 + threads_per_segment_128592 * + i_128616; + // apply map function + { + double i64_res_112432 = ((__global + double *) mem_124121)[gtid_112384]; + int64_t slice_115286 = (int64_t) 1 + gtid_112392; + double x_112433 = ((__global + double *) defunc_3_map_res_mem_124068)[gtid_112384 * + Nmk_76536 + + slice_115286]; + int64_t x_112435 = mul64((int64_t) 2, gtid_112392); + int64_t i64_arg_112436 = add64((int64_t) 2, x_112435); + double i64_res_112437 = sitofp_i64_f64(i64_arg_112436); + double y_112438 = i64_res_112437 / i64_res_112432; + double lifted_div_res_112439 = 1.0 + y_112438; + double abs_arg_112440 = x_112433 / lifted_div_res_112439; + double abs_res_112441 = fabs(abs_arg_112440); // save map-out results { } - // save results to be reduced + // load accumulator + { + x_112428 = x_acc_128611; + } + // load new values + { + x_112429 = abs_res_112441; + } + // apply reduction operator + { + double defunc_1_op_res_112430 = fmax64(x_112428, x_112429); + + // store in accumulator + { + x_acc_128611 = defunc_1_op_res_112430; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_112428 = x_acc_128611; + ((__local + double *) red_arr_mem_128602)[sext_i32_i64(local_tid_128598)] = + x_112428; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128617; + int32_t skip_waves_128618; + + skip_waves_128618 = 1; + + double x_128613; + double x_128614; + + offset_128617 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128598, + sext_i64_i32(segred_group_sizze_112424))) { + x_128613 = ((__local + double *) red_arr_mem_128602)[sext_i32_i64(local_tid_128598 + + offset_128617)]; + } + } + offset_128617 = 1; + while (slt32(offset_128617, wave_sizze_128600)) { + if (slt32(local_tid_128598 + offset_128617, + sext_i64_i32(segred_group_sizze_112424)) && + ((local_tid_128598 - squot32(local_tid_128598, + wave_sizze_128600) * + wave_sizze_128600) & (2 * offset_128617 - 1)) == 0) { + // read array element + { + x_128614 = ((volatile __local + double *) red_arr_mem_128602)[sext_i32_i64(local_tid_128598 + + offset_128617)]; + } + // apply reduction operation + { + double defunc_1_op_res_128615 = fmax64(x_128613, x_128614); + + x_128613 = defunc_1_op_res_128615; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128602)[sext_i32_i64(local_tid_128598)] = + x_128613; + } + } + offset_128617 *= 2; + } + while (slt32(skip_waves_128618, + squot32(sext_i64_i32(segred_group_sizze_112424) + + wave_sizze_128600 - 1, wave_sizze_128600))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128617 = skip_waves_128618 * wave_sizze_128600; + if (slt32(local_tid_128598 + offset_128617, + sext_i64_i32(segred_group_sizze_112424)) && + ((local_tid_128598 - squot32(local_tid_128598, + wave_sizze_128600) * + wave_sizze_128600) == 0 && (squot32(local_tid_128598, + wave_sizze_128600) & (2 * + skip_waves_128618 - + 1)) == + 0)) { + // read array element + { + x_128614 = ((__local + double *) red_arr_mem_128602)[sext_i32_i64(local_tid_128598 + + offset_128617)]; + } + // apply reduction operation + { + double defunc_1_op_res_128615 = fmax64(x_128613, x_128614); + + x_128613 = defunc_1_op_res_128615; + } + // write result of operation { ((__local - int32_t *) red_arr_mem_46384)[sext_i32_i64(local_tid_46380)] = - defunc_0_f_res_32672; + double *) red_arr_mem_128602)[sext_i32_i64(local_tid_128598)] = + x_128613; } - } else { - ((__local - int32_t *) red_arr_mem_46384)[sext_i32_i64(local_tid_46380)] = - 0; } + skip_waves_128618 *= 2; } barrier(CLK_LOCAL_MEM_FENCE); - if (slt64((int64_t) 0, i32_res_27781)) { - // perform segmented scan to imitate reduction + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_128598) == (int64_t) 0) { + x_acc_128611 = x_128613; + } + } + if (groups_per_segment_128588 == (int64_t) 1) { + // first thread in group saves final result to memory { - int32_t x_32665; - int32_t x_32666; - int32_t x_46389; - int32_t x_46390; - bool ltid_in_bounds_46392; - - ltid_in_bounds_46392 = slt64(sext_i32_i64(local_tid_46380), - i32_res_27781 * - squot64(segred_group_sizze_32661, - segment_sizze_nonzzero_46377)); - - int32_t skip_threads_46393; - - // read input for in-block scan + if (local_tid_128598 == 0) { + ((__global double *) mem_124124)[gtid_112384] = + x_acc_128611; + } + } + } else { + int32_t old_counter_128619; + + // first thread in group saves group result to global memory + { + if (local_tid_128598 == 0) { + ((__global + double *) group_res_arr_mem_128593)[sext_i32_i64(virt_group_id_128608) * + segred_group_sizze_112424] = + x_acc_128611; + mem_fence_global(); + old_counter_128619 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_128595)[sext_i32_i64(srem32(flat_segment_id_128609, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_128604)[(int64_t) 0] = + old_counter_128619 == groups_per_segment_128588 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_128620; + + is_last_group_128620 = ((__local + bool *) sync_arr_mem_128604)[(int64_t) 0]; + if (is_last_group_128620) { + if (local_tid_128598 == 0) { + old_counter_128619 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_128595)[sext_i32_i64(srem32(flat_segment_id_128609, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_128588)); + } + // read in the per-group-results { - if (ltid_in_bounds_46392) { - x_32666 = ((volatile __local - int32_t *) red_arr_mem_46384)[sext_i32_i64(local_tid_46380)]; - if ((local_tid_46380 - squot32(local_tid_46380, 32) * - 32) == 0) { - x_32665 = x_32666; + int64_t read_per_thread_128621 = + sdiv_up64(groups_per_segment_128588, + segred_group_sizze_112424); + + x_112428 = -INFINITY; + for (int64_t i_128622 = 0; i_128622 < + read_per_thread_128621; i_128622++) { + int64_t group_res_id_128623 = + sext_i32_i64(local_tid_128598) * + read_per_thread_128621 + i_128622; + int64_t index_of_group_res_128624 = + sext_i32_i64(flat_segment_id_128609) * + groups_per_segment_128588 + group_res_id_128623; + + if (slt64(group_res_id_128623, + groups_per_segment_128588)) { + x_112429 = ((__global + double *) group_res_arr_mem_128593)[index_of_group_res_128624 * + segred_group_sizze_112424]; + + double defunc_1_op_res_112430; + + defunc_1_op_res_112430 = fmax64(x_112428, x_112429); + x_112428 = defunc_1_op_res_112430; } } } - // in-block scan (hopefully no barriers needed) + ((__local + double *) red_arr_mem_128602)[sext_i32_i64(local_tid_128598)] = + x_112428; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results { - skip_threads_46393 = 1; - while (slt32(skip_threads_46393, 32)) { - if (sle32(skip_threads_46393, local_tid_46380 - - squot32(local_tid_46380, 32) * 32) && - ltid_in_bounds_46392) { - // read operands + int32_t offset_128625; + int32_t skip_waves_128626; + + skip_waves_128626 = 1; + + double x_128613; + double x_128614; + + offset_128625 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128598, + sext_i64_i32(segred_group_sizze_112424))) { + x_128613 = ((__local + double *) red_arr_mem_128602)[sext_i32_i64(local_tid_128598 + + offset_128625)]; + } + } + offset_128625 = 1; + while (slt32(offset_128625, wave_sizze_128600)) { + if (slt32(local_tid_128598 + offset_128625, + sext_i64_i32(segred_group_sizze_112424)) && + ((local_tid_128598 - squot32(local_tid_128598, + wave_sizze_128600) * + wave_sizze_128600) & (2 * offset_128625 - 1)) == + 0) { + // read array element { - x_32665 = ((volatile __local - int32_t *) red_arr_mem_46384)[sext_i32_i64(local_tid_46380) - - sext_i32_i64(skip_threads_46393)]; + x_128614 = ((volatile __local + double *) red_arr_mem_128602)[sext_i32_i64(local_tid_128598 + + offset_128625)]; } - // perform operation + // apply reduction operation { - bool inactive_46394 = - slt64(srem64(sext_i32_i64(local_tid_46380), - i32_res_27781), - sext_i32_i64(local_tid_46380) - - sext_i32_i64(local_tid_46380 - - skip_threads_46393)); + double defunc_1_op_res_128615 = fmax64(x_128613, + x_128614); - if (inactive_46394) { - x_32665 = x_32666; - } - if (!inactive_46394) { - int32_t defunc_1_op_res_32667 = - add32(x_32665, x_32666); - - x_32665 = defunc_1_op_res_32667; - } + x_128613 = defunc_1_op_res_128615; } - } - if (sle32(wave_sizze_46382, skip_threads_46393)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46393, local_tid_46380 - - squot32(local_tid_46380, 32) * 32) && - ltid_in_bounds_46392) { - // write result + // write result of operation { ((volatile __local - int32_t *) red_arr_mem_46384)[sext_i32_i64(local_tid_46380)] = - x_32665; - x_32666 = x_32665; - } - } - if (sle32(wave_sizze_46382, skip_threads_46393)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46393 *= 2; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' - { - if ((local_tid_46380 - squot32(local_tid_46380, 32) * 32) == - 31 && ltid_in_bounds_46392) { - ((volatile __local - int32_t *) red_arr_mem_46384)[sext_i32_i64(squot32(local_tid_46380, - 32))] = - x_32665; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' - { - int32_t skip_threads_46395; - - // read input for in-block scan - { - if (squot32(local_tid_46380, 32) == 0 && - ltid_in_bounds_46392) { - x_46390 = ((volatile __local - int32_t *) red_arr_mem_46384)[sext_i32_i64(local_tid_46380)]; - if ((local_tid_46380 - squot32(local_tid_46380, - 32) * 32) == 0) { - x_46389 = x_46390; + double *) red_arr_mem_128602)[sext_i32_i64(local_tid_128598)] = + x_128613; } } + offset_128625 *= 2; } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46395 = 1; - while (slt32(skip_threads_46395, 32)) { - if (sle32(skip_threads_46395, local_tid_46380 - - squot32(local_tid_46380, 32) * 32) && - (squot32(local_tid_46380, 32) == 0 && - ltid_in_bounds_46392)) { - // read operands - { - x_46389 = ((volatile __local - int32_t *) red_arr_mem_46384)[sext_i32_i64(local_tid_46380) - - sext_i32_i64(skip_threads_46395)]; - } - // perform operation - { - bool inactive_46396 = - slt64(srem64(sext_i32_i64(local_tid_46380 * - 32 + 32 - 1), - i32_res_27781), - sext_i32_i64(local_tid_46380 * - 32 + 32 - 1) - - sext_i32_i64((local_tid_46380 - - skip_threads_46395) * - 32 + 32 - 1)); - - if (inactive_46396) { - x_46389 = x_46390; - } - if (!inactive_46396) { - int32_t defunc_1_op_res_46391 = - add32(x_46389, x_46390); - - x_46389 = defunc_1_op_res_46391; - } - } - } - if (sle32(wave_sizze_46382, skip_threads_46395)) { - barrier(CLK_LOCAL_MEM_FENCE); + while (slt32(skip_waves_128626, + squot32(sext_i64_i32(segred_group_sizze_112424) + + wave_sizze_128600 - 1, + wave_sizze_128600))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128625 = skip_waves_128626 * wave_sizze_128600; + if (slt32(local_tid_128598 + offset_128625, + sext_i64_i32(segred_group_sizze_112424)) && + ((local_tid_128598 - squot32(local_tid_128598, + wave_sizze_128600) * + wave_sizze_128600) == 0 && + (squot32(local_tid_128598, wave_sizze_128600) & + (2 * skip_waves_128626 - 1)) == 0)) { + // read array element + { + x_128614 = ((__local + double *) red_arr_mem_128602)[sext_i32_i64(local_tid_128598 + + offset_128625)]; } - if (sle32(skip_threads_46395, local_tid_46380 - - squot32(local_tid_46380, 32) * 32) && - (squot32(local_tid_46380, 32) == 0 && - ltid_in_bounds_46392)) { - // write result - { - ((volatile __local - int32_t *) red_arr_mem_46384)[sext_i32_i64(local_tid_46380)] = - x_46389; - x_46390 = x_46389; - } + // apply reduction operation + { + double defunc_1_op_res_128615 = fmax64(x_128613, + x_128614); + + x_128613 = defunc_1_op_res_128615; } - if (sle32(wave_sizze_46382, skip_threads_46395)) { - barrier(CLK_LOCAL_MEM_FENCE); + // write result of operation + { + ((__local + double *) red_arr_mem_128602)[sext_i32_i64(local_tid_128598)] = + x_128613; } - skip_threads_46395 *= 2; } + skip_waves_128626 *= 2; } - } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_46380, 32) == 0 || - !ltid_in_bounds_46392)) { - // read operands - { - x_32666 = x_32665; - x_32665 = ((__local - int32_t *) red_arr_mem_46384)[sext_i32_i64(squot32(local_tid_46380, - 32)) - - (int64_t) 1]; - } - // perform operation - { - bool inactive_46397 = - slt64(srem64(sext_i32_i64(local_tid_46380), - i32_res_27781), - sext_i32_i64(local_tid_46380) - - sext_i32_i64(squot32(local_tid_46380, - 32) * 32 - 1)); - - if (inactive_46397) { - x_32665 = x_32666; - } - if (!inactive_46397) { - int32_t defunc_1_op_res_32667 = add32(x_32665, - x_32666); - - x_32665 = defunc_1_op_res_32667; - } - } - // write final result - { - ((__local - int32_t *) red_arr_mem_46384)[sext_i32_i64(local_tid_46380)] = - x_32665; + // and back to memory with the final result + { + if (local_tid_128598 == 0) { + ((__global double *) mem_124124)[gtid_112384] = + x_128613; } } } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_46380, 32) == 0) { - ((__local - int32_t *) red_arr_mem_46384)[sext_i32_i64(local_tid_46380)] = - x_32666; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // save final values of segments - { - if (slt64(sext_i32_i64(virt_group_id_46388) * - squot64(segred_group_sizze_32661, - segment_sizze_nonzzero_46377) + - sext_i32_i64(local_tid_46380), m_27772) && - slt64(sext_i32_i64(local_tid_46380), - squot64(segred_group_sizze_32661, - segment_sizze_nonzzero_46377))) { - ((__global - int32_t *) mem_45232)[sext_i32_i64(virt_group_id_46388) * - squot64(segred_group_sizze_32661, - segment_sizze_nonzzero_46377) + - sext_i32_i64(local_tid_46380)] = - ((__local - int32_t *) red_arr_mem_46384)[(sext_i32_i64(local_tid_46380) + - (int64_t) 1) * - segment_sizze_nonzzero_46377 - - (int64_t) 1]; } } - barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } error_1: return; - #undef segred_group_sizze_32661 + #undef segred_group_sizze_112424 } -__kernel void mainDetailedzisegred_small_32813(__global int *global_failure, - int failure_is_an_option, - __global - int64_t *global_failure_args, - __local volatile - int64_t *red_arr_mem_46553_backing_aligned_0, - int64_t N_27771, int64_t m_27772, - int64_t i32_res_28174, - int64_t num_groups_32835, - int64_t segment_sizze_nonzzero_46546, - __global - unsigned char *defunc_4_map_res_mem_45178, - __global - unsigned char *defunc_3_map_res_mem_45244, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global - unsigned char *mem_45278) +__kernel void mainzisegred_large_112741(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_128803_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128801_backing_aligned_1, + int64_t N_75135, int64_t n_75139, + int64_t k2p2zq_75151, + int64_t num_groups_112880, + int64_t groups_per_segment_128787, + int64_t elements_per_thread_128788, + int64_t virt_num_groups_128789, + int64_t threads_per_segment_128791, + __global + unsigned char *binop_p_mem_120117, + __global unsigned char *mem_124142, + __global unsigned char *mem_124276, + __global unsigned char *mem_124281, + __global + unsigned char *group_res_arr_mem_128792, + __global + unsigned char *mainzicounter_mem_128794) { - #define segred_group_sizze_32834 (mainDetailedzisegred_group_sizze_32807) + #define segred_group_sizze_112879 (mainzisegred_group_sizze_112735) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_46553_backing_0 = + __local volatile char *restrict sync_arr_mem_128803_backing_1 = (__local volatile - char *) red_arr_mem_46553_backing_aligned_0; - volatile __local bool local_failure; - - if (failure_is_an_option) { - int failed = *global_failure >= 0; - - if (failed) - return; - } - local_failure = false; - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t global_tid_46548; - int32_t local_tid_46549; - int64_t group_sizze_46552; - int32_t wave_sizze_46551; - int32_t group_tid_46550; - - global_tid_46548 = get_global_id(0); - local_tid_46549 = get_local_id(0); - group_sizze_46552 = get_local_size(0); - wave_sizze_46551 = LOCKSTEP_WIDTH; - group_tid_46550 = get_group_id(0); - - int32_t phys_tid_32813; - - phys_tid_32813 = global_tid_46548; - - __local char *red_arr_mem_46553; - - red_arr_mem_46553 = (__local char *) red_arr_mem_46553_backing_0; + char *) sync_arr_mem_128803_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128801_backing_0 = + (__local volatile + char *) red_arr_mem_128801_backing_aligned_1; - int32_t phys_group_id_46555; + if (*global_failure >= 0) + return; - phys_group_id_46555 = get_group_id(0); - for (int32_t i_46556 = 0; i_46556 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_27772, - squot64(segred_group_sizze_32834, - segment_sizze_nonzzero_46546))) - - phys_group_id_46555, sext_i64_i32(num_groups_32835)); - i_46556++) { - int32_t virt_group_id_46557 = phys_group_id_46555 + i_46556 * - sext_i64_i32(num_groups_32835); - int64_t gtid_32804 = squot64(sext_i32_i64(local_tid_46549), - segment_sizze_nonzzero_46546) + - sext_i32_i64(virt_group_id_46557) * - squot64(segred_group_sizze_32834, segment_sizze_nonzzero_46546); - int64_t gtid_32812 = srem64(sext_i32_i64(local_tid_46549), - i32_res_28174); + int32_t global_tid_128796; + int32_t local_tid_128797; + int64_t group_sizze_128800; + int32_t wave_sizze_128799; + int32_t group_tid_128798; + + global_tid_128796 = get_global_id(0); + local_tid_128797 = get_local_id(0); + group_sizze_128800 = get_local_size(0); + wave_sizze_128799 = LOCKSTEP_WIDTH; + group_tid_128798 = get_group_id(0); + + int32_t phys_tid_112741; + + phys_tid_112741 = global_tid_128796; + + __local char *red_arr_mem_128801; + + red_arr_mem_128801 = (__local char *) red_arr_mem_128801_backing_0; + + __local char *sync_arr_mem_128803; + + sync_arr_mem_128803 = (__local char *) sync_arr_mem_128803_backing_1; + + int32_t phys_group_id_128805; + + phys_group_id_128805 = get_group_id(0); + for (int32_t i_128806 = 0; i_128806 < + sdiv_up32(sext_i64_i32(virt_num_groups_128789) - phys_group_id_128805, + sext_i64_i32(num_groups_112880)); i_128806++) { + int32_t virt_group_id_128807 = phys_group_id_128805 + i_128806 * + sext_i64_i32(num_groups_112880); + int32_t flat_segment_id_128808 = squot32(virt_group_id_128807, + sext_i64_i32(groups_per_segment_128787)); + int64_t global_tid_128809 = srem64(sext_i32_i64(virt_group_id_128807) * + segred_group_sizze_112879 + + sext_i32_i64(local_tid_128797), + segred_group_sizze_112879 * + groups_per_segment_128787); + int64_t gtid_112728 = squot64(sext_i32_i64(flat_segment_id_128808), + k2p2zq_75151 * k2p2zq_75151); + int64_t gtid_112729 = squot64(sext_i32_i64(flat_segment_id_128808) - + squot64(sext_i32_i64(flat_segment_id_128808), + k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151); + int64_t gtid_112730 = sext_i32_i64(flat_segment_id_128808) - + squot64(sext_i32_i64(flat_segment_id_128808), k2p2zq_75151 * + k2p2zq_75151) * (k2p2zq_75151 * k2p2zq_75151) - + squot64(sext_i32_i64(flat_segment_id_128808) - + squot64(sext_i32_i64(flat_segment_id_128808), + k2p2zq_75151 * k2p2zq_75151) * (k2p2zq_75151 * + k2p2zq_75151), + k2p2zq_75151) * k2p2zq_75151; + int64_t gtid_112740; + double x_acc_128810; + int64_t chunk_sizze_128811; + + chunk_sizze_128811 = smin64(elements_per_thread_128788, + sdiv_up64(n_75139 - global_tid_128809, + threads_per_segment_128791)); + + double x_112883; + double x_112884; - // apply map function if in bounds + // neutral-initialise the accumulators { - if (slt64((int64_t) 0, i32_res_28174) && (slt64(gtid_32804, - m_27772) && - slt64(sext_i32_i64(local_tid_46549), - i32_res_28174 * - squot64(segred_group_sizze_32834, - segment_sizze_nonzzero_46546)))) { - int32_t x_32843 = ((__global - int32_t *) defunc_3_map_res_mem_45244)[gtid_32804]; - int32_t index_primexp_42390 = sext_i64_i32(gtid_32812); - bool cond_32845 = slt32(index_primexp_42390, x_32843); - float defunc_0_f_res_32846; - - if (cond_32845) { - int32_t x_32842 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_32804]; - int32_t x_32847 = add32(x_32842, index_primexp_42390); - int32_t x_32848 = sub32(x_32847, x_32843); - int32_t i_32849 = add32(1, x_32848); - int64_t i_32850 = sext_i32_i64(i_32849); - bool x_32851 = sle64((int64_t) 0, i_32850); - bool y_32852 = slt64(i_32850, N_27771); - bool bounds_check_32853 = x_32851 && y_32852; - bool index_certs_32854; - - if (!bounds_check_32853) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 53) == -1) { - global_failure_args[0] = i_32850; - global_failure_args[1] = N_27771; - ; - } - local_failure = true; - goto error_0; - } - } - - float defunc_0_f_res_t_res_32855 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_32804 * - N_27771 + - i_32850]; - - defunc_0_f_res_32846 = defunc_0_f_res_t_res_32855; + x_acc_128810 = 0.0; + } + for (int64_t i_128815 = 0; i_128815 < chunk_sizze_128811; i_128815++) { + gtid_112740 = global_tid_128809 + threads_per_segment_128791 * + i_128815; + // apply map function + { + double x_112889 = ((__global double *) mem_124142)[gtid_112728 * + N_75135 + + gtid_112740]; + double x_112890 = ((__global + double *) binop_p_mem_120117)[gtid_112729 * + N_75135 + + gtid_112740]; + double x_112891 = ((__global double *) mem_124276)[gtid_112730 * + N_75135 + + gtid_112740]; + double x_112892 = x_112890 * x_112891; + bool isnan_res_112893; + + isnan_res_112893 = futrts_isnan64(x_112889); + + double y_112894; + + if (isnan_res_112893) { + y_112894 = 0.0; } else { - defunc_0_f_res_32846 = 0.0F; + y_112894 = 1.0; } + + double defunc_2_f_res_112895 = x_112892 * y_112894; + // save map-out results { } - // save results to be reduced + // load accumulator { - ((__local - float *) red_arr_mem_46553)[sext_i32_i64(local_tid_46549)] = - defunc_0_f_res_32846; + x_112883 = x_acc_128810; + } + // load new values + { + x_112884 = defunc_2_f_res_112895; + } + // apply reduction operator + { + double defunc_1_op_res_112885 = x_112883 + x_112884; + + // store in accumulator + { + x_acc_128810 = defunc_1_op_res_112885; + } } - } else { - ((__local - float *) red_arr_mem_46553)[sext_i32_i64(local_tid_46549)] = - 0.0F; } } - - error_0: + // to reduce current chunk, first store our result in memory + { + x_112883 = x_acc_128810; + ((__local + double *) red_arr_mem_128801)[sext_i32_i64(local_tid_128797)] = + x_112883; + } barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; + + int32_t offset_128816; + int32_t skip_waves_128817; + + skip_waves_128817 = 1; + + double x_128812; + double x_128813; + + offset_128816 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128797, + sext_i64_i32(segred_group_sizze_112879))) { + x_128812 = ((__local + double *) red_arr_mem_128801)[sext_i32_i64(local_tid_128797 + + offset_128816)]; + } + } + offset_128816 = 1; + while (slt32(offset_128816, wave_sizze_128799)) { + if (slt32(local_tid_128797 + offset_128816, + sext_i64_i32(segred_group_sizze_112879)) && + ((local_tid_128797 - squot32(local_tid_128797, + wave_sizze_128799) * + wave_sizze_128799) & (2 * offset_128816 - 1)) == 0) { + // read array element + { + x_128813 = ((volatile __local + double *) red_arr_mem_128801)[sext_i32_i64(local_tid_128797 + + offset_128816)]; + } + // apply reduction operation + { + double defunc_1_op_res_128814 = x_128812 + x_128813; + + x_128812 = defunc_1_op_res_128814; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128801)[sext_i32_i64(local_tid_128797)] = + x_128812; + } + } + offset_128816 *= 2; + } + while (slt32(skip_waves_128817, + squot32(sext_i64_i32(segred_group_sizze_112879) + + wave_sizze_128799 - 1, wave_sizze_128799))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128816 = skip_waves_128817 * wave_sizze_128799; + if (slt32(local_tid_128797 + offset_128816, + sext_i64_i32(segred_group_sizze_112879)) && + ((local_tid_128797 - squot32(local_tid_128797, + wave_sizze_128799) * + wave_sizze_128799) == 0 && (squot32(local_tid_128797, + wave_sizze_128799) & (2 * + skip_waves_128817 - + 1)) == + 0)) { + // read array element + { + x_128813 = ((__local + double *) red_arr_mem_128801)[sext_i32_i64(local_tid_128797 + + offset_128816)]; + } + // apply reduction operation + { + double defunc_1_op_res_128814 = x_128812 + x_128813; + + x_128812 = defunc_1_op_res_128814; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128801)[sext_i32_i64(local_tid_128797)] = + x_128812; + } + } + skip_waves_128817 *= 2; + } barrier(CLK_LOCAL_MEM_FENCE); - if (slt64((int64_t) 0, i32_res_28174)) { - // perform segmented scan to imitate reduction + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_128797) == (int64_t) 0) { + x_acc_128810 = x_128812; + } + } + if (groups_per_segment_128787 == (int64_t) 1) { + // first thread in group saves final result to memory { - float x_32838; - float x_32839; - float x_46558; - float x_46559; - bool ltid_in_bounds_46561; - - ltid_in_bounds_46561 = slt64(sext_i32_i64(local_tid_46549), - i32_res_28174 * - squot64(segred_group_sizze_32834, - segment_sizze_nonzzero_46546)); - - int32_t skip_threads_46562; - - // read input for in-block scan + if (local_tid_128797 == 0) { + ((__global double *) mem_124281)[gtid_112728 * + (k2p2zq_75151 * + k2p2zq_75151) + + gtid_112729 * + k2p2zq_75151 + + gtid_112730] = + x_acc_128810; + } + } + } else { + int32_t old_counter_128818; + + // first thread in group saves group result to global memory + { + if (local_tid_128797 == 0) { + ((__global + double *) group_res_arr_mem_128792)[sext_i32_i64(virt_group_id_128807) * + segred_group_sizze_112879] = + x_acc_128810; + mem_fence_global(); + old_counter_128818 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_128794)[sext_i32_i64(srem32(flat_segment_id_128808, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_128803)[(int64_t) 0] = + old_counter_128818 == groups_per_segment_128787 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_128819; + + is_last_group_128819 = ((__local + bool *) sync_arr_mem_128803)[(int64_t) 0]; + if (is_last_group_128819) { + if (local_tid_128797 == 0) { + old_counter_128818 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_128794)[sext_i32_i64(srem32(flat_segment_id_128808, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_128787)); + } + // read in the per-group-results { - if (ltid_in_bounds_46561) { - x_32839 = ((volatile __local - float *) red_arr_mem_46553)[sext_i32_i64(local_tid_46549)]; - if ((local_tid_46549 - squot32(local_tid_46549, 32) * - 32) == 0) { - x_32838 = x_32839; + int64_t read_per_thread_128820 = + sdiv_up64(groups_per_segment_128787, + segred_group_sizze_112879); + + x_112883 = 0.0; + for (int64_t i_128821 = 0; i_128821 < + read_per_thread_128820; i_128821++) { + int64_t group_res_id_128822 = + sext_i32_i64(local_tid_128797) * + read_per_thread_128820 + i_128821; + int64_t index_of_group_res_128823 = + sext_i32_i64(flat_segment_id_128808) * + groups_per_segment_128787 + group_res_id_128822; + + if (slt64(group_res_id_128822, + groups_per_segment_128787)) { + x_112884 = ((__global + double *) group_res_arr_mem_128792)[index_of_group_res_128823 * + segred_group_sizze_112879]; + + double defunc_1_op_res_112885; + + defunc_1_op_res_112885 = x_112883 + x_112884; + x_112883 = defunc_1_op_res_112885; } } } - // in-block scan (hopefully no barriers needed) + ((__local + double *) red_arr_mem_128801)[sext_i32_i64(local_tid_128797)] = + x_112883; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results { - skip_threads_46562 = 1; - while (slt32(skip_threads_46562, 32)) { - if (sle32(skip_threads_46562, local_tid_46549 - - squot32(local_tid_46549, 32) * 32) && - ltid_in_bounds_46561) { - // read operands + int32_t offset_128824; + int32_t skip_waves_128825; + + skip_waves_128825 = 1; + + double x_128812; + double x_128813; + + offset_128824 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128797, + sext_i64_i32(segred_group_sizze_112879))) { + x_128812 = ((__local + double *) red_arr_mem_128801)[sext_i32_i64(local_tid_128797 + + offset_128824)]; + } + } + offset_128824 = 1; + while (slt32(offset_128824, wave_sizze_128799)) { + if (slt32(local_tid_128797 + offset_128824, + sext_i64_i32(segred_group_sizze_112879)) && + ((local_tid_128797 - squot32(local_tid_128797, + wave_sizze_128799) * + wave_sizze_128799) & (2 * offset_128824 - 1)) == + 0) { + // read array element { - x_32838 = ((volatile __local - float *) red_arr_mem_46553)[sext_i32_i64(local_tid_46549) - - sext_i32_i64(skip_threads_46562)]; + x_128813 = ((volatile __local + double *) red_arr_mem_128801)[sext_i32_i64(local_tid_128797 + + offset_128824)]; } - // perform operation + // apply reduction operation { - bool inactive_46563 = - slt64(srem64(sext_i32_i64(local_tid_46549), - i32_res_28174), - sext_i32_i64(local_tid_46549) - - sext_i32_i64(local_tid_46549 - - skip_threads_46562)); + double defunc_1_op_res_128814 = x_128812 + + x_128813; - if (inactive_46563) { - x_32838 = x_32839; - } - if (!inactive_46563) { - float defunc_1_op_res_32840 = x_32838 + - x_32839; - - x_32838 = defunc_1_op_res_32840; - } + x_128812 = defunc_1_op_res_128814; } - } - if (sle32(wave_sizze_46551, skip_threads_46562)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46562, local_tid_46549 - - squot32(local_tid_46549, 32) * 32) && - ltid_in_bounds_46561) { - // write result + // write result of operation { ((volatile __local - float *) red_arr_mem_46553)[sext_i32_i64(local_tid_46549)] = - x_32838; - x_32839 = x_32838; - } - } - if (sle32(wave_sizze_46551, skip_threads_46562)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46562 *= 2; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' - { - if ((local_tid_46549 - squot32(local_tid_46549, 32) * 32) == - 31 && ltid_in_bounds_46561) { - ((volatile __local - float *) red_arr_mem_46553)[sext_i32_i64(squot32(local_tid_46549, - 32))] = - x_32838; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' - { - int32_t skip_threads_46564; - - // read input for in-block scan - { - if (squot32(local_tid_46549, 32) == 0 && - ltid_in_bounds_46561) { - x_46559 = ((volatile __local - float *) red_arr_mem_46553)[sext_i32_i64(local_tid_46549)]; - if ((local_tid_46549 - squot32(local_tid_46549, - 32) * 32) == 0) { - x_46558 = x_46559; + double *) red_arr_mem_128801)[sext_i32_i64(local_tid_128797)] = + x_128812; } } + offset_128824 *= 2; } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46564 = 1; - while (slt32(skip_threads_46564, 32)) { - if (sle32(skip_threads_46564, local_tid_46549 - - squot32(local_tid_46549, 32) * 32) && - (squot32(local_tid_46549, 32) == 0 && - ltid_in_bounds_46561)) { - // read operands - { - x_46558 = ((volatile __local - float *) red_arr_mem_46553)[sext_i32_i64(local_tid_46549) - - sext_i32_i64(skip_threads_46564)]; - } - // perform operation - { - bool inactive_46565 = - slt64(srem64(sext_i32_i64(local_tid_46549 * - 32 + 32 - 1), - i32_res_28174), - sext_i32_i64(local_tid_46549 * - 32 + 32 - 1) - - sext_i32_i64((local_tid_46549 - - skip_threads_46564) * - 32 + 32 - 1)); - - if (inactive_46565) { - x_46558 = x_46559; - } - if (!inactive_46565) { - float defunc_1_op_res_46560 = x_46558 + - x_46559; - - x_46558 = defunc_1_op_res_46560; - } - } - } - if (sle32(wave_sizze_46551, skip_threads_46564)) { - barrier(CLK_LOCAL_MEM_FENCE); + while (slt32(skip_waves_128825, + squot32(sext_i64_i32(segred_group_sizze_112879) + + wave_sizze_128799 - 1, + wave_sizze_128799))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128824 = skip_waves_128825 * wave_sizze_128799; + if (slt32(local_tid_128797 + offset_128824, + sext_i64_i32(segred_group_sizze_112879)) && + ((local_tid_128797 - squot32(local_tid_128797, + wave_sizze_128799) * + wave_sizze_128799) == 0 && + (squot32(local_tid_128797, wave_sizze_128799) & + (2 * skip_waves_128825 - 1)) == 0)) { + // read array element + { + x_128813 = ((__local + double *) red_arr_mem_128801)[sext_i32_i64(local_tid_128797 + + offset_128824)]; } - if (sle32(skip_threads_46564, local_tid_46549 - - squot32(local_tid_46549, 32) * 32) && - (squot32(local_tid_46549, 32) == 0 && - ltid_in_bounds_46561)) { - // write result - { - ((volatile __local - float *) red_arr_mem_46553)[sext_i32_i64(local_tid_46549)] = - x_46558; - x_46559 = x_46558; - } + // apply reduction operation + { + double defunc_1_op_res_128814 = x_128812 + + x_128813; + + x_128812 = defunc_1_op_res_128814; } - if (sle32(wave_sizze_46551, skip_threads_46564)) { - barrier(CLK_LOCAL_MEM_FENCE); + // write result of operation + { + ((__local + double *) red_arr_mem_128801)[sext_i32_i64(local_tid_128797)] = + x_128812; } - skip_threads_46564 *= 2; } + skip_waves_128825 *= 2; } - } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_46549, 32) == 0 || - !ltid_in_bounds_46561)) { - // read operands - { - x_32839 = x_32838; - x_32838 = ((__local - float *) red_arr_mem_46553)[sext_i32_i64(squot32(local_tid_46549, - 32)) - - (int64_t) 1]; - } - // perform operation - { - bool inactive_46566 = - slt64(srem64(sext_i32_i64(local_tid_46549), - i32_res_28174), - sext_i32_i64(local_tid_46549) - - sext_i32_i64(squot32(local_tid_46549, - 32) * 32 - 1)); - - if (inactive_46566) { - x_32838 = x_32839; - } - if (!inactive_46566) { - float defunc_1_op_res_32840 = x_32838 + x_32839; - - x_32838 = defunc_1_op_res_32840; - } - } - // write final result - { - ((__local - float *) red_arr_mem_46553)[sext_i32_i64(local_tid_46549)] = - x_32838; + // and back to memory with the final result + { + if (local_tid_128797 == 0) { + ((__global double *) mem_124281)[gtid_112728 * + (k2p2zq_75151 * + k2p2zq_75151) + + gtid_112729 * + k2p2zq_75151 + + gtid_112730] = + x_128812; } } } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_46549, 32) == 0) { - ((__local - float *) red_arr_mem_46553)[sext_i32_i64(local_tid_46549)] = - x_32839; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // save final values of segments - { - if (slt64(sext_i32_i64(virt_group_id_46557) * - squot64(segred_group_sizze_32834, - segment_sizze_nonzzero_46546) + - sext_i32_i64(local_tid_46549), m_27772) && - slt64(sext_i32_i64(local_tid_46549), - squot64(segred_group_sizze_32834, - segment_sizze_nonzzero_46546))) { - ((__global - float *) mem_45278)[sext_i32_i64(virt_group_id_46557) * - squot64(segred_group_sizze_32834, - segment_sizze_nonzzero_46546) + - sext_i32_i64(local_tid_46549)] = ((__local - float *) red_arr_mem_46553)[(sext_i32_i64(local_tid_46549) + - (int64_t) 1) * - segment_sizze_nonzzero_46546 - - (int64_t) 1]; } } - barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } error_1: return; - #undef segred_group_sizze_32834 + #undef segred_group_sizze_112879 } -__kernel void mainDetailedzisegred_small_33994(__global int *global_failure, - __local volatile - int64_t *red_arr_mem_46761_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_46759_backing_aligned_1, - __local volatile - int64_t *red_arr_mem_46757_backing_aligned_2, - int64_t m_27772, - int64_t iota32_arg_28203, - int64_t iota32_arg_28233, - int64_t num_groups_34139, - int64_t segment_sizze_nonzzero_46750, - __global - unsigned char *mem_45282, - __global - unsigned char *mem_45399, - __global - unsigned char *mem_45403, - __global - unsigned char *mem_45406, - __global - unsigned char *mem_45409, - __global - unsigned char *mem_45411, - __global - unsigned char *mem_45413, - __global - unsigned char *mem_45416) +__kernel void mainzisegred_large_113625(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_128993_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128991_backing_aligned_1, + int64_t N_75135, int64_t n_75139, + int64_t k2p2zq_75151, + int64_t num_groups_113678, + int64_t groups_per_segment_128977, + int64_t elements_per_thread_128978, + int64_t virt_num_groups_128979, + int64_t threads_per_segment_128981, + __global + unsigned char *binop_p_mem_120117, + __global unsigned char *mem_124142, + __global unsigned char *mem_124587, + __global + unsigned char *group_res_arr_mem_128982, + __global + unsigned char *mainzicounter_mem_128984) { - #define segred_group_sizze_34138 (mainDetailedzisegred_group_sizze_33988) + #define segred_group_sizze_113677 (mainzisegred_group_sizze_113619) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_46761_backing_2 = + __local volatile char *restrict sync_arr_mem_128993_backing_1 = (__local volatile - char *) red_arr_mem_46761_backing_aligned_0; - __local volatile char *restrict red_arr_mem_46759_backing_1 = + char *) sync_arr_mem_128993_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128991_backing_0 = (__local volatile - char *) red_arr_mem_46759_backing_aligned_1; - __local volatile char *restrict red_arr_mem_46757_backing_0 = - (__local volatile - char *) red_arr_mem_46757_backing_aligned_2; + char *) red_arr_mem_128991_backing_aligned_1; if (*global_failure >= 0) return; - int32_t global_tid_46752; - int32_t local_tid_46753; - int64_t group_sizze_46756; - int32_t wave_sizze_46755; - int32_t group_tid_46754; - - global_tid_46752 = get_global_id(0); - local_tid_46753 = get_local_id(0); - group_sizze_46756 = get_local_size(0); - wave_sizze_46755 = LOCKSTEP_WIDTH; - group_tid_46754 = get_group_id(0); - - int32_t phys_tid_33994; - - phys_tid_33994 = global_tid_46752; - - __local char *red_arr_mem_46757; - - red_arr_mem_46757 = (__local char *) red_arr_mem_46757_backing_0; - - __local char *red_arr_mem_46759; - - red_arr_mem_46759 = (__local char *) red_arr_mem_46759_backing_1; - - __local char *red_arr_mem_46761; - - red_arr_mem_46761 = (__local char *) red_arr_mem_46761_backing_2; - - int32_t phys_group_id_46763; - - phys_group_id_46763 = get_group_id(0); - for (int32_t i_46764 = 0; i_46764 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_27772, - squot64(segred_group_sizze_34138, - segment_sizze_nonzzero_46750))) - - phys_group_id_46763, sext_i64_i32(num_groups_34139)); - i_46764++) { - int32_t virt_group_id_46765 = phys_group_id_46763 + i_46764 * - sext_i64_i32(num_groups_34139); - int64_t gtid_33985 = squot64(sext_i32_i64(local_tid_46753), - segment_sizze_nonzzero_46750) + - sext_i32_i64(virt_group_id_46765) * - squot64(segred_group_sizze_34138, segment_sizze_nonzzero_46750); - int64_t gtid_33993 = srem64(sext_i32_i64(local_tid_46753), - iota32_arg_28203); + int32_t global_tid_128986; + int32_t local_tid_128987; + int64_t group_sizze_128990; + int32_t wave_sizze_128989; + int32_t group_tid_128988; + + global_tid_128986 = get_global_id(0); + local_tid_128987 = get_local_id(0); + group_sizze_128990 = get_local_size(0); + wave_sizze_128989 = LOCKSTEP_WIDTH; + group_tid_128988 = get_group_id(0); + + int32_t phys_tid_113625; + + phys_tid_113625 = global_tid_128986; + + __local char *red_arr_mem_128991; + + red_arr_mem_128991 = (__local char *) red_arr_mem_128991_backing_0; + + __local char *sync_arr_mem_128993; + + sync_arr_mem_128993 = (__local char *) sync_arr_mem_128993_backing_1; + + int32_t phys_group_id_128995; + + phys_group_id_128995 = get_group_id(0); + for (int32_t i_128996 = 0; i_128996 < + sdiv_up32(sext_i64_i32(virt_num_groups_128979) - phys_group_id_128995, + sext_i64_i32(num_groups_113678)); i_128996++) { + int32_t virt_group_id_128997 = phys_group_id_128995 + i_128996 * + sext_i64_i32(num_groups_113678); + int32_t flat_segment_id_128998 = squot32(virt_group_id_128997, + sext_i64_i32(groups_per_segment_128977)); + int64_t global_tid_128999 = srem64(sext_i32_i64(virt_group_id_128997) * + segred_group_sizze_113677 + + sext_i32_i64(local_tid_128987), + segred_group_sizze_113677 * + groups_per_segment_128977); + int64_t gtid_113614 = squot64(sext_i32_i64(flat_segment_id_128998), + k2p2zq_75151); + int64_t gtid_113615 = sext_i32_i64(flat_segment_id_128998) - + squot64(sext_i32_i64(flat_segment_id_128998), k2p2zq_75151) * + k2p2zq_75151; + int64_t gtid_113624; + double x_acc_129000; + int64_t chunk_sizze_129001; + + chunk_sizze_129001 = smin64(elements_per_thread_128978, + sdiv_up64(n_75139 - global_tid_128999, + threads_per_segment_128981)); + + double x_113681; + double x_113682; - // apply map function if in bounds + // neutral-initialise the accumulators { - if (slt64((int64_t) 0, iota32_arg_28203) && (slt64(gtid_33985, - m_27772) && - slt64(sext_i32_i64(local_tid_46753), - iota32_arg_28203 * - squot64(segred_group_sizze_34138, - segment_sizze_nonzzero_46750)))) { - int32_t y_34159 = ((__global int32_t *) mem_45399)[gtid_33985]; - float y_34160 = ((__global float *) mem_45406)[gtid_33985]; - int64_t binop_x_42448 = iota32_arg_28203 * gtid_33985; - int64_t binop_x_42449 = gtid_33993 + binop_x_42448; - int64_t new_index_42450 = squot64(binop_x_42449, - iota32_arg_28233); - int64_t binop_y_42456 = iota32_arg_28233 * new_index_42450; - int64_t new_index_42457 = binop_x_42449 - binop_y_42456; - float x_34162 = ((__global float *) mem_45403)[new_index_42450 * - iota32_arg_28233 + - new_index_42457]; - float x_34163 = ((__global float *) mem_45282)[gtid_33993]; - int32_t index_primexp_42414 = sext_i64_i32(gtid_33993); - float defunc_0_f_res_34165 = x_34162 / y_34160; - bool cond_34166 = slt32(index_primexp_42414, y_34159); - bool isnan_res_34167; - - isnan_res_34167 = futrts_isnan32(defunc_0_f_res_34165); - - bool cond_t_res_34168 = !isnan_res_34167; - bool x_34169 = cond_34166 && cond_t_res_34168; - float abs_res_34170 = (float) fabs(defunc_0_f_res_34165); - bool defunc_2_f_res_t_res_34171 = x_34163 < abs_res_34170; - bool x_34172 = x_34169 && defunc_2_f_res_t_res_34171; - float defunc_1_f_res_34173; - - if (cond_34166) { - defunc_1_f_res_34173 = defunc_0_f_res_34165; + x_acc_129000 = 0.0; + } + for (int64_t i_129005 = 0; i_129005 < chunk_sizze_129001; i_129005++) { + gtid_113624 = global_tid_128999 + threads_per_segment_128981 * + i_129005; + // apply map function + { + double x_113687 = ((__global double *) mem_124142)[gtid_113614 * + N_75135 + + gtid_113624]; + bool isnan_res_113688; + + isnan_res_113688 = futrts_isnan64(x_113687); + + double defunc_1_f_res_113689; + + if (isnan_res_113688) { + defunc_1_f_res_113689 = 0.0; } else { - defunc_1_f_res_34173 = 0.0F; + double x_113686 = ((__global + double *) binop_p_mem_120117)[gtid_113615 * + N_75135 + + gtid_113624]; + double defunc_1_f_res_f_res_113690 = x_113686 * x_113687; + + defunc_1_f_res_113689 = defunc_1_f_res_f_res_113690; } // save map-out results + { } + // load accumulator { - ((__global float *) mem_45416)[gtid_33985 * - iota32_arg_28203 + - gtid_33993] = - defunc_0_f_res_34165; + x_113681 = x_acc_129000; } - // save results to be reduced + // load new values { - ((__local - bool *) red_arr_mem_46757)[sext_i32_i64(local_tid_46753)] = - x_34172; - ((__local - int32_t *) red_arr_mem_46759)[sext_i32_i64(local_tid_46753)] = - index_primexp_42414; - ((__local - float *) red_arr_mem_46761)[sext_i32_i64(local_tid_46753)] = - defunc_1_f_res_34173; + x_113682 = defunc_1_f_res_113689; } - } else { - ((__local - bool *) red_arr_mem_46757)[sext_i32_i64(local_tid_46753)] = 0; - ((__local - int32_t *) red_arr_mem_46759)[sext_i32_i64(local_tid_46753)] = - -1; - ((__local - float *) red_arr_mem_46761)[sext_i32_i64(local_tid_46753)] = - 0.0F; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - if (slt64((int64_t) 0, iota32_arg_28203)) { - // perform segmented scan to imitate reduction - { - bool x_34145; - int32_t x_34146; - float x_34147; - bool x_34148; - int32_t x_34149; - float x_34150; - bool x_46766; - int32_t x_46767; - float x_46768; - bool x_46769; - int32_t x_46770; - float x_46771; - bool ltid_in_bounds_46780; - - ltid_in_bounds_46780 = slt64(sext_i32_i64(local_tid_46753), - iota32_arg_28203 * - squot64(segred_group_sizze_34138, - segment_sizze_nonzzero_46750)); - - int32_t skip_threads_46781; - - // read input for in-block scan + // apply reduction operator { - if (ltid_in_bounds_46780) { - x_34148 = ((volatile __local - bool *) red_arr_mem_46757)[sext_i32_i64(local_tid_46753)]; - x_34149 = ((volatile __local - int32_t *) red_arr_mem_46759)[sext_i32_i64(local_tid_46753)]; - x_34150 = ((volatile __local - float *) red_arr_mem_46761)[sext_i32_i64(local_tid_46753)]; - if ((local_tid_46753 - squot32(local_tid_46753, 32) * - 32) == 0) { - x_34145 = x_34148; - x_34146 = x_34149; - x_34147 = x_34150; - } + double defunc_1_op_res_113683 = x_113681 + x_113682; + + // store in accumulator + { + x_acc_129000 = defunc_1_op_res_113683; } } - // in-block scan (hopefully no barriers needed) + } + } + // to reduce current chunk, first store our result in memory + { + x_113681 = x_acc_129000; + ((__local + double *) red_arr_mem_128991)[sext_i32_i64(local_tid_128987)] = + x_113681; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_129006; + int32_t skip_waves_129007; + + skip_waves_129007 = 1; + + double x_129002; + double x_129003; + + offset_129006 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128987, + sext_i64_i32(segred_group_sizze_113677))) { + x_129002 = ((__local + double *) red_arr_mem_128991)[sext_i32_i64(local_tid_128987 + + offset_129006)]; + } + } + offset_129006 = 1; + while (slt32(offset_129006, wave_sizze_128989)) { + if (slt32(local_tid_128987 + offset_129006, + sext_i64_i32(segred_group_sizze_113677)) && + ((local_tid_128987 - squot32(local_tid_128987, + wave_sizze_128989) * + wave_sizze_128989) & (2 * offset_129006 - 1)) == 0) { + // read array element { - skip_threads_46781 = 1; - while (slt32(skip_threads_46781, 32)) { - if (sle32(skip_threads_46781, local_tid_46753 - - squot32(local_tid_46753, 32) * 32) && - ltid_in_bounds_46780) { - // read operands - { - x_34145 = ((volatile __local - bool *) red_arr_mem_46757)[sext_i32_i64(local_tid_46753) - - sext_i32_i64(skip_threads_46781)]; - x_34146 = ((volatile __local - int32_t *) red_arr_mem_46759)[sext_i32_i64(local_tid_46753) - - sext_i32_i64(skip_threads_46781)]; - x_34147 = ((volatile __local - float *) red_arr_mem_46761)[sext_i32_i64(local_tid_46753) - - sext_i32_i64(skip_threads_46781)]; - } - // perform operation - { - bool inactive_46782 = - slt64(srem64(sext_i32_i64(local_tid_46753), - iota32_arg_28203), - sext_i32_i64(local_tid_46753) - - sext_i32_i64(local_tid_46753 - - skip_threads_46781)); - - if (inactive_46782) { - x_34145 = x_34148; - x_34146 = x_34149; - x_34147 = x_34150; - } - if (!inactive_46782) { - bool defunc_1_op_res_34151; - int32_t defunc_1_op_res_34152; - - if (x_34145) { - defunc_1_op_res_34151 = x_34145; - defunc_1_op_res_34152 = x_34146; - } else { - bool x_34153 = x_34148 && x_34148; - bool x_34154 = !x_34148; - bool y_34155 = x_34145 && x_34154; - bool defunc_1_op_res_f_res_34156 = - x_34153 || y_34155; - int32_t defunc_1_op_res_f_res_34157; - - if (x_34148) { - defunc_1_op_res_f_res_34157 = - x_34149; - } else { - defunc_1_op_res_f_res_34157 = - x_34146; - } - defunc_1_op_res_34151 = - defunc_1_op_res_f_res_34156; - defunc_1_op_res_34152 = - defunc_1_op_res_f_res_34157; - } - - float defunc_1_op_res_34158 = x_34147 + - x_34150; - - x_34145 = defunc_1_op_res_34151; - x_34146 = defunc_1_op_res_34152; - x_34147 = defunc_1_op_res_34158; - } - } - } - if (sle32(wave_sizze_46755, skip_threads_46781)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46781, local_tid_46753 - - squot32(local_tid_46753, 32) * 32) && - ltid_in_bounds_46780) { - // write result - { - ((volatile __local - bool *) red_arr_mem_46757)[sext_i32_i64(local_tid_46753)] = - x_34145; - x_34148 = x_34145; - ((volatile __local - int32_t *) red_arr_mem_46759)[sext_i32_i64(local_tid_46753)] = - x_34146; - x_34149 = x_34146; - ((volatile __local - float *) red_arr_mem_46761)[sext_i32_i64(local_tid_46753)] = - x_34147; - x_34150 = x_34147; - } - } - if (sle32(wave_sizze_46755, skip_threads_46781)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46781 *= 2; - } + x_129003 = ((volatile __local + double *) red_arr_mem_128991)[sext_i32_i64(local_tid_128987 + + offset_129006)]; } - barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' + // apply reduction operation { - if ((local_tid_46753 - squot32(local_tid_46753, 32) * 32) == - 31 && ltid_in_bounds_46780) { - ((volatile __local - bool *) red_arr_mem_46757)[sext_i32_i64(squot32(local_tid_46753, - 32))] = - x_34145; - ((volatile __local - int32_t *) red_arr_mem_46759)[sext_i32_i64(squot32(local_tid_46753, - 32))] = - x_34146; - ((volatile __local - float *) red_arr_mem_46761)[sext_i32_i64(squot32(local_tid_46753, - 32))] = - x_34147; - } + double defunc_1_op_res_129004 = x_129002 + x_129003; + + x_129002 = defunc_1_op_res_129004; } - barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128991)[sext_i32_i64(local_tid_128987)] = + x_129002; + } + } + offset_129006 *= 2; + } + while (slt32(skip_waves_129007, + squot32(sext_i64_i32(segred_group_sizze_113677) + + wave_sizze_128989 - 1, wave_sizze_128989))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129006 = skip_waves_129007 * wave_sizze_128989; + if (slt32(local_tid_128987 + offset_129006, + sext_i64_i32(segred_group_sizze_113677)) && + ((local_tid_128987 - squot32(local_tid_128987, + wave_sizze_128989) * + wave_sizze_128989) == 0 && (squot32(local_tid_128987, + wave_sizze_128989) & (2 * + skip_waves_129007 - + 1)) == + 0)) { + // read array element { - int32_t skip_threads_46783; + x_129003 = ((__local + double *) red_arr_mem_128991)[sext_i32_i64(local_tid_128987 + + offset_129006)]; + } + // apply reduction operation + { + double defunc_1_op_res_129004 = x_129002 + x_129003; - // read input for in-block scan - { - if (squot32(local_tid_46753, 32) == 0 && - ltid_in_bounds_46780) { - x_46769 = ((volatile __local - bool *) red_arr_mem_46757)[sext_i32_i64(local_tid_46753)]; - x_46770 = ((volatile __local - int32_t *) red_arr_mem_46759)[sext_i32_i64(local_tid_46753)]; - x_46771 = ((volatile __local - float *) red_arr_mem_46761)[sext_i32_i64(local_tid_46753)]; - if ((local_tid_46753 - squot32(local_tid_46753, - 32) * 32) == 0) { - x_46766 = x_46769; - x_46767 = x_46770; - x_46768 = x_46771; - } + x_129002 = defunc_1_op_res_129004; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128991)[sext_i32_i64(local_tid_128987)] = + x_129002; + } + } + skip_waves_129007 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_128987) == (int64_t) 0) { + x_acc_129000 = x_129002; + } + } + if (groups_per_segment_128977 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_128987 == 0) { + ((__global double *) mem_124587)[gtid_113614 * + k2p2zq_75151 + + gtid_113615] = + x_acc_129000; + } + } + } else { + int32_t old_counter_129008; + + // first thread in group saves group result to global memory + { + if (local_tid_128987 == 0) { + ((__global + double *) group_res_arr_mem_128982)[sext_i32_i64(virt_group_id_128997) * + segred_group_sizze_113677] = + x_acc_129000; + mem_fence_global(); + old_counter_129008 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_128984)[sext_i32_i64(srem32(flat_segment_id_128998, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_128993)[(int64_t) 0] = + old_counter_129008 == groups_per_segment_128977 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_129009; + + is_last_group_129009 = ((__local + bool *) sync_arr_mem_128993)[(int64_t) 0]; + if (is_last_group_129009) { + if (local_tid_128987 == 0) { + old_counter_129008 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_128984)[sext_i32_i64(srem32(flat_segment_id_128998, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_128977)); + } + // read in the per-group-results + { + int64_t read_per_thread_129010 = + sdiv_up64(groups_per_segment_128977, + segred_group_sizze_113677); + + x_113681 = 0.0; + for (int64_t i_129011 = 0; i_129011 < + read_per_thread_129010; i_129011++) { + int64_t group_res_id_129012 = + sext_i32_i64(local_tid_128987) * + read_per_thread_129010 + i_129011; + int64_t index_of_group_res_129013 = + sext_i32_i64(flat_segment_id_128998) * + groups_per_segment_128977 + group_res_id_129012; + + if (slt64(group_res_id_129012, + groups_per_segment_128977)) { + x_113682 = ((__global + double *) group_res_arr_mem_128982)[index_of_group_res_129013 * + segred_group_sizze_113677]; + + double defunc_1_op_res_113683; + + defunc_1_op_res_113683 = x_113681 + x_113682; + x_113681 = defunc_1_op_res_113683; } } - // in-block scan (hopefully no barriers needed) + } + ((__local + double *) red_arr_mem_128991)[sext_i32_i64(local_tid_128987)] = + x_113681; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_129014; + int32_t skip_waves_129015; + + skip_waves_129015 = 1; + + double x_129002; + double x_129003; + + offset_129014 = 0; + // participating threads read initial accumulator { - skip_threads_46783 = 1; - while (slt32(skip_threads_46783, 32)) { - if (sle32(skip_threads_46783, local_tid_46753 - - squot32(local_tid_46753, 32) * 32) && - (squot32(local_tid_46753, 32) == 0 && - ltid_in_bounds_46780)) { - // read operands - { - x_46766 = ((volatile __local - bool *) red_arr_mem_46757)[sext_i32_i64(local_tid_46753) - - sext_i32_i64(skip_threads_46783)]; - x_46767 = ((volatile __local - int32_t *) red_arr_mem_46759)[sext_i32_i64(local_tid_46753) - - sext_i32_i64(skip_threads_46783)]; - x_46768 = ((volatile __local - float *) red_arr_mem_46761)[sext_i32_i64(local_tid_46753) - - sext_i32_i64(skip_threads_46783)]; - } - // perform operation - { - bool inactive_46784 = - slt64(srem64(sext_i32_i64(local_tid_46753 * - 32 + 32 - 1), - iota32_arg_28203), - sext_i32_i64(local_tid_46753 * - 32 + 32 - 1) - - sext_i32_i64((local_tid_46753 - - skip_threads_46783) * - 32 + 32 - 1)); - - if (inactive_46784) { - x_46766 = x_46769; - x_46767 = x_46770; - x_46768 = x_46771; - } - if (!inactive_46784) { - bool defunc_1_op_res_46772; - int32_t defunc_1_op_res_46773; - - if (x_46766) { - defunc_1_op_res_46772 = x_46766; - defunc_1_op_res_46773 = x_46767; - } else { - bool x_46774 = x_46769 && x_46769; - bool x_46775 = !x_46769; - bool y_46776 = x_46766 && x_46775; - bool defunc_1_op_res_f_res_46777 = - x_46774 || y_46776; - int32_t defunc_1_op_res_f_res_46778; - - if (x_46769) { - defunc_1_op_res_f_res_46778 = - x_46770; - } else { - defunc_1_op_res_f_res_46778 = - x_46767; - } - defunc_1_op_res_46772 = - defunc_1_op_res_f_res_46777; - defunc_1_op_res_46773 = - defunc_1_op_res_f_res_46778; - } - - float defunc_1_op_res_46779 = x_46768 + - x_46771; - - x_46766 = defunc_1_op_res_46772; - x_46767 = defunc_1_op_res_46773; - x_46768 = defunc_1_op_res_46779; - } - } - } - if (sle32(wave_sizze_46755, skip_threads_46783)) { - barrier(CLK_LOCAL_MEM_FENCE); + if (slt32(local_tid_128987, + sext_i64_i32(segred_group_sizze_113677))) { + x_129002 = ((__local + double *) red_arr_mem_128991)[sext_i32_i64(local_tid_128987 + + offset_129014)]; + } + } + offset_129014 = 1; + while (slt32(offset_129014, wave_sizze_128989)) { + if (slt32(local_tid_128987 + offset_129014, + sext_i64_i32(segred_group_sizze_113677)) && + ((local_tid_128987 - squot32(local_tid_128987, + wave_sizze_128989) * + wave_sizze_128989) & (2 * offset_129014 - 1)) == + 0) { + // read array element + { + x_129003 = ((volatile __local + double *) red_arr_mem_128991)[sext_i32_i64(local_tid_128987 + + offset_129014)]; } - if (sle32(skip_threads_46783, local_tid_46753 - - squot32(local_tid_46753, 32) * 32) && - (squot32(local_tid_46753, 32) == 0 && - ltid_in_bounds_46780)) { - // write result - { - ((volatile __local - bool *) red_arr_mem_46757)[sext_i32_i64(local_tid_46753)] = - x_46766; - x_46769 = x_46766; - ((volatile __local - int32_t *) red_arr_mem_46759)[sext_i32_i64(local_tid_46753)] = - x_46767; - x_46770 = x_46767; - ((volatile __local - float *) red_arr_mem_46761)[sext_i32_i64(local_tid_46753)] = - x_46768; - x_46771 = x_46768; - } + // apply reduction operation + { + double defunc_1_op_res_129004 = x_129002 + + x_129003; + + x_129002 = defunc_1_op_res_129004; } - if (sle32(wave_sizze_46755, skip_threads_46783)) { - barrier(CLK_LOCAL_MEM_FENCE); + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128991)[sext_i32_i64(local_tid_128987)] = + x_129002; } - skip_threads_46783 *= 2; } + offset_129014 *= 2; } - } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_46753, 32) == 0 || - !ltid_in_bounds_46780)) { - // read operands - { - x_34148 = x_34145; - x_34149 = x_34146; - x_34150 = x_34147; - x_34145 = ((__local - bool *) red_arr_mem_46757)[sext_i32_i64(squot32(local_tid_46753, - 32)) - - (int64_t) 1]; - x_34146 = ((__local - int32_t *) red_arr_mem_46759)[sext_i32_i64(squot32(local_tid_46753, - 32)) - - (int64_t) 1]; - x_34147 = ((__local - float *) red_arr_mem_46761)[sext_i32_i64(squot32(local_tid_46753, - 32)) - - (int64_t) 1]; - } - // perform operation - { - bool inactive_46785 = - slt64(srem64(sext_i32_i64(local_tid_46753), - iota32_arg_28203), - sext_i32_i64(local_tid_46753) - - sext_i32_i64(squot32(local_tid_46753, - 32) * 32 - 1)); - - if (inactive_46785) { - x_34145 = x_34148; - x_34146 = x_34149; - x_34147 = x_34150; + while (slt32(skip_waves_129015, + squot32(sext_i64_i32(segred_group_sizze_113677) + + wave_sizze_128989 - 1, + wave_sizze_128989))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129014 = skip_waves_129015 * wave_sizze_128989; + if (slt32(local_tid_128987 + offset_129014, + sext_i64_i32(segred_group_sizze_113677)) && + ((local_tid_128987 - squot32(local_tid_128987, + wave_sizze_128989) * + wave_sizze_128989) == 0 && + (squot32(local_tid_128987, wave_sizze_128989) & + (2 * skip_waves_129015 - 1)) == 0)) { + // read array element + { + x_129003 = ((__local + double *) red_arr_mem_128991)[sext_i32_i64(local_tid_128987 + + offset_129014)]; } - if (!inactive_46785) { - bool defunc_1_op_res_34151; - int32_t defunc_1_op_res_34152; - - if (x_34145) { - defunc_1_op_res_34151 = x_34145; - defunc_1_op_res_34152 = x_34146; - } else { - bool x_34153 = x_34148 && x_34148; - bool x_34154 = !x_34148; - bool y_34155 = x_34145 && x_34154; - bool defunc_1_op_res_f_res_34156 = - x_34153 || y_34155; - int32_t defunc_1_op_res_f_res_34157; - - if (x_34148) { - defunc_1_op_res_f_res_34157 = x_34149; - } else { - defunc_1_op_res_f_res_34157 = x_34146; - } - defunc_1_op_res_34151 = - defunc_1_op_res_f_res_34156; - defunc_1_op_res_34152 = - defunc_1_op_res_f_res_34157; - } - - float defunc_1_op_res_34158 = x_34147 + x_34150; + // apply reduction operation + { + double defunc_1_op_res_129004 = x_129002 + + x_129003; - x_34145 = defunc_1_op_res_34151; - x_34146 = defunc_1_op_res_34152; - x_34147 = defunc_1_op_res_34158; + x_129002 = defunc_1_op_res_129004; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128991)[sext_i32_i64(local_tid_128987)] = + x_129002; } } - // write final result - { - ((__local - bool *) red_arr_mem_46757)[sext_i32_i64(local_tid_46753)] = - x_34145; - ((__local - int32_t *) red_arr_mem_46759)[sext_i32_i64(local_tid_46753)] = - x_34146; - ((__local - float *) red_arr_mem_46761)[sext_i32_i64(local_tid_46753)] = - x_34147; - } + skip_waves_129015 *= 2; } - } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_46753, 32) == 0) { - ((__local - bool *) red_arr_mem_46757)[sext_i32_i64(local_tid_46753)] = - x_34148; - ((__local - int32_t *) red_arr_mem_46759)[sext_i32_i64(local_tid_46753)] = - x_34149; - ((__local - float *) red_arr_mem_46761)[sext_i32_i64(local_tid_46753)] = - x_34150; + // and back to memory with the final result + { + if (local_tid_128987 == 0) { + ((__global double *) mem_124587)[gtid_113614 * + k2p2zq_75151 + + gtid_113615] = + x_129002; + } } } - barrier(CLK_LOCAL_MEM_FENCE); - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // save final values of segments - { - if (slt64(sext_i32_i64(virt_group_id_46765) * - squot64(segred_group_sizze_34138, - segment_sizze_nonzzero_46750) + - sext_i32_i64(local_tid_46753), m_27772) && - slt64(sext_i32_i64(local_tid_46753), - squot64(segred_group_sizze_34138, - segment_sizze_nonzzero_46750))) { - ((__global - bool *) mem_45409)[sext_i32_i64(virt_group_id_46765) * - squot64(segred_group_sizze_34138, - segment_sizze_nonzzero_46750) + - sext_i32_i64(local_tid_46753)] = ((__local - bool *) red_arr_mem_46757)[(sext_i32_i64(local_tid_46753) + - (int64_t) 1) * - segment_sizze_nonzzero_46750 - - (int64_t) 1]; - ((__global - int32_t *) mem_45411)[sext_i32_i64(virt_group_id_46765) * - squot64(segred_group_sizze_34138, - segment_sizze_nonzzero_46750) + - sext_i32_i64(local_tid_46753)] = - ((__local - int32_t *) red_arr_mem_46759)[(sext_i32_i64(local_tid_46753) + - (int64_t) 1) * - segment_sizze_nonzzero_46750 - - (int64_t) 1]; - ((__global - float *) mem_45413)[sext_i32_i64(virt_group_id_46765) * - squot64(segred_group_sizze_34138, - segment_sizze_nonzzero_46750) + - sext_i32_i64(local_tid_46753)] = ((__local - float *) red_arr_mem_46761)[(sext_i32_i64(local_tid_46753) + - (int64_t) 1) * - segment_sizze_nonzzero_46750 - - (int64_t) 1]; } } - barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } error_1: return; - #undef segred_group_sizze_34138 -} -__kernel void mainMagnitudezicopy_45850(int64_t m_28478, int64_t nm_28626, - int64_t ctx_param_ext_44580, - int64_t ctx_param_ext_44581, - int64_t ctx_param_ext_44583, __global - unsigned char *mem_param_44585, __global - unsigned char *mem_44590) -{ - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - int32_t copy_gtid_45850; - int32_t copy_ltid_45851; - int32_t copy_gid_45852; - - copy_gtid_45850 = get_global_id(0); - copy_ltid_45851 = get_local_id(0); - copy_gid_45852 = get_group_id(0); - if (slt64(sext_i32_i64(copy_gtid_45850), m_28478 * nm_28626)) { - ((__global float *) mem_44590)[(sext_i32_i64(copy_gtid_45850) - - squot64(sext_i32_i64(copy_gtid_45850), - nm_28626) * nm_28626) * - m_28478 + - squot64(sext_i32_i64(copy_gtid_45850), - nm_28626)] = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (squot64(sext_i32_i64(copy_gtid_45850), - nm_28626) * - ctx_param_ext_44581 + - (sext_i32_i64(copy_gtid_45850) - - squot64(sext_i32_i64(copy_gtid_45850), - nm_28626) * - nm_28626) * - ctx_param_ext_44583)]; - } - - error_0: - return; + #undef segred_group_sizze_113677 } -__kernel void mainMagnitudeziscan_stage1_36724(__global int *global_failure, - __local volatile - int64_t *scan_arr_mem_46262_backing_aligned_0, - int64_t N_28477, int64_t m_28478, - int32_t num_threads_46256, - __global - unsigned char *images_mem_44381, - __global - unsigned char *defunc_3_map_res_mem_45140, - __global - unsigned char *mem_45163, - __global - unsigned char *mem_45166) +__kernel void mainzisegred_large_113762(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_129081_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_129079_backing_aligned_1, + int64_t k2p2zq_75151, + int64_t num_groups_113811, + int64_t groups_per_segment_129065, + int64_t elements_per_thread_129066, + int64_t virt_num_groups_129067, + int64_t threads_per_segment_129069, + __global + unsigned char *defunc_3_map_res_mem_124372, + __global + unsigned char *defunc_3_map_res_mem_124593, + __global unsigned char *mem_124653, + __global + unsigned char *group_res_arr_mem_129070, + __global + unsigned char *mainzicounter_mem_129072) { - #define segscan_group_sizze_36741 (mainMagnitudezisegscan_group_sizze_36718) + #define segred_group_sizze_113810 (mainzisegred_group_sizze_113756) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict scan_arr_mem_46262_backing_0 = + __local volatile char *restrict sync_arr_mem_129081_backing_1 = + (__local volatile + char *) sync_arr_mem_129081_backing_aligned_0; + __local volatile char *restrict red_arr_mem_129079_backing_0 = (__local volatile - char *) scan_arr_mem_46262_backing_aligned_0; + char *) red_arr_mem_129079_backing_aligned_1; if (*global_failure >= 0) return; - int32_t global_tid_46257; - int32_t local_tid_46258; - int64_t group_sizze_46261; - int32_t wave_sizze_46260; - int32_t group_tid_46259; - - global_tid_46257 = get_global_id(0); - local_tid_46258 = get_local_id(0); - group_sizze_46261 = get_local_size(0); - wave_sizze_46260 = LOCKSTEP_WIDTH; - group_tid_46259 = get_group_id(0); - - int32_t phys_tid_36724; - - phys_tid_36724 = global_tid_46257; - - __local char *scan_arr_mem_46262; - - scan_arr_mem_46262 = (__local char *) scan_arr_mem_46262_backing_0; - - int64_t x_36746; - int64_t x_36747; - - x_36746 = (int64_t) 0; - for (int64_t j_46264 = 0; j_46264 < sdiv_up64(m_28478 * N_28477, - sext_i32_i64(num_threads_46256)); - j_46264++) { - int64_t chunk_offset_46265 = segscan_group_sizze_36741 * j_46264 + - sext_i32_i64(group_tid_46259) * (segscan_group_sizze_36741 * - sdiv_up64(m_28478 * N_28477, - sext_i32_i64(num_threads_46256))); - int64_t flat_idx_46266 = chunk_offset_46265 + - sext_i32_i64(local_tid_46258); - int64_t gtid_36715 = squot64(flat_idx_46266, N_28477); - int64_t gtid_36723 = flat_idx_46266 - squot64(flat_idx_46266, N_28477) * - N_28477; + int32_t global_tid_129074; + int32_t local_tid_129075; + int64_t group_sizze_129078; + int32_t wave_sizze_129077; + int32_t group_tid_129076; + + global_tid_129074 = get_global_id(0); + local_tid_129075 = get_local_id(0); + group_sizze_129078 = get_local_size(0); + wave_sizze_129077 = LOCKSTEP_WIDTH; + group_tid_129076 = get_group_id(0); + + int32_t phys_tid_113762; + + phys_tid_113762 = global_tid_129074; + + __local char *red_arr_mem_129079; + + red_arr_mem_129079 = (__local char *) red_arr_mem_129079_backing_0; + + __local char *sync_arr_mem_129081; + + sync_arr_mem_129081 = (__local char *) sync_arr_mem_129081_backing_1; + + int32_t phys_group_id_129083; + + phys_group_id_129083 = get_group_id(0); + for (int32_t i_129084 = 0; i_129084 < + sdiv_up32(sext_i64_i32(virt_num_groups_129067) - phys_group_id_129083, + sext_i64_i32(num_groups_113811)); i_129084++) { + int32_t virt_group_id_129085 = phys_group_id_129083 + i_129084 * + sext_i64_i32(num_groups_113811); + int32_t flat_segment_id_129086 = squot32(virt_group_id_129085, + sext_i64_i32(groups_per_segment_129065)); + int64_t global_tid_129087 = srem64(sext_i32_i64(virt_group_id_129085) * + segred_group_sizze_113810 + + sext_i32_i64(local_tid_129075), + segred_group_sizze_113810 * + groups_per_segment_129065); + int64_t gtid_113751 = squot64(sext_i32_i64(flat_segment_id_129086), + k2p2zq_75151); + int64_t gtid_113752 = sext_i32_i64(flat_segment_id_129086) - + squot64(sext_i32_i64(flat_segment_id_129086), k2p2zq_75151) * + k2p2zq_75151; + int64_t gtid_113761; + double x_acc_129088; + int64_t chunk_sizze_129089; + + chunk_sizze_129089 = smin64(elements_per_thread_129066, + sdiv_up64(k2p2zq_75151 - global_tid_129087, + threads_per_segment_129069)); + + double x_113814; + double x_113815; - // threads in bounds read input + // neutral-initialise the accumulators { - if (slt64(gtid_36715, m_28478) && slt64(gtid_36723, N_28477)) { - float x_36751 = ((__global - float *) images_mem_44381)[gtid_36715 * - N_28477 + - gtid_36723]; - bool isnan_res_36753; - - isnan_res_36753 = futrts_isnan32(x_36751); - - bool cond_36754 = !isnan_res_36753; - float defunc_1_f_res_36755; + x_acc_129088 = 0.0; + } + for (int64_t i_129093 = 0; i_129093 < chunk_sizze_129089; i_129093++) { + gtid_113761 = global_tid_129087 + threads_per_segment_129069 * + i_129093; + // apply map function + { + double x_113820 = ((__global + double *) defunc_3_map_res_mem_124593)[gtid_113751 * + k2p2zq_75151 + + gtid_113761]; + double x_113821 = ((__global + double *) defunc_3_map_res_mem_124372)[gtid_113751 * + (k2p2zq_75151 * + k2p2zq_75151) + + gtid_113752 * + k2p2zq_75151 + + gtid_113761]; + double defunc_1_f_res_113822 = x_113820 * x_113821; - if (cond_36754) { - float x_36752 = ((__global - float *) defunc_3_map_res_mem_45140)[gtid_36715 * - N_28477 + - gtid_36723]; - float defunc_1_f_res_t_res_36756 = x_36751 - x_36752; - - defunc_1_f_res_36755 = defunc_1_f_res_t_res_36756; - } else { - defunc_1_f_res_36755 = NAN; + // save map-out results + { } + // load accumulator + { + x_113814 = x_acc_129088; } - - bool isnan_res_36757; - - isnan_res_36757 = futrts_isnan32(defunc_1_f_res_36755); - - bool defunc_0_p_res_36758 = !isnan_res_36757; - int64_t defunc_0_f_res_36759 = - btoi_bool_i64(defunc_0_p_res_36758); - - // write to-scan values to parameters + // load new values { - x_36747 = defunc_0_f_res_36759; + x_113815 = defunc_1_f_res_113822; } - // write mapped values results to global memory + // apply reduction operator { - ((__global float *) mem_45166)[gtid_36715 * N_28477 + - gtid_36723] = - defunc_1_f_res_36755; + double defunc_1_op_res_113816 = x_113814 + x_113815; + + // store in accumulator + { + x_acc_129088 = defunc_1_op_res_113816; + } } } } - // do one intra-group scan operation + // to reduce current chunk, first store our result in memory { - // maybe restore some to-scan values to parameters, or read neutral - { - if (!(slt64(gtid_36715, m_28478) && slt64(gtid_36723, - N_28477))) { - x_36747 = (int64_t) 0; + x_113814 = x_acc_129088; + ((__local + double *) red_arr_mem_129079)[sext_i32_i64(local_tid_129075)] = + x_113814; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_129094; + int32_t skip_waves_129095; + + skip_waves_129095 = 1; + + double x_129090; + double x_129091; + + offset_129094 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129075, + sext_i64_i32(segred_group_sizze_113810))) { + x_129090 = ((__local + double *) red_arr_mem_129079)[sext_i32_i64(local_tid_129075 + + offset_129094)]; + } + } + offset_129094 = 1; + while (slt32(offset_129094, wave_sizze_129077)) { + if (slt32(local_tid_129075 + offset_129094, + sext_i64_i32(segred_group_sizze_113810)) && + ((local_tid_129075 - squot32(local_tid_129075, + wave_sizze_129077) * + wave_sizze_129077) & (2 * offset_129094 - 1)) == 0) { + // read array element + { + x_129091 = ((volatile __local + double *) red_arr_mem_129079)[sext_i32_i64(local_tid_129075 + + offset_129094)]; + } + // apply reduction operation + { + double defunc_1_op_res_129092 = x_129090 + x_129091; + + x_129090 = defunc_1_op_res_129092; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_129079)[sext_i32_i64(local_tid_129075)] = + x_129090; } } - // combine with carry and write to local memory - { - int64_t defunc_1_op_res_36748 = add64(x_36746, x_36747); - - ((__local - int64_t *) scan_arr_mem_46262)[sext_i32_i64(local_tid_46258)] = - defunc_1_op_res_36748; - } + offset_129094 *= 2; + } + while (slt32(skip_waves_129095, + squot32(sext_i64_i32(segred_group_sizze_113810) + + wave_sizze_129077 - 1, wave_sizze_129077))) { barrier(CLK_LOCAL_MEM_FENCE); - - int64_t x_46267; - int64_t x_46268; - int64_t x_46270; - int64_t x_46271; - bool ltid_in_bounds_46273; - - ltid_in_bounds_46273 = slt64(sext_i32_i64(local_tid_46258), - segscan_group_sizze_36741); - - int32_t skip_threads_46274; - - // read input for in-block scan - { - if (ltid_in_bounds_46273) { - x_46268 = ((volatile __local - int64_t *) scan_arr_mem_46262)[sext_i32_i64(local_tid_46258)]; - if ((local_tid_46258 - squot32(local_tid_46258, 32) * 32) == - 0) { - x_46267 = x_46268; - } + offset_129094 = skip_waves_129095 * wave_sizze_129077; + if (slt32(local_tid_129075 + offset_129094, + sext_i64_i32(segred_group_sizze_113810)) && + ((local_tid_129075 - squot32(local_tid_129075, + wave_sizze_129077) * + wave_sizze_129077) == 0 && (squot32(local_tid_129075, + wave_sizze_129077) & (2 * + skip_waves_129095 - + 1)) == + 0)) { + // read array element + { + x_129091 = ((__local + double *) red_arr_mem_129079)[sext_i32_i64(local_tid_129075 + + offset_129094)]; + } + // apply reduction operation + { + double defunc_1_op_res_129092 = x_129090 + x_129091; + + x_129090 = defunc_1_op_res_129092; + } + // write result of operation + { + ((__local + double *) red_arr_mem_129079)[sext_i32_i64(local_tid_129075)] = + x_129090; } } - // in-block scan (hopefully no barriers needed) + skip_waves_129095 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_129075) == (int64_t) 0) { + x_acc_129088 = x_129090; + } + } + if (groups_per_segment_129065 == (int64_t) 1) { + // first thread in group saves final result to memory { - skip_threads_46274 = 1; - while (slt32(skip_threads_46274, 32)) { - if (sle32(skip_threads_46274, local_tid_46258 - - squot32(local_tid_46258, 32) * 32) && - ltid_in_bounds_46273) { - // read operands - { - x_46267 = ((volatile __local - int64_t *) scan_arr_mem_46262)[sext_i32_i64(local_tid_46258) - - sext_i32_i64(skip_threads_46274)]; - } - // perform operation - { - bool inactive_46275 = - slt64(srem64(sext_i32_i64(local_tid_46258) + - chunk_offset_46265, N_28477), - sext_i32_i64(local_tid_46258) + - chunk_offset_46265 - - (sext_i32_i64(local_tid_46258 - - skip_threads_46274) + - chunk_offset_46265)); - - if (inactive_46275) { - x_46267 = x_46268; - } - if (!inactive_46275) { - int64_t defunc_1_op_res_46269 = add64(x_46267, - x_46268); - - x_46267 = defunc_1_op_res_46269; - } - } - } - if (sle32(wave_sizze_46260, skip_threads_46274)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46274, local_tid_46258 - - squot32(local_tid_46258, 32) * 32) && - ltid_in_bounds_46273) { - // write result - { - ((volatile __local - int64_t *) scan_arr_mem_46262)[sext_i32_i64(local_tid_46258)] = - x_46267; - x_46268 = x_46267; - } - } - if (sle32(wave_sizze_46260, skip_threads_46274)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46274 *= 2; + if (local_tid_129075 == 0) { + ((__global double *) mem_124653)[gtid_113751 * + k2p2zq_75151 + + gtid_113752] = + x_acc_129088; } } - barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' + } else { + int32_t old_counter_129096; + + // first thread in group saves group result to global memory { - if ((local_tid_46258 - squot32(local_tid_46258, 32) * 32) == - 31 && ltid_in_bounds_46273) { - ((volatile __local - int64_t *) scan_arr_mem_46262)[sext_i32_i64(squot32(local_tid_46258, - 32))] = - x_46267; + if (local_tid_129075 == 0) { + ((__global + double *) group_res_arr_mem_129070)[sext_i32_i64(virt_group_id_129085) * + segred_group_sizze_113810] = + x_acc_129088; + mem_fence_global(); + old_counter_129096 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_129072)[sext_i32_i64(srem32(flat_segment_id_129086, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_129081)[(int64_t) 0] = + old_counter_129096 == groups_per_segment_129065 - + (int64_t) 1; } } - barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' - { - int32_t skip_threads_46276; - - // read input for in-block scan + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_129097; + + is_last_group_129097 = ((__local + bool *) sync_arr_mem_129081)[(int64_t) 0]; + if (is_last_group_129097) { + if (local_tid_129075 == 0) { + old_counter_129096 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_129072)[sext_i32_i64(srem32(flat_segment_id_129086, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_129065)); + } + // read in the per-group-results { - if (squot32(local_tid_46258, 32) == 0 && - ltid_in_bounds_46273) { - x_46271 = ((volatile __local - int64_t *) scan_arr_mem_46262)[sext_i32_i64(local_tid_46258)]; - if ((local_tid_46258 - squot32(local_tid_46258, 32) * - 32) == 0) { - x_46270 = x_46271; + int64_t read_per_thread_129098 = + sdiv_up64(groups_per_segment_129065, + segred_group_sizze_113810); + + x_113814 = 0.0; + for (int64_t i_129099 = 0; i_129099 < + read_per_thread_129098; i_129099++) { + int64_t group_res_id_129100 = + sext_i32_i64(local_tid_129075) * + read_per_thread_129098 + i_129099; + int64_t index_of_group_res_129101 = + sext_i32_i64(flat_segment_id_129086) * + groups_per_segment_129065 + group_res_id_129100; + + if (slt64(group_res_id_129100, + groups_per_segment_129065)) { + x_113815 = ((__global + double *) group_res_arr_mem_129070)[index_of_group_res_129101 * + segred_group_sizze_113810]; + + double defunc_1_op_res_113816; + + defunc_1_op_res_113816 = x_113814 + x_113815; + x_113814 = defunc_1_op_res_113816; } } } - // in-block scan (hopefully no barriers needed) + ((__local + double *) red_arr_mem_129079)[sext_i32_i64(local_tid_129075)] = + x_113814; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results { - skip_threads_46276 = 1; - while (slt32(skip_threads_46276, 32)) { - if (sle32(skip_threads_46276, local_tid_46258 - - squot32(local_tid_46258, 32) * 32) && - (squot32(local_tid_46258, 32) == 0 && - ltid_in_bounds_46273)) { - // read operands + int32_t offset_129102; + int32_t skip_waves_129103; + + skip_waves_129103 = 1; + + double x_129090; + double x_129091; + + offset_129102 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129075, + sext_i64_i32(segred_group_sizze_113810))) { + x_129090 = ((__local + double *) red_arr_mem_129079)[sext_i32_i64(local_tid_129075 + + offset_129102)]; + } + } + offset_129102 = 1; + while (slt32(offset_129102, wave_sizze_129077)) { + if (slt32(local_tid_129075 + offset_129102, + sext_i64_i32(segred_group_sizze_113810)) && + ((local_tid_129075 - squot32(local_tid_129075, + wave_sizze_129077) * + wave_sizze_129077) & (2 * offset_129102 - 1)) == + 0) { + // read array element { - x_46270 = ((volatile __local - int64_t *) scan_arr_mem_46262)[sext_i32_i64(local_tid_46258) - - sext_i32_i64(skip_threads_46276)]; + x_129091 = ((volatile __local + double *) red_arr_mem_129079)[sext_i32_i64(local_tid_129075 + + offset_129102)]; } - // perform operation + // apply reduction operation { - bool inactive_46277 = - slt64(srem64(sext_i32_i64(local_tid_46258 * - 32 + 32 - 1) + - chunk_offset_46265, N_28477), - sext_i32_i64(local_tid_46258 * 32 + - 32 - 1) + chunk_offset_46265 - - (sext_i32_i64((local_tid_46258 - - skip_threads_46276) * - 32 + 32 - 1) + chunk_offset_46265)); + double defunc_1_op_res_129092 = x_129090 + + x_129091; - if (inactive_46277) { - x_46270 = x_46271; - } - if (!inactive_46277) { - int64_t defunc_1_op_res_46272 = - add64(x_46270, x_46271); - - x_46270 = defunc_1_op_res_46272; - } + x_129090 = defunc_1_op_res_129092; } - } - if (sle32(wave_sizze_46260, skip_threads_46276)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46276, local_tid_46258 - - squot32(local_tid_46258, 32) * 32) && - (squot32(local_tid_46258, 32) == 0 && - ltid_in_bounds_46273)) { - // write result + // write result of operation { ((volatile __local - int64_t *) scan_arr_mem_46262)[sext_i32_i64(local_tid_46258)] = - x_46270; - x_46271 = x_46270; + double *) red_arr_mem_129079)[sext_i32_i64(local_tid_129075)] = + x_129090; } } - if (sle32(wave_sizze_46260, skip_threads_46276)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46276 *= 2; - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_46258, 32) == 0 || - !ltid_in_bounds_46273)) { - // read operands - { - x_46268 = x_46267; - x_46267 = ((__local - int64_t *) scan_arr_mem_46262)[sext_i32_i64(squot32(local_tid_46258, - 32)) - - (int64_t) 1]; + offset_129102 *= 2; } - // perform operation - { - bool inactive_46278 = - slt64(srem64(sext_i32_i64(local_tid_46258) + - chunk_offset_46265, N_28477), - sext_i32_i64(local_tid_46258) + - chunk_offset_46265 - - (sext_i32_i64(squot32(local_tid_46258, 32) * - 32 - 1) + chunk_offset_46265)); - - if (inactive_46278) { - x_46267 = x_46268; - } - if (!inactive_46278) { - int64_t defunc_1_op_res_46269 = add64(x_46267, - x_46268); - - x_46267 = defunc_1_op_res_46269; + while (slt32(skip_waves_129103, + squot32(sext_i64_i32(segred_group_sizze_113810) + + wave_sizze_129077 - 1, + wave_sizze_129077))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129102 = skip_waves_129103 * wave_sizze_129077; + if (slt32(local_tid_129075 + offset_129102, + sext_i64_i32(segred_group_sizze_113810)) && + ((local_tid_129075 - squot32(local_tid_129075, + wave_sizze_129077) * + wave_sizze_129077) == 0 && + (squot32(local_tid_129075, wave_sizze_129077) & + (2 * skip_waves_129103 - 1)) == 0)) { + // read array element + { + x_129091 = ((__local + double *) red_arr_mem_129079)[sext_i32_i64(local_tid_129075 + + offset_129102)]; + } + // apply reduction operation + { + double defunc_1_op_res_129092 = x_129090 + + x_129091; + + x_129090 = defunc_1_op_res_129092; + } + // write result of operation + { + ((__local + double *) red_arr_mem_129079)[sext_i32_i64(local_tid_129075)] = + x_129090; + } } + skip_waves_129103 *= 2; } - // write final result + // and back to memory with the final result { - ((__local - int64_t *) scan_arr_mem_46262)[sext_i32_i64(local_tid_46258)] = - x_46267; + if (local_tid_129075 == 0) { + ((__global double *) mem_124653)[gtid_113751 * + k2p2zq_75151 + + gtid_113752] = + x_129090; + } } } } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_46258, 32) == 0) { - ((__local - int64_t *) scan_arr_mem_46262)[sext_i32_i64(local_tid_46258)] = - x_46268; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // threads in bounds write partial scan result - { - if (slt64(gtid_36715, m_28478) && slt64(gtid_36723, N_28477)) { - ((__global int64_t *) mem_45163)[gtid_36715 * N_28477 + - gtid_36723] = ((__local - int64_t *) scan_arr_mem_46262)[sext_i32_i64(local_tid_46258)]; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // first thread reads last element as carry-in for next iteration - { - bool crosses_segment_46279 = slt64(srem64(chunk_offset_46265 + - segscan_group_sizze_36741, - N_28477), - chunk_offset_46265 + - segscan_group_sizze_36741 - - (chunk_offset_46265 + - segscan_group_sizze_36741 - - (int64_t) 1)); - bool should_load_carry_46280 = local_tid_46258 == 0 && - !crosses_segment_46279; - - if (should_load_carry_46280) { - x_36746 = ((__local - int64_t *) scan_arr_mem_46262)[segscan_group_sizze_36741 - - (int64_t) 1]; - } - if (!should_load_carry_46280) { - x_36746 = (int64_t) 0; - } - } - barrier(CLK_LOCAL_MEM_FENCE); } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } error_1: return; - #undef segscan_group_sizze_36741 + #undef segred_group_sizze_113810 } -__kernel void mainMagnitudeziscan_stage1_38354(__global int *global_failure, - int failure_is_an_option, - __global - int64_t *global_failure_args, - __local volatile - int64_t *scan_arr_mem_46665_backing_aligned_0, - int64_t N_28477, int64_t m_28478, - int64_t iota32_arg_28909, - int32_t num_threads_46659, - __global - unsigned char *defunc_4_map_res_mem_45178, - __global - unsigned char *defunc_3_map_res_mem_45244, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global - unsigned char *defunc_0_f_res_mem_45279, - __global - unsigned char *mem_45339, - __global - unsigned char *mem_45343) +__kernel void mainzisegred_large_113892(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_129213_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_129211_backing_aligned_1, + int64_t N_75135, int64_t k2p2zq_75151, + int64_t num_groups_113939, + int64_t groups_per_segment_129197, + int64_t elements_per_thread_129198, + int64_t virt_num_groups_129199, + int64_t threads_per_segment_129201, + __global unsigned char *mem_120124, + __global + unsigned char *defunc_4_map_res_mem_124659, + __global unsigned char *mem_124877, + __global + unsigned char *group_res_arr_mem_129202, + __global + unsigned char *mainzicounter_mem_129204) { - #define segscan_group_sizze_38440 (mainMagnitudezisegscan_group_sizze_38348) + #define segred_group_sizze_113938 (mainzisegred_group_sizze_113886) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict scan_arr_mem_46665_backing_0 = + __local volatile char *restrict sync_arr_mem_129213_backing_1 = (__local volatile - char *) scan_arr_mem_46665_backing_aligned_0; - volatile __local bool local_failure; - - if (failure_is_an_option) { - int failed = *global_failure >= 0; - - if (failed) - return; - } - local_failure = false; - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t global_tid_46660; - int32_t local_tid_46661; - int64_t group_sizze_46664; - int32_t wave_sizze_46663; - int32_t group_tid_46662; - - global_tid_46660 = get_global_id(0); - local_tid_46661 = get_local_id(0); - group_sizze_46664 = get_local_size(0); - wave_sizze_46663 = LOCKSTEP_WIDTH; - group_tid_46662 = get_group_id(0); - - int32_t phys_tid_38354; - - phys_tid_38354 = global_tid_46660; - - __local char *scan_arr_mem_46665; - - scan_arr_mem_46665 = (__local char *) scan_arr_mem_46665_backing_0; + char *) sync_arr_mem_129213_backing_aligned_0; + __local volatile char *restrict red_arr_mem_129211_backing_0 = + (__local volatile + char *) red_arr_mem_129211_backing_aligned_1; - float x_38444; - float x_38445; + if (*global_failure >= 0) + return; - x_38444 = 0.0F; - for (int64_t j_46667 = 0; j_46667 < sdiv_up64(m_28478 * iota32_arg_28909, - sext_i32_i64(num_threads_46659)); - j_46667++) { - int64_t chunk_offset_46668 = segscan_group_sizze_38440 * j_46667 + - sext_i32_i64(group_tid_46662) * (segscan_group_sizze_38440 * - sdiv_up64(m_28478 * - iota32_arg_28909, - sext_i32_i64(num_threads_46659))); - int64_t flat_idx_46669 = chunk_offset_46668 + - sext_i32_i64(local_tid_46661); - int64_t gtid_38345 = squot64(flat_idx_46669, iota32_arg_28909); - int64_t gtid_38353 = flat_idx_46669 - squot64(flat_idx_46669, - iota32_arg_28909) * - iota32_arg_28909; + int32_t global_tid_129206; + int32_t local_tid_129207; + int64_t group_sizze_129210; + int32_t wave_sizze_129209; + int32_t group_tid_129208; + + global_tid_129206 = get_global_id(0); + local_tid_129207 = get_local_id(0); + group_sizze_129210 = get_local_size(0); + wave_sizze_129209 = LOCKSTEP_WIDTH; + group_tid_129208 = get_group_id(0); + + int32_t phys_tid_113892; + + phys_tid_113892 = global_tid_129206; + + __local char *red_arr_mem_129211; + + red_arr_mem_129211 = (__local char *) red_arr_mem_129211_backing_0; + + __local char *sync_arr_mem_129213; + + sync_arr_mem_129213 = (__local char *) sync_arr_mem_129213_backing_1; + + int32_t phys_group_id_129215; + + phys_group_id_129215 = get_group_id(0); + for (int32_t i_129216 = 0; i_129216 < + sdiv_up32(sext_i64_i32(virt_num_groups_129199) - phys_group_id_129215, + sext_i64_i32(num_groups_113939)); i_129216++) { + int32_t virt_group_id_129217 = phys_group_id_129215 + i_129216 * + sext_i64_i32(num_groups_113939); + int32_t flat_segment_id_129218 = squot32(virt_group_id_129217, + sext_i64_i32(groups_per_segment_129197)); + int64_t global_tid_129219 = srem64(sext_i32_i64(virt_group_id_129217) * + segred_group_sizze_113938 + + sext_i32_i64(local_tid_129207), + segred_group_sizze_113938 * + groups_per_segment_129197); + int64_t gtid_113881 = squot64(sext_i32_i64(flat_segment_id_129218), + N_75135); + int64_t gtid_113882 = sext_i32_i64(flat_segment_id_129218) - + squot64(sext_i32_i64(flat_segment_id_129218), N_75135) * + N_75135; + int64_t gtid_113891; + double x_acc_129220; + int64_t chunk_sizze_129221; + + chunk_sizze_129221 = smin64(elements_per_thread_129198, + sdiv_up64(k2p2zq_75151 - global_tid_129219, + threads_per_segment_129201)); + + double x_113942; + double x_113943; - // threads in bounds read input + // neutral-initialise the accumulators { - if (slt64(gtid_38345, m_28478) && slt64(gtid_38353, - iota32_arg_28909)) { - int32_t y_38451 = ((__global int32_t *) mem_45339)[gtid_38345]; - int32_t index_primexp_42406 = sext_i64_i32(gtid_38353); - bool cond_38454 = sle32(y_38451, index_primexp_42406); - float defunc_0_f_res_38455; + x_acc_129220 = 0.0; + } + for (int64_t i_129225 = 0; i_129225 < chunk_sizze_129221; i_129225++) { + gtid_113891 = global_tid_129219 + threads_per_segment_129201 * + i_129225; + // apply map function + { + double x_113947 = ((__global + double *) defunc_4_map_res_mem_124659)[gtid_113881 * + k2p2zq_75151 + + gtid_113891]; + double x_113948 = ((__global double *) mem_120124)[gtid_113882 * + k2p2zq_75151 + + gtid_113891]; + double defunc_1_f_res_113949 = x_113947 * x_113948; - if (cond_38454) { - defunc_0_f_res_38455 = 0.0F; - } else { - int32_t x_38447 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_38345]; - int32_t x_38448 = ((__global - int32_t *) defunc_3_map_res_mem_45244)[gtid_38345]; - float x_38449 = ((__global - float *) defunc_0_f_res_mem_45279)[gtid_38345]; - bool cond_38456 = index_primexp_42406 == 0; - float defunc_0_f_res_f_res_38457; - - if (cond_38456) { - defunc_0_f_res_f_res_38457 = x_38449; - } else { - int32_t i_38458 = add32(x_38447, index_primexp_42406); - int64_t i_38459 = sext_i32_i64(i_38458); - bool x_38460 = sle64((int64_t) 0, i_38459); - bool y_38461 = slt64(i_38459, N_28477); - bool bounds_check_38462 = x_38460 && y_38461; - bool index_certs_38463; - - if (!bounds_check_38462) { - { - if (atomic_cmpxchg_i32_global(global_failure, - -1, 118) == -1) { - global_failure_args[0] = i_38459; - global_failure_args[1] = N_28477; - ; - } - local_failure = true; - goto error_0; - } - } - - float x_38464 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_38345 * - N_28477 + - i_38459]; - int32_t x_38465 = sub32(x_38447, x_38448); - int32_t i_38466 = add32(x_38465, index_primexp_42406); - int64_t i_38467 = sext_i32_i64(i_38466); - bool x_38468 = sle64((int64_t) 0, i_38467); - bool y_38469 = slt64(i_38467, N_28477); - bool bounds_check_38470 = x_38468 && y_38469; - bool index_certs_38471; - - if (!bounds_check_38470) { - { - if (atomic_cmpxchg_i32_global(global_failure, - -1, 119) == -1) { - global_failure_args[0] = i_38467; - global_failure_args[1] = N_28477; - ; - } - local_failure = true; - goto error_0; - } - } - - float y_38472 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_38345 * - N_28477 + - i_38467]; - float defunc_0_f_res_f_res_f_res_38473 = x_38464 - - y_38472; - - defunc_0_f_res_f_res_38457 = - defunc_0_f_res_f_res_f_res_38473; - } - defunc_0_f_res_38455 = defunc_0_f_res_f_res_38457; + // save map-out results + { } + // load accumulator + { + x_113942 = x_acc_129220; } - // write to-scan values to parameters + // load new values { - x_38445 = defunc_0_f_res_38455; + x_113943 = defunc_1_f_res_113949; + } + // apply reduction operator + { + double defunc_1_op_res_113944 = x_113942 + x_113943; + + // store in accumulator + { + x_acc_129220 = defunc_1_op_res_113944; + } } - // write mapped values results to global memory - { } } } - // do one intra-group scan operation + // to reduce current chunk, first store our result in memory { - // maybe restore some to-scan values to parameters, or read neutral - { - if (!(slt64(gtid_38345, m_28478) && slt64(gtid_38353, - iota32_arg_28909))) { - x_38445 = 0.0F; + x_113942 = x_acc_129220; + ((__local + double *) red_arr_mem_129211)[sext_i32_i64(local_tid_129207)] = + x_113942; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_129226; + int32_t skip_waves_129227; + + skip_waves_129227 = 1; + + double x_129222; + double x_129223; + + offset_129226 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129207, + sext_i64_i32(segred_group_sizze_113938))) { + x_129222 = ((__local + double *) red_arr_mem_129211)[sext_i32_i64(local_tid_129207 + + offset_129226)]; + } + } + offset_129226 = 1; + while (slt32(offset_129226, wave_sizze_129209)) { + if (slt32(local_tid_129207 + offset_129226, + sext_i64_i32(segred_group_sizze_113938)) && + ((local_tid_129207 - squot32(local_tid_129207, + wave_sizze_129209) * + wave_sizze_129209) & (2 * offset_129226 - 1)) == 0) { + // read array element + { + x_129223 = ((volatile __local + double *) red_arr_mem_129211)[sext_i32_i64(local_tid_129207 + + offset_129226)]; + } + // apply reduction operation + { + double defunc_1_op_res_129224 = x_129222 + x_129223; + + x_129222 = defunc_1_op_res_129224; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_129211)[sext_i32_i64(local_tid_129207)] = + x_129222; } } - // combine with carry and write to local memory - { - float defunc_1_op_res_38446 = x_38444 + x_38445; - - ((__local - float *) scan_arr_mem_46665)[sext_i32_i64(local_tid_46661)] = - defunc_1_op_res_38446; - } - - error_0: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; + offset_129226 *= 2; + } + while (slt32(skip_waves_129227, + squot32(sext_i64_i32(segred_group_sizze_113938) + + wave_sizze_129209 - 1, wave_sizze_129209))) { barrier(CLK_LOCAL_MEM_FENCE); - - float x_46670; - float x_46671; - float x_46673; - float x_46674; - bool ltid_in_bounds_46676; - - ltid_in_bounds_46676 = slt64(sext_i32_i64(local_tid_46661), - segscan_group_sizze_38440); - - int32_t skip_threads_46677; - - // read input for in-block scan - { - if (ltid_in_bounds_46676) { - x_46671 = ((volatile __local - float *) scan_arr_mem_46665)[sext_i32_i64(local_tid_46661)]; - if ((local_tid_46661 - squot32(local_tid_46661, 32) * 32) == - 0) { - x_46670 = x_46671; - } + offset_129226 = skip_waves_129227 * wave_sizze_129209; + if (slt32(local_tid_129207 + offset_129226, + sext_i64_i32(segred_group_sizze_113938)) && + ((local_tid_129207 - squot32(local_tid_129207, + wave_sizze_129209) * + wave_sizze_129209) == 0 && (squot32(local_tid_129207, + wave_sizze_129209) & (2 * + skip_waves_129227 - + 1)) == + 0)) { + // read array element + { + x_129223 = ((__local + double *) red_arr_mem_129211)[sext_i32_i64(local_tid_129207 + + offset_129226)]; + } + // apply reduction operation + { + double defunc_1_op_res_129224 = x_129222 + x_129223; + + x_129222 = defunc_1_op_res_129224; + } + // write result of operation + { + ((__local + double *) red_arr_mem_129211)[sext_i32_i64(local_tid_129207)] = + x_129222; } } - // in-block scan (hopefully no barriers needed) + skip_waves_129227 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_129207) == (int64_t) 0) { + x_acc_129220 = x_129222; + } + } + if (groups_per_segment_129197 == (int64_t) 1) { + // first thread in group saves final result to memory { - skip_threads_46677 = 1; - while (slt32(skip_threads_46677, 32)) { - if (sle32(skip_threads_46677, local_tid_46661 - - squot32(local_tid_46661, 32) * 32) && - ltid_in_bounds_46676) { - // read operands - { - x_46670 = ((volatile __local - float *) scan_arr_mem_46665)[sext_i32_i64(local_tid_46661) - - sext_i32_i64(skip_threads_46677)]; - } - // perform operation - { - bool inactive_46678 = - slt64(srem64(sext_i32_i64(local_tid_46661) + - chunk_offset_46668, - iota32_arg_28909), - sext_i32_i64(local_tid_46661) + - chunk_offset_46668 - - (sext_i32_i64(local_tid_46661 - - skip_threads_46677) + - chunk_offset_46668)); - - if (inactive_46678) { - x_46670 = x_46671; - } - if (!inactive_46678) { - float defunc_1_op_res_46672 = x_46670 + x_46671; - - x_46670 = defunc_1_op_res_46672; - } - } - } - if (sle32(wave_sizze_46663, skip_threads_46677)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46677, local_tid_46661 - - squot32(local_tid_46661, 32) * 32) && - ltid_in_bounds_46676) { - // write result - { - ((volatile __local - float *) scan_arr_mem_46665)[sext_i32_i64(local_tid_46661)] = - x_46670; - x_46671 = x_46670; - } - } - if (sle32(wave_sizze_46663, skip_threads_46677)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46677 *= 2; + if (local_tid_129207 == 0) { + ((__global double *) mem_124877)[gtid_113881 * N_75135 + + gtid_113882] = + x_acc_129220; } } - barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' + } else { + int32_t old_counter_129228; + + // first thread in group saves group result to global memory { - if ((local_tid_46661 - squot32(local_tid_46661, 32) * 32) == - 31 && ltid_in_bounds_46676) { - ((volatile __local - float *) scan_arr_mem_46665)[sext_i32_i64(squot32(local_tid_46661, - 32))] = - x_46670; + if (local_tid_129207 == 0) { + ((__global + double *) group_res_arr_mem_129202)[sext_i32_i64(virt_group_id_129217) * + segred_group_sizze_113938] = + x_acc_129220; + mem_fence_global(); + old_counter_129228 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_129204)[sext_i32_i64(srem32(flat_segment_id_129218, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_129213)[(int64_t) 0] = + old_counter_129228 == groups_per_segment_129197 - + (int64_t) 1; } } - barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' - { - int32_t skip_threads_46679; - - // read input for in-block scan + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_129229; + + is_last_group_129229 = ((__local + bool *) sync_arr_mem_129213)[(int64_t) 0]; + if (is_last_group_129229) { + if (local_tid_129207 == 0) { + old_counter_129228 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_129204)[sext_i32_i64(srem32(flat_segment_id_129218, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_129197)); + } + // read in the per-group-results { - if (squot32(local_tid_46661, 32) == 0 && - ltid_in_bounds_46676) { - x_46674 = ((volatile __local - float *) scan_arr_mem_46665)[sext_i32_i64(local_tid_46661)]; - if ((local_tid_46661 - squot32(local_tid_46661, 32) * - 32) == 0) { - x_46673 = x_46674; + int64_t read_per_thread_129230 = + sdiv_up64(groups_per_segment_129197, + segred_group_sizze_113938); + + x_113942 = 0.0; + for (int64_t i_129231 = 0; i_129231 < + read_per_thread_129230; i_129231++) { + int64_t group_res_id_129232 = + sext_i32_i64(local_tid_129207) * + read_per_thread_129230 + i_129231; + int64_t index_of_group_res_129233 = + sext_i32_i64(flat_segment_id_129218) * + groups_per_segment_129197 + group_res_id_129232; + + if (slt64(group_res_id_129232, + groups_per_segment_129197)) { + x_113943 = ((__global + double *) group_res_arr_mem_129202)[index_of_group_res_129233 * + segred_group_sizze_113938]; + + double defunc_1_op_res_113944; + + defunc_1_op_res_113944 = x_113942 + x_113943; + x_113942 = defunc_1_op_res_113944; } } } - // in-block scan (hopefully no barriers needed) + ((__local + double *) red_arr_mem_129211)[sext_i32_i64(local_tid_129207)] = + x_113942; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results { - skip_threads_46679 = 1; - while (slt32(skip_threads_46679, 32)) { - if (sle32(skip_threads_46679, local_tid_46661 - - squot32(local_tid_46661, 32) * 32) && - (squot32(local_tid_46661, 32) == 0 && - ltid_in_bounds_46676)) { - // read operands + int32_t offset_129234; + int32_t skip_waves_129235; + + skip_waves_129235 = 1; + + double x_129222; + double x_129223; + + offset_129234 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129207, + sext_i64_i32(segred_group_sizze_113938))) { + x_129222 = ((__local + double *) red_arr_mem_129211)[sext_i32_i64(local_tid_129207 + + offset_129234)]; + } + } + offset_129234 = 1; + while (slt32(offset_129234, wave_sizze_129209)) { + if (slt32(local_tid_129207 + offset_129234, + sext_i64_i32(segred_group_sizze_113938)) && + ((local_tid_129207 - squot32(local_tid_129207, + wave_sizze_129209) * + wave_sizze_129209) & (2 * offset_129234 - 1)) == + 0) { + // read array element { - x_46673 = ((volatile __local - float *) scan_arr_mem_46665)[sext_i32_i64(local_tid_46661) - - sext_i32_i64(skip_threads_46679)]; + x_129223 = ((volatile __local + double *) red_arr_mem_129211)[sext_i32_i64(local_tid_129207 + + offset_129234)]; } - // perform operation + // apply reduction operation { - bool inactive_46680 = - slt64(srem64(sext_i32_i64(local_tid_46661 * - 32 + 32 - 1) + - chunk_offset_46668, - iota32_arg_28909), - sext_i32_i64(local_tid_46661 * 32 + - 32 - 1) + chunk_offset_46668 - - (sext_i32_i64((local_tid_46661 - - skip_threads_46679) * - 32 + 32 - 1) + chunk_offset_46668)); + double defunc_1_op_res_129224 = x_129222 + + x_129223; - if (inactive_46680) { - x_46673 = x_46674; - } - if (!inactive_46680) { - float defunc_1_op_res_46675 = x_46673 + - x_46674; - - x_46673 = defunc_1_op_res_46675; - } + x_129222 = defunc_1_op_res_129224; } - } - if (sle32(wave_sizze_46663, skip_threads_46679)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46679, local_tid_46661 - - squot32(local_tid_46661, 32) * 32) && - (squot32(local_tid_46661, 32) == 0 && - ltid_in_bounds_46676)) { - // write result + // write result of operation { ((volatile __local - float *) scan_arr_mem_46665)[sext_i32_i64(local_tid_46661)] = - x_46673; - x_46674 = x_46673; + double *) red_arr_mem_129211)[sext_i32_i64(local_tid_129207)] = + x_129222; } } - if (sle32(wave_sizze_46663, skip_threads_46679)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46679 *= 2; - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_46661, 32) == 0 || - !ltid_in_bounds_46676)) { - // read operands - { - x_46671 = x_46670; - x_46670 = ((__local - float *) scan_arr_mem_46665)[sext_i32_i64(squot32(local_tid_46661, - 32)) - - (int64_t) 1]; + offset_129234 *= 2; } - // perform operation - { - bool inactive_46681 = - slt64(srem64(sext_i32_i64(local_tid_46661) + - chunk_offset_46668, iota32_arg_28909), - sext_i32_i64(local_tid_46661) + - chunk_offset_46668 - - (sext_i32_i64(squot32(local_tid_46661, 32) * - 32 - 1) + chunk_offset_46668)); - - if (inactive_46681) { - x_46670 = x_46671; - } - if (!inactive_46681) { - float defunc_1_op_res_46672 = x_46670 + x_46671; - - x_46670 = defunc_1_op_res_46672; + while (slt32(skip_waves_129235, + squot32(sext_i64_i32(segred_group_sizze_113938) + + wave_sizze_129209 - 1, + wave_sizze_129209))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129234 = skip_waves_129235 * wave_sizze_129209; + if (slt32(local_tid_129207 + offset_129234, + sext_i64_i32(segred_group_sizze_113938)) && + ((local_tid_129207 - squot32(local_tid_129207, + wave_sizze_129209) * + wave_sizze_129209) == 0 && + (squot32(local_tid_129207, wave_sizze_129209) & + (2 * skip_waves_129235 - 1)) == 0)) { + // read array element + { + x_129223 = ((__local + double *) red_arr_mem_129211)[sext_i32_i64(local_tid_129207 + + offset_129234)]; + } + // apply reduction operation + { + double defunc_1_op_res_129224 = x_129222 + + x_129223; + + x_129222 = defunc_1_op_res_129224; + } + // write result of operation + { + ((__local + double *) red_arr_mem_129211)[sext_i32_i64(local_tid_129207)] = + x_129222; + } } + skip_waves_129235 *= 2; } - // write final result + // and back to memory with the final result { - ((__local - float *) scan_arr_mem_46665)[sext_i32_i64(local_tid_46661)] = - x_46670; + if (local_tid_129207 == 0) { + ((__global double *) mem_124877)[gtid_113881 * + N_75135 + + gtid_113882] = + x_129222; + } } } } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_46661, 32) == 0) { - ((__local - float *) scan_arr_mem_46665)[sext_i32_i64(local_tid_46661)] = - x_46671; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // threads in bounds write partial scan result - { - if (slt64(gtid_38345, m_28478) && slt64(gtid_38353, - iota32_arg_28909)) { - ((__global float *) mem_45343)[gtid_38345 * - iota32_arg_28909 + - gtid_38353] = ((__local - float *) scan_arr_mem_46665)[sext_i32_i64(local_tid_46661)]; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // first thread reads last element as carry-in for next iteration - { - bool crosses_segment_46682 = slt64(srem64(chunk_offset_46668 + - segscan_group_sizze_38440, - iota32_arg_28909), - chunk_offset_46668 + - segscan_group_sizze_38440 - - (chunk_offset_46668 + - segscan_group_sizze_38440 - - (int64_t) 1)); - bool should_load_carry_46683 = local_tid_46661 == 0 && - !crosses_segment_46682; - - if (should_load_carry_46683) { - x_38444 = ((__local - float *) scan_arr_mem_46665)[segscan_group_sizze_38440 - - (int64_t) 1]; - } - if (!should_load_carry_46683) { - x_38444 = 0.0F; - } - } - barrier(CLK_LOCAL_MEM_FENCE); } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } error_1: return; - #undef segscan_group_sizze_38440 + #undef segred_group_sizze_113938 } -__kernel void mainMagnitudeziscan_stage2_36724(__global int *global_failure, - __local volatile - int64_t *scan_arr_mem_46286_backing_aligned_0, - int64_t N_28477, int64_t m_28478, - int64_t stage1_num_groups_46255, - int32_t num_threads_46256, - __global - unsigned char *mem_45163) +__kernel void mainzisegred_large_114313(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + __local volatile + int64_t *sync_arr_mem_129443_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_129441_backing_aligned_1, + int64_t N_75135, int64_t n_75139, + int64_t num_groups_114363, + int64_t groups_per_segment_129427, + int64_t elements_per_thread_129428, + int64_t virt_num_groups_129429, + int64_t threads_per_segment_129431, + __global + unsigned char *defunc_4_map_res_mem_124920, + __global unsigned char *mem_124946, + __global unsigned char *mem_124949, + __global + unsigned char *group_res_arr_mem_129432, + __global + unsigned char *mainzicounter_mem_129434) { - #define segscan_group_sizze_36741 (mainMagnitudezisegscan_group_sizze_36718) + #define segred_group_sizze_114362 (mainzisegred_group_sizze_114307) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict scan_arr_mem_46286_backing_0 = + __local volatile char *restrict sync_arr_mem_129443_backing_1 = (__local volatile - char *) scan_arr_mem_46286_backing_aligned_0; - - if (*global_failure >= 0) - return; - - int32_t global_tid_46281; - int32_t local_tid_46282; - int64_t group_sizze_46285; - int32_t wave_sizze_46284; - int32_t group_tid_46283; - - global_tid_46281 = get_global_id(0); - local_tid_46282 = get_local_id(0); - group_sizze_46285 = get_local_size(0); - wave_sizze_46284 = LOCKSTEP_WIDTH; - group_tid_46283 = get_group_id(0); - - int32_t phys_tid_36724; - - phys_tid_36724 = global_tid_46281; - - __local char *scan_arr_mem_46286; + char *) sync_arr_mem_129443_backing_aligned_0; + __local volatile char *restrict red_arr_mem_129441_backing_0 = + (__local volatile + char *) red_arr_mem_129441_backing_aligned_1; + volatile __local bool local_failure; - scan_arr_mem_46286 = (__local char *) scan_arr_mem_46286_backing_0; + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); - int64_t flat_idx_46288; + int32_t global_tid_129436; + int32_t local_tid_129437; + int64_t group_sizze_129440; + int32_t wave_sizze_129439; + int32_t group_tid_129438; - flat_idx_46288 = (sext_i32_i64(local_tid_46282) + (int64_t) 1) * - (segscan_group_sizze_36741 * sdiv_up64(m_28478 * N_28477, - sext_i32_i64(num_threads_46256))) - - (int64_t) 1; + global_tid_129436 = get_global_id(0); + local_tid_129437 = get_local_id(0); + group_sizze_129440 = get_local_size(0); + wave_sizze_129439 = LOCKSTEP_WIDTH; + group_tid_129438 = get_group_id(0); - int64_t gtid_36715; + int32_t phys_tid_114313; - gtid_36715 = squot64(flat_idx_46288, N_28477); + phys_tid_114313 = global_tid_129436; - int64_t gtid_36723; + __local char *red_arr_mem_129441; - gtid_36723 = flat_idx_46288 - squot64(flat_idx_46288, N_28477) * N_28477; - // threads in bound read carries; others get neutral element - { - if (slt64(gtid_36715, m_28478) && slt64(gtid_36723, N_28477)) { - ((__local - int64_t *) scan_arr_mem_46286)[sext_i32_i64(local_tid_46282)] = - ((__global int64_t *) mem_45163)[gtid_36715 * N_28477 + - gtid_36723]; - } else { - ((__local - int64_t *) scan_arr_mem_46286)[sext_i32_i64(local_tid_46282)] = - (int64_t) 0; - } - } - barrier(CLK_LOCAL_MEM_FENCE); + red_arr_mem_129441 = (__local char *) red_arr_mem_129441_backing_0; - int64_t x_36746; - int64_t x_36747; - int64_t x_46289; - int64_t x_46290; - bool ltid_in_bounds_46292; + __local char *sync_arr_mem_129443; - ltid_in_bounds_46292 = slt64(sext_i32_i64(local_tid_46282), - stage1_num_groups_46255); + sync_arr_mem_129443 = (__local char *) sync_arr_mem_129443_backing_1; - int32_t skip_threads_46293; + int32_t phys_group_id_129445; - // read input for in-block scan - { - if (ltid_in_bounds_46292) { - x_36747 = ((volatile __local - int64_t *) scan_arr_mem_46286)[sext_i32_i64(local_tid_46282)]; - if ((local_tid_46282 - squot32(local_tid_46282, 32) * 32) == 0) { - x_36746 = x_36747; - } + phys_group_id_129445 = get_group_id(0); + for (int32_t i_129446 = 0; i_129446 < + sdiv_up32(sext_i64_i32(virt_num_groups_129429) - phys_group_id_129445, + sext_i64_i32(num_groups_114363)); i_129446++) { + int32_t virt_group_id_129447 = phys_group_id_129445 + i_129446 * + sext_i64_i32(num_groups_114363); + int32_t flat_segment_id_129448 = squot32(virt_group_id_129447, + sext_i64_i32(groups_per_segment_129427)); + int64_t global_tid_129449 = srem64(sext_i32_i64(virt_group_id_129447) * + segred_group_sizze_114362 + + sext_i32_i64(local_tid_129437), + segred_group_sizze_114362 * + groups_per_segment_129427); + int64_t gtid_114304 = sext_i32_i64(flat_segment_id_129448); + int64_t gtid_114312; + double x_acc_129450; + int64_t chunk_sizze_129451; + + chunk_sizze_129451 = smin64(elements_per_thread_129428, + sdiv_up64(n_75139 - global_tid_129449, + threads_per_segment_129431)); + + double x_114366; + double x_114367; + + // neutral-initialise the accumulators + { + x_acc_129450 = 0.0; } - } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46293 = 1; - while (slt32(skip_threads_46293, 32)) { - if (sle32(skip_threads_46293, local_tid_46282 - - squot32(local_tid_46282, 32) * 32) && - ltid_in_bounds_46292) { - // read operands + for (int64_t i_129455 = 0; i_129455 < chunk_sizze_129451; i_129455++) { + gtid_114312 = global_tid_129449 + threads_per_segment_129431 * + i_129455; + // apply map function + { + int64_t defunc_0_f_res_114370 = ((__global + int64_t *) mem_124946)[gtid_114304]; + bool cond_114372 = slt64(gtid_114312, defunc_0_f_res_114370); + double defunc_0_f_res_114373; + + if (cond_114372) { + bool y_114375 = slt64(gtid_114312, N_75135); + bool index_certs_114377; + + if (!y_114375) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 198) == -1) { + global_failure_args[0] = gtid_114312; + global_failure_args[1] = N_75135; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_0_f_res_t_res_114378 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_114304 * + N_75135 + + gtid_114312]; + + defunc_0_f_res_114373 = defunc_0_f_res_t_res_114378; + } else { + defunc_0_f_res_114373 = 0.0; + } + + double defunc_0_f_res_114379 = defunc_0_f_res_114373 * + defunc_0_f_res_114373; + + // save map-out results + { } + // load accumulator { - x_36746 = ((volatile __local - int64_t *) scan_arr_mem_46286)[sext_i32_i64(local_tid_46282) - - sext_i32_i64(skip_threads_46293)]; + x_114366 = x_acc_129450; } - // perform operation + // load new values { - bool inactive_46294 = - slt64(srem64((sext_i32_i64(local_tid_46282) + - (int64_t) 1) * - (segscan_group_sizze_36741 * - sdiv_up64(m_28478 * N_28477, - sext_i32_i64(num_threads_46256))) - - (int64_t) 1, N_28477), - (sext_i32_i64(local_tid_46282) + (int64_t) 1) * - (segscan_group_sizze_36741 * sdiv_up64(m_28478 * - N_28477, - sext_i32_i64(num_threads_46256))) - - (int64_t) 1 - ((sext_i32_i64(local_tid_46282 - - skip_threads_46293) + - (int64_t) 1) * - (segscan_group_sizze_36741 * - sdiv_up64(m_28478 * N_28477, - sext_i32_i64(num_threads_46256))) - - (int64_t) 1)); - - if (inactive_46294) { - x_36746 = x_36747; - } - if (!inactive_46294) { - int64_t defunc_1_op_res_36748 = add64(x_36746, x_36747); - - x_36746 = defunc_1_op_res_36748; - } + x_114367 = defunc_0_f_res_114379; } - } - if (sle32(wave_sizze_46284, skip_threads_46293)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46293, local_tid_46282 - - squot32(local_tid_46282, 32) * 32) && - ltid_in_bounds_46292) { - // write result + // apply reduction operator { - ((volatile __local - int64_t *) scan_arr_mem_46286)[sext_i32_i64(local_tid_46282)] = - x_36746; - x_36747 = x_36746; + double defunc_1_op_res_114368 = x_114366 + x_114367; + + // store in accumulator + { + x_acc_129450 = defunc_1_op_res_114368; + } } } - if (sle32(wave_sizze_46284, skip_threads_46293)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46293 *= 2; } - } - barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' - { - if ((local_tid_46282 - squot32(local_tid_46282, 32) * 32) == 31 && - ltid_in_bounds_46292) { - ((volatile __local - int64_t *) scan_arr_mem_46286)[sext_i32_i64(squot32(local_tid_46282, - 32))] = - x_36746; + // to reduce current chunk, first store our result in memory + { + x_114366 = x_acc_129450; + ((__local + double *) red_arr_mem_129441)[sext_i32_i64(local_tid_129437)] = + x_114366; } - } - barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' - { - int32_t skip_threads_46295; - // read input for in-block scan + error_0: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_129456; + int32_t skip_waves_129457; + + skip_waves_129457 = 1; + + double x_129452; + double x_129453; + + offset_129456 = 0; + // participating threads read initial accumulator { - if (squot32(local_tid_46282, 32) == 0 && ltid_in_bounds_46292) { - x_46290 = ((volatile __local - int64_t *) scan_arr_mem_46286)[sext_i32_i64(local_tid_46282)]; - if ((local_tid_46282 - squot32(local_tid_46282, 32) * 32) == - 0) { - x_46289 = x_46290; + if (slt32(local_tid_129437, + sext_i64_i32(segred_group_sizze_114362))) { + x_129452 = ((__local + double *) red_arr_mem_129441)[sext_i32_i64(local_tid_129437 + + offset_129456)]; + } + } + offset_129456 = 1; + while (slt32(offset_129456, wave_sizze_129439)) { + if (slt32(local_tid_129437 + offset_129456, + sext_i64_i32(segred_group_sizze_114362)) && + ((local_tid_129437 - squot32(local_tid_129437, + wave_sizze_129439) * + wave_sizze_129439) & (2 * offset_129456 - 1)) == 0) { + // read array element + { + x_129453 = ((volatile __local + double *) red_arr_mem_129441)[sext_i32_i64(local_tid_129437 + + offset_129456)]; + } + // apply reduction operation + { + double defunc_1_op_res_129454 = x_129452 + x_129453; + + x_129452 = defunc_1_op_res_129454; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_129441)[sext_i32_i64(local_tid_129437)] = + x_129452; } } + offset_129456 *= 2; } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46295 = 1; - while (slt32(skip_threads_46295, 32)) { - if (sle32(skip_threads_46295, local_tid_46282 - - squot32(local_tid_46282, 32) * 32) && - (squot32(local_tid_46282, 32) == 0 && - ltid_in_bounds_46292)) { - // read operands - { - x_46289 = ((volatile __local - int64_t *) scan_arr_mem_46286)[sext_i32_i64(local_tid_46282) - - sext_i32_i64(skip_threads_46295)]; - } - // perform operation - { - bool inactive_46296 = - slt64(srem64((sext_i32_i64(local_tid_46282 * 32 + - 32 - 1) + (int64_t) 1) * - (segscan_group_sizze_36741 * - sdiv_up64(m_28478 * N_28477, - sext_i32_i64(num_threads_46256))) - - (int64_t) 1, N_28477), - (sext_i32_i64(local_tid_46282 * 32 + 32 - - 1) + (int64_t) 1) * - (segscan_group_sizze_36741 * - sdiv_up64(m_28478 * N_28477, - sext_i32_i64(num_threads_46256))) - - (int64_t) 1 - - ((sext_i32_i64((local_tid_46282 - - skip_threads_46295) * 32 + - 32 - 1) + (int64_t) 1) * - (segscan_group_sizze_36741 * - sdiv_up64(m_28478 * N_28477, - sext_i32_i64(num_threads_46256))) - - (int64_t) 1)); - - if (inactive_46296) { - x_46289 = x_46290; - } - if (!inactive_46296) { - int64_t defunc_1_op_res_46291 = add64(x_46289, - x_46290); - - x_46289 = defunc_1_op_res_46291; - } - } - } - if (sle32(wave_sizze_46284, skip_threads_46295)) { - barrier(CLK_LOCAL_MEM_FENCE); + while (slt32(skip_waves_129457, + squot32(sext_i64_i32(segred_group_sizze_114362) + + wave_sizze_129439 - 1, wave_sizze_129439))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129456 = skip_waves_129457 * wave_sizze_129439; + if (slt32(local_tid_129437 + offset_129456, + sext_i64_i32(segred_group_sizze_114362)) && + ((local_tid_129437 - squot32(local_tid_129437, + wave_sizze_129439) * + wave_sizze_129439) == 0 && (squot32(local_tid_129437, + wave_sizze_129439) & (2 * + skip_waves_129457 - + 1)) == + 0)) { + // read array element + { + x_129453 = ((__local + double *) red_arr_mem_129441)[sext_i32_i64(local_tid_129437 + + offset_129456)]; } - if (sle32(skip_threads_46295, local_tid_46282 - - squot32(local_tid_46282, 32) * 32) && - (squot32(local_tid_46282, 32) == 0 && - ltid_in_bounds_46292)) { - // write result - { - ((volatile __local - int64_t *) scan_arr_mem_46286)[sext_i32_i64(local_tid_46282)] = - x_46289; - x_46290 = x_46289; - } + // apply reduction operation + { + double defunc_1_op_res_129454 = x_129452 + x_129453; + + x_129452 = defunc_1_op_res_129454; } - if (sle32(wave_sizze_46284, skip_threads_46295)) { - barrier(CLK_LOCAL_MEM_FENCE); + // write result of operation + { + ((__local + double *) red_arr_mem_129441)[sext_i32_i64(local_tid_129437)] = + x_129452; } - skip_threads_46295 *= 2; } + skip_waves_129457 *= 2; } - } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_46282, 32) == 0 || !ltid_in_bounds_46292)) { - // read operands - { - x_36747 = x_36746; - x_36746 = ((__local - int64_t *) scan_arr_mem_46286)[sext_i32_i64(squot32(local_tid_46282, - 32)) - - (int64_t) 1]; + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_129437) == (int64_t) 0) { + x_acc_129450 = x_129452; } - // perform operation + } + if (groups_per_segment_129427 == (int64_t) 1) { + // first thread in group saves final result to memory { - bool inactive_46297 = - slt64(srem64((sext_i32_i64(local_tid_46282) + - (int64_t) 1) * (segscan_group_sizze_36741 * - sdiv_up64(m_28478 * N_28477, - sext_i32_i64(num_threads_46256))) - - (int64_t) 1, N_28477), - (sext_i32_i64(local_tid_46282) + (int64_t) 1) * - (segscan_group_sizze_36741 * sdiv_up64(m_28478 * - N_28477, - sext_i32_i64(num_threads_46256))) - - (int64_t) 1 - ((sext_i32_i64(squot32(local_tid_46282, - 32) * 32 - 1) + - (int64_t) 1) * - (segscan_group_sizze_36741 * - sdiv_up64(m_28478 * N_28477, - sext_i32_i64(num_threads_46256))) - - (int64_t) 1)); - - if (inactive_46297) { - x_36746 = x_36747; - } - if (!inactive_46297) { - int64_t defunc_1_op_res_36748 = add64(x_36746, x_36747); - - x_36746 = defunc_1_op_res_36748; + if (local_tid_129437 == 0) { + ((__global double *) mem_124949)[gtid_114304] = + x_acc_129450; } } - // write final result + } else { + int32_t old_counter_129458; + + // first thread in group saves group result to global memory { + if (local_tid_129437 == 0) { + ((__global + double *) group_res_arr_mem_129432)[sext_i32_i64(virt_group_id_129447) * + segred_group_sizze_114362] = + x_acc_129450; + mem_fence_global(); + old_counter_129458 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_129434)[sext_i32_i64(srem32(flat_segment_id_129448, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_129443)[(int64_t) 0] = + old_counter_129458 == groups_per_segment_129427 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_129459; + + is_last_group_129459 = ((__local + bool *) sync_arr_mem_129443)[(int64_t) 0]; + if (is_last_group_129459) { + if (local_tid_129437 == 0) { + old_counter_129458 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_129434)[sext_i32_i64(srem32(flat_segment_id_129448, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_129427)); + } + // read in the per-group-results + { + int64_t read_per_thread_129460 = + sdiv_up64(groups_per_segment_129427, + segred_group_sizze_114362); + + x_114366 = 0.0; + for (int64_t i_129461 = 0; i_129461 < + read_per_thread_129460; i_129461++) { + int64_t group_res_id_129462 = + sext_i32_i64(local_tid_129437) * + read_per_thread_129460 + i_129461; + int64_t index_of_group_res_129463 = + sext_i32_i64(flat_segment_id_129448) * + groups_per_segment_129427 + group_res_id_129462; + + if (slt64(group_res_id_129462, + groups_per_segment_129427)) { + x_114367 = ((__global + double *) group_res_arr_mem_129432)[index_of_group_res_129463 * + segred_group_sizze_114362]; + + double defunc_1_op_res_114368; + + defunc_1_op_res_114368 = x_114366 + x_114367; + x_114366 = defunc_1_op_res_114368; + } + } + } ((__local - int64_t *) scan_arr_mem_46286)[sext_i32_i64(local_tid_46282)] = - x_36746; + double *) red_arr_mem_129441)[sext_i32_i64(local_tid_129437)] = + x_114366; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_129464; + int32_t skip_waves_129465; + + skip_waves_129465 = 1; + + double x_129452; + double x_129453; + + offset_129464 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129437, + sext_i64_i32(segred_group_sizze_114362))) { + x_129452 = ((__local + double *) red_arr_mem_129441)[sext_i32_i64(local_tid_129437 + + offset_129464)]; + } + } + offset_129464 = 1; + while (slt32(offset_129464, wave_sizze_129439)) { + if (slt32(local_tid_129437 + offset_129464, + sext_i64_i32(segred_group_sizze_114362)) && + ((local_tid_129437 - squot32(local_tid_129437, + wave_sizze_129439) * + wave_sizze_129439) & (2 * offset_129464 - 1)) == + 0) { + // read array element + { + x_129453 = ((volatile __local + double *) red_arr_mem_129441)[sext_i32_i64(local_tid_129437 + + offset_129464)]; + } + // apply reduction operation + { + double defunc_1_op_res_129454 = x_129452 + + x_129453; + + x_129452 = defunc_1_op_res_129454; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_129441)[sext_i32_i64(local_tid_129437)] = + x_129452; + } + } + offset_129464 *= 2; + } + while (slt32(skip_waves_129465, + squot32(sext_i64_i32(segred_group_sizze_114362) + + wave_sizze_129439 - 1, + wave_sizze_129439))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129464 = skip_waves_129465 * wave_sizze_129439; + if (slt32(local_tid_129437 + offset_129464, + sext_i64_i32(segred_group_sizze_114362)) && + ((local_tid_129437 - squot32(local_tid_129437, + wave_sizze_129439) * + wave_sizze_129439) == 0 && + (squot32(local_tid_129437, wave_sizze_129439) & + (2 * skip_waves_129465 - 1)) == 0)) { + // read array element + { + x_129453 = ((__local + double *) red_arr_mem_129441)[sext_i32_i64(local_tid_129437 + + offset_129464)]; + } + // apply reduction operation + { + double defunc_1_op_res_129454 = x_129452 + + x_129453; + + x_129452 = defunc_1_op_res_129454; + } + // write result of operation + { + ((__local + double *) red_arr_mem_129441)[sext_i32_i64(local_tid_129437)] = + x_129452; + } + } + skip_waves_129465 *= 2; + } + // and back to memory with the final result + { + if (local_tid_129437 == 0) { + ((__global double *) mem_124949)[gtid_114304] = + x_129452; + } + } + } } } - } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_46282, 32) == 0) { - ((__local - int64_t *) scan_arr_mem_46286)[sext_i32_i64(local_tid_46282)] = - x_36747; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // threads in bounds write scanned carries - { - if (slt64(gtid_36715, m_28478) && slt64(gtid_36723, N_28477)) { - ((__global int64_t *) mem_45163)[gtid_36715 * N_28477 + - gtid_36723] = ((__local - int64_t *) scan_arr_mem_46286)[sext_i32_i64(local_tid_46282)]; - } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - error_0: + error_1: return; - #undef segscan_group_sizze_36741 + #undef segred_group_sizze_114362 } -__kernel void mainMagnitudeziscan_stage2_38354(__global int *global_failure, - __local volatile - int64_t *scan_arr_mem_46689_backing_aligned_0, - int64_t m_28478, - int64_t iota32_arg_28909, - int64_t stage1_num_groups_46658, - int32_t num_threads_46659, - __global - unsigned char *mem_45343) +__kernel void mainzisegred_large_114337(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_129383_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_129381_backing_aligned_1, + int64_t N_75135, int64_t n_75139, + int64_t num_groups_114349, + int64_t groups_per_segment_129367, + int64_t elements_per_thread_129368, + int64_t virt_num_groups_129369, + int64_t threads_per_segment_129371, + __global unsigned char *mem_124142, + __global unsigned char *mem_124946, + __global + unsigned char *group_res_arr_mem_129372, + __global + unsigned char *mainzicounter_mem_129374) { - #define segscan_group_sizze_38440 (mainMagnitudezisegscan_group_sizze_38348) + #define segred_group_sizze_114348 (mainzisegred_group_sizze_114331) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict scan_arr_mem_46689_backing_0 = + __local volatile char *restrict sync_arr_mem_129383_backing_1 = (__local volatile - char *) scan_arr_mem_46689_backing_aligned_0; + char *) sync_arr_mem_129383_backing_aligned_0; + __local volatile char *restrict red_arr_mem_129381_backing_0 = + (__local volatile + char *) red_arr_mem_129381_backing_aligned_1; if (*global_failure >= 0) return; - int32_t global_tid_46684; - int32_t local_tid_46685; - int64_t group_sizze_46688; - int32_t wave_sizze_46687; - int32_t group_tid_46686; - - global_tid_46684 = get_global_id(0); - local_tid_46685 = get_local_id(0); - group_sizze_46688 = get_local_size(0); - wave_sizze_46687 = LOCKSTEP_WIDTH; - group_tid_46686 = get_group_id(0); + int32_t global_tid_129376; + int32_t local_tid_129377; + int64_t group_sizze_129380; + int32_t wave_sizze_129379; + int32_t group_tid_129378; - int32_t phys_tid_38354; + global_tid_129376 = get_global_id(0); + local_tid_129377 = get_local_id(0); + group_sizze_129380 = get_local_size(0); + wave_sizze_129379 = LOCKSTEP_WIDTH; + group_tid_129378 = get_group_id(0); - phys_tid_38354 = global_tid_46684; + int32_t phys_tid_114337; - __local char *scan_arr_mem_46689; + phys_tid_114337 = global_tid_129376; - scan_arr_mem_46689 = (__local char *) scan_arr_mem_46689_backing_0; + __local char *red_arr_mem_129381; - int64_t flat_idx_46691; - - flat_idx_46691 = (sext_i32_i64(local_tid_46685) + (int64_t) 1) * - (segscan_group_sizze_38440 * sdiv_up64(m_28478 * iota32_arg_28909, - sext_i32_i64(num_threads_46659))) - - (int64_t) 1; - - int64_t gtid_38345; - - gtid_38345 = squot64(flat_idx_46691, iota32_arg_28909); - - int64_t gtid_38353; - - gtid_38353 = flat_idx_46691 - squot64(flat_idx_46691, iota32_arg_28909) * - iota32_arg_28909; - // threads in bound read carries; others get neutral element - { - if (slt64(gtid_38345, m_28478) && slt64(gtid_38353, iota32_arg_28909)) { - ((__local - float *) scan_arr_mem_46689)[sext_i32_i64(local_tid_46685)] = - ((__global float *) mem_45343)[gtid_38345 * iota32_arg_28909 + - gtid_38353]; - } else { - ((__local - float *) scan_arr_mem_46689)[sext_i32_i64(local_tid_46685)] = - 0.0F; - } - } - barrier(CLK_LOCAL_MEM_FENCE); + red_arr_mem_129381 = (__local char *) red_arr_mem_129381_backing_0; - float x_38444; - float x_38445; - float x_46692; - float x_46693; - bool ltid_in_bounds_46695; + __local char *sync_arr_mem_129383; - ltid_in_bounds_46695 = slt64(sext_i32_i64(local_tid_46685), - stage1_num_groups_46658); + sync_arr_mem_129383 = (__local char *) sync_arr_mem_129383_backing_1; - int32_t skip_threads_46696; + int32_t phys_group_id_129385; - // read input for in-block scan - { - if (ltid_in_bounds_46695) { - x_38445 = ((volatile __local - float *) scan_arr_mem_46689)[sext_i32_i64(local_tid_46685)]; - if ((local_tid_46685 - squot32(local_tid_46685, 32) * 32) == 0) { - x_38444 = x_38445; - } + phys_group_id_129385 = get_group_id(0); + for (int32_t i_129386 = 0; i_129386 < + sdiv_up32(sext_i64_i32(virt_num_groups_129369) - phys_group_id_129385, + sext_i64_i32(num_groups_114349)); i_129386++) { + int32_t virt_group_id_129387 = phys_group_id_129385 + i_129386 * + sext_i64_i32(num_groups_114349); + int32_t flat_segment_id_129388 = squot32(virt_group_id_129387, + sext_i64_i32(groups_per_segment_129367)); + int64_t global_tid_129389 = srem64(sext_i32_i64(virt_group_id_129387) * + segred_group_sizze_114348 + + sext_i32_i64(local_tid_129377), + segred_group_sizze_114348 * + groups_per_segment_129367); + int64_t gtid_114328 = sext_i32_i64(flat_segment_id_129388); + int64_t gtid_114336; + int64_t x_acc_129390; + int64_t chunk_sizze_129391; + + chunk_sizze_129391 = smin64(elements_per_thread_129368, + sdiv_up64(n_75139 - global_tid_129389, + threads_per_segment_129371)); + + int64_t x_114352; + int64_t x_114353; + + // neutral-initialise the accumulators + { + x_acc_129390 = (int64_t) 0; } - } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46696 = 1; - while (slt32(skip_threads_46696, 32)) { - if (sle32(skip_threads_46696, local_tid_46685 - - squot32(local_tid_46685, 32) * 32) && - ltid_in_bounds_46695) { - // read operands + for (int64_t i_129395 = 0; i_129395 < chunk_sizze_129391; i_129395++) { + gtid_114336 = global_tid_129389 + threads_per_segment_129371 * + i_129395; + // apply map function + { + double x_114356 = ((__global double *) mem_124142)[gtid_114328 * + N_75135 + + gtid_114336]; + bool isnan_res_114357; + + isnan_res_114357 = futrts_isnan64(x_114356); + + bool cond_114358 = !isnan_res_114357; + int64_t defunc_0_f_res_114359 = btoi_bool_i64(cond_114358); + + // save map-out results + { } + // load accumulator { - x_38444 = ((volatile __local - float *) scan_arr_mem_46689)[sext_i32_i64(local_tid_46685) - - sext_i32_i64(skip_threads_46696)]; + x_114352 = x_acc_129390; } - // perform operation + // load new values { - bool inactive_46697 = - slt64(srem64((sext_i32_i64(local_tid_46685) + - (int64_t) 1) * - (segscan_group_sizze_38440 * - sdiv_up64(m_28478 * iota32_arg_28909, - sext_i32_i64(num_threads_46659))) - - (int64_t) 1, iota32_arg_28909), - (sext_i32_i64(local_tid_46685) + (int64_t) 1) * - (segscan_group_sizze_38440 * sdiv_up64(m_28478 * - iota32_arg_28909, - sext_i32_i64(num_threads_46659))) - - (int64_t) 1 - ((sext_i32_i64(local_tid_46685 - - skip_threads_46696) + - (int64_t) 1) * - (segscan_group_sizze_38440 * - sdiv_up64(m_28478 * - iota32_arg_28909, - sext_i32_i64(num_threads_46659))) - - (int64_t) 1)); - - if (inactive_46697) { - x_38444 = x_38445; - } - if (!inactive_46697) { - float defunc_1_op_res_38446 = x_38444 + x_38445; - - x_38444 = defunc_1_op_res_38446; - } + x_114353 = defunc_0_f_res_114359; } - } - if (sle32(wave_sizze_46687, skip_threads_46696)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46696, local_tid_46685 - - squot32(local_tid_46685, 32) * 32) && - ltid_in_bounds_46695) { - // write result + // apply reduction operator { - ((volatile __local - float *) scan_arr_mem_46689)[sext_i32_i64(local_tid_46685)] = - x_38444; - x_38445 = x_38444; + int64_t defunc_1_op_res_114354 = add64(x_114352, x_114353); + + // store in accumulator + { + x_acc_129390 = defunc_1_op_res_114354; + } } } - if (sle32(wave_sizze_46687, skip_threads_46696)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46696 *= 2; } - } - barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' - { - if ((local_tid_46685 - squot32(local_tid_46685, 32) * 32) == 31 && - ltid_in_bounds_46695) { - ((volatile __local - float *) scan_arr_mem_46689)[sext_i32_i64(squot32(local_tid_46685, - 32))] = x_38444; + // to reduce current chunk, first store our result in memory + { + x_114352 = x_acc_129390; + ((__local + int64_t *) red_arr_mem_129381)[sext_i32_i64(local_tid_129377)] = + x_114352; } - } - barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' - { - int32_t skip_threads_46698; + barrier(CLK_LOCAL_MEM_FENCE); - // read input for in-block scan + int32_t offset_129396; + int32_t skip_waves_129397; + + skip_waves_129397 = 1; + + int64_t x_129392; + int64_t x_129393; + + offset_129396 = 0; + // participating threads read initial accumulator { - if (squot32(local_tid_46685, 32) == 0 && ltid_in_bounds_46695) { - x_46693 = ((volatile __local - float *) scan_arr_mem_46689)[sext_i32_i64(local_tid_46685)]; - if ((local_tid_46685 - squot32(local_tid_46685, 32) * 32) == - 0) { - x_46692 = x_46693; + if (slt32(local_tid_129377, + sext_i64_i32(segred_group_sizze_114348))) { + x_129392 = ((__local + int64_t *) red_arr_mem_129381)[sext_i32_i64(local_tid_129377 + + offset_129396)]; + } + } + offset_129396 = 1; + while (slt32(offset_129396, wave_sizze_129379)) { + if (slt32(local_tid_129377 + offset_129396, + sext_i64_i32(segred_group_sizze_114348)) && + ((local_tid_129377 - squot32(local_tid_129377, + wave_sizze_129379) * + wave_sizze_129379) & (2 * offset_129396 - 1)) == 0) { + // read array element + { + x_129393 = ((volatile __local + int64_t *) red_arr_mem_129381)[sext_i32_i64(local_tid_129377 + + offset_129396)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_129394 = add64(x_129392, x_129393); + + x_129392 = defunc_1_op_res_129394; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_129381)[sext_i32_i64(local_tid_129377)] = + x_129392; } } + offset_129396 *= 2; } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46698 = 1; - while (slt32(skip_threads_46698, 32)) { - if (sle32(skip_threads_46698, local_tid_46685 - - squot32(local_tid_46685, 32) * 32) && - (squot32(local_tid_46685, 32) == 0 && - ltid_in_bounds_46695)) { - // read operands - { - x_46692 = ((volatile __local - float *) scan_arr_mem_46689)[sext_i32_i64(local_tid_46685) - - sext_i32_i64(skip_threads_46698)]; - } - // perform operation - { - bool inactive_46699 = - slt64(srem64((sext_i32_i64(local_tid_46685 * 32 + - 32 - 1) + (int64_t) 1) * - (segscan_group_sizze_38440 * - sdiv_up64(m_28478 * iota32_arg_28909, - sext_i32_i64(num_threads_46659))) - - (int64_t) 1, iota32_arg_28909), - (sext_i32_i64(local_tid_46685 * 32 + 32 - - 1) + (int64_t) 1) * - (segscan_group_sizze_38440 * - sdiv_up64(m_28478 * iota32_arg_28909, - sext_i32_i64(num_threads_46659))) - - (int64_t) 1 - - ((sext_i32_i64((local_tid_46685 - - skip_threads_46698) * 32 + - 32 - 1) + (int64_t) 1) * - (segscan_group_sizze_38440 * - sdiv_up64(m_28478 * iota32_arg_28909, - sext_i32_i64(num_threads_46659))) - - (int64_t) 1)); - - if (inactive_46699) { - x_46692 = x_46693; - } - if (!inactive_46699) { - float defunc_1_op_res_46694 = x_46692 + x_46693; - - x_46692 = defunc_1_op_res_46694; - } - } - } - if (sle32(wave_sizze_46687, skip_threads_46698)) { - barrier(CLK_LOCAL_MEM_FENCE); + while (slt32(skip_waves_129397, + squot32(sext_i64_i32(segred_group_sizze_114348) + + wave_sizze_129379 - 1, wave_sizze_129379))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129396 = skip_waves_129397 * wave_sizze_129379; + if (slt32(local_tid_129377 + offset_129396, + sext_i64_i32(segred_group_sizze_114348)) && + ((local_tid_129377 - squot32(local_tid_129377, + wave_sizze_129379) * + wave_sizze_129379) == 0 && (squot32(local_tid_129377, + wave_sizze_129379) & (2 * + skip_waves_129397 - + 1)) == + 0)) { + // read array element + { + x_129393 = ((__local + int64_t *) red_arr_mem_129381)[sext_i32_i64(local_tid_129377 + + offset_129396)]; } - if (sle32(skip_threads_46698, local_tid_46685 - - squot32(local_tid_46685, 32) * 32) && - (squot32(local_tid_46685, 32) == 0 && - ltid_in_bounds_46695)) { - // write result - { - ((volatile __local - float *) scan_arr_mem_46689)[sext_i32_i64(local_tid_46685)] = - x_46692; - x_46693 = x_46692; - } + // apply reduction operation + { + int64_t defunc_1_op_res_129394 = add64(x_129392, x_129393); + + x_129392 = defunc_1_op_res_129394; } - if (sle32(wave_sizze_46687, skip_threads_46698)) { - barrier(CLK_LOCAL_MEM_FENCE); + // write result of operation + { + ((__local + int64_t *) red_arr_mem_129381)[sext_i32_i64(local_tid_129377)] = + x_129392; } - skip_threads_46698 *= 2; } + skip_waves_129397 *= 2; } - } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_46685, 32) == 0 || !ltid_in_bounds_46695)) { - // read operands - { - x_38445 = x_38444; - x_38444 = ((__local - float *) scan_arr_mem_46689)[sext_i32_i64(squot32(local_tid_46685, - 32)) - - (int64_t) 1]; + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_129377) == (int64_t) 0) { + x_acc_129390 = x_129392; } - // perform operation + } + if (groups_per_segment_129367 == (int64_t) 1) { + // first thread in group saves final result to memory { - bool inactive_46700 = - slt64(srem64((sext_i32_i64(local_tid_46685) + - (int64_t) 1) * (segscan_group_sizze_38440 * - sdiv_up64(m_28478 * - iota32_arg_28909, - sext_i32_i64(num_threads_46659))) - - (int64_t) 1, iota32_arg_28909), - (sext_i32_i64(local_tid_46685) + (int64_t) 1) * - (segscan_group_sizze_38440 * sdiv_up64(m_28478 * - iota32_arg_28909, - sext_i32_i64(num_threads_46659))) - - (int64_t) 1 - ((sext_i32_i64(squot32(local_tid_46685, - 32) * 32 - 1) + - (int64_t) 1) * - (segscan_group_sizze_38440 * - sdiv_up64(m_28478 * iota32_arg_28909, - sext_i32_i64(num_threads_46659))) - - (int64_t) 1)); - - if (inactive_46700) { - x_38444 = x_38445; - } - if (!inactive_46700) { - float defunc_1_op_res_38446 = x_38444 + x_38445; - - x_38444 = defunc_1_op_res_38446; + if (local_tid_129377 == 0) { + ((__global int64_t *) mem_124946)[gtid_114328] = + x_acc_129390; } } - // write final result + } else { + int32_t old_counter_129398; + + // first thread in group saves group result to global memory { - ((__local - float *) scan_arr_mem_46689)[sext_i32_i64(local_tid_46685)] = - x_38444; + if (local_tid_129377 == 0) { + ((__global + int64_t *) group_res_arr_mem_129372)[sext_i32_i64(virt_group_id_129387) * + segred_group_sizze_114348] = + x_acc_129390; + mem_fence_global(); + old_counter_129398 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_129374)[sext_i32_i64(srem32(flat_segment_id_129388, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_129383)[(int64_t) 0] = + old_counter_129398 == groups_per_segment_129367 - + (int64_t) 1; + } } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_46685, 32) == 0) { - ((__local - float *) scan_arr_mem_46689)[sext_i32_i64(local_tid_46685)] = - x_38445; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // threads in bounds write scanned carries - { - if (slt64(gtid_38345, m_28478) && slt64(gtid_38353, iota32_arg_28909)) { - ((__global float *) mem_45343)[gtid_38345 * iota32_arg_28909 + - gtid_38353] = ((__local - float *) scan_arr_mem_46689)[sext_i32_i64(local_tid_46685)]; - } - } - - error_0: - return; - #undef segscan_group_sizze_38440 -} -__kernel void mainMagnitudeziscan_stage3_36724(__global int *global_failure, - int64_t N_28477, int64_t m_28478, - int64_t num_groups_36742, - int32_t num_threads_46256, - int32_t required_groups_46298, - __global - unsigned char *mem_45163) -{ - #define segscan_group_sizze_36741 (mainMagnitudezisegscan_group_sizze_36718) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - - if (*global_failure >= 0) - return; - - int32_t global_tid_46299; - int32_t local_tid_46300; - int64_t group_sizze_46303; - int32_t wave_sizze_46302; - int32_t group_tid_46301; - - global_tid_46299 = get_global_id(0); - local_tid_46300 = get_local_id(0); - group_sizze_46303 = get_local_size(0); - wave_sizze_46302 = LOCKSTEP_WIDTH; - group_tid_46301 = get_group_id(0); - - int32_t phys_tid_36724; - - phys_tid_36724 = global_tid_46299; - - int32_t phys_group_id_46304; - - phys_group_id_46304 = get_group_id(0); - for (int32_t i_46305 = 0; i_46305 < sdiv_up32(required_groups_46298 - - phys_group_id_46304, - sext_i64_i32(num_groups_36742)); - i_46305++) { - int32_t virt_group_id_46306 = phys_group_id_46304 + i_46305 * - sext_i64_i32(num_groups_36742); - int64_t flat_idx_46307 = sext_i32_i64(virt_group_id_46306) * - segscan_group_sizze_36741 + sext_i32_i64(local_tid_46300); - int64_t gtid_36715 = squot64(flat_idx_46307, N_28477); - int64_t gtid_36723 = flat_idx_46307 - squot64(flat_idx_46307, N_28477) * - N_28477; - int64_t orig_group_46308 = squot64(flat_idx_46307, - segscan_group_sizze_36741 * - sdiv_up64(m_28478 * N_28477, - sext_i32_i64(num_threads_46256))); - int64_t carry_in_flat_idx_46309 = orig_group_46308 * - (segscan_group_sizze_36741 * sdiv_up64(m_28478 * N_28477, - sext_i32_i64(num_threads_46256))) - - (int64_t) 1; - - if (slt64(gtid_36715, m_28478) && slt64(gtid_36723, N_28477)) { - if (!(orig_group_46308 == (int64_t) 0 || (flat_idx_46307 == - (orig_group_46308 + - (int64_t) 1) * - (segscan_group_sizze_36741 * - sdiv_up64(m_28478 * - N_28477, - sext_i32_i64(num_threads_46256))) - - (int64_t) 1 || - slt64(srem64(flat_idx_46307, - N_28477), - flat_idx_46307 - - carry_in_flat_idx_46309)))) { - int64_t x_36746; - int64_t x_36747; - - x_36746 = ((__global - int64_t *) mem_45163)[squot64(carry_in_flat_idx_46309, - N_28477) * N_28477 + - (carry_in_flat_idx_46309 - - squot64(carry_in_flat_idx_46309, - N_28477) * N_28477)]; - x_36747 = ((__global int64_t *) mem_45163)[gtid_36715 * - N_28477 + - gtid_36723]; - - int64_t defunc_1_op_res_36748; - - defunc_1_op_res_36748 = add64(x_36746, x_36747); - x_36746 = defunc_1_op_res_36748; - ((__global int64_t *) mem_45163)[gtid_36715 * N_28477 + - gtid_36723] = x_36746; + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_129399; + + is_last_group_129399 = ((__local + bool *) sync_arr_mem_129383)[(int64_t) 0]; + if (is_last_group_129399) { + if (local_tid_129377 == 0) { + old_counter_129398 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_129374)[sext_i32_i64(srem32(flat_segment_id_129388, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_129367)); + } + // read in the per-group-results + { + int64_t read_per_thread_129400 = + sdiv_up64(groups_per_segment_129367, + segred_group_sizze_114348); + + x_114352 = (int64_t) 0; + for (int64_t i_129401 = 0; i_129401 < + read_per_thread_129400; i_129401++) { + int64_t group_res_id_129402 = + sext_i32_i64(local_tid_129377) * + read_per_thread_129400 + i_129401; + int64_t index_of_group_res_129403 = + sext_i32_i64(flat_segment_id_129388) * + groups_per_segment_129367 + group_res_id_129402; + + if (slt64(group_res_id_129402, + groups_per_segment_129367)) { + x_114353 = ((__global + int64_t *) group_res_arr_mem_129372)[index_of_group_res_129403 * + segred_group_sizze_114348]; + + int64_t defunc_1_op_res_114354; + + defunc_1_op_res_114354 = add64(x_114352, x_114353); + x_114352 = defunc_1_op_res_114354; + } + } + } + ((__local + int64_t *) red_arr_mem_129381)[sext_i32_i64(local_tid_129377)] = + x_114352; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_129404; + int32_t skip_waves_129405; + + skip_waves_129405 = 1; + + int64_t x_129392; + int64_t x_129393; + + offset_129404 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129377, + sext_i64_i32(segred_group_sizze_114348))) { + x_129392 = ((__local + int64_t *) red_arr_mem_129381)[sext_i32_i64(local_tid_129377 + + offset_129404)]; + } + } + offset_129404 = 1; + while (slt32(offset_129404, wave_sizze_129379)) { + if (slt32(local_tid_129377 + offset_129404, + sext_i64_i32(segred_group_sizze_114348)) && + ((local_tid_129377 - squot32(local_tid_129377, + wave_sizze_129379) * + wave_sizze_129379) & (2 * offset_129404 - 1)) == + 0) { + // read array element + { + x_129393 = ((volatile __local + int64_t *) red_arr_mem_129381)[sext_i32_i64(local_tid_129377 + + offset_129404)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_129394 = add64(x_129392, + x_129393); + + x_129392 = defunc_1_op_res_129394; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_129381)[sext_i32_i64(local_tid_129377)] = + x_129392; + } + } + offset_129404 *= 2; + } + while (slt32(skip_waves_129405, + squot32(sext_i64_i32(segred_group_sizze_114348) + + wave_sizze_129379 - 1, + wave_sizze_129379))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129404 = skip_waves_129405 * wave_sizze_129379; + if (slt32(local_tid_129377 + offset_129404, + sext_i64_i32(segred_group_sizze_114348)) && + ((local_tid_129377 - squot32(local_tid_129377, + wave_sizze_129379) * + wave_sizze_129379) == 0 && + (squot32(local_tid_129377, wave_sizze_129379) & + (2 * skip_waves_129405 - 1)) == 0)) { + // read array element + { + x_129393 = ((__local + int64_t *) red_arr_mem_129381)[sext_i32_i64(local_tid_129377 + + offset_129404)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_129394 = add64(x_129392, + x_129393); + + x_129392 = defunc_1_op_res_129394; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_129381)[sext_i32_i64(local_tid_129377)] = + x_129392; + } + } + skip_waves_129405 *= 2; + } + // and back to memory with the final result + { + if (local_tid_129377 == 0) { + ((__global int64_t *) mem_124946)[gtid_114328] = + x_129392; + } + } + } } } barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - error_0: + error_1: return; - #undef segscan_group_sizze_36741 + #undef segred_group_sizze_114348 } -__kernel void mainMagnitudeziscan_stage3_38354(__global int *global_failure, - int64_t m_28478, - int64_t iota32_arg_28909, - int64_t num_groups_38441, - int32_t num_threads_46659, - int32_t required_groups_46701, - __global - unsigned char *mem_45343) +__kernel void mainzisegred_large_114467(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + __local volatile + int64_t *sync_arr_mem_129548_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_129546_backing_aligned_1, + int64_t N_75135, + int64_t defunc_2_reduce_comm_res_76995, + int64_t num_groups_114488, + int64_t groups_per_segment_129532, + int64_t elements_per_thread_129533, + int64_t virt_num_groups_129534, + int64_t threads_per_segment_129536, + __global + unsigned char *defunc_4_map_res_mem_124920, + __global + unsigned char *defunc_3_map_res_mem_124958, + __global + unsigned char *defunc_3_map_res_mem_124959, + __global unsigned char *mem_124969, + __global + unsigned char *group_res_arr_mem_129537, + __global + unsigned char *mainzicounter_mem_129539) { - #define segscan_group_sizze_38440 (mainMagnitudezisegscan_group_sizze_38348) + #define segred_group_sizze_114487 (mainzisegred_group_sizze_114461) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_129548_backing_1 = + (__local volatile + char *) sync_arr_mem_129548_backing_aligned_0; + __local volatile char *restrict red_arr_mem_129546_backing_0 = + (__local volatile + char *) red_arr_mem_129546_backing_aligned_1; + volatile __local bool local_failure; - if (*global_failure >= 0) - return; - - int32_t global_tid_46702; - int32_t local_tid_46703; - int64_t group_sizze_46706; - int32_t wave_sizze_46705; - int32_t group_tid_46704; - - global_tid_46702 = get_global_id(0); - local_tid_46703 = get_local_id(0); - group_sizze_46706 = get_local_size(0); - wave_sizze_46705 = LOCKSTEP_WIDTH; - group_tid_46704 = get_group_id(0); - - int32_t phys_tid_38354; - - phys_tid_38354 = global_tid_46702; - - int32_t phys_group_id_46707; - - phys_group_id_46707 = get_group_id(0); - for (int32_t i_46708 = 0; i_46708 < sdiv_up32(required_groups_46701 - - phys_group_id_46707, - sext_i64_i32(num_groups_38441)); - i_46708++) { - int32_t virt_group_id_46709 = phys_group_id_46707 + i_46708 * - sext_i64_i32(num_groups_38441); - int64_t flat_idx_46710 = sext_i32_i64(virt_group_id_46709) * - segscan_group_sizze_38440 + sext_i32_i64(local_tid_46703); - int64_t gtid_38345 = squot64(flat_idx_46710, iota32_arg_28909); - int64_t gtid_38353 = flat_idx_46710 - squot64(flat_idx_46710, - iota32_arg_28909) * - iota32_arg_28909; - int64_t orig_group_46711 = squot64(flat_idx_46710, - segscan_group_sizze_38440 * - sdiv_up64(m_28478 * iota32_arg_28909, - sext_i32_i64(num_threads_46659))); - int64_t carry_in_flat_idx_46712 = orig_group_46711 * - (segscan_group_sizze_38440 * sdiv_up64(m_28478 * - iota32_arg_28909, - sext_i32_i64(num_threads_46659))) - - (int64_t) 1; + if (failure_is_an_option) { + int failed = *global_failure >= 0; - if (slt64(gtid_38345, m_28478) && slt64(gtid_38353, iota32_arg_28909)) { - if (!(orig_group_46711 == (int64_t) 0 || (flat_idx_46710 == - (orig_group_46711 + - (int64_t) 1) * - (segscan_group_sizze_38440 * - sdiv_up64(m_28478 * - iota32_arg_28909, - sext_i32_i64(num_threads_46659))) - - (int64_t) 1 || - slt64(srem64(flat_idx_46710, - iota32_arg_28909), - flat_idx_46710 - - carry_in_flat_idx_46712)))) { - float x_38444; - float x_38445; - - x_38444 = ((__global - float *) mem_45343)[squot64(carry_in_flat_idx_46712, - iota32_arg_28909) * - iota32_arg_28909 + - (carry_in_flat_idx_46712 - - squot64(carry_in_flat_idx_46712, - iota32_arg_28909) * - iota32_arg_28909)]; - x_38445 = ((__global float *) mem_45343)[gtid_38345 * - iota32_arg_28909 + - gtid_38353]; - - float defunc_1_op_res_38446; - - defunc_1_op_res_38446 = x_38444 + x_38445; - x_38444 = defunc_1_op_res_38446; - ((__global float *) mem_45343)[gtid_38345 * iota32_arg_28909 + - gtid_38353] = x_38444; - } - } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + if (failed) + return; } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); - error_0: - return; - #undef segscan_group_sizze_38440 -} -__kernel void mainMagnitudezisegmap_34343(__global int *global_failure, - int64_t N_28477, float freq_28482, - int64_t i32_res_28493, __global - unsigned char *mappingindices_mem_44380, - __global unsigned char *mem_44385) -{ - #define segmap_group_sizze_34416 (mainMagnitudezisegmap_group_sizze_34346) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; + int32_t global_tid_129541; + int32_t local_tid_129542; + int64_t group_sizze_129545; + int32_t wave_sizze_129544; + int32_t group_tid_129543; - if (*global_failure >= 0) - return; + global_tid_129541 = get_global_id(0); + local_tid_129542 = get_local_id(0); + group_sizze_129545 = get_local_size(0); + wave_sizze_129544 = LOCKSTEP_WIDTH; + group_tid_129543 = get_group_id(0); - int32_t global_tid_45680; - int32_t local_tid_45681; - int64_t group_sizze_45684; - int32_t wave_sizze_45683; - int32_t group_tid_45682; + int32_t phys_tid_114467; - global_tid_45680 = get_global_id(0); - local_tid_45681 = get_local_id(0); - group_sizze_45684 = get_local_size(0); - wave_sizze_45683 = LOCKSTEP_WIDTH; - group_tid_45682 = get_group_id(0); + phys_tid_114467 = global_tid_129541; - int32_t phys_tid_34343; + __local char *red_arr_mem_129546; - phys_tid_34343 = global_tid_45680; + red_arr_mem_129546 = (__local char *) red_arr_mem_129546_backing_0; - int64_t gtid_34341; + __local char *sync_arr_mem_129548; - gtid_34341 = squot64(sext_i32_i64(group_tid_45682) * - segmap_group_sizze_34416 + - sext_i32_i64(local_tid_45681), N_28477); + sync_arr_mem_129548 = (__local char *) sync_arr_mem_129548_backing_1; - int64_t gtid_34342; + int32_t phys_group_id_129550; - gtid_34342 = sext_i32_i64(group_tid_45682) * segmap_group_sizze_34416 + - sext_i32_i64(local_tid_45681) - squot64(sext_i32_i64(group_tid_45682) * - segmap_group_sizze_34416 + - sext_i32_i64(local_tid_45681), - N_28477) * N_28477; - if (slt64(gtid_34341, i32_res_28493) && slt64(gtid_34342, N_28477)) { - int32_t index_primexp_42340 = sext_i64_i32(gtid_34341); - bool index_primexp_42337 = index_primexp_42340 == 0; - float defunc_0_f_res_34422; + phys_group_id_129550 = get_group_id(0); + for (int32_t i_129551 = 0; i_129551 < + sdiv_up32(sext_i64_i32(virt_num_groups_129534) - phys_group_id_129550, + sext_i64_i32(num_groups_114488)); i_129551++) { + int32_t virt_group_id_129552 = phys_group_id_129550 + i_129551 * + sext_i64_i32(num_groups_114488); + int32_t flat_segment_id_129553 = squot32(virt_group_id_129552, + sext_i64_i32(groups_per_segment_129532)); + int64_t global_tid_129554 = srem64(sext_i32_i64(virt_group_id_129552) * + segred_group_sizze_114487 + + sext_i32_i64(local_tid_129542), + segred_group_sizze_114487 * + groups_per_segment_129532); + int64_t gtid_114458 = sext_i32_i64(flat_segment_id_129553); + int64_t gtid_114466; + double x_acc_129555; + int64_t chunk_sizze_129556; - if (index_primexp_42337) { - defunc_0_f_res_34422 = 1.0F; - } else { - int32_t x_34421 = ((__global - int32_t *) mappingindices_mem_44380)[gtid_34342]; - bool cond_34423 = index_primexp_42340 == 1; - float defunc_0_f_res_f_res_34424; - - if (cond_34423) { - float i32_res_34425 = sitofp_i32_f32(x_34421); - - defunc_0_f_res_f_res_34424 = i32_res_34425; - } else { - int32_t r32_arg_34426 = sdiv32(index_primexp_42340, 2); - float i32_res_34427 = sitofp_i32_f32(r32_arg_34426); - float i32_res_34428 = sitofp_i32_f32(x_34421); - float x_34429 = 6.2831855F * i32_res_34427; - float x_34430 = i32_res_34428 * x_34429; - float angle_34431 = x_34430 / freq_28482; - int32_t x_34432 = smod32(index_primexp_42340, 2); - bool cond_34433 = x_34432 == 0; - float defunc_0_f_res_f_res_f_res_34434; - - if (cond_34433) { - float sin_res_34435; - - sin_res_34435 = futrts_sin32(angle_34431); - defunc_0_f_res_f_res_f_res_34434 = sin_res_34435; + chunk_sizze_129556 = smin64(elements_per_thread_129533, + sdiv_up64(defunc_2_reduce_comm_res_76995 - + global_tid_129554, + threads_per_segment_129536)); + + double x_114491; + double x_114492; + + // neutral-initialise the accumulators + { + x_acc_129555 = 0.0; + } + for (int64_t i_129560 = 0; i_129560 < chunk_sizze_129556; i_129560++) { + gtid_114466 = global_tid_129554 + threads_per_segment_129536 * + i_129560; + // apply map function + { + int64_t x_114496 = ((__global + int64_t *) defunc_3_map_res_mem_124958)[gtid_114458]; + bool cond_114498 = slt64(gtid_114466, x_114496); + double defunc_0_f_res_114499; + + if (cond_114498) { + int64_t x_114495 = ((__global + int64_t *) defunc_3_map_res_mem_124959)[gtid_114458]; + int64_t x_114500 = add64(gtid_114466, x_114495); + int64_t x_114501 = sub64(x_114500, x_114496); + int64_t i_114502 = add64((int64_t) 1, x_114501); + bool x_114503 = sle64((int64_t) 0, i_114502); + bool y_114504 = slt64(i_114502, N_75135); + bool bounds_check_114505 = x_114503 && y_114504; + bool index_certs_114506; + + if (!bounds_check_114505) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 201) == -1) { + global_failure_args[0] = i_114502; + global_failure_args[1] = N_75135; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_0_f_res_t_res_114507 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_114458 * + N_75135 + + i_114502]; + + defunc_0_f_res_114499 = defunc_0_f_res_t_res_114507; } else { - float cos_res_34436; + defunc_0_f_res_114499 = 0.0; + } + // save map-out results + { } + // load accumulator + { + x_114491 = x_acc_129555; + } + // load new values + { + x_114492 = defunc_0_f_res_114499; + } + // apply reduction operator + { + double defunc_1_op_res_114493 = x_114491 + x_114492; - cos_res_34436 = futrts_cos32(angle_34431); - defunc_0_f_res_f_res_f_res_34434 = cos_res_34436; + // store in accumulator + { + x_acc_129555 = defunc_1_op_res_114493; + } } - defunc_0_f_res_f_res_34424 = defunc_0_f_res_f_res_f_res_34434; } - defunc_0_f_res_34422 = defunc_0_f_res_f_res_34424; } - ((__global float *) mem_44385)[gtid_34341 * N_28477 + gtid_34342] = - defunc_0_f_res_34422; - } - - error_0: - return; - #undef segmap_group_sizze_34416 -} -__kernel void mainMagnitudezisegmap_34521(__global int *global_failure, - int64_t N_28477, float freq_28482, - int64_t i32_res_28493, __global - unsigned char *mappingindices_mem_44380, - __global unsigned char *mem_44389) -{ - #define segmap_group_sizze_34590 (mainMagnitudezisegmap_group_sizze_34524) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - - if (*global_failure >= 0) - return; - - int32_t global_tid_45685; - int32_t local_tid_45686; - int64_t group_sizze_45689; - int32_t wave_sizze_45688; - int32_t group_tid_45687; - - global_tid_45685 = get_global_id(0); - local_tid_45686 = get_local_id(0); - group_sizze_45689 = get_local_size(0); - wave_sizze_45688 = LOCKSTEP_WIDTH; - group_tid_45687 = get_group_id(0); - - int32_t phys_tid_34521; - - phys_tid_34521 = global_tid_45685; - - int64_t gtid_34519; - - gtid_34519 = squot64(sext_i32_i64(group_tid_45687) * - segmap_group_sizze_34590 + - sext_i32_i64(local_tid_45686), N_28477); - - int64_t gtid_34520; - - gtid_34520 = sext_i32_i64(group_tid_45687) * segmap_group_sizze_34590 + - sext_i32_i64(local_tid_45686) - squot64(sext_i32_i64(group_tid_45687) * - segmap_group_sizze_34590 + - sext_i32_i64(local_tid_45686), - N_28477) * N_28477; - if (slt64(gtid_34519, i32_res_28493) && slt64(gtid_34520, N_28477)) { - int32_t index_primexp_42349 = sext_i64_i32(gtid_34519); - bool index_primexp_42346 = index_primexp_42349 == 0; - float defunc_0_f_res_34596; - - if (index_primexp_42346) { - defunc_0_f_res_34596 = 1.0F; - } else { - int32_t x_34595 = ((__global - int32_t *) mappingindices_mem_44380)[gtid_34520]; - int32_t i_34597 = add32(1, index_primexp_42349); - int32_t r32_arg_34598 = sdiv32(i_34597, 2); - float i32_res_34599 = sitofp_i32_f32(r32_arg_34598); - float i32_res_34600 = sitofp_i32_f32(x_34595); - float x_34601 = 6.2831855F * i32_res_34599; - float x_34602 = i32_res_34600 * x_34601; - float angle_34603 = x_34602 / freq_28482; - int32_t x_34604 = smod32(i_34597, 2); - bool cond_34605 = x_34604 == 0; - float defunc_0_f_res_f_res_34606; - - if (cond_34605) { - float sin_res_34607; - - sin_res_34607 = futrts_sin32(angle_34603); - defunc_0_f_res_f_res_34606 = sin_res_34607; - } else { - float cos_res_34608; - - cos_res_34608 = futrts_cos32(angle_34603); - defunc_0_f_res_f_res_34606 = cos_res_34608; - } - defunc_0_f_res_34596 = defunc_0_f_res_f_res_34606; + // to reduce current chunk, first store our result in memory + { + x_114491 = x_acc_129555; + ((__local + double *) red_arr_mem_129546)[sext_i32_i64(local_tid_129542)] = + x_114491; } - ((__global float *) mem_44389)[gtid_34519 * N_28477 + gtid_34520] = - defunc_0_f_res_34596; - } - - error_0: - return; - #undef segmap_group_sizze_34590 -} -__kernel void mainMagnitudezisegmap_34649(__global int *global_failure, - int64_t N_28477, - int64_t i32_res_28493, - float i32_res_28558, __global - unsigned char *mem_44393, __global - unsigned char *mem_44397) -{ - #define segmap_group_sizze_34673 (mainMagnitudezisegmap_group_sizze_34652) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - - if (*global_failure >= 0) - return; - - int32_t global_tid_45690; - int32_t local_tid_45691; - int64_t group_sizze_45694; - int32_t wave_sizze_45693; - int32_t group_tid_45692; - - global_tid_45690 = get_global_id(0); - local_tid_45691 = get_local_id(0); - group_sizze_45694 = get_local_size(0); - wave_sizze_45693 = LOCKSTEP_WIDTH; - group_tid_45692 = get_group_id(0); - - int32_t phys_tid_34649; - - phys_tid_34649 = global_tid_45690; - - int64_t gtid_34647; - - gtid_34647 = squot64(sext_i32_i64(group_tid_45692) * - segmap_group_sizze_34673 + - sext_i32_i64(local_tid_45691), i32_res_28493); - - int64_t gtid_34648; - - gtid_34648 = sext_i32_i64(group_tid_45692) * segmap_group_sizze_34673 + - sext_i32_i64(local_tid_45691) - squot64(sext_i32_i64(group_tid_45692) * - segmap_group_sizze_34673 + - sext_i32_i64(local_tid_45691), - i32_res_28493) * i32_res_28493; - if (slt64(gtid_34647, N_28477) && slt64(gtid_34648, i32_res_28493)) { - float x_34676 = ((__global float *) mem_44393)[gtid_34647 * - i32_res_28493 + - gtid_34648]; - float defunc_0_f_res_34677 = i32_res_28558 + x_34676; - ((__global float *) mem_44397)[gtid_34647 * i32_res_28493 + - gtid_34648] = defunc_0_f_res_34677; - } - - error_0: - return; - #undef segmap_group_sizze_34673 -} -__kernel void mainMagnitudezisegmap_34682(__global int *global_failure, - int64_t N_28477, int64_t m_28478, - int32_t n_28481, int32_t k2p2zq_28491, - int64_t i32_res_28493, - int64_t num_groups_34707, __global - unsigned char *binop_p_mem_44390, - __global unsigned char *mem_44397, - __global unsigned char *mem_44400, - __global unsigned char *mem_44404, - __global unsigned char *mem_44446) -{ - #define segmap_group_sizze_34706 (mainMagnitudezisegmap_group_sizze_34684) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - - if (*global_failure >= 0) - return; - - int32_t global_tid_45698; - int32_t local_tid_45699; - int64_t group_sizze_45702; - int32_t wave_sizze_45701; - int32_t group_tid_45700; - - global_tid_45698 = get_global_id(0); - local_tid_45699 = get_local_id(0); - group_sizze_45702 = get_local_size(0); - wave_sizze_45701 = LOCKSTEP_WIDTH; - group_tid_45700 = get_group_id(0); - - int32_t phys_tid_34682; - - phys_tid_34682 = global_tid_45698; - - int32_t phys_group_id_45703; - - phys_group_id_45703 = get_group_id(0); - for (int32_t i_45704 = 0; i_45704 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_28478, segmap_group_sizze_34706)) - - phys_group_id_45703, sext_i64_i32(num_groups_34707)); - i_45704++) { - int32_t virt_group_id_45705 = phys_group_id_45703 + i_45704 * - sext_i64_i32(num_groups_34707); - int64_t gtid_34681 = sext_i32_i64(virt_group_id_45705) * - segmap_group_sizze_34706 + sext_i32_i64(local_tid_45699); + error_0: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); - if (slt64(gtid_34681, m_28478)) { - for (int32_t i_44360 = 0; i_44360 < k2p2zq_28491; i_44360++) { - int64_t i_44289 = sext_i32_i64(i_44360); - - for (int32_t i_44359 = 0; i_44359 < k2p2zq_28491; i_44359++) { - int64_t i_44293 = sext_i32_i64(i_44359); - float defunc_2_reduce_res_34715; - float redout_44295 = 0.0F; + int32_t offset_129561; + int32_t skip_waves_129562; + + skip_waves_129562 = 1; + + double x_129557; + double x_129558; + + offset_129561 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129542, + sext_i64_i32(segred_group_sizze_114487))) { + x_129557 = ((__local + double *) red_arr_mem_129546)[sext_i32_i64(local_tid_129542 + + offset_129561)]; + } + } + offset_129561 = 1; + while (slt32(offset_129561, wave_sizze_129544)) { + if (slt32(local_tid_129542 + offset_129561, + sext_i64_i32(segred_group_sizze_114487)) && + ((local_tid_129542 - squot32(local_tid_129542, + wave_sizze_129544) * + wave_sizze_129544) & (2 * offset_129561 - 1)) == 0) { + // read array element + { + x_129558 = ((volatile __local + double *) red_arr_mem_129546)[sext_i32_i64(local_tid_129542 + + offset_129561)]; + } + // apply reduction operation + { + double defunc_1_op_res_129559 = x_129557 + x_129558; - for (int32_t i_44358 = 0; i_44358 < n_28481; i_44358++) { - int64_t i_44296 = sext_i32_i64(i_44358); - float x_34719 = ((__global float *) mem_44400)[i_44296 * - m_28478 + - gtid_34681]; - float x_34720 = ((__global - float *) binop_p_mem_44390)[i_44289 * - N_28477 + - i_44296]; - float x_34721 = ((__global float *) mem_44397)[i_44296 * - i32_res_28493 + - i_44293]; - float x_34722 = x_34720 * x_34721; - bool isnan_res_34723; - - isnan_res_34723 = futrts_isnan32(x_34719); - - float y_34724; - - if (isnan_res_34723) { - y_34724 = 0.0F; - } else { - y_34724 = 1.0F; - } - - float defunc_2_f_res_34725 = x_34722 * y_34724; - float defunc_1_op_res_34718 = defunc_2_f_res_34725 + - redout_44295; - float redout_tmp_45708 = defunc_1_op_res_34718; - - redout_44295 = redout_tmp_45708; - } - defunc_2_reduce_res_34715 = redout_44295; - ((__global float *) mem_44404)[phys_tid_34682 + (i_44289 * - (num_groups_34707 * - segmap_group_sizze_34706 * - i32_res_28493) + - i_44293 * - (num_groups_34707 * - segmap_group_sizze_34706))] = - defunc_2_reduce_res_34715; + x_129557 = defunc_1_op_res_129559; } - } - for (int64_t i_45709 = 0; i_45709 < i32_res_28493; i_45709++) { - for (int64_t i_45710 = 0; i_45710 < i32_res_28493; i_45710++) { - ((__global float *) mem_44446)[i_45709 * (m_28478 * - i32_res_28493) + - i_45710 * m_28478 + - gtid_34681] = ((__global - float *) mem_44404)[phys_tid_34682 + - (i_45709 * - (num_groups_34707 * - segmap_group_sizze_34706 * - i32_res_28493) + - i_45710 * - (num_groups_34707 * - segmap_group_sizze_34706))]; + // write result of operation + { + ((volatile __local + double *) red_arr_mem_129546)[sext_i32_i64(local_tid_129542)] = + x_129557; } } + offset_129561 *= 2; } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - } - - error_0: - return; - #undef segmap_group_sizze_34706 -} -__kernel void mainMagnitudezisegmap_34728(__global int *global_failure, - int64_t N_28477, int64_t m_28478, - int32_t n_28481, int32_t k2p2zq_28491, - int64_t i32_res_28493, - int64_t num_groups_34884, __global - unsigned char *images_mem_44381, - __global unsigned char *mem_44393, - __global unsigned char *mem_44397, - __global unsigned char *mem_44449, - __global unsigned char *mem_44465) -{ - #define segmap_group_sizze_34883 (mainMagnitudezisegmap_group_sizze_34731) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - - if (*global_failure >= 0) - return; - - int32_t global_tid_45711; - int32_t local_tid_45712; - int64_t group_sizze_45715; - int32_t wave_sizze_45714; - int32_t group_tid_45713; - - global_tid_45711 = get_global_id(0); - local_tid_45712 = get_local_id(0); - group_sizze_45715 = get_local_size(0); - wave_sizze_45714 = LOCKSTEP_WIDTH; - group_tid_45713 = get_group_id(0); - - int32_t phys_tid_34728; - - phys_tid_34728 = global_tid_45711; - - int32_t phys_group_id_45716; - - phys_group_id_45716 = get_group_id(0); - for (int32_t i_45717 = 0; i_45717 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_28478 * i32_res_28493, - segmap_group_sizze_34883)) - - phys_group_id_45716, sext_i64_i32(num_groups_34884)); - i_45717++) { - int32_t virt_group_id_45718 = phys_group_id_45716 + i_45717 * - sext_i64_i32(num_groups_34884); - int64_t gtid_34726 = squot64(sext_i32_i64(virt_group_id_45718) * - segmap_group_sizze_34883 + - sext_i32_i64(local_tid_45712), - i32_res_28493); - int64_t gtid_34727 = sext_i32_i64(virt_group_id_45718) * - segmap_group_sizze_34883 + sext_i32_i64(local_tid_45712) - - squot64(sext_i32_i64(virt_group_id_45718) * - segmap_group_sizze_34883 + - sext_i32_i64(local_tid_45712), i32_res_28493) * - i32_res_28493; - - if (slt64(gtid_34726, m_28478) && slt64(gtid_34727, i32_res_28493)) { - for (int32_t i_44362 = 0; i_44362 < k2p2zq_28491; i_44362++) { - int64_t i_44299 = sext_i32_i64(i_44362); - float defunc_2_reduce_res_34895; - float redout_44301 = 0.0F; - - for (int32_t i_44361 = 0; i_44361 < n_28481; i_44361++) { - int64_t i_44302 = sext_i32_i64(i_44361); - float x_34899 = ((__global - float *) images_mem_44381)[gtid_34726 * - N_28477 + - i_44302]; - float x_34900 = ((__global float *) mem_44393)[i_44302 * - i32_res_28493 + - gtid_34727]; - float x_34901 = ((__global float *) mem_44397)[i_44302 * - i32_res_28493 + - i_44299]; - float x_34902 = x_34900 * x_34901; - bool isnan_res_34903; - - isnan_res_34903 = futrts_isnan32(x_34899); + while (slt32(skip_waves_129562, + squot32(sext_i64_i32(segred_group_sizze_114487) + + wave_sizze_129544 - 1, wave_sizze_129544))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129561 = skip_waves_129562 * wave_sizze_129544; + if (slt32(local_tid_129542 + offset_129561, + sext_i64_i32(segred_group_sizze_114487)) && + ((local_tid_129542 - squot32(local_tid_129542, + wave_sizze_129544) * + wave_sizze_129544) == 0 && (squot32(local_tid_129542, + wave_sizze_129544) & (2 * + skip_waves_129562 - + 1)) == + 0)) { + // read array element + { + x_129558 = ((__local + double *) red_arr_mem_129546)[sext_i32_i64(local_tid_129542 + + offset_129561)]; + } + // apply reduction operation + { + double defunc_1_op_res_129559 = x_129557 + x_129558; - float y_34904; + x_129557 = defunc_1_op_res_129559; + } + // write result of operation + { + ((__local + double *) red_arr_mem_129546)[sext_i32_i64(local_tid_129542)] = + x_129557; + } + } + skip_waves_129562 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_129542) == (int64_t) 0) { + x_acc_129555 = x_129557; + } + } + if (groups_per_segment_129532 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_129542 == 0) { + ((__global double *) mem_124969)[gtid_114458] = + x_acc_129555; + } + } + } else { + int32_t old_counter_129563; + + // first thread in group saves group result to global memory + { + if (local_tid_129542 == 0) { + ((__global + double *) group_res_arr_mem_129537)[sext_i32_i64(virt_group_id_129552) * + segred_group_sizze_114487] = + x_acc_129555; + mem_fence_global(); + old_counter_129563 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_129539)[sext_i32_i64(srem32(flat_segment_id_129553, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_129548)[(int64_t) 0] = + old_counter_129563 == groups_per_segment_129532 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_129564; + + is_last_group_129564 = ((__local + bool *) sync_arr_mem_129548)[(int64_t) 0]; + if (is_last_group_129564) { + if (local_tid_129542 == 0) { + old_counter_129563 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_129539)[sext_i32_i64(srem32(flat_segment_id_129553, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_129532)); + } + // read in the per-group-results + { + int64_t read_per_thread_129565 = + sdiv_up64(groups_per_segment_129532, + segred_group_sizze_114487); - if (isnan_res_34903) { - y_34904 = 0.0F; - } else { - y_34904 = 1.0F; + x_114491 = 0.0; + for (int64_t i_129566 = 0; i_129566 < + read_per_thread_129565; i_129566++) { + int64_t group_res_id_129567 = + sext_i32_i64(local_tid_129542) * + read_per_thread_129565 + i_129566; + int64_t index_of_group_res_129568 = + sext_i32_i64(flat_segment_id_129553) * + groups_per_segment_129532 + group_res_id_129567; + + if (slt64(group_res_id_129567, + groups_per_segment_129532)) { + x_114492 = ((__global + double *) group_res_arr_mem_129537)[index_of_group_res_129568 * + segred_group_sizze_114487]; + + double defunc_1_op_res_114493; + + defunc_1_op_res_114493 = x_114491 + x_114492; + x_114491 = defunc_1_op_res_114493; + } } + } + ((__local + double *) red_arr_mem_129546)[sext_i32_i64(local_tid_129542)] = + x_114491; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_129569; + int32_t skip_waves_129570; + + skip_waves_129570 = 1; - float defunc_2_f_res_34905 = x_34902 * y_34904; - float defunc_1_op_res_34898 = defunc_2_f_res_34905 + - redout_44301; - float redout_tmp_45720 = defunc_1_op_res_34898; + double x_129557; + double x_129558; - redout_44301 = redout_tmp_45720; + offset_129569 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129542, + sext_i64_i32(segred_group_sizze_114487))) { + x_129557 = ((__local + double *) red_arr_mem_129546)[sext_i32_i64(local_tid_129542 + + offset_129569)]; + } + } + offset_129569 = 1; + while (slt32(offset_129569, wave_sizze_129544)) { + if (slt32(local_tid_129542 + offset_129569, + sext_i64_i32(segred_group_sizze_114487)) && + ((local_tid_129542 - squot32(local_tid_129542, + wave_sizze_129544) * + wave_sizze_129544) & (2 * offset_129569 - 1)) == + 0) { + // read array element + { + x_129558 = ((volatile __local + double *) red_arr_mem_129546)[sext_i32_i64(local_tid_129542 + + offset_129569)]; + } + // apply reduction operation + { + double defunc_1_op_res_129559 = x_129557 + + x_129558; + + x_129557 = defunc_1_op_res_129559; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_129546)[sext_i32_i64(local_tid_129542)] = + x_129557; + } + } + offset_129569 *= 2; + } + while (slt32(skip_waves_129570, + squot32(sext_i64_i32(segred_group_sizze_114487) + + wave_sizze_129544 - 1, + wave_sizze_129544))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129569 = skip_waves_129570 * wave_sizze_129544; + if (slt32(local_tid_129542 + offset_129569, + sext_i64_i32(segred_group_sizze_114487)) && + ((local_tid_129542 - squot32(local_tid_129542, + wave_sizze_129544) * + wave_sizze_129544) == 0 && + (squot32(local_tid_129542, wave_sizze_129544) & + (2 * skip_waves_129570 - 1)) == 0)) { + // read array element + { + x_129558 = ((__local + double *) red_arr_mem_129546)[sext_i32_i64(local_tid_129542 + + offset_129569)]; + } + // apply reduction operation + { + double defunc_1_op_res_129559 = x_129557 + + x_129558; + + x_129557 = defunc_1_op_res_129559; + } + // write result of operation + { + ((__local + double *) red_arr_mem_129546)[sext_i32_i64(local_tid_129542)] = + x_129557; + } + } + skip_waves_129570 *= 2; + } + // and back to memory with the final result + { + if (local_tid_129542 == 0) { + ((__global double *) mem_124969)[gtid_114458] = + x_129557; + } + } } - defunc_2_reduce_res_34895 = redout_44301; - ((__global float *) mem_44449)[phys_tid_34728 + i_44299 * - (num_groups_34884 * - segmap_group_sizze_34883)] = - defunc_2_reduce_res_34895; - } - for (int64_t i_45721 = 0; i_45721 < i32_res_28493; i_45721++) { - ((__global float *) mem_44465)[i_45721 * (i32_res_28493 * - m_28478) + - gtid_34726 * i32_res_28493 + - gtid_34727] = ((__global - float *) mem_44449)[phys_tid_34728 + - i_45721 * - (num_groups_34884 * - segmap_group_sizze_34883)]; } } barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - error_0: + error_1: return; - #undef segmap_group_sizze_34883 + #undef segred_group_sizze_114487 } -__kernel void mainMagnitudezisegmap_35320(__global int *global_failure, - int64_t m_28478, - int64_t i32_res_28493, - int64_t nm_28626, - int64_t i32_res_28641, - int64_t x_28642, int64_t j_m_i_28645, - int64_t gauss_jordan_res_r_ixfn_44617, - int64_t gauss_jordan_res_r_ixfn_44618, - int64_t gauss_jordan_res_r_ixfn_44620, - __global - unsigned char *gauss_jordan_res_r_mem_44622, - __global unsigned char *mem_44627) +__kernel void mainzisegred_large_114739(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_129729_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_129727_backing_aligned_1, + __local volatile + int64_t *red_arr_mem_129725_backing_aligned_2, + __local volatile + int64_t *red_arr_mem_129723_backing_aligned_3, + int64_t iota_arg_77024, + int64_t num_groups_114922, + int64_t groups_per_segment_129705, + int64_t elements_per_thread_129706, + int64_t virt_num_groups_129707, __global + unsigned char *mem_124973, __global + unsigned char *mem_124985, __global + unsigned char *mem_124987, __global + unsigned char *mem_124991, __global + unsigned char *mem_124994, __global + unsigned char *mem_124996, __global + unsigned char *mem_124998, __global + unsigned char *group_res_arr_mem_129710, + __global + unsigned char *group_res_arr_mem_129712, + __global + unsigned char *group_res_arr_mem_129714, + __global + unsigned char *mainzicounter_mem_129716) { - #define segmap_group_sizze_36067 (mainMagnitudezisegmap_group_sizze_35324) + #define segred_group_sizze_114921 (mainzisegred_group_sizze_114733) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_129729_backing_3 = + (__local volatile + char *) sync_arr_mem_129729_backing_aligned_0; + __local volatile char *restrict red_arr_mem_129727_backing_2 = + (__local volatile + char *) red_arr_mem_129727_backing_aligned_1; + __local volatile char *restrict red_arr_mem_129725_backing_1 = + (__local volatile + char *) red_arr_mem_129725_backing_aligned_2; + __local volatile char *restrict red_arr_mem_129723_backing_0 = + (__local volatile + char *) red_arr_mem_129723_backing_aligned_3; if (*global_failure >= 0) return; - int32_t global_tid_45878; - int32_t local_tid_45879; - int64_t group_sizze_45882; - int32_t wave_sizze_45881; - int32_t group_tid_45880; - - global_tid_45878 = get_global_id(0); - local_tid_45879 = get_local_id(0); - group_sizze_45882 = get_local_size(0); - wave_sizze_45881 = LOCKSTEP_WIDTH; - group_tid_45880 = get_group_id(0); + int32_t global_tid_129718; + int32_t local_tid_129719; + int64_t group_sizze_129722; + int32_t wave_sizze_129721; + int32_t group_tid_129720; - int32_t phys_tid_35320; + global_tid_129718 = get_global_id(0); + local_tid_129719 = get_local_id(0); + group_sizze_129722 = get_local_size(0); + wave_sizze_129721 = LOCKSTEP_WIDTH; + group_tid_129720 = get_group_id(0); - phys_tid_35320 = global_tid_45878; + int32_t phys_tid_114739; - int64_t gtid_35317; + phys_tid_114739 = global_tid_129718; - gtid_35317 = squot64(sext_i32_i64(group_tid_45880) * - segmap_group_sizze_36067 + - sext_i32_i64(local_tid_45879), i32_res_28493 * - j_m_i_28645); + __local char *red_arr_mem_129723; - int64_t gtid_slice_35315; + red_arr_mem_129723 = (__local char *) red_arr_mem_129723_backing_0; - gtid_slice_35315 = squot64(sext_i32_i64(group_tid_45880) * - segmap_group_sizze_36067 + - sext_i32_i64(local_tid_45879) - - squot64(sext_i32_i64(group_tid_45880) * - segmap_group_sizze_36067 + - sext_i32_i64(local_tid_45879), - i32_res_28493 * j_m_i_28645) * - (i32_res_28493 * j_m_i_28645), j_m_i_28645); - - int64_t gtid_slice_35316; - - gtid_slice_35316 = sext_i32_i64(group_tid_45880) * - segmap_group_sizze_36067 + sext_i32_i64(local_tid_45879) - - squot64(sext_i32_i64(group_tid_45880) * segmap_group_sizze_36067 + - sext_i32_i64(local_tid_45879), i32_res_28493 * j_m_i_28645) * - (i32_res_28493 * j_m_i_28645) - squot64(sext_i32_i64(group_tid_45880) * - segmap_group_sizze_36067 + - sext_i32_i64(local_tid_45879) - - squot64(sext_i32_i64(group_tid_45880) * - segmap_group_sizze_36067 + - sext_i32_i64(local_tid_45879), - i32_res_28493 * - j_m_i_28645) * - (i32_res_28493 * j_m_i_28645), - j_m_i_28645) * j_m_i_28645; - if ((slt64(gtid_35317, m_28478) && slt64(gtid_slice_35315, - i32_res_28493)) && - slt64(gtid_slice_35316, j_m_i_28645)) { - int64_t slice_36071 = i32_res_28493 + gtid_slice_35316; - int64_t binop_x_42412 = x_28642 * gtid_35317; - int64_t binop_y_42413 = i32_res_28641 * gtid_slice_35315; - int64_t binop_x_42414 = binop_x_42412 + binop_y_42413; - int64_t binop_x_42415 = slice_36071 + binop_x_42414; - int64_t new_index_42416 = squot64(binop_x_42415, nm_28626); - int64_t binop_y_42428 = nm_28626 * new_index_42416; - int64_t new_index_42429 = binop_x_42415 - binop_y_42428; - float v_36072 = ((__global - float *) gauss_jordan_res_r_mem_44622)[gauss_jordan_res_r_ixfn_44617 + - (new_index_42416 * - gauss_jordan_res_r_ixfn_44618 + - new_index_42429 * - gauss_jordan_res_r_ixfn_44620)]; - - ((__global float *) mem_44627)[gtid_35317 * (j_m_i_28645 * - i32_res_28493) + - gtid_slice_35315 * j_m_i_28645 + - gtid_slice_35316] = v_36072; - } - - error_0: - return; - #undef segmap_group_sizze_36067 -} -__kernel void mainMagnitudezisegmap_35550(__global int *global_failure, - int64_t m_28478, int64_t nm_28626, - int64_t ctx_param_ext_44580, - int64_t ctx_param_ext_44581, - int64_t ctx_param_ext_44583, __global - unsigned char *mem_param_44585, - __global unsigned char *mem_44605) -{ - #define segmap_group_sizze_36055 (mainMagnitudezisegmap_group_sizze_35553) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - - if (*global_failure >= 0) - return; + __local char *red_arr_mem_129725; - int32_t global_tid_45872; - int32_t local_tid_45873; - int64_t group_sizze_45876; - int32_t wave_sizze_45875; - int32_t group_tid_45874; + red_arr_mem_129725 = (__local char *) red_arr_mem_129725_backing_1; - global_tid_45872 = get_global_id(0); - local_tid_45873 = get_local_id(0); - group_sizze_45876 = get_local_size(0); - wave_sizze_45875 = LOCKSTEP_WIDTH; - group_tid_45874 = get_group_id(0); + __local char *red_arr_mem_129727; - int32_t phys_tid_35550; + red_arr_mem_129727 = (__local char *) red_arr_mem_129727_backing_2; - phys_tid_35550 = global_tid_45872; + __local char *sync_arr_mem_129729; - int64_t gtid_35548; + sync_arr_mem_129729 = (__local char *) sync_arr_mem_129729_backing_3; - gtid_35548 = squot64(sext_i32_i64(group_tid_45874) * - segmap_group_sizze_36055 + - sext_i32_i64(local_tid_45873), nm_28626); + int32_t phys_group_id_129731; - int64_t gtid_35549; - - gtid_35549 = sext_i32_i64(group_tid_45874) * segmap_group_sizze_36055 + - sext_i32_i64(local_tid_45873) - squot64(sext_i32_i64(group_tid_45874) * - segmap_group_sizze_36055 + - sext_i32_i64(local_tid_45873), - nm_28626) * nm_28626; - if (slt64(gtid_35548, m_28478) && slt64(gtid_35549, nm_28626)) { - float write_value_36061 = ((__global float *) mem_44605)[gtid_35548 * - nm_28626 + - gtid_35549]; + phys_group_id_129731 = get_group_id(0); + for (int32_t i_129732 = 0; i_129732 < + sdiv_up32(sext_i64_i32(virt_num_groups_129707) - phys_group_id_129731, + sext_i64_i32(num_groups_114922)); i_129732++) { + int32_t virt_group_id_129733 = phys_group_id_129731 + i_129732 * + sext_i64_i32(num_groups_114922); + int32_t flat_segment_id_129734 = squot32(virt_group_id_129733, + sext_i64_i32(groups_per_segment_129705)); + int64_t global_tid_129735 = srem64(sext_i32_i64(virt_group_id_129733) * + segred_group_sizze_114921 + + sext_i32_i64(local_tid_129719), + segred_group_sizze_114921 * + groups_per_segment_129705); + int64_t gtid_114730 = sext_i32_i64(flat_segment_id_129734); + int64_t gtid_114738; + bool x_acc_129736; + int64_t x_acc_129737; + double x_acc_129738; + int64_t chunk_sizze_129739; + int64_t starting_point_129740; + + starting_point_129740 = global_tid_129735 * elements_per_thread_129706; - if ((sle64((int64_t) 0, gtid_35548) && slt64(gtid_35548, m_28478)) && - (sle64((int64_t) 0, gtid_35549) && slt64(gtid_35549, nm_28626))) { - ((__global float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_35548 * - ctx_param_ext_44581 + - gtid_35549 * - ctx_param_ext_44583)] = - write_value_36061; + int64_t remaining_elements_129741; + + remaining_elements_129741 = iota_arg_77024 - starting_point_129740; + if (sle64(remaining_elements_129741, (int64_t) 0) || + sle64(iota_arg_77024, starting_point_129740)) { + chunk_sizze_129739 = (int64_t) 0; + } else { + if (slt64(iota_arg_77024, (global_tid_129735 + (int64_t) 1) * + elements_per_thread_129706)) { + chunk_sizze_129739 = iota_arg_77024 - global_tid_129735 * + elements_per_thread_129706; + } else { + chunk_sizze_129739 = elements_per_thread_129706; + } } - } - - error_0: - return; - #undef segmap_group_sizze_36055 -} -__kernel void mainMagnitudezisegmap_35620(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, - int64_t m_28478, int32_t k2p2zq_28491, - int32_t m_28624, int64_t nm_28626, - int32_t i_35922, - int64_t i32_res_35924, - int64_t ctx_param_ext_44580, - int64_t ctx_param_ext_44581, - int64_t ctx_param_ext_44583, __global - unsigned char *mem_param_44585, - __global unsigned char *mem_44601, - __global unsigned char *mem_44605) -{ - #define segmap_group_sizze_36005 (mainMagnitudezisegmap_group_sizze_35623) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - - if (*global_failure >= 0) - return; - - int32_t global_tid_45867; - int32_t local_tid_45868; - int64_t group_sizze_45871; - int32_t wave_sizze_45870; - int32_t group_tid_45869; - - global_tid_45867 = get_global_id(0); - local_tid_45868 = get_local_id(0); - group_sizze_45871 = get_local_size(0); - wave_sizze_45870 = LOCKSTEP_WIDTH; - group_tid_45869 = get_group_id(0); - - int32_t phys_tid_35620; - - phys_tid_35620 = global_tid_45867; - - int64_t gtid_35618; - - gtid_35618 = squot64(sext_i32_i64(group_tid_45869) * - segmap_group_sizze_36005 + - sext_i32_i64(local_tid_45868), nm_28626); - - int64_t gtid_35619; - - gtid_35619 = sext_i32_i64(group_tid_45869) * segmap_group_sizze_36005 + - sext_i32_i64(local_tid_45868) - squot64(sext_i32_i64(group_tid_45869) * - segmap_group_sizze_36005 + - sext_i32_i64(local_tid_45868), - nm_28626) * nm_28626; - if (slt64(gtid_35618, m_28478) && slt64(gtid_35619, nm_28626)) { - bool cond_36010 = ((__global bool *) mem_44601)[gtid_35618]; - int32_t defunc_0_f_res_36012 = sext_i64_i32(gtid_35619); - int32_t defunc_0_f_res_36013 = sdiv32(defunc_0_f_res_36012, m_28624); - int32_t defunc_0_f_res_36014 = smod32(defunc_0_f_res_36012, m_28624); - float defunc_0_f_res_36015; - if (cond_36010) { - int32_t x_36016 = mul32(m_28624, defunc_0_f_res_36013); - int32_t i32_arg_36017 = add32(defunc_0_f_res_36014, x_36016); - int64_t i32_res_36018 = sext_i32_i64(i32_arg_36017); - bool x_36019 = sle64((int64_t) 0, i32_res_36018); - bool y_36020 = slt64(i32_res_36018, nm_28626); - bool bounds_check_36021 = x_36019 && y_36020; - bool index_certs_36022; - - if (!bounds_check_36021) { + bool x_114927; + int64_t x_114928; + double x_114929; + bool x_114930; + int64_t x_114931; + double x_114932; + + // neutral-initialise the accumulators + { + x_acc_129736 = 0; + x_acc_129737 = (int64_t) -1; + x_acc_129738 = 0.0; + } + for (int64_t i_129756 = 0; i_129756 < elements_per_thread_129706; + i_129756++) { + gtid_114738 = sext_i32_i64(local_tid_129719) + + (squot64(global_tid_129735, segred_group_sizze_114921) * + elements_per_thread_129706 + i_129756) * + segred_group_sizze_114921; + if (slt64(gtid_114738, iota_arg_77024)) { + // apply map function { - if (atomic_cmpxchg_i32_global(global_failure, -1, 90) == - -1) { - global_failure_args[0] = i32_res_36018; - global_failure_args[1] = nm_28626; - ; + int64_t y_114941 = ((__global + int64_t *) mem_124987)[gtid_114730]; + double y_114942 = ((__global + double *) mem_124985)[gtid_114730]; + double x_114946 = ((__global + double *) mem_124991)[gtid_114730 * + iota_arg_77024 + + gtid_114738]; + double x_114947 = ((__global + double *) mem_124973)[gtid_114738]; + double defunc_0_f_res_114950 = x_114946 / y_114942; + bool cond_114951 = slt64(gtid_114738, y_114941); + bool isnan_res_114952; + + isnan_res_114952 = futrts_isnan64(defunc_0_f_res_114950); + + bool cond_t_res_114953 = !isnan_res_114952; + bool x_114954 = cond_114951 && cond_t_res_114953; + double abs_res_114955 = fabs(defunc_0_f_res_114950); + bool defunc_2_f_res_t_res_114956 = x_114947 < + abs_res_114955; + bool x_114957 = x_114954 && defunc_2_f_res_t_res_114956; + double defunc_1_f_res_114958; + + if (cond_114951) { + defunc_1_f_res_114958 = defunc_0_f_res_114950; + } else { + defunc_1_f_res_114958 = 0.0; + } + // save map-out results + { } + // load accumulator + { + x_114927 = x_acc_129736; + x_114928 = x_acc_129737; + x_114929 = x_acc_129738; + } + // load new values + { + x_114930 = x_114957; + x_114931 = gtid_114738; + x_114932 = defunc_1_f_res_114958; + } + // apply reduction operator + { + bool defunc_1_op_res_114933; + int64_t defunc_1_op_res_114934; + + if (x_114927) { + defunc_1_op_res_114933 = x_114927; + defunc_1_op_res_114934 = x_114928; + } else { + bool x_114935 = x_114930 && x_114930; + bool x_114936 = !x_114930; + bool y_114937 = x_114927 && x_114936; + bool defunc_1_op_res_f_res_114938 = x_114935 || + y_114937; + int64_t defunc_1_op_res_f_res_114939; + + if (x_114930) { + defunc_1_op_res_f_res_114939 = x_114931; + } else { + defunc_1_op_res_f_res_114939 = x_114928; + } + defunc_1_op_res_114933 = + defunc_1_op_res_f_res_114938; + defunc_1_op_res_114934 = + defunc_1_op_res_f_res_114939; + } + + double defunc_1_op_res_114940 = x_114929 + x_114932; + + // store in accumulator + { + x_acc_129736 = defunc_1_op_res_114933; + x_acc_129737 = defunc_1_op_res_114934; + x_acc_129738 = defunc_1_op_res_114940; + } } - return; } } + // to reduce current chunk, first store our result in memory + { + x_114927 = x_acc_129736; + x_114928 = x_acc_129737; + x_114929 = x_acc_129738; + ((__local + bool *) red_arr_mem_129723)[sext_i32_i64(local_tid_129719)] = + x_114927; + ((__local + int64_t *) red_arr_mem_129725)[sext_i32_i64(local_tid_129719)] = + x_114928; + ((__local + double *) red_arr_mem_129727)[sext_i32_i64(local_tid_129719)] = + x_114929; + } + barrier(CLK_LOCAL_MEM_FENCE); - float defunc_0_f_res_t_res_36023 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_35618 * - ctx_param_ext_44581 + - i32_res_36018 * - ctx_param_ext_44583)]; + int32_t offset_129757; + int32_t skip_waves_129758; - defunc_0_f_res_36015 = defunc_0_f_res_t_res_36023; - } else { - float v1_36009 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_35618 * - ctx_param_ext_44581 + - i32_res_35924 * - ctx_param_ext_44583)]; - int64_t i32_res_36024 = sext_i32_i64(defunc_0_f_res_36014); - bool x_36025 = sle64((int64_t) 0, i32_res_36024); - bool y_36026 = slt64(i32_res_36024, nm_28626); - bool bounds_check_36027 = x_36025 && y_36026; - bool index_certs_36028; + skip_waves_129758 = 1; - if (!bounds_check_36027) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 91) == - -1) { - global_failure_args[0] = i32_res_36024; - global_failure_args[1] = nm_28626; - ; + bool x_129742; + int64_t x_129743; + double x_129744; + bool x_129745; + int64_t x_129746; + double x_129747; + + offset_129757 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129719, + sext_i64_i32(segred_group_sizze_114921))) { + x_129742 = ((__local + bool *) red_arr_mem_129723)[sext_i32_i64(local_tid_129719 + + offset_129757)]; + x_129743 = ((__local + int64_t *) red_arr_mem_129725)[sext_i32_i64(local_tid_129719 + + offset_129757)]; + x_129744 = ((__local + double *) red_arr_mem_129727)[sext_i32_i64(local_tid_129719 + + offset_129757)]; + } + } + offset_129757 = 1; + while (slt32(offset_129757, wave_sizze_129721)) { + if (slt32(local_tid_129719 + offset_129757, + sext_i64_i32(segred_group_sizze_114921)) && + ((local_tid_129719 - squot32(local_tid_129719, + wave_sizze_129721) * + wave_sizze_129721) & (2 * offset_129757 - 1)) == 0) { + // read array element + { + x_129745 = ((volatile __local + bool *) red_arr_mem_129723)[sext_i32_i64(local_tid_129719 + + offset_129757)]; + x_129746 = ((volatile __local + int64_t *) red_arr_mem_129725)[sext_i32_i64(local_tid_129719 + + offset_129757)]; + x_129747 = ((volatile __local + double *) red_arr_mem_129727)[sext_i32_i64(local_tid_129719 + + offset_129757)]; } - return; + // apply reduction operation + { + bool defunc_1_op_res_129748; + int64_t defunc_1_op_res_129749; + + if (x_129742) { + defunc_1_op_res_129748 = x_129742; + defunc_1_op_res_129749 = x_129743; + } else { + bool x_129750 = x_129745 && x_129745; + bool x_129751 = !x_129745; + bool y_129752 = x_129742 && x_129751; + bool defunc_1_op_res_f_res_129753 = x_129750 || + y_129752; + int64_t defunc_1_op_res_f_res_129754; + + if (x_129745) { + defunc_1_op_res_f_res_129754 = x_129746; + } else { + defunc_1_op_res_f_res_129754 = x_129743; + } + defunc_1_op_res_129748 = + defunc_1_op_res_f_res_129753; + defunc_1_op_res_129749 = + defunc_1_op_res_f_res_129754; + } + + double defunc_1_op_res_129755 = x_129744 + x_129747; + + x_129742 = defunc_1_op_res_129748; + x_129743 = defunc_1_op_res_129749; + x_129744 = defunc_1_op_res_129755; + } + // write result of operation + { + ((volatile __local + bool *) red_arr_mem_129723)[sext_i32_i64(local_tid_129719)] = + x_129742; + ((volatile __local + int64_t *) red_arr_mem_129725)[sext_i32_i64(local_tid_129719)] = + x_129743; + ((volatile __local + double *) red_arr_mem_129727)[sext_i32_i64(local_tid_129719)] = + x_129744; + } + } + offset_129757 *= 2; + } + while (slt32(skip_waves_129758, + squot32(sext_i64_i32(segred_group_sizze_114921) + + wave_sizze_129721 - 1, wave_sizze_129721))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129757 = skip_waves_129758 * wave_sizze_129721; + if (slt32(local_tid_129719 + offset_129757, + sext_i64_i32(segred_group_sizze_114921)) && + ((local_tid_129719 - squot32(local_tid_129719, + wave_sizze_129721) * + wave_sizze_129721) == 0 && (squot32(local_tid_129719, + wave_sizze_129721) & + (2 * skip_waves_129758 - + 1)) == 0)) { + // read array element + { + x_129745 = ((__local + bool *) red_arr_mem_129723)[sext_i32_i64(local_tid_129719 + + offset_129757)]; + x_129746 = ((__local + int64_t *) red_arr_mem_129725)[sext_i32_i64(local_tid_129719 + + offset_129757)]; + x_129747 = ((__local + double *) red_arr_mem_129727)[sext_i32_i64(local_tid_129719 + + offset_129757)]; + } + // apply reduction operation + { + bool defunc_1_op_res_129748; + int64_t defunc_1_op_res_129749; + + if (x_129742) { + defunc_1_op_res_129748 = x_129742; + defunc_1_op_res_129749 = x_129743; + } else { + bool x_129750 = x_129745 && x_129745; + bool x_129751 = !x_129745; + bool y_129752 = x_129742 && x_129751; + bool defunc_1_op_res_f_res_129753 = x_129750 || + y_129752; + int64_t defunc_1_op_res_f_res_129754; + + if (x_129745) { + defunc_1_op_res_f_res_129754 = x_129746; + } else { + defunc_1_op_res_f_res_129754 = x_129743; + } + defunc_1_op_res_129748 = + defunc_1_op_res_f_res_129753; + defunc_1_op_res_129749 = + defunc_1_op_res_f_res_129754; + } + + double defunc_1_op_res_129755 = x_129744 + x_129747; + + x_129742 = defunc_1_op_res_129748; + x_129743 = defunc_1_op_res_129749; + x_129744 = defunc_1_op_res_129755; + } + // write result of operation + { + ((__local + bool *) red_arr_mem_129723)[sext_i32_i64(local_tid_129719)] = + x_129742; + ((__local + int64_t *) red_arr_mem_129725)[sext_i32_i64(local_tid_129719)] = + x_129743; + ((__local + double *) red_arr_mem_129727)[sext_i32_i64(local_tid_129719)] = + x_129744; + } + } + skip_waves_129758 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_129719) == (int64_t) 0) { + x_acc_129736 = x_129742; + x_acc_129737 = x_129743; + x_acc_129738 = x_129744; + } + } + // first thread keeps accumulator; others reset to neutral element + { + if (!(sext_i32_i64(local_tid_129719) == (int64_t) 0)) { + x_acc_129736 = 0; + x_acc_129737 = (int64_t) -1; + x_acc_129738 = 0.0; + } + } + } + x_114927 = x_acc_129736; + x_114928 = x_acc_129737; + x_114929 = x_acc_129738; + if (groups_per_segment_129705 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_129719 == 0) { + ((__global bool *) mem_124994)[gtid_114730] = x_acc_129736; + ((__global int64_t *) mem_124996)[gtid_114730] = + x_acc_129737; + ((__global double *) mem_124998)[gtid_114730] = + x_acc_129738; + } + } + } else { + int32_t old_counter_129759; + + // first thread in group saves group result to global memory + { + if (local_tid_129719 == 0) { + ((__global + bool *) group_res_arr_mem_129710)[sext_i32_i64(virt_group_id_129733) * + segred_group_sizze_114921] = + x_acc_129736; + ((__global + int64_t *) group_res_arr_mem_129712)[sext_i32_i64(virt_group_id_129733) * + segred_group_sizze_114921] = + x_acc_129737; + ((__global + double *) group_res_arr_mem_129714)[sext_i32_i64(virt_group_id_129733) * + segred_group_sizze_114921] = + x_acc_129738; + mem_fence_global(); + old_counter_129759 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_129716)[sext_i32_i64(srem32(flat_segment_id_129734, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_129729)[(int64_t) 0] = + old_counter_129759 == groups_per_segment_129705 - + (int64_t) 1; } } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - float x_36029 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_35618 * - ctx_param_ext_44581 + - i32_res_36024 * - ctx_param_ext_44583)]; - float x_36030 = x_36029 / v1_36009; - int32_t y_36031 = sub32(k2p2zq_28491, 1); - bool cond_36032 = slt32(defunc_0_f_res_36013, y_36031); - float defunc_0_f_res_f_res_36033; + bool is_last_group_129760; - if (cond_36032) { - int32_t x_36034 = add32(1, defunc_0_f_res_36013); - int32_t x_36035 = mul32(m_28624, x_36034); - int32_t i32_arg_36036 = add32(defunc_0_f_res_36014, x_36035); - int64_t i32_res_36037 = sext_i32_i64(i32_arg_36036); - bool x_36038 = sle64((int64_t) 0, i32_res_36037); - bool y_36039 = slt64(i32_res_36037, nm_28626); - bool bounds_check_36040 = x_36038 && y_36039; - bool index_certs_36041; - - if (!bounds_check_36040) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 92) == - -1) { - global_failure_args[0] = i32_res_36037; - global_failure_args[1] = nm_28626; - ; + is_last_group_129760 = ((__local + bool *) sync_arr_mem_129729)[(int64_t) 0]; + if (is_last_group_129760) { + if (local_tid_129719 == 0) { + old_counter_129759 = + atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_129716)[sext_i32_i64(srem32(flat_segment_id_129734, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_129705)); + } + // read in the per-group-results + { + int64_t read_per_thread_129761 = + sdiv_up64(groups_per_segment_129705, + segred_group_sizze_114921); + + x_114927 = 0; + x_114928 = (int64_t) -1; + x_114929 = 0.0; + for (int64_t i_129762 = 0; i_129762 < + read_per_thread_129761; i_129762++) { + int64_t group_res_id_129763 = + sext_i32_i64(local_tid_129719) * + read_per_thread_129761 + i_129762; + int64_t index_of_group_res_129764 = + sext_i32_i64(flat_segment_id_129734) * + groups_per_segment_129705 + group_res_id_129763; + + if (slt64(group_res_id_129763, + groups_per_segment_129705)) { + x_114930 = ((__global + bool *) group_res_arr_mem_129710)[index_of_group_res_129764 * + segred_group_sizze_114921]; + x_114931 = ((__global + int64_t *) group_res_arr_mem_129712)[index_of_group_res_129764 * + segred_group_sizze_114921]; + x_114932 = ((__global + double *) group_res_arr_mem_129714)[index_of_group_res_129764 * + segred_group_sizze_114921]; + + bool defunc_1_op_res_114933; + int64_t defunc_1_op_res_114934; + + if (x_114927) { + defunc_1_op_res_114933 = x_114927; + defunc_1_op_res_114934 = x_114928; + } else { + bool x_114935 = x_114930 && x_114930; + bool x_114936 = !x_114930; + bool y_114937 = x_114927 && x_114936; + bool defunc_1_op_res_f_res_114938 = x_114935 || + y_114937; + int64_t defunc_1_op_res_f_res_114939; + + if (x_114930) { + defunc_1_op_res_f_res_114939 = x_114931; + } else { + defunc_1_op_res_f_res_114939 = x_114928; + } + defunc_1_op_res_114933 = + defunc_1_op_res_f_res_114938; + defunc_1_op_res_114934 = + defunc_1_op_res_f_res_114939; + } + + double defunc_1_op_res_114940 = x_114929 + x_114932; + + x_114927 = defunc_1_op_res_114933; + x_114928 = defunc_1_op_res_114934; + x_114929 = defunc_1_op_res_114940; } - return; } } - - float x_36042 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_35618 * - ctx_param_ext_44581 + - i32_res_36037 * - ctx_param_ext_44583)]; - int32_t i32_arg_36043 = add32(i_35922, x_36035); - int64_t i32_res_36044 = sext_i32_i64(i32_arg_36043); - bool x_36045 = sle64((int64_t) 0, i32_res_36044); - bool y_36046 = slt64(i32_res_36044, nm_28626); - bool bounds_check_36047 = x_36045 && y_36046; - bool index_certs_36048; - - if (!bounds_check_36047) { + ((__local + bool *) red_arr_mem_129723)[sext_i32_i64(local_tid_129719)] = + x_114927; + ((__local + int64_t *) red_arr_mem_129725)[sext_i32_i64(local_tid_129719)] = + x_114928; + ((__local + double *) red_arr_mem_129727)[sext_i32_i64(local_tid_129719)] = + x_114929; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_129765; + int32_t skip_waves_129766; + + skip_waves_129766 = 1; + + bool x_129742; + int64_t x_129743; + double x_129744; + bool x_129745; + int64_t x_129746; + double x_129747; + + offset_129765 = 0; + // participating threads read initial accumulator { - if (atomic_cmpxchg_i32_global(global_failure, -1, 93) == - -1) { - global_failure_args[0] = i32_res_36044; - global_failure_args[1] = nm_28626; - ; + if (slt32(local_tid_129719, + sext_i64_i32(segred_group_sizze_114921))) { + x_129742 = ((__local + bool *) red_arr_mem_129723)[sext_i32_i64(local_tid_129719 + + offset_129765)]; + x_129743 = ((__local + int64_t *) red_arr_mem_129725)[sext_i32_i64(local_tid_129719 + + offset_129765)]; + x_129744 = ((__local + double *) red_arr_mem_129727)[sext_i32_i64(local_tid_129719 + + offset_129765)]; + } + } + offset_129765 = 1; + while (slt32(offset_129765, wave_sizze_129721)) { + if (slt32(local_tid_129719 + offset_129765, + sext_i64_i32(segred_group_sizze_114921)) && + ((local_tid_129719 - squot32(local_tid_129719, + wave_sizze_129721) * + wave_sizze_129721) & (2 * offset_129765 - 1)) == + 0) { + // read array element + { + x_129745 = ((volatile __local + bool *) red_arr_mem_129723)[sext_i32_i64(local_tid_129719 + + offset_129765)]; + x_129746 = ((volatile __local + int64_t *) red_arr_mem_129725)[sext_i32_i64(local_tid_129719 + + offset_129765)]; + x_129747 = ((volatile __local + double *) red_arr_mem_129727)[sext_i32_i64(local_tid_129719 + + offset_129765)]; + } + // apply reduction operation + { + bool defunc_1_op_res_129748; + int64_t defunc_1_op_res_129749; + + if (x_129742) { + defunc_1_op_res_129748 = x_129742; + defunc_1_op_res_129749 = x_129743; + } else { + bool x_129750 = x_129745 && x_129745; + bool x_129751 = !x_129745; + bool y_129752 = x_129742 && x_129751; + bool defunc_1_op_res_f_res_129753 = + x_129750 || y_129752; + int64_t defunc_1_op_res_f_res_129754; + + if (x_129745) { + defunc_1_op_res_f_res_129754 = x_129746; + } else { + defunc_1_op_res_f_res_129754 = x_129743; + } + defunc_1_op_res_129748 = + defunc_1_op_res_f_res_129753; + defunc_1_op_res_129749 = + defunc_1_op_res_f_res_129754; + } + + double defunc_1_op_res_129755 = x_129744 + + x_129747; + + x_129742 = defunc_1_op_res_129748; + x_129743 = defunc_1_op_res_129749; + x_129744 = defunc_1_op_res_129755; + } + // write result of operation + { + ((volatile __local + bool *) red_arr_mem_129723)[sext_i32_i64(local_tid_129719)] = + x_129742; + ((volatile __local + int64_t *) red_arr_mem_129725)[sext_i32_i64(local_tid_129719)] = + x_129743; + ((volatile __local + double *) red_arr_mem_129727)[sext_i32_i64(local_tid_129719)] = + x_129744; + } + } + offset_129765 *= 2; + } + while (slt32(skip_waves_129766, + squot32(sext_i64_i32(segred_group_sizze_114921) + + wave_sizze_129721 - 1, + wave_sizze_129721))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129765 = skip_waves_129766 * wave_sizze_129721; + if (slt32(local_tid_129719 + offset_129765, + sext_i64_i32(segred_group_sizze_114921)) && + ((local_tid_129719 - squot32(local_tid_129719, + wave_sizze_129721) * + wave_sizze_129721) == 0 && + (squot32(local_tid_129719, wave_sizze_129721) & + (2 * skip_waves_129766 - 1)) == 0)) { + // read array element + { + x_129745 = ((__local + bool *) red_arr_mem_129723)[sext_i32_i64(local_tid_129719 + + offset_129765)]; + x_129746 = ((__local + int64_t *) red_arr_mem_129725)[sext_i32_i64(local_tid_129719 + + offset_129765)]; + x_129747 = ((__local + double *) red_arr_mem_129727)[sext_i32_i64(local_tid_129719 + + offset_129765)]; + } + // apply reduction operation + { + bool defunc_1_op_res_129748; + int64_t defunc_1_op_res_129749; + + if (x_129742) { + defunc_1_op_res_129748 = x_129742; + defunc_1_op_res_129749 = x_129743; + } else { + bool x_129750 = x_129745 && x_129745; + bool x_129751 = !x_129745; + bool y_129752 = x_129742 && x_129751; + bool defunc_1_op_res_f_res_129753 = + x_129750 || y_129752; + int64_t defunc_1_op_res_f_res_129754; + + if (x_129745) { + defunc_1_op_res_f_res_129754 = x_129746; + } else { + defunc_1_op_res_f_res_129754 = x_129743; + } + defunc_1_op_res_129748 = + defunc_1_op_res_f_res_129753; + defunc_1_op_res_129749 = + defunc_1_op_res_f_res_129754; + } + + double defunc_1_op_res_129755 = x_129744 + + x_129747; + + x_129742 = defunc_1_op_res_129748; + x_129743 = defunc_1_op_res_129749; + x_129744 = defunc_1_op_res_129755; + } + // write result of operation + { + ((__local + bool *) red_arr_mem_129723)[sext_i32_i64(local_tid_129719)] = + x_129742; + ((__local + int64_t *) red_arr_mem_129725)[sext_i32_i64(local_tid_129719)] = + x_129743; + ((__local + double *) red_arr_mem_129727)[sext_i32_i64(local_tid_129719)] = + x_129744; + } + } + skip_waves_129766 *= 2; + } + // and back to memory with the final result + { + if (local_tid_129719 == 0) { + ((__global bool *) mem_124994)[gtid_114730] = + x_129742; + ((__global int64_t *) mem_124996)[gtid_114730] = + x_129743; + ((__global double *) mem_124998)[gtid_114730] = + x_129744; } - return; } } - - float x_36049 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_35618 * - ctx_param_ext_44581 + - i32_res_36044 * - ctx_param_ext_44583)]; - float y_36050 = x_36030 * x_36049; - float defunc_0_f_res_f_res_t_res_36051 = x_36042 - y_36050; - - defunc_0_f_res_f_res_36033 = defunc_0_f_res_f_res_t_res_36051; - } else { - defunc_0_f_res_f_res_36033 = x_36030; } - defunc_0_f_res_36015 = defunc_0_f_res_f_res_36033; } - ((__global float *) mem_44605)[gtid_35618 * nm_28626 + gtid_35619] = - defunc_0_f_res_36015; + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - error_0: + error_1: return; - #undef segmap_group_sizze_36005 + #undef segred_group_sizze_114921 } -__kernel void mainMagnitudezisegmap_35720(__global int *global_failure, - int64_t m_28478, - int64_t i32_res_35924, - int64_t ctx_param_ext_44580, - int64_t ctx_param_ext_44581, - int64_t ctx_param_ext_44583, __global - unsigned char *mem_param_44585, - __global unsigned char *mem_44601) +__kernel void mainzisegred_nonseg_102922(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_126356_backing_aligned_0, + __local volatile + int64_t *sync_arr_mem_126354_backing_aligned_1, + int64_t m_75136, int64_t n_75139, + int64_t m_75231, + int64_t num_groups_102925, + int64_t num_threads_126097, + int64_t num_threads_126348, __global + unsigned char *mem_120127, __global + unsigned char *mem_120130, __global + unsigned char *mem_120144, __global + unsigned char *mem_120146, __global + unsigned char *mem_120172, __global + unsigned char *mem_120174, __global + unsigned char *mem_120177, __global + unsigned char *mem_120180, __global + unsigned char *mainzicounter_mem_126344, + __global + unsigned char *group_res_arr_mem_126346) { - #define segmap_group_sizze_35989 (mainMagnitudezisegmap_group_sizze_35722) + #define segred_group_sizze_102924 (mainzisegred_group_sizze_102911) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_126356_backing_1 = + (__local volatile + char *) red_arr_mem_126356_backing_aligned_0; + __local volatile char *restrict sync_arr_mem_126354_backing_0 = + (__local volatile + char *) sync_arr_mem_126354_backing_aligned_1; if (*global_failure >= 0) return; - int32_t global_tid_45862; - int32_t local_tid_45863; - int64_t group_sizze_45866; - int32_t wave_sizze_45865; - int32_t group_tid_45864; - - global_tid_45862 = get_global_id(0); - local_tid_45863 = get_local_id(0); - group_sizze_45866 = get_local_size(0); - wave_sizze_45865 = LOCKSTEP_WIDTH; - group_tid_45864 = get_group_id(0); + int32_t global_tid_126349; + int32_t local_tid_126350; + int64_t group_sizze_126353; + int32_t wave_sizze_126352; + int32_t group_tid_126351; - int32_t phys_tid_35720; + global_tid_126349 = get_global_id(0); + local_tid_126350 = get_local_id(0); + group_sizze_126353 = get_local_size(0); + wave_sizze_126352 = LOCKSTEP_WIDTH; + group_tid_126351 = get_group_id(0); - phys_tid_35720 = global_tid_45862; + int32_t phys_tid_102922; - int64_t gtid_35719; + phys_tid_102922 = global_tid_126349; - gtid_35719 = sext_i32_i64(group_tid_45864) * segmap_group_sizze_35989 + - sext_i32_i64(local_tid_45863); - if (slt64(gtid_35719, m_28478)) { - float v1_35994 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_35719 * - ctx_param_ext_44581 + - i32_res_35924 * - ctx_param_ext_44583)]; - bool cond_35995 = v1_35994 == 0.0F; - - ((__global bool *) mem_44601)[gtid_35719] = cond_35995; - } + __local char *sync_arr_mem_126354; - error_0: - return; - #undef segmap_group_sizze_35989 -} -__kernel void mainMagnitudezisegmap_35837(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, - int64_t m_28478, int32_t k2p2zq_28491, - int64_t i32_res_28493, - int32_t m_28624, int64_t nm_28626, - __global - unsigned char *defunc_3_map_res_mem_44549, - __global unsigned char *mem_44577) -{ - #define segmap_group_sizze_35897 (mainMagnitudezisegmap_group_sizze_35840) + sync_arr_mem_126354 = (__local char *) sync_arr_mem_126354_backing_0; - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; + __local char *red_arr_mem_126356; - if (*global_failure >= 0) - return; + red_arr_mem_126356 = (__local char *) red_arr_mem_126356_backing_1; - int32_t global_tid_45829; - int32_t local_tid_45830; - int64_t group_sizze_45833; - int32_t wave_sizze_45832; - int32_t group_tid_45831; + int64_t dummy_102920; - global_tid_45829 = get_global_id(0); - local_tid_45830 = get_local_id(0); - group_sizze_45833 = get_local_size(0); - wave_sizze_45832 = LOCKSTEP_WIDTH; - group_tid_45831 = get_group_id(0); + dummy_102920 = (int64_t) 0; - int32_t phys_tid_35837; + int64_t gtid_102921; - phys_tid_35837 = global_tid_45829; + gtid_102921 = (int64_t) 0; - int64_t gtid_35835; + int64_t x_acc_126358; + int64_t chunk_sizze_126359; - gtid_35835 = squot64(sext_i32_i64(group_tid_45831) * - segmap_group_sizze_35897 + - sext_i32_i64(local_tid_45830), nm_28626); + chunk_sizze_126359 = smin64(sdiv_up64(m_75136, + sext_i32_i64(sext_i64_i32(segred_group_sizze_102924 * + num_groups_102925))), + sdiv_up64(m_75136 - phys_tid_102922, + num_threads_126348)); - int64_t gtid_35836; + int64_t x_102931; + int64_t x_102932; - gtid_35836 = sext_i32_i64(group_tid_45831) * segmap_group_sizze_35897 + - sext_i32_i64(local_tid_45830) - squot64(sext_i32_i64(group_tid_45831) * - segmap_group_sizze_35897 + - sext_i32_i64(local_tid_45830), - nm_28626) * nm_28626; - if (slt64(gtid_35835, m_28478) && slt64(gtid_35836, nm_28626)) { - int32_t index_primexp_42361 = sext_i64_i32(gtid_35836); - int32_t defunc_0_f_res_35902 = sdiv32(index_primexp_42361, m_28624); - int32_t defunc_0_f_res_35903 = smod32(index_primexp_42361, m_28624); - bool cond_35904 = slt32(defunc_0_f_res_35903, k2p2zq_28491); - float defunc_0_f_res_35905; - - if (cond_35904) { - int64_t i_35906 = sext_i32_i64(defunc_0_f_res_35902); - bool x_35907 = sle64((int64_t) 0, i_35906); - bool y_35908 = slt64(i_35906, i32_res_28493); - bool bounds_check_35909 = x_35907 && y_35908; - int64_t j_35910 = sext_i32_i64(defunc_0_f_res_35903); - bool x_35911 = sle64((int64_t) 0, j_35910); - bool y_35912 = slt64(j_35910, i32_res_28493); - bool bounds_check_35913 = x_35911 && y_35912; - bool index_ok_35914 = bounds_check_35909 && bounds_check_35913; - bool index_certs_35915; + // neutral-initialise the accumulators + { + x_acc_126358 = (int64_t) -9223372036854775808; + } + for (int64_t i_126363 = 0; i_126363 < chunk_sizze_126359; i_126363++) { + gtid_102921 = phys_tid_102922 + num_threads_126348 * i_126363; + // apply map function + { + int64_t discard_119622; + int64_t scanacc_119618 = (int64_t) 0; - if (!index_ok_35914) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 85) == - -1) { - global_failure_args[0] = i_35906; - global_failure_args[1] = j_35910; - global_failure_args[2] = i32_res_28493; - global_failure_args[3] = i32_res_28493; - ; - } - return; - } + for (int64_t i_119620 = 0; i_119620 < n_75139; i_119620++) { + int64_t binop_y_119975 = (int64_t) -1 * i_119620; + int64_t slice_119976 = m_75231 + binop_y_119975; + double x_102939 = ((__global + double *) mem_120127)[slice_119976 * + m_75136 + + gtid_102921]; + bool defunc_0_f_res_102940; + + defunc_0_f_res_102940 = futrts_isnan64(x_102939); + + bool defunc_0_g_res_102941 = !defunc_0_f_res_102940; + int64_t defunc_0_f_res_102942 = + btoi_bool_i64(defunc_0_g_res_102941); + int64_t defunc_1_op_res_102938 = add64(defunc_0_f_res_102942, + scanacc_119618); + + ((__global int64_t *) mem_120130)[phys_tid_102922 + i_119620 * + num_threads_126097] = + defunc_1_op_res_102938; + + int64_t scanacc_tmp_126364 = defunc_1_op_res_102938; + + scanacc_119618 = scanacc_tmp_126364; } + discard_119622 = scanacc_119618; - float defunc_0_f_res_t_res_35916 = ((__global - float *) defunc_3_map_res_mem_44549)[gtid_35835 * - (i32_res_28493 * - i32_res_28493) + - i_35906 * - i32_res_28493 + - j_35910]; - - defunc_0_f_res_35905 = defunc_0_f_res_t_res_35916; - } else { - int32_t y_35917 = add32(k2p2zq_28491, defunc_0_f_res_35902); - bool cond_35918 = defunc_0_f_res_35903 == y_35917; - float defunc_0_f_res_f_res_35919; + int64_t last_res_102943 = ((__global + int64_t *) mem_120130)[phys_tid_102922 + + m_75231 * + num_threads_126097]; - if (cond_35918) { - defunc_0_f_res_f_res_35919 = 1.0F; - } else { - defunc_0_f_res_f_res_35919 = 0.0F; + for (int64_t i_126366 = 0; i_126366 < n_75139; i_126366++) { + ((__global double *) mem_120144)[phys_tid_102922 + i_126366 * + num_threads_126097] = NAN; + } + for (int64_t i_126367 = 0; i_126367 < n_75139; i_126367++) { + ((__global int64_t *) mem_120146)[phys_tid_102922 + i_126367 * + num_threads_126097] = + (int64_t) 0; + } + for (int64_t write_iter_119623 = 0; write_iter_119623 < n_75139; + write_iter_119623++) { + int64_t binop_y_119983 = (int64_t) -1 * write_iter_119623; + int64_t slice_119984 = m_75231 + binop_y_119983; + double write_iv_119626 = ((__global + double *) mem_120127)[slice_119984 * + m_75136 + + gtid_102921]; + bool defunc_0_f_res_102951; + + defunc_0_f_res_102951 = futrts_isnan64(write_iv_119626); + + bool defunc_0_g_res_102952 = !defunc_0_f_res_102951; + int64_t defunc_1_f_res_102953; + + if (defunc_0_g_res_102952) { + int64_t write_iv_119627 = ((__global + int64_t *) mem_120130)[phys_tid_102922 + + write_iter_119623 * + num_threads_126097]; + int64_t defunc_1_f_res_t_res_102954 = sub64(write_iv_119627, + (int64_t) 1); + + defunc_1_f_res_102953 = defunc_1_f_res_t_res_102954; + } else { + defunc_1_f_res_102953 = (int64_t) -1; + } + + bool less_than_zzero_119629 = slt64(defunc_1_f_res_102953, + (int64_t) 0); + bool greater_than_sizze_119630 = sle64(n_75139, + defunc_1_f_res_102953); + bool outside_bounds_dim_119631 = less_than_zzero_119629 || + greater_than_sizze_119630; + + if (!outside_bounds_dim_119631) { + ((__global int64_t *) mem_120146)[phys_tid_102922 + + defunc_1_f_res_102953 * + num_threads_126097] = + write_iter_119623; + } + if (!outside_bounds_dim_119631) { + for (int64_t i_126370 = 0; i_126370 < (int64_t) 1; + i_126370++) { + ((__global double *) mem_120144)[phys_tid_102922 + + (defunc_1_f_res_102953 + + i_126370) * + num_threads_126097] = + ((__global double *) mem_120127)[m_75136 * + slice_119984 + + gtid_102921 + + i_126370 * + ((int64_t) -1 * + m_75136)]; + } + } + } + // save map-out results + { + ((__global int64_t *) mem_120174)[dummy_102920 * m_75136 + + gtid_102921] = + last_res_102943; + for (int64_t i_126371 = 0; i_126371 < n_75139; i_126371++) { + ((__global double *) mem_120177)[i_126371 * m_75136 + + dummy_102920 * m_75136 + + gtid_102921] = ((__global + double *) mem_120144)[phys_tid_102922 + + i_126371 * + num_threads_126097]; + } + for (int64_t i_126372 = 0; i_126372 < n_75139; i_126372++) { + ((__global int64_t *) mem_120180)[i_126372 * m_75136 + + dummy_102920 * m_75136 + + gtid_102921] = ((__global + int64_t *) mem_120146)[phys_tid_102922 + + i_126372 * + num_threads_126097]; + } + } + // load accumulator + { + x_102931 = x_acc_126358; + } + // load new values + { + x_102932 = last_res_102943; + } + // apply reduction operator + { + int64_t defunc_1_op_res_102933 = smax64(x_102931, x_102932); + + // store in accumulator + { + x_acc_126358 = defunc_1_op_res_102933; + } } - defunc_0_f_res_35905 = defunc_0_f_res_f_res_35919; } - ((__global float *) mem_44577)[gtid_35835 * nm_28626 + gtid_35836] = - defunc_0_f_res_35905; } + // to reduce current chunk, first store our result in memory + { + x_102931 = x_acc_126358; + ((__local + int64_t *) red_arr_mem_126356)[sext_i32_i64(local_tid_126350)] = + x_102931; + } + barrier(CLK_LOCAL_MEM_FENCE); - error_0: - return; - #undef segmap_group_sizze_35897 -} -__kernel void mainMagnitudezisegmap_36078(__global int *global_failure, - int64_t N_28477, int64_t m_28478, - int32_t n_28481, int32_t k2p2zq_28491, - int64_t i32_res_28493, - int64_t num_groups_36099, __global - unsigned char *binop_p_mem_44390, - __global unsigned char *mem_44632, - __global unsigned char *mem_44635, - __global unsigned char *mem_44650) -{ - #define segmap_group_sizze_36098 (mainMagnitudezisegmap_group_sizze_36080) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; + int32_t offset_126373; + int32_t skip_waves_126374; - if (*global_failure >= 0) - return; + skip_waves_126374 = 1; - int32_t global_tid_45886; - int32_t local_tid_45887; - int64_t group_sizze_45890; - int32_t wave_sizze_45889; - int32_t group_tid_45888; + int64_t x_126360; + int64_t x_126361; - global_tid_45886 = get_global_id(0); - local_tid_45887 = get_local_id(0); - group_sizze_45890 = get_local_size(0); - wave_sizze_45889 = LOCKSTEP_WIDTH; - group_tid_45888 = get_group_id(0); + offset_126373 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_126350, sext_i64_i32(segred_group_sizze_102924))) { + x_126360 = ((__local + int64_t *) red_arr_mem_126356)[sext_i32_i64(local_tid_126350 + + offset_126373)]; + } + } + offset_126373 = 1; + while (slt32(offset_126373, wave_sizze_126352)) { + if (slt32(local_tid_126350 + offset_126373, + sext_i64_i32(segred_group_sizze_102924)) && + ((local_tid_126350 - squot32(local_tid_126350, wave_sizze_126352) * + wave_sizze_126352) & (2 * offset_126373 - 1)) == 0) { + // read array element + { + x_126361 = ((volatile __local + int64_t *) red_arr_mem_126356)[sext_i32_i64(local_tid_126350 + + offset_126373)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_126362 = smax64(x_126360, x_126361); + + x_126360 = defunc_1_op_res_126362; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_126356)[sext_i32_i64(local_tid_126350)] = + x_126360; + } + } + offset_126373 *= 2; + } + while (slt32(skip_waves_126374, + squot32(sext_i64_i32(segred_group_sizze_102924) + + wave_sizze_126352 - 1, wave_sizze_126352))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_126373 = skip_waves_126374 * wave_sizze_126352; + if (slt32(local_tid_126350 + offset_126373, + sext_i64_i32(segred_group_sizze_102924)) && + ((local_tid_126350 - squot32(local_tid_126350, wave_sizze_126352) * + wave_sizze_126352) == 0 && (squot32(local_tid_126350, + wave_sizze_126352) & (2 * + skip_waves_126374 - + 1)) == + 0)) { + // read array element + { + x_126361 = ((__local + int64_t *) red_arr_mem_126356)[sext_i32_i64(local_tid_126350 + + offset_126373)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_126362 = smax64(x_126360, x_126361); + + x_126360 = defunc_1_op_res_126362; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_126356)[sext_i32_i64(local_tid_126350)] = + x_126360; + } + } + skip_waves_126374 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_126350) == (int64_t) 0) { + x_acc_126358 = x_126360; + } + } - int32_t phys_tid_36078; + int32_t old_counter_126375; - phys_tid_36078 = global_tid_45886; + // first thread in group saves group result to global memory + { + if (local_tid_126350 == 0) { + ((__global + int64_t *) group_res_arr_mem_126346)[sext_i32_i64(group_tid_126351) * + segred_group_sizze_102924] = + x_acc_126358; + mem_fence_global(); + old_counter_126375 = atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_126344)[(int64_t) 0], + (int) 1); + ((__local bool *) sync_arr_mem_126354)[(int64_t) 0] = + old_counter_126375 == num_groups_102925 - (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - int32_t phys_group_id_45891; + bool is_last_group_126376; - phys_group_id_45891 = get_group_id(0); - for (int32_t i_45892 = 0; i_45892 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_28478, segmap_group_sizze_36098)) - - phys_group_id_45891, sext_i64_i32(num_groups_36099)); - i_45892++) { - int32_t virt_group_id_45893 = phys_group_id_45891 + i_45892 * - sext_i64_i32(num_groups_36099); - int64_t gtid_36077 = sext_i32_i64(virt_group_id_45893) * - segmap_group_sizze_36098 + sext_i32_i64(local_tid_45887); - - if (slt64(gtid_36077, m_28478)) { - for (int32_t i_44364 = 0; i_44364 < k2p2zq_28491; i_44364++) { - int64_t i_44305 = sext_i32_i64(i_44364); - float defunc_2_reduce_res_36105; - float redout_44307 = 0.0F; - - for (int32_t i_44363 = 0; i_44363 < n_28481; i_44363++) { - int64_t i_44308 = sext_i32_i64(i_44363); - float x_36110 = ((__global float *) mem_44632)[i_44308 * - m_28478 + - gtid_36077]; - bool isnan_res_36111; - - isnan_res_36111 = futrts_isnan32(x_36110); + is_last_group_126376 = ((__local bool *) sync_arr_mem_126354)[(int64_t) 0]; + if (is_last_group_126376) { + if (local_tid_126350 == 0) { + old_counter_126375 = atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_126344)[(int64_t) 0], + (int) ((int64_t) 0 - + num_groups_102925)); + } + // read in the per-group-results + { + int64_t read_per_thread_126377 = sdiv_up64(num_groups_102925, + segred_group_sizze_102924); + + x_102931 = (int64_t) -9223372036854775808; + for (int64_t i_126378 = 0; i_126378 < read_per_thread_126377; + i_126378++) { + int64_t group_res_id_126379 = sext_i32_i64(local_tid_126350) * + read_per_thread_126377 + i_126378; + int64_t index_of_group_res_126380 = group_res_id_126379; + + if (slt64(group_res_id_126379, num_groups_102925)) { + x_102932 = ((__global + int64_t *) group_res_arr_mem_126346)[index_of_group_res_126380 * + segred_group_sizze_102924]; - float defunc_1_f_res_36112; + int64_t defunc_1_op_res_102933; - if (isnan_res_36111) { - defunc_1_f_res_36112 = 0.0F; - } else { - float x_36109 = ((__global - float *) binop_p_mem_44390)[i_44305 * - N_28477 + - i_44308]; - float defunc_1_f_res_f_res_36113 = x_36109 * x_36110; + defunc_1_op_res_102933 = smax64(x_102931, x_102932); + x_102931 = defunc_1_op_res_102933; + } + } + } + ((__local + int64_t *) red_arr_mem_126356)[sext_i32_i64(local_tid_126350)] = + x_102931; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_126381; + int32_t skip_waves_126382; + + skip_waves_126382 = 1; + + int64_t x_126360; + int64_t x_126361; + + offset_126381 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_126350, + sext_i64_i32(segred_group_sizze_102924))) { + x_126360 = ((__local + int64_t *) red_arr_mem_126356)[sext_i32_i64(local_tid_126350 + + offset_126381)]; + } + } + offset_126381 = 1; + while (slt32(offset_126381, wave_sizze_126352)) { + if (slt32(local_tid_126350 + offset_126381, + sext_i64_i32(segred_group_sizze_102924)) && + ((local_tid_126350 - squot32(local_tid_126350, + wave_sizze_126352) * + wave_sizze_126352) & (2 * offset_126381 - 1)) == 0) { + // read array element + { + x_126361 = ((volatile __local + int64_t *) red_arr_mem_126356)[sext_i32_i64(local_tid_126350 + + offset_126381)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_126362 = smax64(x_126360, + x_126361); - defunc_1_f_res_36112 = defunc_1_f_res_f_res_36113; + x_126360 = defunc_1_op_res_126362; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_126356)[sext_i32_i64(local_tid_126350)] = + x_126360; + } + } + offset_126381 *= 2; + } + while (slt32(skip_waves_126382, + squot32(sext_i64_i32(segred_group_sizze_102924) + + wave_sizze_126352 - 1, wave_sizze_126352))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_126381 = skip_waves_126382 * wave_sizze_126352; + if (slt32(local_tid_126350 + offset_126381, + sext_i64_i32(segred_group_sizze_102924)) && + ((local_tid_126350 - squot32(local_tid_126350, + wave_sizze_126352) * + wave_sizze_126352) == 0 && (squot32(local_tid_126350, + wave_sizze_126352) & + (2 * skip_waves_126382 - + 1)) == 0)) { + // read array element + { + x_126361 = ((__local + int64_t *) red_arr_mem_126356)[sext_i32_i64(local_tid_126350 + + offset_126381)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_126362 = smax64(x_126360, + x_126361); + + x_126360 = defunc_1_op_res_126362; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_126356)[sext_i32_i64(local_tid_126350)] = + x_126360; } - - float defunc_1_op_res_36108 = defunc_1_f_res_36112 + - redout_44307; - float redout_tmp_45895 = defunc_1_op_res_36108; - - redout_44307 = redout_tmp_45895; } - defunc_2_reduce_res_36105 = redout_44307; - ((__global float *) mem_44635)[phys_tid_36078 + i_44305 * - (num_groups_36099 * - segmap_group_sizze_36098)] = - defunc_2_reduce_res_36105; + skip_waves_126382 *= 2; } - for (int64_t i_45896 = 0; i_45896 < i32_res_28493; i_45896++) { - ((__global float *) mem_44650)[i_45896 * m_28478 + gtid_36077] = - ((__global float *) mem_44635)[phys_tid_36078 + i_45896 * - (num_groups_36099 * - segmap_group_sizze_36098)]; + // and back to memory with the final result + { + if (local_tid_126350 == 0) { + ((__global int64_t *) mem_120172)[(int64_t) 0] = x_126360; + } } } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - error_0: + error_1: return; - #undef segmap_group_sizze_36098 + #undef segred_group_sizze_102924 } -__kernel void mainMagnitudezisegmap_36219(__global int *global_failure, - int64_t m_28478, int32_t k2p2zq_28491, - int64_t i32_res_28493, - int64_t num_groups_36239, __global - unsigned char *mem_44854, __global - unsigned char *mem_44857, __global - unsigned char *mem_44860, __global - unsigned char *mem_44875) +__kernel void mainzisegred_nonseg_103159(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_126507_backing_aligned_0, + __local volatile + int64_t *sync_arr_mem_126505_backing_aligned_1, + int64_t m_75136, + int64_t num_groups_103256, + int64_t num_threads_126499, __global + unsigned char *defunc_2_reduce_res_map_acc_mem_120211, + __global unsigned char *mem_120218, + __global + unsigned char *mainzicounter_mem_126495, + __global + unsigned char *group_res_arr_mem_126497) { - #define segmap_group_sizze_36238 (mainMagnitudezisegmap_group_sizze_36221) + #define segred_group_sizze_103255 (mainzisegred_group_sizze_103151) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_126507_backing_1 = + (__local volatile + char *) red_arr_mem_126507_backing_aligned_0; + __local volatile char *restrict sync_arr_mem_126505_backing_0 = + (__local volatile + char *) sync_arr_mem_126505_backing_aligned_1; if (*global_failure >= 0) return; - int32_t global_tid_46018; - int32_t local_tid_46019; - int64_t group_sizze_46022; - int32_t wave_sizze_46021; - int32_t group_tid_46020; + int32_t global_tid_126500; + int32_t local_tid_126501; + int64_t group_sizze_126504; + int32_t wave_sizze_126503; + int32_t group_tid_126502; - global_tid_46018 = get_global_id(0); - local_tid_46019 = get_local_id(0); - group_sizze_46022 = get_local_size(0); - wave_sizze_46021 = LOCKSTEP_WIDTH; - group_tid_46020 = get_group_id(0); + global_tid_126500 = get_global_id(0); + local_tid_126501 = get_local_id(0); + group_sizze_126504 = get_local_size(0); + wave_sizze_126503 = LOCKSTEP_WIDTH; + group_tid_126502 = get_group_id(0); - int32_t phys_tid_36219; + int32_t phys_tid_103159; - phys_tid_36219 = global_tid_46018; + phys_tid_103159 = global_tid_126500; - int32_t phys_group_id_46023; + __local char *sync_arr_mem_126505; - phys_group_id_46023 = get_group_id(0); - for (int32_t i_46024 = 0; i_46024 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_28478, segmap_group_sizze_36238)) - - phys_group_id_46023, sext_i64_i32(num_groups_36239)); - i_46024++) { - int32_t virt_group_id_46025 = phys_group_id_46023 + i_46024 * - sext_i64_i32(num_groups_36239); - int64_t gtid_36218 = sext_i32_i64(virt_group_id_46025) * - segmap_group_sizze_36238 + sext_i32_i64(local_tid_46019); - - if (slt64(gtid_36218, m_28478)) { - for (int32_t i_44366 = 0; i_44366 < k2p2zq_28491; i_44366++) { - int64_t i_44311 = sext_i32_i64(i_44366); - float defunc_0_f_res_36246; - float redout_44313 = 0.0F; - - for (int32_t i_44365 = 0; i_44365 < k2p2zq_28491; i_44365++) { - int64_t i_44314 = sext_i32_i64(i_44365); - float x_36250 = ((__global float *) mem_44857)[i_44314 * - m_28478 + - gtid_36218]; - float x_36251 = ((__global float *) mem_44854)[i_44311 * - (m_28478 * - i32_res_28493) + - i_44314 * - m_28478 + - gtid_36218]; - float defunc_1_f_res_36252 = x_36250 * x_36251; - float defunc_1_op_res_36249 = defunc_1_f_res_36252 + - redout_44313; - float redout_tmp_46027 = defunc_1_op_res_36249; - - redout_44313 = redout_tmp_46027; - } - defunc_0_f_res_36246 = redout_44313; - ((__global float *) mem_44860)[phys_tid_36219 + i_44311 * - (num_groups_36239 * - segmap_group_sizze_36238)] = - defunc_0_f_res_36246; - } - for (int64_t i_46028 = 0; i_46028 < i32_res_28493; i_46028++) { - ((__global float *) mem_44875)[i_46028 * m_28478 + gtid_36218] = - ((__global float *) mem_44860)[phys_tid_36219 + i_46028 * - (num_groups_36239 * - segmap_group_sizze_36238)]; - } - } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - } + sync_arr_mem_126505 = (__local char *) sync_arr_mem_126505_backing_0; - error_0: - return; - #undef segmap_group_sizze_36238 -} -__kernel void mainMagnitudezisegmap_36351(__global int *global_failure, - int64_t N_28477, int64_t m_28478, - int32_t k2p2zq_28491, - int64_t i32_res_28493, - int64_t num_groups_36370, __global - unsigned char *mem_44397, __global - unsigned char *mem_44919, __global - unsigned char *mem_44922, __global - unsigned char *mem_44937) -{ - #define segmap_group_sizze_36369 (mainMagnitudezisegmap_group_sizze_36353) + __local char *red_arr_mem_126507; - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; + red_arr_mem_126507 = (__local char *) red_arr_mem_126507_backing_1; - if (*global_failure >= 0) - return; + int64_t dummy_103157; - int32_t global_tid_46106; - int32_t local_tid_46107; - int64_t group_sizze_46110; - int32_t wave_sizze_46109; - int32_t group_tid_46108; + dummy_103157 = (int64_t) 0; - global_tid_46106 = get_global_id(0); - local_tid_46107 = get_local_id(0); - group_sizze_46110 = get_local_size(0); - wave_sizze_46109 = LOCKSTEP_WIDTH; - group_tid_46108 = get_group_id(0); + int64_t gtid_103158; - int32_t phys_tid_36351; + gtid_103158 = (int64_t) 0; - phys_tid_36351 = global_tid_46106; + int64_t x_acc_126509; + int64_t chunk_sizze_126510; - int32_t phys_group_id_46111; + chunk_sizze_126510 = smin64(sdiv_up64(m_75136, + sext_i32_i64(sext_i64_i32(segred_group_sizze_103255 * + num_groups_103256))), + sdiv_up64(m_75136 - phys_tid_103159, + num_threads_126499)); - phys_group_id_46111 = get_group_id(0); - for (int32_t i_46112 = 0; i_46112 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_28478, segmap_group_sizze_36369)) - - phys_group_id_46111, sext_i64_i32(num_groups_36370)); - i_46112++) { - int32_t virt_group_id_46113 = phys_group_id_46111 + i_46112 * - sext_i64_i32(num_groups_36370); - int64_t gtid_36350 = sext_i32_i64(virt_group_id_46113) * - segmap_group_sizze_36369 + sext_i32_i64(local_tid_46107); - - if (slt64(gtid_36350, m_28478)) { - for (int64_t i_44321 = 0; i_44321 < N_28477; i_44321++) { - float defunc_0_f_res_36376; - float redout_44323 = 0.0F; + int64_t x_103259; + int64_t x_103260; + + // neutral-initialise the accumulators + { + x_acc_126509 = (int64_t) -9223372036854775808; + } + for (int64_t i_126514 = 0; i_126514 < chunk_sizze_126510; i_126514++) { + gtid_103158 = phys_tid_103159 + num_threads_126499 * i_126514; + // apply map function + { + int64_t x_103262 = ((__global + int64_t *) defunc_2_reduce_res_map_acc_mem_120211)[gtid_103158]; + + // save map-out results + { } + // load accumulator + { + x_103259 = x_acc_126509; + } + // load new values + { + x_103260 = x_103262; + } + // apply reduction operator + { + int64_t defunc_1_op_res_103261 = smax64(x_103259, x_103260); - for (int32_t i_44369 = 0; i_44369 < k2p2zq_28491; i_44369++) { - int64_t i_44324 = sext_i32_i64(i_44369); - float x_36380 = ((__global float *) mem_44919)[i_44324 * - m_28478 + - gtid_36350]; - float x_36381 = ((__global float *) mem_44397)[i_44321 * - i32_res_28493 + - i_44324]; - float defunc_1_f_res_36382 = x_36380 * x_36381; - float defunc_1_op_res_36379 = defunc_1_f_res_36382 + - redout_44323; - float redout_tmp_46115 = defunc_1_op_res_36379; - - redout_44323 = redout_tmp_46115; + // store in accumulator + { + x_acc_126509 = defunc_1_op_res_103261; } - defunc_0_f_res_36376 = redout_44323; - ((__global float *) mem_44922)[phys_tid_36351 + i_44321 * - (num_groups_36370 * - segmap_group_sizze_36369)] = - defunc_0_f_res_36376; - } - for (int64_t i_46116 = 0; i_46116 < N_28477; i_46116++) { - ((__global float *) mem_44937)[i_46116 * m_28478 + gtid_36350] = - ((__global float *) mem_44922)[phys_tid_36351 + i_46116 * - (num_groups_36370 * - segmap_group_sizze_36369)]; } } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } + // to reduce current chunk, first store our result in memory + { + x_103259 = x_acc_126509; + ((__local + int64_t *) red_arr_mem_126507)[sext_i32_i64(local_tid_126501)] = + x_103259; + } + barrier(CLK_LOCAL_MEM_FENCE); - error_0: - return; - #undef segmap_group_sizze_36369 -} -__kernel void mainMagnitudezisegmap_36631(__global int *global_failure, - int64_t N_28477, int64_t m_28478, - __global unsigned char *mem_45163, - __global unsigned char *mem_45166, - __global unsigned char *mem_45172, - __global unsigned char *mem_45175) -{ - #define segmap_group_sizze_36797 (mainMagnitudezisegmap_group_sizze_36634) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - - if (*global_failure >= 0) - return; - - int32_t global_tid_46333; - int32_t local_tid_46334; - int64_t group_sizze_46337; - int32_t wave_sizze_46336; - int32_t group_tid_46335; - - global_tid_46333 = get_global_id(0); - local_tid_46334 = get_local_id(0); - group_sizze_46337 = get_local_size(0); - wave_sizze_46336 = LOCKSTEP_WIDTH; - group_tid_46335 = get_group_id(0); - - int32_t phys_tid_36631; - - phys_tid_36631 = global_tid_46333; - - int64_t gtid_36629; + int32_t offset_126515; + int32_t skip_waves_126516; - gtid_36629 = squot64(sext_i32_i64(group_tid_46335) * - segmap_group_sizze_36797 + - sext_i32_i64(local_tid_46334), N_28477); + skip_waves_126516 = 1; - int64_t gtid_36630; + int64_t x_126511; + int64_t x_126512; - gtid_36630 = sext_i32_i64(group_tid_46335) * segmap_group_sizze_36797 + - sext_i32_i64(local_tid_46334) - squot64(sext_i32_i64(group_tid_46335) * - segmap_group_sizze_36797 + - sext_i32_i64(local_tid_46334), - N_28477) * N_28477; - if (slt64(gtid_36629, m_28478) && slt64(gtid_36630, N_28477)) { - float x_36805 = ((__global float *) mem_45166)[gtid_36629 * N_28477 + - gtid_36630]; - int32_t index_primexp_42377 = sext_i64_i32(gtid_36630); - bool isnan_res_36808; - - isnan_res_36808 = futrts_isnan32(x_36805); - - bool defunc_0_p_res_36809 = !isnan_res_36808; - int64_t defunc_1_f_res_36810; - - if (defunc_0_p_res_36809) { - int64_t x_36806 = ((__global int64_t *) mem_45163)[gtid_36629 * - N_28477 + - gtid_36630]; - int64_t defunc_1_f_res_t_res_36811 = sub64(x_36806, (int64_t) 1); - - defunc_1_f_res_36810 = defunc_1_f_res_t_res_36811; - } else { - defunc_1_f_res_36810 = (int64_t) -1; + offset_126515 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_126501, sext_i64_i32(segred_group_sizze_103255))) { + x_126511 = ((__local + int64_t *) red_arr_mem_126507)[sext_i32_i64(local_tid_126501 + + offset_126515)]; + } + } + offset_126515 = 1; + while (slt32(offset_126515, wave_sizze_126503)) { + if (slt32(local_tid_126501 + offset_126515, + sext_i64_i32(segred_group_sizze_103255)) && + ((local_tid_126501 - squot32(local_tid_126501, wave_sizze_126503) * + wave_sizze_126503) & (2 * offset_126515 - 1)) == 0) { + // read array element + { + x_126512 = ((volatile __local + int64_t *) red_arr_mem_126507)[sext_i32_i64(local_tid_126501 + + offset_126515)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_126513 = smax64(x_126511, x_126512); + + x_126511 = defunc_1_op_res_126513; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_126507)[sext_i32_i64(local_tid_126501)] = + x_126511; + } } - if ((sle64((int64_t) 0, gtid_36629) && slt64(gtid_36629, m_28478)) && - (sle64((int64_t) 0, defunc_1_f_res_36810) && - slt64(defunc_1_f_res_36810, N_28477))) { - ((__global int32_t *) mem_45175)[gtid_36629 * N_28477 + - defunc_1_f_res_36810] = - index_primexp_42377; + offset_126515 *= 2; + } + while (slt32(skip_waves_126516, + squot32(sext_i64_i32(segred_group_sizze_103255) + + wave_sizze_126503 - 1, wave_sizze_126503))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_126515 = skip_waves_126516 * wave_sizze_126503; + if (slt32(local_tid_126501 + offset_126515, + sext_i64_i32(segred_group_sizze_103255)) && + ((local_tid_126501 - squot32(local_tid_126501, wave_sizze_126503) * + wave_sizze_126503) == 0 && (squot32(local_tid_126501, + wave_sizze_126503) & (2 * + skip_waves_126516 - + 1)) == + 0)) { + // read array element + { + x_126512 = ((__local + int64_t *) red_arr_mem_126507)[sext_i32_i64(local_tid_126501 + + offset_126515)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_126513 = smax64(x_126511, x_126512); + + x_126511 = defunc_1_op_res_126513; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_126507)[sext_i32_i64(local_tid_126501)] = + x_126511; + } } - if ((sle64((int64_t) 0, gtid_36629) && slt64(gtid_36629, m_28478)) && - (sle64((int64_t) 0, defunc_1_f_res_36810) && - slt64(defunc_1_f_res_36810, N_28477))) { - ((__global float *) mem_45172)[gtid_36629 * N_28477 + - defunc_1_f_res_36810] = x_36805; + skip_waves_126516 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_126501) == (int64_t) 0) { + x_acc_126509 = x_126511; } } - error_0: - return; - #undef segmap_group_sizze_36797 -} -__kernel void mainMagnitudezisegmap_36707(__global int *global_failure, - int64_t N_28477, int64_t m_28478, - int64_t i_28781, __global - unsigned char *mem_45163, __global - unsigned char *mem_45169) -{ - #define segmap_group_sizze_36761 (mainMagnitudezisegmap_group_sizze_36709) + int32_t old_counter_126517; - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - - if (*global_failure >= 0) - return; - - int32_t global_tid_46310; - int32_t local_tid_46311; - int64_t group_sizze_46314; - int32_t wave_sizze_46313; - int32_t group_tid_46312; - - global_tid_46310 = get_global_id(0); - local_tid_46311 = get_local_id(0); - group_sizze_46314 = get_local_size(0); - wave_sizze_46313 = LOCKSTEP_WIDTH; - group_tid_46312 = get_group_id(0); - - int32_t phys_tid_36707; - - phys_tid_36707 = global_tid_46310; + // first thread in group saves group result to global memory + { + if (local_tid_126501 == 0) { + ((__global + int64_t *) group_res_arr_mem_126497)[sext_i32_i64(group_tid_126502) * + segred_group_sizze_103255] = + x_acc_126509; + mem_fence_global(); + old_counter_126517 = atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_126495)[(int64_t) 0], + (int) 1); + ((__local bool *) sync_arr_mem_126505)[(int64_t) 0] = + old_counter_126517 == num_groups_103256 - (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - int64_t gtid_36706; + bool is_last_group_126518; - gtid_36706 = sext_i32_i64(group_tid_46312) * segmap_group_sizze_36761 + - sext_i32_i64(local_tid_46311); - if (slt64(gtid_36706, m_28478)) { - int64_t last_res_36765 = ((__global int64_t *) mem_45163)[gtid_36706 * - N_28477 + - i_28781]; - int32_t defunc_0_f_res_36766 = sext_i64_i32(last_res_36765); - - ((__global int32_t *) mem_45169)[gtid_36706] = defunc_0_f_res_36766; + is_last_group_126518 = ((__local bool *) sync_arr_mem_126505)[(int64_t) 0]; + if (is_last_group_126518) { + if (local_tid_126501 == 0) { + old_counter_126517 = atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_126495)[(int64_t) 0], + (int) ((int64_t) 0 - + num_groups_103256)); + } + // read in the per-group-results + { + int64_t read_per_thread_126519 = sdiv_up64(num_groups_103256, + segred_group_sizze_103255); + + x_103259 = (int64_t) -9223372036854775808; + for (int64_t i_126520 = 0; i_126520 < read_per_thread_126519; + i_126520++) { + int64_t group_res_id_126521 = sext_i32_i64(local_tid_126501) * + read_per_thread_126519 + i_126520; + int64_t index_of_group_res_126522 = group_res_id_126521; + + if (slt64(group_res_id_126521, num_groups_103256)) { + x_103260 = ((__global + int64_t *) group_res_arr_mem_126497)[index_of_group_res_126522 * + segred_group_sizze_103255]; + + int64_t defunc_1_op_res_103261; + + defunc_1_op_res_103261 = smax64(x_103259, x_103260); + x_103259 = defunc_1_op_res_103261; + } + } + } + ((__local + int64_t *) red_arr_mem_126507)[sext_i32_i64(local_tid_126501)] = + x_103259; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_126523; + int32_t skip_waves_126524; + + skip_waves_126524 = 1; + + int64_t x_126511; + int64_t x_126512; + + offset_126523 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_126501, + sext_i64_i32(segred_group_sizze_103255))) { + x_126511 = ((__local + int64_t *) red_arr_mem_126507)[sext_i32_i64(local_tid_126501 + + offset_126523)]; + } + } + offset_126523 = 1; + while (slt32(offset_126523, wave_sizze_126503)) { + if (slt32(local_tid_126501 + offset_126523, + sext_i64_i32(segred_group_sizze_103255)) && + ((local_tid_126501 - squot32(local_tid_126501, + wave_sizze_126503) * + wave_sizze_126503) & (2 * offset_126523 - 1)) == 0) { + // read array element + { + x_126512 = ((volatile __local + int64_t *) red_arr_mem_126507)[sext_i32_i64(local_tid_126501 + + offset_126523)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_126513 = smax64(x_126511, + x_126512); + + x_126511 = defunc_1_op_res_126513; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_126507)[sext_i32_i64(local_tid_126501)] = + x_126511; + } + } + offset_126523 *= 2; + } + while (slt32(skip_waves_126524, + squot32(sext_i64_i32(segred_group_sizze_103255) + + wave_sizze_126503 - 1, wave_sizze_126503))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_126523 = skip_waves_126524 * wave_sizze_126503; + if (slt32(local_tid_126501 + offset_126523, + sext_i64_i32(segred_group_sizze_103255)) && + ((local_tid_126501 - squot32(local_tid_126501, + wave_sizze_126503) * + wave_sizze_126503) == 0 && (squot32(local_tid_126501, + wave_sizze_126503) & + (2 * skip_waves_126524 - + 1)) == 0)) { + // read array element + { + x_126512 = ((__local + int64_t *) red_arr_mem_126507)[sext_i32_i64(local_tid_126501 + + offset_126523)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_126513 = smax64(x_126511, + x_126512); + + x_126511 = defunc_1_op_res_126513; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_126507)[sext_i32_i64(local_tid_126501)] = + x_126511; + } + } + skip_waves_126524 *= 2; + } + // and back to memory with the final result + { + if (local_tid_126501 == 0) { + ((__global int64_t *) mem_120218)[(int64_t) 0] = x_126511; + } + } + } } - error_0: + error_1: return; - #undef segmap_group_sizze_36761 + #undef segred_group_sizze_103255 } -__kernel void mainMagnitudezisegmap_36970(__global int *global_failure, - int64_t m_28478, float hfrac_28483, - int32_t k2p2_28489, __global - unsigned char *mem_45232, __global - unsigned char *mem_45235, __global - unsigned char *mem_45238, __global - unsigned char *mem_45240) +__kernel void mainzisegred_nonseg_110867(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_127986_backing_aligned_0, + __local volatile + int64_t *sync_arr_mem_127984_backing_aligned_1, + int64_t m_75136, + int64_t num_groups_110862, + int64_t num_threads_127978, __global + unsigned char *defunc_7_map_res_mem_123721, + __global unsigned char *mem_123728, + __global + unsigned char *mainzicounter_mem_127974, + __global + unsigned char *group_res_arr_mem_127976) { - #define segmap_group_sizze_37063 (mainMagnitudezisegmap_group_sizze_36972) + #define segred_group_sizze_110860 (mainzisegred_group_sizze_110859) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_127986_backing_1 = + (__local volatile + char *) red_arr_mem_127986_backing_aligned_0; + __local volatile char *restrict sync_arr_mem_127984_backing_0 = + (__local volatile + char *) sync_arr_mem_127984_backing_aligned_1; if (*global_failure >= 0) return; - int32_t global_tid_46486; - int32_t local_tid_46487; - int64_t group_sizze_46490; - int32_t wave_sizze_46489; - int32_t group_tid_46488; + int32_t global_tid_127979; + int32_t local_tid_127980; + int64_t group_sizze_127983; + int32_t wave_sizze_127982; + int32_t group_tid_127981; - global_tid_46486 = get_global_id(0); - local_tid_46487 = get_local_id(0); - group_sizze_46490 = get_local_size(0); - wave_sizze_46489 = LOCKSTEP_WIDTH; - group_tid_46488 = get_group_id(0); + global_tid_127979 = get_global_id(0); + local_tid_127980 = get_local_id(0); + group_sizze_127983 = get_local_size(0); + wave_sizze_127982 = LOCKSTEP_WIDTH; + group_tid_127981 = get_group_id(0); - int32_t phys_tid_36970; + int32_t phys_tid_110867; - phys_tid_36970 = global_tid_46486; + phys_tid_110867 = global_tid_127979; - int64_t gtid_36969; + __local char *sync_arr_mem_127984; - gtid_36969 = sext_i32_i64(group_tid_46488) * segmap_group_sizze_37063 + - sext_i32_i64(local_tid_46487); - if (slt64(gtid_36969, m_28478)) { - int32_t defunc_0_f_res_37067 = ((__global - int32_t *) mem_45232)[gtid_36969]; - float defunc_0_f_res_37068 = ((__global float *) mem_45235)[gtid_36969]; - int32_t r32_arg_37069 = sub32(defunc_0_f_res_37067, k2p2_28489); - float i32_res_37070 = sitofp_i32_f32(r32_arg_37069); - float sqrt_arg_37071 = defunc_0_f_res_37068 / i32_res_37070; - float sqrt_res_37072; - - sqrt_res_37072 = futrts_sqrt32(sqrt_arg_37071); - - float i32_res_37073 = sitofp_i32_f32(defunc_0_f_res_37067); - float t32_arg_37074 = hfrac_28483 * i32_res_37073; - int32_t f32_res_37075 = fptosi_f32_i32(t32_arg_37074); - - ((__global int32_t *) mem_45238)[gtid_36969] = f32_res_37075; - ((__global float *) mem_45240)[gtid_36969] = sqrt_res_37072; - } + sync_arr_mem_127984 = (__local char *) sync_arr_mem_127984_backing_0; - error_0: - return; - #undef segmap_group_sizze_37063 -} -__kernel void mainMagnitudezisegmap_37271(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, - int64_t N_28477, int32_t n_28481, - float lam_28484, - int64_t iota32_arg_28909, - float i32_res_28921, __global - unsigned char *mappingindices_mem_44380, - __global unsigned char *mem_45282, - __global unsigned char *mem_45284) -{ - #define segmap_group_sizze_37293 (mainMagnitudezisegmap_group_sizze_37273) + __local char *red_arr_mem_127986; - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; + red_arr_mem_127986 = (__local char *) red_arr_mem_127986_backing_1; - if (*global_failure >= 0) - return; + int64_t dummy_110865; - int32_t global_tid_46596; - int32_t local_tid_46597; - int64_t group_sizze_46600; - int32_t wave_sizze_46599; - int32_t group_tid_46598; + dummy_110865 = (int64_t) 0; - global_tid_46596 = get_global_id(0); - local_tid_46597 = get_local_id(0); - group_sizze_46600 = get_local_size(0); - wave_sizze_46599 = LOCKSTEP_WIDTH; - group_tid_46598 = get_group_id(0); + int64_t gtid_110866; - int32_t phys_tid_37271; + gtid_110866 = (int64_t) 0; - phys_tid_37271 = global_tid_46596; + bool x_acc_127988; + int64_t chunk_sizze_127989; - int64_t gtid_37270; + chunk_sizze_127989 = smin64(sdiv_up64(m_75136, + sext_i32_i64(sext_i64_i32(segred_group_sizze_110860 * + num_groups_110862))), + sdiv_up64(m_75136 - phys_tid_110867, + num_threads_127978)); - gtid_37270 = sext_i32_i64(group_tid_46598) * segmap_group_sizze_37293 + - sext_i32_i64(local_tid_46597); - if (slt64(gtid_37270, iota32_arg_28909)) { - int32_t defunc_0_f_res_37298 = sext_i64_i32(gtid_37270); - int32_t i_37299 = add32(n_28481, defunc_0_f_res_37298); - int64_t i_37300 = sext_i32_i64(i_37299); - bool x_37301 = sle64((int64_t) 0, i_37300); - bool y_37302 = slt64(i_37300, N_28477); - bool bounds_check_37303 = x_37301 && y_37302; - bool index_certs_37304; - - if (!bounds_check_37303) { + bool x_76420; + bool x_76421; + + // neutral-initialise the accumulators + { + x_acc_127988 = 0; + } + for (int64_t i_127993 = 0; i_127993 < chunk_sizze_127989; i_127993++) { + gtid_110866 = phys_tid_110867 + num_threads_127978 * i_127993; + // apply map function + { + bool x_76423 = ((__global + bool *) defunc_7_map_res_mem_123721)[gtid_110866]; + + // save map-out results + { } + // load accumulator { - if (atomic_cmpxchg_i32_global(global_failure, -1, 103) == -1) { - global_failure_args[0] = i_37300; - global_failure_args[1] = N_28477; - ; + x_76420 = x_acc_127988; + } + // load new values + { + x_76421 = x_76423; + } + // apply reduction operator + { + bool defunc_1_op_res_76422 = x_76420 || x_76421; + + // store in accumulator + { + x_acc_127988 = defunc_1_op_res_76422; } - return; } } - - int32_t time_37305 = ((__global - int32_t *) mappingindices_mem_44380)[i_37300]; - float i32_res_37306 = sitofp_i32_f32(time_37305); - float logplus_arg_37307 = i32_res_37306 / i32_res_28921; - bool cond_37308 = 2.7182817F < logplus_arg_37307; - float logplus_res_37309; - - if (cond_37308) { - float log_res_37310; - - log_res_37310 = futrts_log32(logplus_arg_37307); - logplus_res_37309 = log_res_37310; - } else { - logplus_res_37309 = 1.0F; - } - - float sqrt_res_37311; - - sqrt_res_37311 = futrts_sqrt32(logplus_res_37309); - - float defunc_0_f_res_37312 = lam_28484 * sqrt_res_37311; - - ((__global int32_t *) mem_45282)[gtid_37270] = defunc_0_f_res_37298; - ((__global float *) mem_45284)[gtid_37270] = defunc_0_f_res_37312; } + // to reduce current chunk, first store our result in memory + { + x_76420 = x_acc_127988; + ((__local bool *) red_arr_mem_127986)[sext_i32_i64(local_tid_127980)] = + x_76420; + } + barrier(CLK_LOCAL_MEM_FENCE); - error_0: - return; - #undef segmap_group_sizze_37293 -} -__kernel void mainMagnitudezisegmap_37528(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, - int64_t m_28478, - int64_t iota32_arg_28909, - int64_t distance_28943, - int64_t segmap_usable_groups_37761, - __global - unsigned char *defunc_4_map_res_mem_45177, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global unsigned char *mem_45287, - __global unsigned char *mem_45294, - __global unsigned char *mem_45305, - __global unsigned char *mem_45325) -{ - #define segmap_group_sizze_37760 (mainMagnitudezisegmap_group_sizze_37530) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; + int32_t offset_127994; + int32_t skip_waves_127995; - if (*global_failure >= 0) - return; + skip_waves_127995 = 1; - int32_t global_tid_46611; - int32_t local_tid_46612; - int64_t group_sizze_46615; - int32_t wave_sizze_46614; - int32_t group_tid_46613; + bool x_127990; + bool x_127991; - global_tid_46611 = get_global_id(0); - local_tid_46612 = get_local_id(0); - group_sizze_46615 = get_local_size(0); - wave_sizze_46614 = LOCKSTEP_WIDTH; - group_tid_46613 = get_group_id(0); + offset_127994 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127980, sext_i64_i32(segred_group_sizze_110860))) { + x_127990 = ((__local + bool *) red_arr_mem_127986)[sext_i32_i64(local_tid_127980 + + offset_127994)]; + } + } + offset_127994 = 1; + while (slt32(offset_127994, wave_sizze_127982)) { + if (slt32(local_tid_127980 + offset_127994, + sext_i64_i32(segred_group_sizze_110860)) && + ((local_tid_127980 - squot32(local_tid_127980, wave_sizze_127982) * + wave_sizze_127982) & (2 * offset_127994 - 1)) == 0) { + // read array element + { + x_127991 = ((volatile __local + bool *) red_arr_mem_127986)[sext_i32_i64(local_tid_127980 + + offset_127994)]; + } + // apply reduction operation + { + bool defunc_1_op_res_127992 = x_127990 || x_127991; + + x_127990 = defunc_1_op_res_127992; + } + // write result of operation + { + ((volatile __local + bool *) red_arr_mem_127986)[sext_i32_i64(local_tid_127980)] = + x_127990; + } + } + offset_127994 *= 2; + } + while (slt32(skip_waves_127995, + squot32(sext_i64_i32(segred_group_sizze_110860) + + wave_sizze_127982 - 1, wave_sizze_127982))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_127994 = skip_waves_127995 * wave_sizze_127982; + if (slt32(local_tid_127980 + offset_127994, + sext_i64_i32(segred_group_sizze_110860)) && + ((local_tid_127980 - squot32(local_tid_127980, wave_sizze_127982) * + wave_sizze_127982) == 0 && (squot32(local_tid_127980, + wave_sizze_127982) & (2 * + skip_waves_127995 - + 1)) == + 0)) { + // read array element + { + x_127991 = ((__local + bool *) red_arr_mem_127986)[sext_i32_i64(local_tid_127980 + + offset_127994)]; + } + // apply reduction operation + { + bool defunc_1_op_res_127992 = x_127990 || x_127991; + + x_127990 = defunc_1_op_res_127992; + } + // write result of operation + { + ((__local + bool *) red_arr_mem_127986)[sext_i32_i64(local_tid_127980)] = + x_127990; + } + } + skip_waves_127995 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_127980) == (int64_t) 0) { + x_acc_127988 = x_127990; + } + } - int32_t phys_tid_37528; + int32_t old_counter_127996; - phys_tid_37528 = global_tid_46611; + // first thread in group saves group result to global memory + { + if (local_tid_127980 == 0) { + ((__global + bool *) group_res_arr_mem_127976)[sext_i32_i64(group_tid_127981) * + segred_group_sizze_110860] = + x_acc_127988; + mem_fence_global(); + old_counter_127996 = atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_127974)[(int64_t) 0], + (int) 1); + ((__local bool *) sync_arr_mem_127984)[(int64_t) 0] = + old_counter_127996 == num_groups_110862 - (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - int64_t gtid_37527; + bool is_last_group_127997; - gtid_37527 = sext_i32_i64(group_tid_46613) * segmap_group_sizze_37760 + - sext_i32_i64(local_tid_46612); - if (slt64(gtid_37527, m_28478)) { - int32_t x_37763 = ((__global - int32_t *) defunc_4_map_res_mem_45177)[gtid_37527]; - int32_t x_37764 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_37527]; - int32_t y_37765 = ((__global int32_t *) mem_45287)[gtid_37527]; - - for (int64_t i_46616 = 0; i_46616 < iota32_arg_28909; i_46616++) { - ((__global float *) mem_45305)[phys_tid_37528 + i_46616 * - (segmap_usable_groups_37761 * - segmap_group_sizze_37760)] = - ((__global float *) mem_45294)[gtid_37527 + i_46616 * m_28478]; + is_last_group_127997 = ((__local bool *) sync_arr_mem_127984)[(int64_t) 0]; + if (is_last_group_127997) { + if (local_tid_127980 == 0) { + old_counter_127996 = atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_127974)[(int64_t) 0], + (int) ((int64_t) 0 - + num_groups_110862)); } - for (int64_t i_37768 = 0; i_37768 < distance_28943; i_37768++) { - int64_t index_primexp_37770 = add64((int64_t) 1, i_37768); - bool cond_37771 = slt64((int64_t) 0, index_primexp_37770); - bool loop_cond_37772; + // read in the per-group-results + { + int64_t read_per_thread_127998 = sdiv_up64(num_groups_110862, + segred_group_sizze_110860); - if (cond_37771) { - bool x_37773 = sle64((int64_t) 0, index_primexp_37770); - bool y_37774 = slt64(index_primexp_37770, iota32_arg_28909); - bool bounds_check_37775 = x_37773 && y_37774; - bool index_certs_37776; - - if (!bounds_check_37775) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 106) == -1) { - global_failure_args[0] = index_primexp_37770; - global_failure_args[1] = iota32_arg_28909; - ; - } - return; - } - } - - float defunc_2_lifted_gt_arg_37777 = ((__global - float *) mem_45305)[phys_tid_37528 + - index_primexp_37770 * - (segmap_usable_groups_37761 * - segmap_group_sizze_37760)]; - bool y_37778 = slt64(i_37768, iota32_arg_28909); - bool index_certs_37779; - - if (!y_37778) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 107) == -1) { - global_failure_args[0] = i_37768; - global_failure_args[1] = iota32_arg_28909; - ; - } - return; - } + x_76420 = 0; + for (int64_t i_127999 = 0; i_127999 < read_per_thread_127998; + i_127999++) { + int64_t group_res_id_128000 = sext_i32_i64(local_tid_127980) * + read_per_thread_127998 + i_127999; + int64_t index_of_group_res_128001 = group_res_id_128000; + + if (slt64(group_res_id_128000, num_groups_110862)) { + x_76421 = ((__global + bool *) group_res_arr_mem_127976)[index_of_group_res_128001 * + segred_group_sizze_110860]; + + bool defunc_1_op_res_76422; + + defunc_1_op_res_76422 = x_76420 || x_76421; + x_76420 = defunc_1_op_res_76422; } - - float defunc_1_lifted_gt_arg_37780 = ((__global - float *) mem_45305)[phys_tid_37528 + - i_37768 * - (segmap_usable_groups_37761 * - segmap_group_sizze_37760)]; - bool defunc_1_zlze_res_37781 = defunc_1_lifted_gt_arg_37780 <= - defunc_2_lifted_gt_arg_37777; - bool defunc_2_lifted_gt_res_37782 = !defunc_1_zlze_res_37781; - - loop_cond_37772 = defunc_2_lifted_gt_res_37782; - } else { - loop_cond_37772 = 0; } + } + ((__local bool *) red_arr_mem_127986)[sext_i32_i64(local_tid_127980)] = + x_76420; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_128002; + int32_t skip_waves_128003; - bool xszq_37783; - int64_t xszq_37784; - bool loop_while_37786; - int64_t j_37787; + skip_waves_128003 = 1; - loop_while_37786 = loop_cond_37772; - j_37787 = index_primexp_37770; - while (loop_while_37786) { - int64_t loopres_37789 = sub64(j_37787, (int64_t) 1); - bool x_37790 = sle64((int64_t) 0, j_37787); - bool y_37791 = slt64(j_37787, iota32_arg_28909); - bool bounds_check_37792 = x_37790 && y_37791; - bool index_certs_37793; - - if (!bounds_check_37792) { + bool x_127990; + bool x_127991; + + offset_128002 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127980, + sext_i64_i32(segred_group_sizze_110860))) { + x_127990 = ((__local + bool *) red_arr_mem_127986)[sext_i32_i64(local_tid_127980 + + offset_128002)]; + } + } + offset_128002 = 1; + while (slt32(offset_128002, wave_sizze_127982)) { + if (slt32(local_tid_127980 + offset_128002, + sext_i64_i32(segred_group_sizze_110860)) && + ((local_tid_127980 - squot32(local_tid_127980, + wave_sizze_127982) * + wave_sizze_127982) & (2 * offset_128002 - 1)) == 0) { + // read array element { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 108) == -1) { - global_failure_args[0] = j_37787; - global_failure_args[1] = iota32_arg_28909; - ; - } - return; + x_127991 = ((volatile __local + bool *) red_arr_mem_127986)[sext_i32_i64(local_tid_127980 + + offset_128002)]; } - } - - float copy_arg_37794 = ((__global - float *) mem_45305)[phys_tid_37528 + - j_37787 * - (segmap_usable_groups_37761 * - segmap_group_sizze_37760)]; - bool x_37795 = sle64((int64_t) 0, loopres_37789); - bool y_37796 = slt64(loopres_37789, iota32_arg_28909); - bool bounds_check_37797 = x_37795 && y_37796; - bool index_certs_37798; - - if (!bounds_check_37797) { + // apply reduction operation { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 109) == -1) { - global_failure_args[0] = loopres_37789; - global_failure_args[1] = iota32_arg_28909; - ; - } - return; - } - } - - float copy_arg_37799 = ((__global - float *) mem_45305)[phys_tid_37528 + - loopres_37789 * - (segmap_usable_groups_37761 * - segmap_group_sizze_37760)]; - - ((__global float *) mem_45305)[phys_tid_37528 + j_37787 * - (segmap_usable_groups_37761 * - segmap_group_sizze_37760)] = - copy_arg_37799; - ((__global float *) mem_45305)[phys_tid_37528 + loopres_37789 * - (segmap_usable_groups_37761 * - segmap_group_sizze_37760)] = - copy_arg_37794; - - bool cond_37802 = slt64((int64_t) 0, loopres_37789); - bool loop_cond_37803; - - if (cond_37802) { - bool index_certs_37804; - - if (!bounds_check_37797) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 110) == -1) { - global_failure_args[0] = loopres_37789; - global_failure_args[1] = iota32_arg_28909; - ; - } - return; - } + bool defunc_1_op_res_127992 = x_127990 || x_127991; + + x_127990 = defunc_1_op_res_127992; } - - float defunc_2_lifted_gt_arg_37805 = ((__global - float *) mem_45305)[phys_tid_37528 + - loopres_37789 * - (segmap_usable_groups_37761 * - segmap_group_sizze_37760)]; - int64_t i_37806 = sub64(loopres_37789, (int64_t) 1); - bool x_37807 = sle64((int64_t) 0, i_37806); - bool y_37808 = slt64(i_37806, iota32_arg_28909); - bool bounds_check_37809 = x_37807 && y_37808; - bool index_certs_37810; - - if (!bounds_check_37809) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 111) == -1) { - global_failure_args[0] = i_37806; - global_failure_args[1] = iota32_arg_28909; - ; - } - return; - } + // write result of operation + { + ((volatile __local + bool *) red_arr_mem_127986)[sext_i32_i64(local_tid_127980)] = + x_127990; } - - float defunc_1_lifted_gt_arg_37811 = ((__global - float *) mem_45305)[phys_tid_37528 + - i_37806 * - (segmap_usable_groups_37761 * - segmap_group_sizze_37760)]; - bool defunc_1_zlze_res_37812 = - defunc_1_lifted_gt_arg_37811 <= - defunc_2_lifted_gt_arg_37805; - bool defunc_2_lifted_gt_res_37813 = - !defunc_1_zlze_res_37812; - - loop_cond_37803 = defunc_2_lifted_gt_res_37813; - } else { - loop_cond_37803 = 0; } - - bool loop_while_tmp_46618 = loop_cond_37803; - int64_t j_tmp_46619 = loopres_37789; - - loop_while_37786 = loop_while_tmp_46618; - j_37787 = j_tmp_46619; + offset_128002 *= 2; } - xszq_37783 = loop_while_37786; - xszq_37784 = j_37787; - } - - int32_t i_37814 = sdiv32(y_37765, 2); - int32_t j_37815 = sub32(i_37814, 1); - bool cond_37816 = x_37763 == x_37764; - float defunc_0_f_res_37817; - - if (cond_37816) { - defunc_0_f_res_37817 = 0.0F; - } else { - int32_t x_37818 = smod32(y_37765, 2); - bool cond_37819 = x_37818 == 0; - float defunc_0_f_res_f_res_37820; - - if (cond_37819) { - int64_t j_37821 = sext_i32_i64(j_37815); - bool x_37822 = sle64((int64_t) 0, j_37821); - bool y_37823 = slt64(j_37821, iota32_arg_28909); - bool bounds_check_37824 = x_37822 && y_37823; - bool index_certs_37825; - - if (!bounds_check_37824) { + while (slt32(skip_waves_128003, + squot32(sext_i64_i32(segred_group_sizze_110860) + + wave_sizze_127982 - 1, wave_sizze_127982))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128002 = skip_waves_128003 * wave_sizze_127982; + if (slt32(local_tid_127980 + offset_128002, + sext_i64_i32(segred_group_sizze_110860)) && + ((local_tid_127980 - squot32(local_tid_127980, + wave_sizze_127982) * + wave_sizze_127982) == 0 && (squot32(local_tid_127980, + wave_sizze_127982) & + (2 * skip_waves_128003 - + 1)) == 0)) { + // read array element { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 112) == -1) { - global_failure_args[0] = j_37821; - global_failure_args[1] = iota32_arg_28909; - ; - } - return; + x_127991 = ((__local + bool *) red_arr_mem_127986)[sext_i32_i64(local_tid_127980 + + offset_128002)]; } - } - - float x_37826 = ((__global float *) mem_45305)[phys_tid_37528 + - j_37821 * - (segmap_usable_groups_37761 * - segmap_group_sizze_37760)]; - int64_t i_37827 = sext_i32_i64(i_37814); - bool x_37828 = sle64((int64_t) 0, i_37827); - bool y_37829 = slt64(i_37827, iota32_arg_28909); - bool bounds_check_37830 = x_37828 && y_37829; - bool index_certs_37831; - - if (!bounds_check_37830) { + // apply reduction operation { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 113) == -1) { - global_failure_args[0] = i_37827; - global_failure_args[1] = iota32_arg_28909; - ; - } - return; + bool defunc_1_op_res_127992 = x_127990 || x_127991; + + x_127990 = defunc_1_op_res_127992; } - } - - float y_37832 = ((__global float *) mem_45305)[phys_tid_37528 + - i_37827 * - (segmap_usable_groups_37761 * - segmap_group_sizze_37760)]; - float x_37833 = x_37826 + y_37832; - float defunc_0_f_res_f_res_t_res_37834 = x_37833 / 2.0F; - - defunc_0_f_res_f_res_37820 = defunc_0_f_res_f_res_t_res_37834; - } else { - int64_t i_37835 = sext_i32_i64(i_37814); - bool x_37836 = sle64((int64_t) 0, i_37835); - bool y_37837 = slt64(i_37835, iota32_arg_28909); - bool bounds_check_37838 = x_37836 && y_37837; - bool index_certs_37839; - - if (!bounds_check_37838) { + // write result of operation { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 114) == -1) { - global_failure_args[0] = i_37835; - global_failure_args[1] = iota32_arg_28909; - ; - } - return; + ((__local + bool *) red_arr_mem_127986)[sext_i32_i64(local_tid_127980)] = + x_127990; } } - - float defunc_0_f_res_f_res_f_res_37840 = ((__global - float *) mem_45305)[phys_tid_37528 + - i_37835 * - (segmap_usable_groups_37761 * - segmap_group_sizze_37760)]; - - defunc_0_f_res_f_res_37820 = defunc_0_f_res_f_res_f_res_37840; + skip_waves_128003 *= 2; + } + // and back to memory with the final result + { + if (local_tid_127980 == 0) { + ((__global bool *) mem_123728)[(int64_t) 0] = x_127990; + } } - defunc_0_f_res_37817 = defunc_0_f_res_f_res_37820; } - ((__global float *) mem_45325)[gtid_37527] = defunc_0_f_res_37817; } - error_0: + error_1: return; - #undef segmap_group_sizze_37760 + #undef segred_group_sizze_110860 } -__kernel void mainMagnitudezisegmap_37649(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, - int64_t N_28477, int64_t m_28478, - int64_t iota32_arg_28909, __global - unsigned char *defunc_4_map_res_mem_45178, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global unsigned char *mem_45287, - __global unsigned char *mem_45291) +__kernel void mainzisegred_nonseg_114409(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_129486_backing_aligned_0, + __local volatile + int64_t *sync_arr_mem_129484_backing_aligned_1, + int64_t m_75136, + int64_t num_groups_114404, + int64_t num_threads_129478, __global + unsigned char *defunc_3_map_res_mem_124958, + __global unsigned char *mem_124963, + __global + unsigned char *mainzicounter_mem_129474, + __global + unsigned char *group_res_arr_mem_129476) { - #define segmap_group_sizze_37731 (mainMagnitudezisegmap_group_sizze_37652) + #define segred_group_sizze_114402 (mainzisegred_group_sizze_114401) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_129486_backing_1 = + (__local volatile + char *) red_arr_mem_129486_backing_aligned_0; + __local volatile char *restrict sync_arr_mem_129484_backing_0 = + (__local volatile + char *) sync_arr_mem_129484_backing_aligned_1; if (*global_failure >= 0) return; - int32_t global_tid_46606; - int32_t local_tid_46607; - int64_t group_sizze_46610; - int32_t wave_sizze_46609; - int32_t group_tid_46608; + int32_t global_tid_129479; + int32_t local_tid_129480; + int64_t group_sizze_129483; + int32_t wave_sizze_129482; + int32_t group_tid_129481; - global_tid_46606 = get_global_id(0); - local_tid_46607 = get_local_id(0); - group_sizze_46610 = get_local_size(0); - wave_sizze_46609 = LOCKSTEP_WIDTH; - group_tid_46608 = get_group_id(0); + global_tid_129479 = get_global_id(0); + local_tid_129480 = get_local_id(0); + group_sizze_129483 = get_local_size(0); + wave_sizze_129482 = LOCKSTEP_WIDTH; + group_tid_129481 = get_group_id(0); - int32_t phys_tid_37649; + int32_t phys_tid_114409; - phys_tid_37649 = global_tid_46606; + phys_tid_114409 = global_tid_129479; - int64_t gtid_37647; + __local char *sync_arr_mem_129484; - gtid_37647 = squot64(sext_i32_i64(group_tid_46608) * - segmap_group_sizze_37731 + - sext_i32_i64(local_tid_46607), iota32_arg_28909); + sync_arr_mem_129484 = (__local char *) sync_arr_mem_129484_backing_0; - int64_t gtid_37648; - - gtid_37648 = sext_i32_i64(group_tid_46608) * segmap_group_sizze_37731 + - sext_i32_i64(local_tid_46607) - squot64(sext_i32_i64(group_tid_46608) * - segmap_group_sizze_37731 + - sext_i32_i64(local_tid_46607), - iota32_arg_28909) * - iota32_arg_28909; - if (slt64(gtid_37647, m_28478) && slt64(gtid_37648, iota32_arg_28909)) { - int32_t x_37734 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_37647]; - int32_t y_37736 = ((__global int32_t *) mem_45287)[gtid_37647]; - int32_t index_primexp_42395 = sext_i64_i32(gtid_37648); - bool cond_37738 = slt32(index_primexp_42395, y_37736); - bool cond_37739; - - if (cond_37738) { - int32_t i_37740 = add32(x_37734, index_primexp_42395); - int64_t i_37741 = sext_i32_i64(i_37740); - bool x_37742 = sle64((int64_t) 0, i_37741); - bool y_37743 = slt64(i_37741, N_28477); - bool bounds_check_37744 = x_37742 && y_37743; - bool index_certs_37745; - - if (!bounds_check_37744) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 104) == - -1) { - global_failure_args[0] = i_37741; - global_failure_args[1] = N_28477; - ; - } - return; - } - } - - float isnan_arg_37746 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_37647 * - N_28477 + - i_37741]; - bool isnan_res_37747; - - isnan_res_37747 = futrts_isnan32(isnan_arg_37746); - - bool cond_t_res_37748 = !isnan_res_37747; - - cond_37739 = cond_t_res_37748; - } else { - cond_37739 = 0; - } - - float defunc_0_f_res_37749; - - if (cond_37739) { - int32_t i_37750 = add32(x_37734, index_primexp_42395); - int64_t i_37751 = sext_i32_i64(i_37750); - bool x_37752 = sle64((int64_t) 0, i_37751); - bool y_37753 = slt64(i_37751, N_28477); - bool bounds_check_37754 = x_37752 && y_37753; - bool index_certs_37755; - - if (!bounds_check_37754) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 105) == - -1) { - global_failure_args[0] = i_37751; - global_failure_args[1] = N_28477; - ; - } - return; - } - } - - float defunc_0_f_res_t_res_37756 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_37647 * - N_28477 + - i_37751]; - - defunc_0_f_res_37749 = defunc_0_f_res_t_res_37756; - } else { - defunc_0_f_res_37749 = INFINITY; - } - ((__global float *) mem_45291)[gtid_37647 * iota32_arg_28909 + - gtid_37648] = defunc_0_f_res_37749; - } + __local char *red_arr_mem_129486; - error_0: - return; - #undef segmap_group_sizze_37731 -} -__kernel void mainMagnitudezisegmap_37707(__global int *global_failure, - int64_t m_28478, __global - unsigned char *defunc_4_map_res_mem_45177, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global unsigned char *mem_45287) -{ - #define segmap_group_sizze_37716 (mainMagnitudezisegmap_group_sizze_37709) + red_arr_mem_129486 = (__local char *) red_arr_mem_129486_backing_1; - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; + int64_t dummy_114407; - if (*global_failure >= 0) - return; + dummy_114407 = (int64_t) 0; - int32_t global_tid_46601; - int32_t local_tid_46602; - int64_t group_sizze_46605; - int32_t wave_sizze_46604; - int32_t group_tid_46603; + int64_t gtid_114408; - global_tid_46601 = get_global_id(0); - local_tid_46602 = get_local_id(0); - group_sizze_46605 = get_local_size(0); - wave_sizze_46604 = LOCKSTEP_WIDTH; - group_tid_46603 = get_group_id(0); + gtid_114408 = (int64_t) 0; - int32_t phys_tid_37707; + int64_t x_acc_129488; + int64_t chunk_sizze_129489; - phys_tid_37707 = global_tid_46601; + chunk_sizze_129489 = smin64(sdiv_up64(m_75136, + sext_i32_i64(sext_i64_i32(segred_group_sizze_114402 * + num_groups_114404))), + sdiv_up64(m_75136 - phys_tid_114409, + num_threads_129478)); - int64_t gtid_37706; + int64_t x_76996; + int64_t x_76997; - gtid_37706 = sext_i32_i64(group_tid_46603) * segmap_group_sizze_37716 + - sext_i32_i64(local_tid_46602); - if (slt64(gtid_37706, m_28478)) { - int32_t x_37719 = ((__global - int32_t *) defunc_4_map_res_mem_45177)[gtid_37706]; - int32_t x_37720 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_37706]; - int32_t y_37721 = sub32(x_37719, x_37720); - - ((__global int32_t *) mem_45287)[gtid_37706] = y_37721; + // neutral-initialise the accumulators + { + x_acc_129488 = (int64_t) 0; } + for (int64_t i_129493 = 0; i_129493 < chunk_sizze_129489; i_129493++) { + gtid_114408 = phys_tid_114409 + num_threads_129478 * i_129493; + // apply map function + { + int64_t x_76999 = ((__global + int64_t *) defunc_3_map_res_mem_124958)[gtid_114408]; + + // save map-out results + { } + // load accumulator + { + x_76996 = x_acc_129488; + } + // load new values + { + x_76997 = x_76999; + } + // apply reduction operator + { + int64_t defunc_1_op_res_76998 = smax64(x_76996, x_76997); + + // store in accumulator + { + x_acc_129488 = defunc_1_op_res_76998; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_76996 = x_acc_129488; + ((__local + int64_t *) red_arr_mem_129486)[sext_i32_i64(local_tid_129480)] = + x_76996; + } + barrier(CLK_LOCAL_MEM_FENCE); - error_0: - return; - #undef segmap_group_sizze_37716 -} -__kernel void mainMagnitudezisegmap_38241(__global int *global_failure, - int failure_is_an_option, __global - int64_t *global_failure_args, - int64_t N_28477, int64_t m_28478, - int32_t n_28481, __global - unsigned char *defunc_4_map_res_mem_45179, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global unsigned char *mem_45339, - __global unsigned char *mem_45346, - __global unsigned char *mem_45348, - __global unsigned char *mem_45350, - __global unsigned char *mem_45353, - __global unsigned char *mem_45355) -{ - #define segmap_group_sizze_38525 (mainMagnitudezisegmap_group_sizze_38243) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; + int32_t offset_129494; + int32_t skip_waves_129495; - if (*global_failure >= 0) - return; + skip_waves_129495 = 1; - int32_t global_tid_46811; - int32_t local_tid_46812; - int64_t group_sizze_46815; - int32_t wave_sizze_46814; - int32_t group_tid_46813; + int64_t x_129490; + int64_t x_129491; - global_tid_46811 = get_global_id(0); - local_tid_46812 = get_local_id(0); - group_sizze_46815 = get_local_size(0); - wave_sizze_46814 = LOCKSTEP_WIDTH; - group_tid_46813 = get_group_id(0); + offset_129494 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129480, sext_i64_i32(segred_group_sizze_114402))) { + x_129490 = ((__local + int64_t *) red_arr_mem_129486)[sext_i32_i64(local_tid_129480 + + offset_129494)]; + } + } + offset_129494 = 1; + while (slt32(offset_129494, wave_sizze_129482)) { + if (slt32(local_tid_129480 + offset_129494, + sext_i64_i32(segred_group_sizze_114402)) && + ((local_tid_129480 - squot32(local_tid_129480, wave_sizze_129482) * + wave_sizze_129482) & (2 * offset_129494 - 1)) == 0) { + // read array element + { + x_129491 = ((volatile __local + int64_t *) red_arr_mem_129486)[sext_i32_i64(local_tid_129480 + + offset_129494)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_129492 = smax64(x_129490, x_129491); + + x_129490 = defunc_1_op_res_129492; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_129486)[sext_i32_i64(local_tid_129480)] = + x_129490; + } + } + offset_129494 *= 2; + } + while (slt32(skip_waves_129495, + squot32(sext_i64_i32(segred_group_sizze_114402) + + wave_sizze_129482 - 1, wave_sizze_129482))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129494 = skip_waves_129495 * wave_sizze_129482; + if (slt32(local_tid_129480 + offset_129494, + sext_i64_i32(segred_group_sizze_114402)) && + ((local_tid_129480 - squot32(local_tid_129480, wave_sizze_129482) * + wave_sizze_129482) == 0 && (squot32(local_tid_129480, + wave_sizze_129482) & (2 * + skip_waves_129495 - + 1)) == + 0)) { + // read array element + { + x_129491 = ((__local + int64_t *) red_arr_mem_129486)[sext_i32_i64(local_tid_129480 + + offset_129494)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_129492 = smax64(x_129490, x_129491); + + x_129490 = defunc_1_op_res_129492; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_129486)[sext_i32_i64(local_tid_129480)] = + x_129490; + } + } + skip_waves_129495 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_129480) == (int64_t) 0) { + x_acc_129488 = x_129490; + } + } - int32_t phys_tid_38241; + int32_t old_counter_129496; - phys_tid_38241 = global_tid_46811; + // first thread in group saves group result to global memory + { + if (local_tid_129480 == 0) { + ((__global + int64_t *) group_res_arr_mem_129476)[sext_i32_i64(group_tid_129481) * + segred_group_sizze_114402] = + x_acc_129488; + mem_fence_global(); + old_counter_129496 = atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_129474)[(int64_t) 0], + (int) 1); + ((__local bool *) sync_arr_mem_129484)[(int64_t) 0] = + old_counter_129496 == num_groups_114404 - (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - int64_t gtid_38240; + bool is_last_group_129497; - gtid_38240 = sext_i32_i64(group_tid_46813) * segmap_group_sizze_38525 + - sext_i32_i64(local_tid_46812); - if (slt64(gtid_38240, m_28478)) { - int32_t x_38529 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_38240]; - int32_t y_38531 = ((__global int32_t *) mem_45339)[gtid_38240]; - bool acc0_38533 = ((__global bool *) mem_45346)[gtid_38240]; - bool x_38538 = acc0_38533 && acc0_38533; - int32_t defunc_1_op_res_f_res_38542; - - if (acc0_38533) { - int32_t acc0_38534 = ((__global int32_t *) mem_45348)[gtid_38240]; - - defunc_1_op_res_f_res_38542 = acc0_38534; - } else { - defunc_1_op_res_f_res_38542 = -1; + is_last_group_129497 = ((__local bool *) sync_arr_mem_129484)[(int64_t) 0]; + if (is_last_group_129497) { + if (local_tid_129480 == 0) { + old_counter_129496 = atomic_add_i32_global(&((volatile __global + int *) mainzicounter_mem_129474)[(int64_t) 0], + (int) ((int64_t) 0 - + num_groups_114404)); } - - bool cond_38548 = y_38531 == 0; - float defunc_0_f_res_38549; - - if (cond_38548) { - defunc_0_f_res_38549 = 0.0F; - } else { - float acc0_38535 = ((__global float *) mem_45350)[gtid_38240]; - float i32_res_38550 = sitofp_i32_f32(y_38531); - float defunc_0_f_res_f_res_38551 = acc0_38535 / i32_res_38550; + // read in the per-group-results + { + int64_t read_per_thread_129498 = sdiv_up64(num_groups_114404, + segred_group_sizze_114402); - defunc_0_f_res_38549 = defunc_0_f_res_f_res_38551; + x_76996 = (int64_t) 0; + for (int64_t i_129499 = 0; i_129499 < read_per_thread_129498; + i_129499++) { + int64_t group_res_id_129500 = sext_i32_i64(local_tid_129480) * + read_per_thread_129498 + i_129499; + int64_t index_of_group_res_129501 = group_res_id_129500; + + if (slt64(group_res_id_129500, num_groups_114404)) { + x_76997 = ((__global + int64_t *) group_res_arr_mem_129476)[index_of_group_res_129501 * + segred_group_sizze_114402]; + + int64_t defunc_1_op_res_76998; + + defunc_1_op_res_76998 = smax64(x_76996, x_76997); + x_76996 = defunc_1_op_res_76998; + } + } } - - bool cond_38552 = !x_38538; - int32_t fst_breakzq_38553; - - if (cond_38552) { - fst_breakzq_38553 = -1; - } else { - bool cond_38554 = slt32(defunc_1_op_res_f_res_38542, y_38531); - int32_t adjustValInds_res_38555; + ((__local + int64_t *) red_arr_mem_129486)[sext_i32_i64(local_tid_129480)] = + x_76996; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_129502; + int32_t skip_waves_129503; - if (cond_38554) { - int32_t i_38556 = add32(x_38529, defunc_1_op_res_f_res_38542); - int64_t i_38557 = sext_i32_i64(i_38556); - bool x_38558 = sle64((int64_t) 0, i_38557); - bool y_38559 = slt64(i_38557, N_28477); - bool bounds_check_38560 = x_38558 && y_38559; - bool index_certs_38561; - - if (!bounds_check_38560) { + skip_waves_129503 = 1; + + int64_t x_129490; + int64_t x_129491; + + offset_129502 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129480, + sext_i64_i32(segred_group_sizze_114402))) { + x_129490 = ((__local + int64_t *) red_arr_mem_129486)[sext_i32_i64(local_tid_129480 + + offset_129502)]; + } + } + offset_129502 = 1; + while (slt32(offset_129502, wave_sizze_129482)) { + if (slt32(local_tid_129480 + offset_129502, + sext_i64_i32(segred_group_sizze_114402)) && + ((local_tid_129480 - squot32(local_tid_129480, + wave_sizze_129482) * + wave_sizze_129482) & (2 * offset_129502 - 1)) == 0) { + // read array element { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 120) == -1) { - global_failure_args[0] = i_38557; - global_failure_args[1] = N_28477; - ; - } - return; + x_129491 = ((volatile __local + int64_t *) red_arr_mem_129486)[sext_i32_i64(local_tid_129480 + + offset_129502)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_129492 = smax64(x_129490, + x_129491); + + x_129490 = defunc_1_op_res_129492; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_129486)[sext_i32_i64(local_tid_129480)] = + x_129490; } } - - int32_t x_38562 = ((__global - int32_t *) defunc_4_map_res_mem_45179)[gtid_38240 * - N_28477 + - i_38557]; - int32_t adjustValInds_res_t_res_38563 = sub32(x_38562, n_28481); - - adjustValInds_res_38555 = adjustValInds_res_t_res_38563; - } else { - adjustValInds_res_38555 = -1; + offset_129502 *= 2; + } + while (slt32(skip_waves_129503, + squot32(sext_i64_i32(segred_group_sizze_114402) + + wave_sizze_129482 - 1, wave_sizze_129482))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129502 = skip_waves_129503 * wave_sizze_129482; + if (slt32(local_tid_129480 + offset_129502, + sext_i64_i32(segred_group_sizze_114402)) && + ((local_tid_129480 - squot32(local_tid_129480, + wave_sizze_129482) * + wave_sizze_129482) == 0 && (squot32(local_tid_129480, + wave_sizze_129482) & + (2 * skip_waves_129503 - + 1)) == 0)) { + // read array element + { + x_129491 = ((__local + int64_t *) red_arr_mem_129486)[sext_i32_i64(local_tid_129480 + + offset_129502)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_129492 = smax64(x_129490, + x_129491); + + x_129490 = defunc_1_op_res_129492; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_129486)[sext_i32_i64(local_tid_129480)] = + x_129490; + } + } + skip_waves_129503 *= 2; + } + // and back to memory with the final result + { + if (local_tid_129480 == 0) { + ((__global int64_t *) mem_124963)[(int64_t) 0] = x_129490; + } } - fst_breakzq_38553 = adjustValInds_res_38555; - } - - bool cond_38564 = sle32(x_38529, 5); - bool cond_f_res_38565 = sle32(y_38531, 5); - bool x_38566 = !cond_38564; - bool y_38567 = cond_f_res_38565 && x_38566; - bool cond_38568 = cond_38564 || y_38567; - int32_t fst_breakzq_38569; - - if (cond_38568) { - fst_breakzq_38569 = -2; - } else { - fst_breakzq_38569 = fst_breakzq_38553; } - ((__global int32_t *) mem_45353)[gtid_38240] = fst_breakzq_38569; - ((__global float *) mem_45355)[gtid_38240] = defunc_0_f_res_38549; } - error_0: + error_1: return; - #undef segmap_group_sizze_38525 + #undef segred_group_sizze_114402 } -__kernel void mainMagnitudezisegmap_38395(__global int *global_failure, - int64_t m_28478, - int64_t num_groups_38418, __global - unsigned char *defunc_4_map_res_mem_45177, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global - unsigned char *defunc_3_map_res_mem_45246, - __global unsigned char *mem_45337, - __global unsigned char *mem_45339) +__kernel void mainzisegred_small_104864(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_127079_backing_aligned_0, + int64_t m_75136, int64_t k2p2zq_75151, + int64_t num_groups_106695, + int64_t segment_sizze_nonzzero_127072, + __global unsigned char *mem_121831, + __global unsigned char *mem_121835, + __global unsigned char *mem_121840) { - #define segmap_group_sizze_38417 (mainMagnitudezisegmap_group_sizze_38397) + #define segred_group_sizze_106694 (mainzisegred_group_sizze_104858) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_127079_backing_0 = + (__local volatile + char *) red_arr_mem_127079_backing_aligned_0; if (*global_failure >= 0) return; - int32_t global_tid_46649; - int32_t local_tid_46650; - int64_t group_sizze_46653; - int32_t wave_sizze_46652; - int32_t group_tid_46651; - - global_tid_46649 = get_global_id(0); - local_tid_46650 = get_local_id(0); - group_sizze_46653 = get_local_size(0); - wave_sizze_46652 = LOCKSTEP_WIDTH; - group_tid_46651 = get_group_id(0); - - int32_t phys_tid_38395; - - phys_tid_38395 = global_tid_46649; - - int32_t phys_group_id_46654; - - phys_group_id_46654 = get_group_id(0); - for (int32_t i_46655 = 0; i_46655 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_28478, segmap_group_sizze_38417)) - - phys_group_id_46654, sext_i64_i32(num_groups_38418)); - i_46655++) { - int32_t virt_group_id_46656 = phys_group_id_46654 + i_46655 * - sext_i64_i32(num_groups_38418); - int64_t gtid_38394 = sext_i32_i64(virt_group_id_46656) * - segmap_group_sizze_38417 + sext_i32_i64(local_tid_46650); + int32_t global_tid_127074; + int32_t local_tid_127075; + int64_t group_sizze_127078; + int32_t wave_sizze_127077; + int32_t group_tid_127076; + + global_tid_127074 = get_global_id(0); + local_tid_127075 = get_local_id(0); + group_sizze_127078 = get_local_size(0); + wave_sizze_127077 = LOCKSTEP_WIDTH; + group_tid_127076 = get_group_id(0); + + int32_t phys_tid_104864; + + phys_tid_104864 = global_tid_127074; + + __local char *red_arr_mem_127079; + + red_arr_mem_127079 = (__local char *) red_arr_mem_127079_backing_0; + + int32_t phys_group_id_127081; + + phys_group_id_127081 = get_group_id(0); + for (int32_t i_127082 = 0; i_127082 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136 * k2p2zq_75151 * k2p2zq_75151, + squot64(segred_group_sizze_106694, + segment_sizze_nonzzero_127072))) - + phys_group_id_127081, sext_i64_i32(num_groups_106695)); + i_127082++) { + int32_t virt_group_id_127083 = phys_group_id_127081 + i_127082 * + sext_i64_i32(num_groups_106695); + int64_t gtid_104851 = squot64(squot64(sext_i32_i64(local_tid_127075), + segment_sizze_nonzzero_127072) + + sext_i32_i64(virt_group_id_127083) * + squot64(segred_group_sizze_106694, + segment_sizze_nonzzero_127072), + k2p2zq_75151 * k2p2zq_75151); + int64_t gtid_104852 = squot64(squot64(sext_i32_i64(local_tid_127075), + segment_sizze_nonzzero_127072) + + sext_i32_i64(virt_group_id_127083) * + squot64(segred_group_sizze_106694, + segment_sizze_nonzzero_127072) - + squot64(squot64(sext_i32_i64(local_tid_127075), + segment_sizze_nonzzero_127072) + + sext_i32_i64(virt_group_id_127083) * + squot64(segred_group_sizze_106694, + segment_sizze_nonzzero_127072), + k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151); + int64_t gtid_104853 = squot64(sext_i32_i64(local_tid_127075), + segment_sizze_nonzzero_127072) + + sext_i32_i64(virt_group_id_127083) * + squot64(segred_group_sizze_106694, + segment_sizze_nonzzero_127072) - + squot64(squot64(sext_i32_i64(local_tid_127075), + segment_sizze_nonzzero_127072) + + sext_i32_i64(virt_group_id_127083) * + squot64(segred_group_sizze_106694, + segment_sizze_nonzzero_127072), k2p2zq_75151 * + k2p2zq_75151) * (k2p2zq_75151 * k2p2zq_75151) - + squot64(squot64(sext_i32_i64(local_tid_127075), + segment_sizze_nonzzero_127072) + + sext_i32_i64(virt_group_id_127083) * + squot64(segred_group_sizze_106694, + segment_sizze_nonzzero_127072) - + squot64(squot64(sext_i32_i64(local_tid_127075), + segment_sizze_nonzzero_127072) + + sext_i32_i64(virt_group_id_127083) * + squot64(segred_group_sizze_106694, + segment_sizze_nonzzero_127072), + k2p2zq_75151 * k2p2zq_75151) * (k2p2zq_75151 * + k2p2zq_75151), + k2p2zq_75151) * k2p2zq_75151; + int64_t gtid_104863 = srem64(sext_i32_i64(local_tid_127075), + k2p2zq_75151); - if (slt64(gtid_38394, m_28478)) { - int32_t x_38424 = ((__global - int32_t *) defunc_4_map_res_mem_45177)[gtid_38394]; - int32_t x_38425 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_38394]; - float x_38426 = ((__global - float *) defunc_3_map_res_mem_45246)[gtid_38394]; - int32_t y_38427 = sub32(x_38424, x_38425); - float i32_res_38428 = sitofp_i32_f32(x_38425); - float sqrt_res_38429; - - sqrt_res_38429 = futrts_sqrt32(i32_res_38428); - - float y_38430 = x_38426 * sqrt_res_38429; - - ((__global float *) mem_45337)[gtid_38394] = y_38430; - ((__global int32_t *) mem_45339)[gtid_38394] = y_38427; - } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - } - - error_0: - return; - #undef segmap_group_sizze_38417 -} -__kernel void mainMagnitudezisegmap_intragroup_35056(__global - int *global_failure, - int failure_is_an_option, - __global - int64_t *global_failure_args, - __local volatile - int64_t *mem_44563_backing_aligned_0, - __local volatile - int64_t *mem_44553_backing_aligned_1, - int32_t k2p2zq_28491, - int64_t i32_res_28493, - int32_t m_28624, - int64_t nm_28626, - int64_t i32_res_28641, - __global - unsigned char *defunc_3_map_res_mem_44549, - __global - unsigned char *mem_44573) -{ - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - __local volatile char *restrict mem_44563_backing_1 = (__local volatile - char *) mem_44563_backing_aligned_0; - __local volatile char *restrict mem_44553_backing_0 = (__local volatile - char *) mem_44553_backing_aligned_1; - volatile __local bool local_failure; - - if (failure_is_an_option) { - int failed = *global_failure >= 0; - - if (failed) - return; - } - local_failure = false; - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t global_tid_45821; - int32_t local_tid_45822; - int64_t group_sizze_45825; - int32_t wave_sizze_45824; - int32_t group_tid_45823; - - global_tid_45821 = get_global_id(0); - local_tid_45822 = get_local_id(0); - group_sizze_45825 = get_local_size(0); - wave_sizze_45824 = LOCKSTEP_WIDTH; - group_tid_45823 = get_group_id(0); - - int32_t phys_tid_35056; - - phys_tid_35056 = group_tid_45823; - - int32_t ltid_pre_45826; - - ltid_pre_45826 = local_tid_45822; - - int64_t gtid_34987; - - gtid_34987 = sext_i32_i64(group_tid_45823); - - __local char *mem_44553; - - mem_44553 = (__local char *) mem_44553_backing_0; - - int64_t gtid_34990 = sext_i32_i64(ltid_pre_45826); - int32_t phys_tid_34991 = local_tid_45822; - int32_t index_primexp_42354 = sext_i64_i32(gtid_34990); - int32_t defunc_0_f_res_35237 = sdiv32(index_primexp_42354, m_28624); - int32_t defunc_0_f_res_35238 = smod32(index_primexp_42354, m_28624); - bool cond_35239 = slt32(defunc_0_f_res_35238, k2p2zq_28491); - float defunc_0_f_res_35240; - - if (cond_35239) { - int64_t i_35241 = sext_i32_i64(defunc_0_f_res_35237); - bool x_35242 = sle64((int64_t) 0, i_35241); - bool y_35243 = slt64(i_35241, i32_res_28493); - bool bounds_check_35244 = x_35242 && y_35243; - int64_t j_35245 = sext_i32_i64(defunc_0_f_res_35238); - bool x_35246 = sle64((int64_t) 0, j_35245); - bool y_35247 = slt64(j_35245, i32_res_28493); - bool bounds_check_35248 = x_35246 && y_35247; - bool index_ok_35249 = bounds_check_35244 && bounds_check_35248; - bool index_certs_35250; - - if (!index_ok_35249) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 79) == -1) { - global_failure_args[0] = i_35241; - global_failure_args[1] = j_35245; - global_failure_args[2] = i32_res_28493; - global_failure_args[3] = i32_res_28493; - ; + // apply map function if in bounds + { + if (slt64((int64_t) 0, k2p2zq_75151) && (((slt64(gtid_104851, + m_75136) && + slt64(gtid_104852, + k2p2zq_75151)) && + slt64(gtid_104853, + k2p2zq_75151)) && + slt64(sext_i32_i64(local_tid_127075), + k2p2zq_75151 * + squot64(segred_group_sizze_106694, + segment_sizze_nonzzero_127072)))) { + double x_106704 = ((__global double *) mem_121831)[gtid_104852 * + (k2p2zq_75151 * + m_75136) + + gtid_104851 * + k2p2zq_75151 + + gtid_104863]; + double x_106705 = ((__global double *) mem_121835)[gtid_104851 * + (k2p2zq_75151 * + k2p2zq_75151) + + gtid_104853 * + k2p2zq_75151 + + gtid_104863]; + double defunc_1_f_res_106706 = x_106704 * x_106705; + + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_127079)[sext_i32_i64(local_tid_127075)] = + defunc_1_f_res_106706; } - local_failure = true; - goto error_0; + } else { + ((__local + double *) red_arr_mem_127079)[sext_i32_i64(local_tid_127075)] = + 0.0; } } - - float defunc_0_f_res_t_res_35251 = ((__global - float *) defunc_3_map_res_mem_44549)[gtid_34987 * - (i32_res_28493 * - i32_res_28493) + - i_35241 * - i32_res_28493 + - j_35245]; - - defunc_0_f_res_35240 = defunc_0_f_res_t_res_35251; - } else { - int32_t y_35252 = add32(k2p2zq_28491, defunc_0_f_res_35237); - bool cond_35253 = defunc_0_f_res_35238 == y_35252; - float defunc_0_f_res_f_res_35254; - - if (cond_35253) { - defunc_0_f_res_f_res_35254 = 1.0F; - } else { - defunc_0_f_res_f_res_35254 = 0.0F; - } - defunc_0_f_res_35240 = defunc_0_f_res_f_res_35254; - } - ((__local float *) mem_44553)[gtid_34990] = defunc_0_f_res_35240; - - error_0: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); - - __local char *mem_44563; - - mem_44563 = (__local char *) mem_44563_backing_1; - for (int32_t i_35256 = 0; i_35256 < k2p2zq_28491; i_35256++) { - int64_t i32_res_35258 = sext_i32_i64(i_35256); - bool x_35259 = sle64((int64_t) 0, i32_res_35258); - bool y_35260 = slt64(i32_res_35258, nm_28626); - bool bounds_check_35261 = x_35259 && y_35260; - bool index_certs_35262; - - if (!bounds_check_35261) { + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, k2p2zq_75151)) { + // perform segmented scan to imitate reduction { - if (atomic_cmpxchg_i32_global(global_failure, -1, 80) == -1) { - global_failure_args[0] = i32_res_35258; - global_failure_args[1] = nm_28626; - ; + double x_106698; + double x_106699; + double x_127084; + double x_127085; + bool ltid_in_bounds_127087; + + ltid_in_bounds_127087 = slt64(sext_i32_i64(local_tid_127075), + k2p2zq_75151 * + squot64(segred_group_sizze_106694, + segment_sizze_nonzzero_127072)); + + int32_t skip_threads_127088; + + // read input for in-block scan + { + if (ltid_in_bounds_127087) { + x_106699 = ((volatile __local + double *) red_arr_mem_127079)[sext_i32_i64(local_tid_127075)]; + if ((local_tid_127075 - squot32(local_tid_127075, 32) * + 32) == 0) { + x_106698 = x_106699; + } + } } - local_failure = true; - goto error_1; - } - } - - float v1_35263 = ((__local float *) mem_44553)[i32_res_35258]; - bool cond_35264 = v1_35263 == 0.0F; - int64_t gtid_35011 = sext_i32_i64(ltid_pre_45826); - int32_t phys_tid_35012 = local_tid_45822; - int32_t defunc_0_f_res_35267 = sext_i64_i32(gtid_35011); - int32_t defunc_0_f_res_35268 = sdiv32(defunc_0_f_res_35267, m_28624); - int32_t defunc_0_f_res_35269 = smod32(defunc_0_f_res_35267, m_28624); - float defunc_0_f_res_35270; - - if (cond_35264) { - int32_t x_35271 = mul32(m_28624, defunc_0_f_res_35268); - int32_t i32_arg_35272 = add32(defunc_0_f_res_35269, x_35271); - int64_t i32_res_35273 = sext_i32_i64(i32_arg_35272); - bool x_35274 = sle64((int64_t) 0, i32_res_35273); - bool y_35275 = slt64(i32_res_35273, nm_28626); - bool bounds_check_35276 = x_35274 && y_35275; - bool index_certs_35277; - - if (!bounds_check_35276) { + // in-block scan (hopefully no barriers needed) { - if (atomic_cmpxchg_i32_global(global_failure, -1, 81) == - -1) { - global_failure_args[0] = i32_res_35273; - global_failure_args[1] = nm_28626; - ; + skip_threads_127088 = 1; + while (slt32(skip_threads_127088, 32)) { + if (sle32(skip_threads_127088, local_tid_127075 - + squot32(local_tid_127075, 32) * 32) && + ltid_in_bounds_127087) { + // read operands + { + x_106698 = ((volatile __local + double *) red_arr_mem_127079)[sext_i32_i64(local_tid_127075) - + sext_i32_i64(skip_threads_127088)]; + } + // perform operation + { + bool inactive_127089 = + slt64(srem64(sext_i32_i64(local_tid_127075), + k2p2zq_75151), + sext_i32_i64(local_tid_127075) - + sext_i32_i64(local_tid_127075 - + skip_threads_127088)); + + if (inactive_127089) { + x_106698 = x_106699; + } + if (!inactive_127089) { + double defunc_1_op_res_106700 = x_106698 + + x_106699; + + x_106698 = defunc_1_op_res_106700; + } + } + } + if (sle32(wave_sizze_127077, skip_threads_127088)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127088, local_tid_127075 - + squot32(local_tid_127075, 32) * 32) && + ltid_in_bounds_127087) { + // write result + { + ((volatile __local + double *) red_arr_mem_127079)[sext_i32_i64(local_tid_127075)] = + x_106698; + x_106699 = x_106698; + } + } + if (sle32(wave_sizze_127077, skip_threads_127088)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127088 *= 2; } - local_failure = true; - goto error_1; } - } - - float defunc_0_f_res_t_res_35278 = ((__local - float *) mem_44553)[i32_res_35273]; - - defunc_0_f_res_35270 = defunc_0_f_res_t_res_35278; - } else { - int64_t i32_res_35279 = sext_i32_i64(defunc_0_f_res_35269); - bool x_35280 = sle64((int64_t) 0, i32_res_35279); - bool y_35281 = slt64(i32_res_35279, nm_28626); - bool bounds_check_35282 = x_35280 && y_35281; - bool index_certs_35283; - - if (!bounds_check_35282) { + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' { - if (atomic_cmpxchg_i32_global(global_failure, -1, 82) == - -1) { - global_failure_args[0] = i32_res_35279; - global_failure_args[1] = nm_28626; - ; + if ((local_tid_127075 - squot32(local_tid_127075, 32) * + 32) == 31 && ltid_in_bounds_127087) { + ((volatile __local + double *) red_arr_mem_127079)[sext_i32_i64(squot32(local_tid_127075, + 32))] = + x_106698; } - local_failure = true; - goto error_1; } - } - - float x_35284 = ((__local float *) mem_44553)[i32_res_35279]; - float x_35285 = x_35284 / v1_35263; - int32_t y_35286 = sub32(k2p2zq_28491, 1); - bool cond_35287 = slt32(defunc_0_f_res_35268, y_35286); - float defunc_0_f_res_f_res_35288; - - if (cond_35287) { - int32_t x_35289 = add32(1, defunc_0_f_res_35268); - int32_t x_35290 = mul32(m_28624, x_35289); - int32_t i32_arg_35291 = add32(defunc_0_f_res_35269, x_35290); - int64_t i32_res_35292 = sext_i32_i64(i32_arg_35291); - bool x_35293 = sle64((int64_t) 0, i32_res_35292); - bool y_35294 = slt64(i32_res_35292, nm_28626); - bool bounds_check_35295 = x_35293 && y_35294; - bool index_certs_35296; - - if (!bounds_check_35295) { + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_127090; + + // read input for in-block scan { - if (atomic_cmpxchg_i32_global(global_failure, -1, 83) == - -1) { - global_failure_args[0] = i32_res_35292; - global_failure_args[1] = nm_28626; - ; + if (squot32(local_tid_127075, 32) == 0 && + ltid_in_bounds_127087) { + x_127085 = ((volatile __local + double *) red_arr_mem_127079)[sext_i32_i64(local_tid_127075)]; + if ((local_tid_127075 - squot32(local_tid_127075, + 32) * 32) == 0) { + x_127084 = x_127085; + } } - local_failure = true; - goto error_1; } - } - - float x_35297 = ((__local float *) mem_44553)[i32_res_35292]; - int32_t i32_arg_35298 = add32(i_35256, x_35290); - int64_t i32_res_35299 = sext_i32_i64(i32_arg_35298); - bool x_35300 = sle64((int64_t) 0, i32_res_35299); - bool y_35301 = slt64(i32_res_35299, nm_28626); - bool bounds_check_35302 = x_35300 && y_35301; - bool index_certs_35303; - - if (!bounds_check_35302) { + // in-block scan (hopefully no barriers needed) { - if (atomic_cmpxchg_i32_global(global_failure, -1, 84) == - -1) { - global_failure_args[0] = i32_res_35299; - global_failure_args[1] = nm_28626; - ; + skip_threads_127090 = 1; + while (slt32(skip_threads_127090, 32)) { + if (sle32(skip_threads_127090, local_tid_127075 - + squot32(local_tid_127075, 32) * 32) && + (squot32(local_tid_127075, 32) == 0 && + ltid_in_bounds_127087)) { + // read operands + { + x_127084 = ((volatile __local + double *) red_arr_mem_127079)[sext_i32_i64(local_tid_127075) - + sext_i32_i64(skip_threads_127090)]; + } + // perform operation + { + bool inactive_127091 = + slt64(srem64(sext_i32_i64(local_tid_127075 * + 32 + 32 - 1), + k2p2zq_75151), + sext_i32_i64(local_tid_127075 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_127075 - + skip_threads_127090) * + 32 + 32 - 1)); + + if (inactive_127091) { + x_127084 = x_127085; + } + if (!inactive_127091) { + double defunc_1_op_res_127086 = + x_127084 + x_127085; + + x_127084 = defunc_1_op_res_127086; + } + } + } + if (sle32(wave_sizze_127077, skip_threads_127090)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127090, local_tid_127075 - + squot32(local_tid_127075, 32) * 32) && + (squot32(local_tid_127075, 32) == 0 && + ltid_in_bounds_127087)) { + // write result + { + ((volatile __local + double *) red_arr_mem_127079)[sext_i32_i64(local_tid_127075)] = + x_127084; + x_127085 = x_127084; + } + } + if (sle32(wave_sizze_127077, skip_threads_127090)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127090 *= 2; } - local_failure = true; - goto error_1; } } - - float x_35304 = ((__local float *) mem_44553)[i32_res_35299]; - float y_35305 = x_35285 * x_35304; - float defunc_0_f_res_f_res_t_res_35306 = x_35297 - y_35305; - - defunc_0_f_res_f_res_35288 = defunc_0_f_res_f_res_t_res_35306; - } else { - defunc_0_f_res_f_res_35288 = x_35285; + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_127075, 32) == 0 || + !ltid_in_bounds_127087)) { + // read operands + { + x_106699 = x_106698; + x_106698 = ((__local + double *) red_arr_mem_127079)[sext_i32_i64(squot32(local_tid_127075, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_127092 = + slt64(srem64(sext_i32_i64(local_tid_127075), + k2p2zq_75151), + sext_i32_i64(local_tid_127075) - + sext_i32_i64(squot32(local_tid_127075, + 32) * 32 - 1)); + + if (inactive_127092) { + x_106698 = x_106699; + } + if (!inactive_127092) { + double defunc_1_op_res_106700 = x_106698 + + x_106699; + + x_106698 = defunc_1_op_res_106700; + } + } + // write final result + { + ((__local + double *) red_arr_mem_127079)[sext_i32_i64(local_tid_127075)] = + x_106698; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_127075, 32) == 0) { + ((__local + double *) red_arr_mem_127079)[sext_i32_i64(local_tid_127075)] = + x_106699; + } + } + barrier(CLK_LOCAL_MEM_FENCE); } - defunc_0_f_res_35270 = defunc_0_f_res_f_res_35288; } - ((__local float *) mem_44563)[gtid_35011] = defunc_0_f_res_35270; - - error_1: barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_127083) * + squot64(segred_group_sizze_106694, + segment_sizze_nonzzero_127072) + + sext_i32_i64(local_tid_127075), m_75136 * k2p2zq_75151 * + k2p2zq_75151) && slt64(sext_i32_i64(local_tid_127075), + squot64(segred_group_sizze_106694, + segment_sizze_nonzzero_127072))) { + ((__global + double *) mem_121840)[squot64(sext_i32_i64(virt_group_id_127083) * + squot64(segred_group_sizze_106694, + segment_sizze_nonzzero_127072) + + sext_i32_i64(local_tid_127075), + k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151) + + squot64(sext_i32_i64(virt_group_id_127083) * + squot64(segred_group_sizze_106694, + segment_sizze_nonzzero_127072) + + sext_i32_i64(local_tid_127075) - + squot64(sext_i32_i64(virt_group_id_127083) * + squot64(segred_group_sizze_106694, + segment_sizze_nonzzero_127072) + + sext_i32_i64(local_tid_127075), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151) * k2p2zq_75151 + + (sext_i32_i64(virt_group_id_127083) * + squot64(segred_group_sizze_106694, + segment_sizze_nonzzero_127072) + + sext_i32_i64(local_tid_127075) - + squot64(sext_i32_i64(virt_group_id_127083) * + squot64(segred_group_sizze_106694, + segment_sizze_nonzzero_127072) + + sext_i32_i64(local_tid_127075), + k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151) - + squot64(sext_i32_i64(virt_group_id_127083) * + squot64(segred_group_sizze_106694, + segment_sizze_nonzzero_127072) + + sext_i32_i64(local_tid_127075) - + squot64(sext_i32_i64(virt_group_id_127083) * + squot64(segred_group_sizze_106694, + segment_sizze_nonzzero_127072) + + sext_i32_i64(local_tid_127075), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151) * + k2p2zq_75151)] = ((__local + double *) red_arr_mem_127079)[(sext_i32_i64(local_tid_127075) + + (int64_t) 1) * + segment_sizze_nonzzero_127072 - + (int64_t) 1]; + } + } barrier(CLK_LOCAL_MEM_FENCE); - - int64_t write_i_35054 = sext_i32_i64(ltid_pre_45826); - int32_t phys_tid_35055 = local_tid_45822; - float write_value_35309 = ((__local float *) mem_44563)[write_i_35054]; - - if (sle64((int64_t) 0, write_i_35054) && slt64(write_i_35054, - nm_28626)) { - ((__local float *) mem_44553)[write_i_35054] = write_value_35309; - } - barrier(CLK_LOCAL_MEM_FENCE); - } - for (int64_t i_45828 = 0; i_45828 < sdiv_up64(i32_res_28493 * - i32_res_28493 - - sext_i32_i64(local_tid_45822), - nm_28626); i_45828++) { - ((__global float *) mem_44573)[gtid_34987 * (i32_res_28493 * - i32_res_28493) + - squot64(i_45828 * nm_28626 + - sext_i32_i64(local_tid_45822), - i32_res_28493) * i32_res_28493 + - (i_45828 * nm_28626 + - sext_i32_i64(local_tid_45822) - - squot64(i_45828 * nm_28626 + - sext_i32_i64(local_tid_45822), - i32_res_28493) * - i32_res_28493)] = ((__local - float *) mem_44553)[i32_res_28493 + - (squot64(i_45828 * - nm_28626 + - sext_i32_i64(local_tid_45822), - i32_res_28493) * - i32_res_28641 + - (i_45828 * - nm_28626 + - sext_i32_i64(local_tid_45822) - - squot64(i_45828 * - nm_28626 + - sext_i32_i64(local_tid_45822), - i32_res_28493) * - i32_res_28493))]; + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - barrier(CLK_LOCAL_MEM_FENCE); - error_3: + error_1: return; + #undef segred_group_sizze_106694 } -__kernel void mainMagnitudezisegmap_intragroup_35383(__global - int *global_failure, - int failure_is_an_option, - __global - int64_t *global_failure_args, - __local volatile - int64_t *mem_44594_backing_aligned_0, - int64_t m_28478, - int32_t k2p2zq_28491, - int32_t m_28624, - int64_t nm_28626, - int32_t i_35922, - int64_t i32_res_35924, - int64_t ctx_param_ext_44580, - int64_t ctx_param_ext_44581, - int64_t ctx_param_ext_44583, - __global - unsigned char *mem_param_44585, - __global - unsigned char *mem_44590, - __global - unsigned char *mem_44598) +__kernel void mainzisegred_small_105142(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_126935_backing_aligned_0, + int64_t m_75136, int64_t k2p2zq_75151, + int64_t x_106526, int64_t i_106527, + int64_t j_m_i_106531, + int64_t num_groups_106613, + int64_t binop_x_120251, + int64_t segment_sizze_nonzzero_126928, + __global unsigned char *mem_121351, + __global + unsigned char *mem_param_121469, + __global unsigned char *mem_121555) { + #define segred_group_sizze_106612 (mainzisegred_group_sizze_105136) + const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict mem_44594_backing_0 = (__local volatile - char *) mem_44594_backing_aligned_0; - volatile __local bool local_failure; - - if (failure_is_an_option) { - int failed = *global_failure >= 0; - - if (failed) - return; - } - local_failure = false; - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t global_tid_45855; - int32_t local_tid_45856; - int64_t group_sizze_45859; - int32_t wave_sizze_45858; - int32_t group_tid_45857; - - global_tid_45855 = get_global_id(0); - local_tid_45856 = get_local_id(0); - group_sizze_45859 = get_local_size(0); - wave_sizze_45858 = LOCKSTEP_WIDTH; - group_tid_45857 = get_group_id(0); - - int32_t phys_tid_35383; - - phys_tid_35383 = group_tid_45857; - - int32_t ltid_pre_45860; - - ltid_pre_45860 = local_tid_45856; - - int64_t gtid_35335; - - gtid_35335 = sext_i32_i64(group_tid_45857); - - float v1_35941 = ((__global float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_35335 * - ctx_param_ext_44581 + - i32_res_35924 * - ctx_param_ext_44583)]; - bool cond_35942 = v1_35941 == 0.0F; - __local char *mem_44594; - - mem_44594 = (__local char *) mem_44594_backing_0; + __local volatile char *restrict red_arr_mem_126935_backing_0 = + (__local volatile + char *) red_arr_mem_126935_backing_aligned_0; - int64_t gtid_35338 = sext_i32_i64(ltid_pre_45860); - int32_t phys_tid_35339 = local_tid_45856; - int32_t defunc_0_f_res_35945 = sext_i64_i32(gtid_35338); - int32_t defunc_0_f_res_35946 = sdiv32(defunc_0_f_res_35945, m_28624); - int32_t defunc_0_f_res_35947 = smod32(defunc_0_f_res_35945, m_28624); - float defunc_0_f_res_35948; + if (*global_failure >= 0) + return; - if (cond_35942) { - int32_t x_35949 = mul32(m_28624, defunc_0_f_res_35946); - int32_t i32_arg_35950 = add32(defunc_0_f_res_35947, x_35949); - int64_t i32_res_35951 = sext_i32_i64(i32_arg_35950); - bool x_35952 = sle64((int64_t) 0, i32_res_35951); - bool y_35953 = slt64(i32_res_35951, nm_28626); - bool bounds_check_35954 = x_35952 && y_35953; - bool index_certs_35955; + int32_t global_tid_126930; + int32_t local_tid_126931; + int64_t group_sizze_126934; + int32_t wave_sizze_126933; + int32_t group_tid_126932; + + global_tid_126930 = get_global_id(0); + local_tid_126931 = get_local_id(0); + group_sizze_126934 = get_local_size(0); + wave_sizze_126933 = LOCKSTEP_WIDTH; + group_tid_126932 = get_group_id(0); + + int32_t phys_tid_105142; + + phys_tid_105142 = global_tid_126930; + + __local char *red_arr_mem_126935; + + red_arr_mem_126935 = (__local char *) red_arr_mem_126935_backing_0; + + int32_t phys_group_id_126937; + + phys_group_id_126937 = get_group_id(0); + for (int32_t i_126938 = 0; i_126938 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136 * k2p2zq_75151, + squot64(segred_group_sizze_106612, + segment_sizze_nonzzero_126928))) - + phys_group_id_126937, sext_i64_i32(num_groups_106613)); + i_126938++) { + int32_t virt_group_id_126939 = phys_group_id_126937 + i_126938 * + sext_i64_i32(num_groups_106613); + int64_t gtid_105131 = squot64(squot64(sext_i32_i64(local_tid_126931), + segment_sizze_nonzzero_126928) + + sext_i32_i64(virt_group_id_126939) * + squot64(segred_group_sizze_106612, + segment_sizze_nonzzero_126928), + k2p2zq_75151); + int64_t gtid_105132 = squot64(sext_i32_i64(local_tid_126931), + segment_sizze_nonzzero_126928) + + sext_i32_i64(virt_group_id_126939) * + squot64(segred_group_sizze_106612, + segment_sizze_nonzzero_126928) - + squot64(squot64(sext_i32_i64(local_tid_126931), + segment_sizze_nonzzero_126928) + + sext_i32_i64(virt_group_id_126939) * + squot64(segred_group_sizze_106612, + segment_sizze_nonzzero_126928), k2p2zq_75151) * + k2p2zq_75151; + int64_t gtid_105141 = srem64(sext_i32_i64(local_tid_126931), + j_m_i_106531); - if (!bounds_check_35954) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 86) == -1) { - global_failure_args[0] = i32_res_35951; - global_failure_args[1] = nm_28626; - ; + // apply map function if in bounds + { + if (slt64((int64_t) 0, j_m_i_106531) && ((slt64(gtid_105131, + m_75136) && + slt64(gtid_105132, + k2p2zq_75151)) && + slt64(sext_i32_i64(local_tid_126931), + j_m_i_106531 * + squot64(segred_group_sizze_106612, + segment_sizze_nonzzero_126928)))) { + int64_t slice_115048 = gtid_105141 + x_106526; + double x_106623 = ((__global double *) mem_121351)[gtid_105131 * + (k2p2zq_75151 * + k2p2zq_75151) + + slice_115048 * + k2p2zq_75151 + + i_106527]; + bool isnan_res_106624; + + isnan_res_106624 = futrts_isnan64(x_106623); + + double defunc_1_f_res_106625; + + if (isnan_res_106624) { + defunc_1_f_res_106625 = 0.0; + } else { + double x_106622 = ((__global + double *) mem_param_121469)[gtid_105131 * + binop_x_120251 + + gtid_105132 * + k2p2zq_75151 + + slice_115048]; + double defunc_1_f_res_f_res_106626 = x_106622 * x_106623; + + defunc_1_f_res_106625 = defunc_1_f_res_f_res_106626; } - local_failure = true; - goto error_0; + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_126935)[sext_i32_i64(local_tid_126931)] = + defunc_1_f_res_106625; + } + } else { + ((__local + double *) red_arr_mem_126935)[sext_i32_i64(local_tid_126931)] = + 0.0; } } - - float defunc_0_f_res_t_res_35956 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_35335 * - ctx_param_ext_44581 + - i32_res_35951 * - ctx_param_ext_44583)]; - - defunc_0_f_res_35948 = defunc_0_f_res_t_res_35956; - } else { - int64_t i32_res_35957 = sext_i32_i64(defunc_0_f_res_35947); - bool x_35958 = sle64((int64_t) 0, i32_res_35957); - bool y_35959 = slt64(i32_res_35957, nm_28626); - bool bounds_check_35960 = x_35958 && y_35959; - bool index_certs_35961; - - if (!bounds_check_35960) { + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, j_m_i_106531)) { + // perform segmented scan to imitate reduction { - if (atomic_cmpxchg_i32_global(global_failure, -1, 87) == -1) { - global_failure_args[0] = i32_res_35957; - global_failure_args[1] = nm_28626; - ; + double x_106616; + double x_106617; + double x_126940; + double x_126941; + bool ltid_in_bounds_126943; + + ltid_in_bounds_126943 = slt64(sext_i32_i64(local_tid_126931), + j_m_i_106531 * + squot64(segred_group_sizze_106612, + segment_sizze_nonzzero_126928)); + + int32_t skip_threads_126944; + + // read input for in-block scan + { + if (ltid_in_bounds_126943) { + x_106617 = ((volatile __local + double *) red_arr_mem_126935)[sext_i32_i64(local_tid_126931)]; + if ((local_tid_126931 - squot32(local_tid_126931, 32) * + 32) == 0) { + x_106616 = x_106617; + } + } } - local_failure = true; - goto error_0; - } - } - - float x_35962 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_35335 * - ctx_param_ext_44581 + - i32_res_35957 * - ctx_param_ext_44583)]; - float x_35963 = x_35962 / v1_35941; - int32_t y_35964 = sub32(k2p2zq_28491, 1); - bool cond_35965 = slt32(defunc_0_f_res_35946, y_35964); - float defunc_0_f_res_f_res_35966; - - if (cond_35965) { - int32_t x_35967 = add32(1, defunc_0_f_res_35946); - int32_t x_35968 = mul32(m_28624, x_35967); - int32_t i32_arg_35969 = add32(defunc_0_f_res_35947, x_35968); - int64_t i32_res_35970 = sext_i32_i64(i32_arg_35969); - bool x_35971 = sle64((int64_t) 0, i32_res_35970); - bool y_35972 = slt64(i32_res_35970, nm_28626); - bool bounds_check_35973 = x_35971 && y_35972; - bool index_certs_35974; - - if (!bounds_check_35973) { + // in-block scan (hopefully no barriers needed) { - if (atomic_cmpxchg_i32_global(global_failure, -1, 88) == - -1) { - global_failure_args[0] = i32_res_35970; - global_failure_args[1] = nm_28626; - ; + skip_threads_126944 = 1; + while (slt32(skip_threads_126944, 32)) { + if (sle32(skip_threads_126944, local_tid_126931 - + squot32(local_tid_126931, 32) * 32) && + ltid_in_bounds_126943) { + // read operands + { + x_106616 = ((volatile __local + double *) red_arr_mem_126935)[sext_i32_i64(local_tid_126931) - + sext_i32_i64(skip_threads_126944)]; + } + // perform operation + { + bool inactive_126945 = + slt64(srem64(sext_i32_i64(local_tid_126931), + j_m_i_106531), + sext_i32_i64(local_tid_126931) - + sext_i32_i64(local_tid_126931 - + skip_threads_126944)); + + if (inactive_126945) { + x_106616 = x_106617; + } + if (!inactive_126945) { + double defunc_1_op_res_106618 = x_106616 + + x_106617; + + x_106616 = defunc_1_op_res_106618; + } + } + } + if (sle32(wave_sizze_126933, skip_threads_126944)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_126944, local_tid_126931 - + squot32(local_tid_126931, 32) * 32) && + ltid_in_bounds_126943) { + // write result + { + ((volatile __local + double *) red_arr_mem_126935)[sext_i32_i64(local_tid_126931)] = + x_106616; + x_106617 = x_106616; + } + } + if (sle32(wave_sizze_126933, skip_threads_126944)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_126944 *= 2; } - local_failure = true; - goto error_0; } - } - - float x_35975 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_35335 * - ctx_param_ext_44581 + - i32_res_35970 * - ctx_param_ext_44583)]; - int32_t i32_arg_35976 = add32(i_35922, x_35968); - int64_t i32_res_35977 = sext_i32_i64(i32_arg_35976); - bool x_35978 = sle64((int64_t) 0, i32_res_35977); - bool y_35979 = slt64(i32_res_35977, nm_28626); - bool bounds_check_35980 = x_35978 && y_35979; - bool index_certs_35981; - - if (!bounds_check_35980) { + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' { - if (atomic_cmpxchg_i32_global(global_failure, -1, 89) == - -1) { - global_failure_args[0] = i32_res_35977; - global_failure_args[1] = nm_28626; - ; + if ((local_tid_126931 - squot32(local_tid_126931, 32) * + 32) == 31 && ltid_in_bounds_126943) { + ((volatile __local + double *) red_arr_mem_126935)[sext_i32_i64(squot32(local_tid_126931, + 32))] = + x_106616; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_126946; + + // read input for in-block scan + { + if (squot32(local_tid_126931, 32) == 0 && + ltid_in_bounds_126943) { + x_126941 = ((volatile __local + double *) red_arr_mem_126935)[sext_i32_i64(local_tid_126931)]; + if ((local_tid_126931 - squot32(local_tid_126931, + 32) * 32) == 0) { + x_126940 = x_126941; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_126946 = 1; + while (slt32(skip_threads_126946, 32)) { + if (sle32(skip_threads_126946, local_tid_126931 - + squot32(local_tid_126931, 32) * 32) && + (squot32(local_tid_126931, 32) == 0 && + ltid_in_bounds_126943)) { + // read operands + { + x_126940 = ((volatile __local + double *) red_arr_mem_126935)[sext_i32_i64(local_tid_126931) - + sext_i32_i64(skip_threads_126946)]; + } + // perform operation + { + bool inactive_126947 = + slt64(srem64(sext_i32_i64(local_tid_126931 * + 32 + 32 - 1), + j_m_i_106531), + sext_i32_i64(local_tid_126931 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_126931 - + skip_threads_126946) * + 32 + 32 - 1)); + + if (inactive_126947) { + x_126940 = x_126941; + } + if (!inactive_126947) { + double defunc_1_op_res_126942 = + x_126940 + x_126941; + + x_126940 = defunc_1_op_res_126942; + } + } + } + if (sle32(wave_sizze_126933, skip_threads_126946)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_126946, local_tid_126931 - + squot32(local_tid_126931, 32) * 32) && + (squot32(local_tid_126931, 32) == 0 && + ltid_in_bounds_126943)) { + // write result + { + ((volatile __local + double *) red_arr_mem_126935)[sext_i32_i64(local_tid_126931)] = + x_126940; + x_126941 = x_126940; + } + } + if (sle32(wave_sizze_126933, skip_threads_126946)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_126946 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_126931, 32) == 0 || + !ltid_in_bounds_126943)) { + // read operands + { + x_106617 = x_106616; + x_106616 = ((__local + double *) red_arr_mem_126935)[sext_i32_i64(squot32(local_tid_126931, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_126948 = + slt64(srem64(sext_i32_i64(local_tid_126931), + j_m_i_106531), + sext_i32_i64(local_tid_126931) - + sext_i32_i64(squot32(local_tid_126931, + 32) * 32 - 1)); + + if (inactive_126948) { + x_106616 = x_106617; + } + if (!inactive_126948) { + double defunc_1_op_res_106618 = x_106616 + + x_106617; + + x_106616 = defunc_1_op_res_106618; + } + } + // write final result + { + ((__local + double *) red_arr_mem_126935)[sext_i32_i64(local_tid_126931)] = + x_106616; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_126931, 32) == 0) { + ((__local + double *) red_arr_mem_126935)[sext_i32_i64(local_tid_126931)] = + x_106617; } - local_failure = true; - goto error_0; } + barrier(CLK_LOCAL_MEM_FENCE); } - - float x_35982 = ((__global - float *) mem_param_44585)[ctx_param_ext_44580 + - (gtid_35335 * - ctx_param_ext_44581 + - i32_res_35977 * - ctx_param_ext_44583)]; - float y_35983 = x_35963 * x_35982; - float defunc_0_f_res_f_res_t_res_35984 = x_35975 - y_35983; - - defunc_0_f_res_f_res_35966 = defunc_0_f_res_f_res_t_res_35984; - } else { - defunc_0_f_res_f_res_35966 = x_35963; } - defunc_0_f_res_35948 = defunc_0_f_res_f_res_35966; - } - ((__local float *) mem_44594)[gtid_35338] = defunc_0_f_res_35948; - - error_0: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t write_i_35381 = sext_i32_i64(ltid_pre_45860); - int32_t phys_tid_35382 = local_tid_45856; - float write_value_35987 = ((__local float *) mem_44594)[write_i_35381]; - - if (sle64((int64_t) 0, write_i_35381) && slt64(write_i_35381, nm_28626)) { - ((__global float *) mem_44590)[gtid_35335 + write_i_35381 * m_28478] = - write_value_35987; - } - barrier(CLK_LOCAL_MEM_FENCE); - if (local_tid_45856 == 0) { - for (int64_t i_45861 = 0; i_45861 < nm_28626; i_45861++) { - ((__global float *) mem_44598)[gtid_35335 * nm_28626 + i_45861] = - ((__global float *) mem_44590)[gtid_35335 + i_45861 * m_28478]; + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_126939) * + squot64(segred_group_sizze_106612, + segment_sizze_nonzzero_126928) + + sext_i32_i64(local_tid_126931), m_75136 * k2p2zq_75151) && + slt64(sext_i32_i64(local_tid_126931), + squot64(segred_group_sizze_106612, + segment_sizze_nonzzero_126928))) { + ((__global + double *) mem_121555)[squot64(sext_i32_i64(virt_group_id_126939) * + squot64(segred_group_sizze_106612, + segment_sizze_nonzzero_126928) + + sext_i32_i64(local_tid_126931), + k2p2zq_75151) * k2p2zq_75151 + + (sext_i32_i64(virt_group_id_126939) * + squot64(segred_group_sizze_106612, + segment_sizze_nonzzero_126928) + + sext_i32_i64(local_tid_126931) - + squot64(sext_i32_i64(virt_group_id_126939) * + squot64(segred_group_sizze_106612, + segment_sizze_nonzzero_126928) + + sext_i32_i64(local_tid_126931), + k2p2zq_75151) * + k2p2zq_75151)] = ((__local + double *) red_arr_mem_126935)[(sext_i32_i64(local_tid_126931) + + (int64_t) 1) * + segment_sizze_nonzzero_126928 - + (int64_t) 1]; + } } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - error_2: + error_1: return; + #undef segred_group_sizze_106612 } -__kernel void mainMagnitudezisegmap_intragroup_36514(__global - int *global_failure, - __local volatile - int64_t *mem_45150_backing_aligned_0, - __local volatile - int64_t *mem_45148_backing_aligned_1, - __local volatile - int64_t *mem_45146_backing_aligned_2, - __local volatile - int64_t *mem_45144_backing_aligned_3, - int64_t N_28477, - int64_t i_28781, __global - unsigned char *images_mem_44381, - __global - unsigned char *defunc_3_map_res_mem_45140, - __global - unsigned char *mem_45153, - __global - unsigned char *mem_45156, - __global - unsigned char *mem_45159) +__kernel void mainzisegred_small_105899(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_126710_backing_aligned_0, + int64_t m_75136, int64_t k2p2zq_75151, + int64_t defunc_2_reduce_res_75260, + int64_t j_106067, + int64_t num_groups_106100, + int64_t segment_sizze_nonzzero_126703, + __global unsigned char *mem_120246, + __global unsigned char *mem_120938) { + #define segred_group_sizze_106099 (mainzisegred_group_sizze_105893) + const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict mem_45150_backing_3 = (__local volatile - char *) mem_45150_backing_aligned_0; - __local volatile char *restrict mem_45148_backing_2 = (__local volatile - char *) mem_45148_backing_aligned_1; - __local volatile char *restrict mem_45146_backing_1 = (__local volatile - char *) mem_45146_backing_aligned_2; - __local volatile char *restrict mem_45144_backing_0 = (__local volatile - char *) mem_45144_backing_aligned_3; + __local volatile char *restrict red_arr_mem_126710_backing_0 = + (__local volatile + char *) red_arr_mem_126710_backing_aligned_0; if (*global_failure >= 0) return; - int32_t global_tid_46237; - int32_t local_tid_46238; - int64_t group_sizze_46241; - int32_t wave_sizze_46240; - int32_t group_tid_46239; - - global_tid_46237 = get_global_id(0); - local_tid_46238 = get_local_id(0); - group_sizze_46241 = get_local_size(0); - wave_sizze_46240 = LOCKSTEP_WIDTH; - group_tid_46239 = get_group_id(0); - - int32_t phys_tid_36514; - - phys_tid_36514 = group_tid_46239; - - int32_t ltid_pre_46242; - - ltid_pre_46242 = local_tid_46238; - - int64_t gtid_36507; - - gtid_36507 = sext_i32_i64(group_tid_46239); - - __local char *mem_45144; - - mem_45144 = (__local char *) mem_45144_backing_0; - - __local char *mem_45146; - - mem_45146 = (__local char *) mem_45146_backing_1; - - int64_t gtid_36510 = sext_i32_i64(ltid_pre_46242); - int32_t phys_tid_36511 = local_tid_46238; - float x_36603 = ((__global float *) images_mem_44381)[gtid_36507 * N_28477 + - gtid_36510]; - bool isnan_res_36605; - - isnan_res_36605 = futrts_isnan32(x_36603); - - bool cond_36606 = !isnan_res_36605; - float defunc_1_f_res_36607; - - if (cond_36606) { - float x_36604 = ((__global - float *) defunc_3_map_res_mem_45140)[gtid_36507 * - N_28477 + - gtid_36510]; - float defunc_1_f_res_t_res_36608 = x_36603 - x_36604; + int32_t global_tid_126705; + int32_t local_tid_126706; + int64_t group_sizze_126709; + int32_t wave_sizze_126708; + int32_t group_tid_126707; + + global_tid_126705 = get_global_id(0); + local_tid_126706 = get_local_id(0); + group_sizze_126709 = get_local_size(0); + wave_sizze_126708 = LOCKSTEP_WIDTH; + group_tid_126707 = get_group_id(0); + + int32_t phys_tid_105899; + + phys_tid_105899 = global_tid_126705; + + __local char *red_arr_mem_126710; + + red_arr_mem_126710 = (__local char *) red_arr_mem_126710_backing_0; + + int32_t phys_group_id_126712; + + phys_group_id_126712 = get_group_id(0); + for (int32_t i_126713 = 0; i_126713 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, + squot64(segred_group_sizze_106099, + segment_sizze_nonzzero_126703))) - + phys_group_id_126712, sext_i64_i32(num_groups_106100)); + i_126713++) { + int32_t virt_group_id_126714 = phys_group_id_126712 + i_126713 * + sext_i64_i32(num_groups_106100); + int64_t gtid_105890 = squot64(sext_i32_i64(local_tid_126706), + segment_sizze_nonzzero_126703) + + sext_i32_i64(virt_group_id_126714) * + squot64(segred_group_sizze_106099, + segment_sizze_nonzzero_126703); + int64_t gtid_105898 = srem64(sext_i32_i64(local_tid_126706), + k2p2zq_75151); - defunc_1_f_res_36607 = defunc_1_f_res_t_res_36608; - } else { - defunc_1_f_res_36607 = NAN; - } - - bool isnan_res_36609; - - isnan_res_36609 = futrts_isnan32(defunc_1_f_res_36607); - - bool defunc_0_p_res_36610 = !isnan_res_36609; - int64_t defunc_0_f_res_36611 = btoi_bool_i64(defunc_0_p_res_36610); - - ((__local int64_t *) mem_45144)[gtid_36510] = defunc_0_f_res_36611; - ((__local float *) mem_45146)[gtid_36510] = defunc_1_f_res_36607; - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t dims_flat_46243; - - dims_flat_46243 = N_28477; - - int64_t x_36600; - int64_t x_36601; - int64_t x_46245; - int64_t x_46246; - bool ltid_in_bounds_46248; - - ltid_in_bounds_46248 = slt64(sext_i32_i64(local_tid_46238), N_28477); - - int32_t skip_threads_46249; - - // read input for in-block scan - { - if (ltid_in_bounds_46248) { - x_36601 = ((volatile __local - int64_t *) mem_45144)[sext_i32_i64(local_tid_46238)]; - if ((local_tid_46238 - squot32(local_tid_46238, 32) * 32) == 0) { - x_36600 = x_36601; + // apply map function if in bounds + { + if (slt64((int64_t) 0, k2p2zq_75151) && (slt64(gtid_105890, + m_75136) && + slt64(sext_i32_i64(local_tid_126706), + k2p2zq_75151 * + squot64(segred_group_sizze_106099, + segment_sizze_nonzzero_126703)))) { + double x_106107 = ((__global double *) mem_120246)[j_106067 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_105890 * + defunc_2_reduce_res_75260 + + gtid_105898]; + double defunc_1_f_res_106108 = x_106107 * x_106107; + + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_126710)[sext_i32_i64(local_tid_126706)] = + defunc_1_f_res_106108; + } + } else { + ((__local + double *) red_arr_mem_126710)[sext_i32_i64(local_tid_126706)] = + 0.0; } } - } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46249 = 1; - while (slt32(skip_threads_46249, 32)) { - if (sle32(skip_threads_46249, local_tid_46238 - - squot32(local_tid_46238, 32) * 32) && - ltid_in_bounds_46248) { - // read operands + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, k2p2zq_75151)) { + // perform segmented scan to imitate reduction + { + double x_106103; + double x_106104; + double x_126715; + double x_126716; + bool ltid_in_bounds_126718; + + ltid_in_bounds_126718 = slt64(sext_i32_i64(local_tid_126706), + k2p2zq_75151 * + squot64(segred_group_sizze_106099, + segment_sizze_nonzzero_126703)); + + int32_t skip_threads_126719; + + // read input for in-block scan { - x_36600 = ((volatile __local - int64_t *) mem_45144)[sext_i32_i64(local_tid_46238) - - sext_i32_i64(skip_threads_46249)]; + if (ltid_in_bounds_126718) { + x_106104 = ((volatile __local + double *) red_arr_mem_126710)[sext_i32_i64(local_tid_126706)]; + if ((local_tid_126706 - squot32(local_tid_126706, 32) * + 32) == 0) { + x_106103 = x_106104; + } + } } - // perform operation + // in-block scan (hopefully no barriers needed) { - bool inactive_46250 = - slt64(srem64(sext_i32_i64(local_tid_46238), N_28477), - sext_i32_i64(local_tid_46238) - - sext_i32_i64(local_tid_46238 - - skip_threads_46249)); - - if (inactive_46250) { - x_36600 = x_36601; - } - if (!inactive_46250) { - int64_t defunc_1_op_res_36602 = add64(x_36600, x_36601); - - x_36600 = defunc_1_op_res_36602; + skip_threads_126719 = 1; + while (slt32(skip_threads_126719, 32)) { + if (sle32(skip_threads_126719, local_tid_126706 - + squot32(local_tid_126706, 32) * 32) && + ltid_in_bounds_126718) { + // read operands + { + x_106103 = ((volatile __local + double *) red_arr_mem_126710)[sext_i32_i64(local_tid_126706) - + sext_i32_i64(skip_threads_126719)]; + } + // perform operation + { + bool inactive_126720 = + slt64(srem64(sext_i32_i64(local_tid_126706), + k2p2zq_75151), + sext_i32_i64(local_tid_126706) - + sext_i32_i64(local_tid_126706 - + skip_threads_126719)); + + if (inactive_126720) { + x_106103 = x_106104; + } + if (!inactive_126720) { + double defunc_1_op_res_106105 = x_106103 + + x_106104; + + x_106103 = defunc_1_op_res_106105; + } + } + } + if (sle32(wave_sizze_126708, skip_threads_126719)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_126719, local_tid_126706 - + squot32(local_tid_126706, 32) * 32) && + ltid_in_bounds_126718) { + // write result + { + ((volatile __local + double *) red_arr_mem_126710)[sext_i32_i64(local_tid_126706)] = + x_106103; + x_106104 = x_106103; + } + } + if (sle32(wave_sizze_126708, skip_threads_126719)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_126719 *= 2; } } - } - if (sle32(wave_sizze_46240, skip_threads_46249)) { barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46249, local_tid_46238 - - squot32(local_tid_46238, 32) * 32) && - ltid_in_bounds_46248) { - // write result + // last thread of block 'i' writes its result to offset 'i' { - ((volatile __local - int64_t *) mem_45144)[sext_i32_i64(local_tid_46238)] = - x_36600; - x_36601 = x_36600; + if ((local_tid_126706 - squot32(local_tid_126706, 32) * + 32) == 31 && ltid_in_bounds_126718) { + ((volatile __local + double *) red_arr_mem_126710)[sext_i32_i64(squot32(local_tid_126706, + 32))] = + x_106103; + } } - } - if (sle32(wave_sizze_46240, skip_threads_46249)) { barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46249 *= 2; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' - { - if ((local_tid_46238 - squot32(local_tid_46238, 32) * 32) == 31 && - ltid_in_bounds_46248) { - ((volatile __local - int64_t *) mem_45144)[sext_i32_i64(squot32(local_tid_46238, - 32))] = x_36600; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' - { - int32_t skip_threads_46251; - - // read input for in-block scan - { - if (squot32(local_tid_46238, 32) == 0 && ltid_in_bounds_46248) { - x_46246 = ((volatile __local - int64_t *) mem_45144)[sext_i32_i64(local_tid_46238)]; - if ((local_tid_46238 - squot32(local_tid_46238, 32) * 32) == - 0) { - x_46245 = x_46246; - } - } - } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46251 = 1; - while (slt32(skip_threads_46251, 32)) { - if (sle32(skip_threads_46251, local_tid_46238 - - squot32(local_tid_46238, 32) * 32) && - (squot32(local_tid_46238, 32) == 0 && - ltid_in_bounds_46248)) { - // read operands + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_126721; + + // read input for in-block scan { - x_46245 = ((volatile __local - int64_t *) mem_45144)[sext_i32_i64(local_tid_46238) - - sext_i32_i64(skip_threads_46251)]; + if (squot32(local_tid_126706, 32) == 0 && + ltid_in_bounds_126718) { + x_126716 = ((volatile __local + double *) red_arr_mem_126710)[sext_i32_i64(local_tid_126706)]; + if ((local_tid_126706 - squot32(local_tid_126706, + 32) * 32) == 0) { + x_126715 = x_126716; + } + } } - // perform operation + // in-block scan (hopefully no barriers needed) { - bool inactive_46252 = - slt64(srem64(sext_i32_i64(local_tid_46238 * 32 + - 32 - 1), N_28477), - sext_i32_i64(local_tid_46238 * 32 + 32 - 1) - - sext_i32_i64((local_tid_46238 - - skip_threads_46251) * 32 + 32 - - 1)); - - if (inactive_46252) { - x_46245 = x_46246; + skip_threads_126721 = 1; + while (slt32(skip_threads_126721, 32)) { + if (sle32(skip_threads_126721, local_tid_126706 - + squot32(local_tid_126706, 32) * 32) && + (squot32(local_tid_126706, 32) == 0 && + ltid_in_bounds_126718)) { + // read operands + { + x_126715 = ((volatile __local + double *) red_arr_mem_126710)[sext_i32_i64(local_tid_126706) - + sext_i32_i64(skip_threads_126721)]; + } + // perform operation + { + bool inactive_126722 = + slt64(srem64(sext_i32_i64(local_tid_126706 * + 32 + 32 - 1), + k2p2zq_75151), + sext_i32_i64(local_tid_126706 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_126706 - + skip_threads_126721) * + 32 + 32 - 1)); + + if (inactive_126722) { + x_126715 = x_126716; + } + if (!inactive_126722) { + double defunc_1_op_res_126717 = + x_126715 + x_126716; + + x_126715 = defunc_1_op_res_126717; + } + } + } + if (sle32(wave_sizze_126708, skip_threads_126721)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_126721, local_tid_126706 - + squot32(local_tid_126706, 32) * 32) && + (squot32(local_tid_126706, 32) == 0 && + ltid_in_bounds_126718)) { + // write result + { + ((volatile __local + double *) red_arr_mem_126710)[sext_i32_i64(local_tid_126706)] = + x_126715; + x_126716 = x_126715; + } + } + if (sle32(wave_sizze_126708, skip_threads_126721)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_126721 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_126706, 32) == 0 || + !ltid_in_bounds_126718)) { + // read operands + { + x_106104 = x_106103; + x_106103 = ((__local + double *) red_arr_mem_126710)[sext_i32_i64(squot32(local_tid_126706, + 32)) - + (int64_t) 1]; } - if (!inactive_46252) { - int64_t defunc_1_op_res_46247 = add64(x_46245, - x_46246); + // perform operation + { + bool inactive_126723 = + slt64(srem64(sext_i32_i64(local_tid_126706), + k2p2zq_75151), + sext_i32_i64(local_tid_126706) - + sext_i32_i64(squot32(local_tid_126706, + 32) * 32 - 1)); - x_46245 = defunc_1_op_res_46247; + if (inactive_126723) { + x_106103 = x_106104; + } + if (!inactive_126723) { + double defunc_1_op_res_106105 = x_106103 + + x_106104; + + x_106103 = defunc_1_op_res_106105; + } + } + // write final result + { + ((__local + double *) red_arr_mem_126710)[sext_i32_i64(local_tid_126706)] = + x_106103; } } } - if (sle32(wave_sizze_46240, skip_threads_46251)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46251, local_tid_46238 - - squot32(local_tid_46238, 32) * 32) && - (squot32(local_tid_46238, 32) == 0 && - ltid_in_bounds_46248)) { - // write result - { - ((volatile __local - int64_t *) mem_45144)[sext_i32_i64(local_tid_46238)] = - x_46245; - x_46246 = x_46245; + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_126706, 32) == 0) { + ((__local + double *) red_arr_mem_126710)[sext_i32_i64(local_tid_126706)] = + x_106104; } } - if (sle32(wave_sizze_46240, skip_threads_46251)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46251 *= 2; + barrier(CLK_LOCAL_MEM_FENCE); } } - } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_46238, 32) == 0 || !ltid_in_bounds_46248)) { - // read operands - { - x_36601 = x_36600; - x_36600 = ((__local - int64_t *) mem_45144)[sext_i32_i64(squot32(local_tid_46238, - 32)) - - (int64_t) 1]; - } - // perform operation - { - bool inactive_46253 = - slt64(srem64(sext_i32_i64(local_tid_46238), N_28477), - sext_i32_i64(local_tid_46238) - - sext_i32_i64(squot32(local_tid_46238, 32) * 32 - 1)); - - if (inactive_46253) { - x_36600 = x_36601; - } - if (!inactive_46253) { - int64_t defunc_1_op_res_36602 = add64(x_36600, x_36601); - - x_36600 = defunc_1_op_res_36602; - } - } - // write final result - { - ((__local int64_t *) mem_45144)[sext_i32_i64(local_tid_46238)] = - x_36600; + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_126714) * + squot64(segred_group_sizze_106099, + segment_sizze_nonzzero_126703) + + sext_i32_i64(local_tid_126706), m_75136) && + slt64(sext_i32_i64(local_tid_126706), + squot64(segred_group_sizze_106099, + segment_sizze_nonzzero_126703))) { + ((__global + double *) mem_120938)[sext_i32_i64(virt_group_id_126714) * + squot64(segred_group_sizze_106099, + segment_sizze_nonzzero_126703) + + sext_i32_i64(local_tid_126706)] = + ((__local + double *) red_arr_mem_126710)[(sext_i32_i64(local_tid_126706) + + (int64_t) 1) * + segment_sizze_nonzzero_126703 - + (int64_t) 1]; } } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_46238, 32) == 0) { - ((__local int64_t *) mem_45144)[sext_i32_i64(local_tid_46238)] = - x_36601; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t last_res_36612 = ((__local int64_t *) mem_45144)[i_28781]; - int32_t defunc_0_f_res_36613 = sext_i64_i32(last_res_36612); - __local char *mem_45148; - - mem_45148 = (__local char *) mem_45148_backing_2; - ((__local float *) mem_45148)[sext_i32_i64(local_tid_46238)] = NAN; - barrier(CLK_LOCAL_MEM_FENCE); - - __local char *mem_45150; - - mem_45150 = (__local char *) mem_45150_backing_3; - ((__local int32_t *) mem_45150)[sext_i32_i64(local_tid_46238)] = 0; - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t write_i_36512 = sext_i32_i64(ltid_pre_46242); - int32_t phys_tid_36513 = local_tid_46238; - float x_36618 = ((__local float *) mem_45146)[write_i_36512]; - int32_t index_primexp_42374 = sext_i64_i32(write_i_36512); - bool isnan_res_36621; - - isnan_res_36621 = futrts_isnan32(x_36618); - - bool defunc_0_p_res_36622 = !isnan_res_36621; - int64_t defunc_1_f_res_36623; - - if (defunc_0_p_res_36622) { - int64_t x_36619 = ((__local int64_t *) mem_45144)[write_i_36512]; - int64_t defunc_1_f_res_t_res_36624 = sub64(x_36619, (int64_t) 1); - - defunc_1_f_res_36623 = defunc_1_f_res_t_res_36624; - } else { - defunc_1_f_res_36623 = (int64_t) -1; - } - if (sle64((int64_t) 0, defunc_1_f_res_36623) && slt64(defunc_1_f_res_36623, - N_28477)) { - ((__local int32_t *) mem_45150)[defunc_1_f_res_36623] = - index_primexp_42374; - } - if (sle64((int64_t) 0, defunc_1_f_res_36623) && slt64(defunc_1_f_res_36623, - N_28477)) { - ((__local float *) mem_45148)[defunc_1_f_res_36623] = x_36618; - } - barrier(CLK_LOCAL_MEM_FENCE); - if (local_tid_46238 == 0) { - ((__global int32_t *) mem_45153)[gtid_36507] = defunc_0_f_res_36613; - } - ((__global float *) mem_45156)[gtid_36507 * N_28477 + - sext_i32_i64(local_tid_46238)] = ((__local - float *) mem_45148)[sext_i32_i64(local_tid_46238)]; - barrier(CLK_LOCAL_MEM_FENCE); - ((__global int32_t *) mem_45159)[gtid_36507 * N_28477 + - sext_i32_i64(local_tid_46238)] = ((__local - int32_t *) mem_45150)[sext_i32_i64(local_tid_46238)]; - barrier(CLK_LOCAL_MEM_FENCE); - error_2: + error_1: return; + #undef segred_group_sizze_106099 } -__kernel void mainMagnitudezisegmap_intragroup_36854(__global - int *global_failure, - int failure_is_an_option, - __global - int64_t *global_failure_args, - __local volatile - int64_t *red_arr_mem_46362_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_46358_backing_aligned_1, - int64_t N_28477, - float hfrac_28483, - int64_t i32_res_28487, - int32_t k2p2_28489, - __global - unsigned char *images_mem_44381, - __global - unsigned char *defunc_4_map_res_mem_45178, - __global - unsigned char *mem_45225, - __global - unsigned char *mem_45227, - __global - unsigned char *mem_45229) +__kernel void mainzisegred_small_108614(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_127863_backing_aligned_0, + int64_t m_75136, int64_t k2p2zq_75151, + int64_t num_groups_110595, + int64_t segment_sizze_nonzzero_127856, + __global unsigned char *mem_123614, + __global unsigned char *mem_123618, + __global unsigned char *mem_123623) { + #define segred_group_sizze_110594 (mainzisegred_group_sizze_108608) + const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_46362_backing_1 = + __local volatile char *restrict red_arr_mem_127863_backing_0 = (__local volatile - char *) red_arr_mem_46362_backing_aligned_0; - __local volatile char *restrict red_arr_mem_46358_backing_0 = - (__local volatile - char *) red_arr_mem_46358_backing_aligned_1; - volatile __local bool local_failure; - - if (failure_is_an_option) { - int failed = *global_failure >= 0; - - if (failed) - return; - } - local_failure = false; - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t global_tid_46352; - int32_t local_tid_46353; - int64_t group_sizze_46356; - int32_t wave_sizze_46355; - int32_t group_tid_46354; - - global_tid_46352 = get_global_id(0); - local_tid_46353 = get_local_id(0); - group_sizze_46356 = get_local_size(0); - wave_sizze_46355 = LOCKSTEP_WIDTH; - group_tid_46354 = get_group_id(0); - - int32_t phys_tid_36854; - - phys_tid_36854 = group_tid_46354; - - int32_t ltid_pre_46357; - - ltid_pre_46357 = local_tid_46353; - - int64_t gtid_36847; - - gtid_36847 = sext_i32_i64(group_tid_46354); - - int32_t defunc_0_f_res_36939; - int64_t gtid_36850 = sext_i32_i64(ltid_pre_46357); - int32_t phys_tid_36851 = local_tid_46353; - __local char *red_arr_mem_46358; - - red_arr_mem_46358 = (__local char *) red_arr_mem_46358_backing_0; - - float x_36943; - - x_36943 = ((__global float *) images_mem_44381)[gtid_36847 * N_28477 + - gtid_36850]; - - bool isnan_res_36944; - - isnan_res_36944 = futrts_isnan32(x_36943); - - bool cond_36945 = !isnan_res_36944; - int32_t defunc_0_f_res_36946 = btoi_bool_i32(cond_36945); - - ((__local int32_t *) red_arr_mem_46358)[gtid_36850] = defunc_0_f_res_36946; - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t offset_46360; - int32_t skip_waves_46361; - - skip_waves_46361 = 1; - - int32_t x_36940; - int32_t x_36941; - - offset_46360 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46353, sext_i64_i32(i32_res_28487))) { - x_36940 = ((__local - int32_t *) red_arr_mem_46358)[sext_i32_i64(local_tid_46353 + - offset_46360)]; - } - } - offset_46360 = 1; - while (slt32(offset_46360, wave_sizze_46355)) { - if (slt32(local_tid_46353 + offset_46360, - sext_i64_i32(i32_res_28487)) && ((local_tid_46353 - - squot32(local_tid_46353, - wave_sizze_46355) * - wave_sizze_46355) & (2 * - offset_46360 - - 1)) == - 0) { - // read array element - { - x_36941 = ((volatile __local - int32_t *) red_arr_mem_46358)[sext_i32_i64(local_tid_46353 + - offset_46360)]; - } - // apply reduction operation - { - int32_t defunc_1_op_res_36942 = add32(x_36940, x_36941); - - x_36940 = defunc_1_op_res_36942; - } - // write result of operation - { - ((volatile __local - int32_t *) red_arr_mem_46358)[sext_i32_i64(local_tid_46353)] = - x_36940; - } - } - offset_46360 *= 2; - } - while (slt32(skip_waves_46361, squot32(sext_i64_i32(i32_res_28487) + - wave_sizze_46355 - 1, - wave_sizze_46355))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46360 = skip_waves_46361 * wave_sizze_46355; - if (slt32(local_tid_46353 + offset_46360, - sext_i64_i32(i32_res_28487)) && ((local_tid_46353 - - squot32(local_tid_46353, - wave_sizze_46355) * - wave_sizze_46355) == 0 && - (squot32(local_tid_46353, - wave_sizze_46355) & - (2 * skip_waves_46361 - - 1)) == 0)) { - // read array element - { - x_36941 = ((__local - int32_t *) red_arr_mem_46358)[sext_i32_i64(local_tid_46353 + - offset_46360)]; - } - // apply reduction operation - { - int32_t defunc_1_op_res_36942 = add32(x_36940, x_36941); - - x_36940 = defunc_1_op_res_36942; - } - // write result of operation - { - ((__local - int32_t *) red_arr_mem_46358)[sext_i32_i64(local_tid_46353)] = - x_36940; - } - } - skip_waves_46361 *= 2; - } - barrier(CLK_LOCAL_MEM_FENCE); - defunc_0_f_res_36939 = ((__local int32_t *) red_arr_mem_46358)[(int64_t) 0]; - - float defunc_0_f_res_36947; - int64_t gtid_36852 = sext_i32_i64(ltid_pre_46357); - int32_t phys_tid_36853 = local_tid_46353; - __local char *red_arr_mem_46362; - - red_arr_mem_46362 = (__local char *) red_arr_mem_46362_backing_1; + char *) red_arr_mem_127863_backing_aligned_0; - int32_t index_primexp_42382; - - index_primexp_42382 = sext_i64_i32(gtid_36852); - - bool cond_36952 = slt32(index_primexp_42382, defunc_0_f_res_36939); - float defunc_0_f_res_36953; + if (*global_failure >= 0) + return; - if (cond_36952) { - int64_t i_36954 = sext_i32_i64(index_primexp_42382); - bool x_36955 = sle64((int64_t) 0, i_36954); - bool y_36956 = slt64(i_36954, N_28477); - bool bounds_check_36957 = x_36955 && y_36956; - bool index_certs_36958; + int32_t global_tid_127858; + int32_t local_tid_127859; + int64_t group_sizze_127862; + int32_t wave_sizze_127861; + int32_t group_tid_127860; + + global_tid_127858 = get_global_id(0); + local_tid_127859 = get_local_id(0); + group_sizze_127862 = get_local_size(0); + wave_sizze_127861 = LOCKSTEP_WIDTH; + group_tid_127860 = get_group_id(0); + + int32_t phys_tid_108614; + + phys_tid_108614 = global_tid_127858; + + __local char *red_arr_mem_127863; + + red_arr_mem_127863 = (__local char *) red_arr_mem_127863_backing_0; + + int32_t phys_group_id_127865; + + phys_group_id_127865 = get_group_id(0); + for (int32_t i_127866 = 0; i_127866 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136 * k2p2zq_75151 * k2p2zq_75151, + squot64(segred_group_sizze_110594, + segment_sizze_nonzzero_127856))) - + phys_group_id_127865, sext_i64_i32(num_groups_110595)); + i_127866++) { + int32_t virt_group_id_127867 = phys_group_id_127865 + i_127866 * + sext_i64_i32(num_groups_110595); + int64_t gtid_108601 = squot64(squot64(sext_i32_i64(local_tid_127859), + segment_sizze_nonzzero_127856) + + sext_i32_i64(virt_group_id_127867) * + squot64(segred_group_sizze_110594, + segment_sizze_nonzzero_127856), + k2p2zq_75151 * k2p2zq_75151); + int64_t gtid_108602 = squot64(squot64(sext_i32_i64(local_tid_127859), + segment_sizze_nonzzero_127856) + + sext_i32_i64(virt_group_id_127867) * + squot64(segred_group_sizze_110594, + segment_sizze_nonzzero_127856) - + squot64(squot64(sext_i32_i64(local_tid_127859), + segment_sizze_nonzzero_127856) + + sext_i32_i64(virt_group_id_127867) * + squot64(segred_group_sizze_110594, + segment_sizze_nonzzero_127856), + k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151); + int64_t gtid_108603 = squot64(sext_i32_i64(local_tid_127859), + segment_sizze_nonzzero_127856) + + sext_i32_i64(virt_group_id_127867) * + squot64(segred_group_sizze_110594, + segment_sizze_nonzzero_127856) - + squot64(squot64(sext_i32_i64(local_tid_127859), + segment_sizze_nonzzero_127856) + + sext_i32_i64(virt_group_id_127867) * + squot64(segred_group_sizze_110594, + segment_sizze_nonzzero_127856), k2p2zq_75151 * + k2p2zq_75151) * (k2p2zq_75151 * k2p2zq_75151) - + squot64(squot64(sext_i32_i64(local_tid_127859), + segment_sizze_nonzzero_127856) + + sext_i32_i64(virt_group_id_127867) * + squot64(segred_group_sizze_110594, + segment_sizze_nonzzero_127856) - + squot64(squot64(sext_i32_i64(local_tid_127859), + segment_sizze_nonzzero_127856) + + sext_i32_i64(virt_group_id_127867) * + squot64(segred_group_sizze_110594, + segment_sizze_nonzzero_127856), + k2p2zq_75151 * k2p2zq_75151) * (k2p2zq_75151 * + k2p2zq_75151), + k2p2zq_75151) * k2p2zq_75151; + int64_t gtid_108613 = srem64(sext_i32_i64(local_tid_127859), + k2p2zq_75151); - if (!bounds_check_36957) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, 96) == -1) { - global_failure_args[0] = i_36954; - global_failure_args[1] = N_28477; - ; + // apply map function if in bounds + { + if (slt64((int64_t) 0, k2p2zq_75151) && (((slt64(gtid_108601, + m_75136) && + slt64(gtid_108602, + k2p2zq_75151)) && + slt64(gtid_108603, + k2p2zq_75151)) && + slt64(sext_i32_i64(local_tid_127859), + k2p2zq_75151 * + squot64(segred_group_sizze_110594, + segment_sizze_nonzzero_127856)))) { + double x_110604 = ((__global double *) mem_123614)[gtid_108602 * + (k2p2zq_75151 * + m_75136) + + gtid_108601 * + k2p2zq_75151 + + gtid_108613]; + double x_110605 = ((__global double *) mem_123618)[gtid_108601 * + (k2p2zq_75151 * + k2p2zq_75151) + + gtid_108603 * + k2p2zq_75151 + + gtid_108613]; + double defunc_1_f_res_110606 = x_110604 * x_110605; + + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_127863)[sext_i32_i64(local_tid_127859)] = + defunc_1_f_res_110606; } - local_failure = true; - goto error_2; + } else { + ((__local + double *) red_arr_mem_127863)[sext_i32_i64(local_tid_127859)] = + 0.0; } } - - float defunc_0_f_res_t_res_36959 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_36847 * - N_28477 + - i_36954]; - - defunc_0_f_res_36953 = defunc_0_f_res_t_res_36959; - } else { - defunc_0_f_res_36953 = 0.0F; - } - - float defunc_0_f_res_36960 = defunc_0_f_res_36953 * defunc_0_f_res_36953; - - ((__local float *) red_arr_mem_46362)[gtid_36852] = defunc_0_f_res_36960; - - error_2: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t offset_46364; - int32_t skip_waves_46365; - - skip_waves_46365 = 1; - - float x_36948; - float x_36949; - - offset_46364 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46353, sext_i64_i32(i32_res_28487))) { - x_36948 = ((__local - float *) red_arr_mem_46362)[sext_i32_i64(local_tid_46353 + - offset_46364)]; - } - } - offset_46364 = 1; - while (slt32(offset_46364, wave_sizze_46355)) { - if (slt32(local_tid_46353 + offset_46364, - sext_i64_i32(i32_res_28487)) && ((local_tid_46353 - - squot32(local_tid_46353, - wave_sizze_46355) * - wave_sizze_46355) & (2 * - offset_46364 - - 1)) == - 0) { - // read array element - { - x_36949 = ((volatile __local - float *) red_arr_mem_46362)[sext_i32_i64(local_tid_46353 + - offset_46364)]; - } - // apply reduction operation + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, k2p2zq_75151)) { + // perform segmented scan to imitate reduction { - float defunc_1_op_res_36950 = x_36948 + x_36949; + double x_110598; + double x_110599; + double x_127868; + double x_127869; + bool ltid_in_bounds_127871; - x_36948 = defunc_1_op_res_36950; - } - // write result of operation - { - ((volatile __local - float *) red_arr_mem_46362)[sext_i32_i64(local_tid_46353)] = - x_36948; - } - } - offset_46364 *= 2; - } - while (slt32(skip_waves_46365, squot32(sext_i64_i32(i32_res_28487) + - wave_sizze_46355 - 1, - wave_sizze_46355))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46364 = skip_waves_46365 * wave_sizze_46355; - if (slt32(local_tid_46353 + offset_46364, - sext_i64_i32(i32_res_28487)) && ((local_tid_46353 - - squot32(local_tid_46353, - wave_sizze_46355) * - wave_sizze_46355) == 0 && - (squot32(local_tid_46353, - wave_sizze_46355) & - (2 * skip_waves_46365 - - 1)) == 0)) { - // read array element - { - x_36949 = ((__local - float *) red_arr_mem_46362)[sext_i32_i64(local_tid_46353 + - offset_46364)]; - } - // apply reduction operation - { - float defunc_1_op_res_36950 = x_36948 + x_36949; + ltid_in_bounds_127871 = slt64(sext_i32_i64(local_tid_127859), + k2p2zq_75151 * + squot64(segred_group_sizze_110594, + segment_sizze_nonzzero_127856)); - x_36948 = defunc_1_op_res_36950; - } - // write result of operation - { - ((__local - float *) red_arr_mem_46362)[sext_i32_i64(local_tid_46353)] = - x_36948; - } - } - skip_waves_46365 *= 2; - } - barrier(CLK_LOCAL_MEM_FENCE); - defunc_0_f_res_36947 = ((__local float *) red_arr_mem_46362)[(int64_t) 0]; - - int32_t r32_arg_36961 = sub32(defunc_0_f_res_36939, k2p2_28489); - float i32_res_36962 = sitofp_i32_f32(r32_arg_36961); - float sqrt_arg_36963 = defunc_0_f_res_36947 / i32_res_36962; - float sqrt_res_36964; - - sqrt_res_36964 = futrts_sqrt32(sqrt_arg_36963); - - float i32_res_36965 = sitofp_i32_f32(defunc_0_f_res_36939); - float t32_arg_36966 = hfrac_28483 * i32_res_36965; - int32_t f32_res_36967 = fptosi_f32_i32(t32_arg_36966); - - if (local_tid_46353 == 0) { - ((__global int32_t *) mem_45225)[gtid_36847] = f32_res_36967; - } - if (local_tid_46353 == 0) { - ((__global int32_t *) mem_45227)[gtid_36847] = defunc_0_f_res_36939; - } - if (local_tid_46353 == 0) { - ((__global float *) mem_45229)[gtid_36847] = sqrt_res_36964; - } - - error_4: - return; -} -__kernel void mainMagnitudezisegmap_intragroup_37880(__global - int *global_failure, - int failure_is_an_option, - __global - int64_t *global_failure_args, - __local volatile - int64_t *red_arr_mem_46645_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_46643_backing_aligned_1, - __local volatile - int64_t *red_arr_mem_46641_backing_aligned_2, - __local volatile - int64_t *mem_45329_backing_aligned_3, - int64_t N_28477, - int32_t n_28481, - int64_t iota32_arg_28909, - __global - unsigned char *defunc_4_map_res_mem_45177, - __global - unsigned char *defunc_4_map_res_mem_45178, - __global - unsigned char *defunc_4_map_res_mem_45179, - __global - unsigned char *defunc_3_map_res_mem_45244, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global - unsigned char *defunc_3_map_res_mem_45246, - __global - unsigned char *defunc_0_f_res_mem_45279, - __global - unsigned char *mem_45284, - __global - unsigned char *mem_45332, - __global - unsigned char *mem_45334) -{ - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_46645_backing_3 = - (__local volatile - char *) red_arr_mem_46645_backing_aligned_0; - __local volatile char *restrict red_arr_mem_46643_backing_2 = - (__local volatile - char *) red_arr_mem_46643_backing_aligned_1; - __local volatile char *restrict red_arr_mem_46641_backing_1 = - (__local volatile - char *) red_arr_mem_46641_backing_aligned_2; - __local volatile char *restrict mem_45329_backing_0 = (__local volatile - char *) mem_45329_backing_aligned_3; - volatile __local bool local_failure; - - if (failure_is_an_option) { - int failed = *global_failure >= 0; - - if (failed) - return; - } - local_failure = false; - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t global_tid_46624; - int32_t local_tid_46625; - int64_t group_sizze_46628; - int32_t wave_sizze_46627; - int32_t group_tid_46626; - - global_tid_46624 = get_global_id(0); - local_tid_46625 = get_local_id(0); - group_sizze_46628 = get_local_size(0); - wave_sizze_46627 = LOCKSTEP_WIDTH; - group_tid_46626 = get_group_id(0); - - int32_t phys_tid_37880; - - phys_tid_37880 = group_tid_46626; - - int32_t ltid_pre_46629; - - ltid_pre_46629 = local_tid_46625; - - int64_t gtid_37873; - - gtid_37873 = sext_i32_i64(group_tid_46626); - - int32_t x_38123; - - x_38123 = ((__global int32_t *) defunc_4_map_res_mem_45177)[gtid_37873]; - - int32_t x_38124 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_37873]; - float x_38125 = ((__global float *) defunc_3_map_res_mem_45246)[gtid_37873]; - int32_t x_38126 = ((__global - int32_t *) defunc_3_map_res_mem_45244)[gtid_37873]; - float x_38127 = ((__global float *) defunc_0_f_res_mem_45279)[gtid_37873]; - int32_t y_38130 = sub32(x_38123, x_38124); - float i32_res_38131 = sitofp_i32_f32(x_38124); - float sqrt_res_38132; - - sqrt_res_38132 = futrts_sqrt32(i32_res_38131); - - float y_38133 = x_38125 * sqrt_res_38132; - __local char *mem_45329; - - mem_45329 = (__local char *) mem_45329_backing_0; - - int64_t gtid_37876 = sext_i32_i64(ltid_pre_46629); - int32_t phys_tid_37877 = local_tid_46625; - int32_t index_primexp_42400 = sext_i64_i32(gtid_37876); - bool cond_38146 = sle32(y_38130, index_primexp_42400); - float defunc_0_f_res_38147; - - if (cond_38146) { - defunc_0_f_res_38147 = 0.0F; - } else { - bool cond_38148 = index_primexp_42400 == 0; - float defunc_0_f_res_f_res_38149; - - if (cond_38148) { - defunc_0_f_res_f_res_38149 = x_38127; - } else { - int32_t i_38150 = add32(x_38124, index_primexp_42400); - int64_t i_38151 = sext_i32_i64(i_38150); - bool x_38152 = sle64((int64_t) 0, i_38151); - bool y_38153 = slt64(i_38151, N_28477); - bool bounds_check_38154 = x_38152 && y_38153; - bool index_certs_38155; - - if (!bounds_check_38154) { + int32_t skip_threads_127872; + + // read input for in-block scan { - if (atomic_cmpxchg_i32_global(global_failure, -1, 115) == - -1) { - global_failure_args[0] = i_38151; - global_failure_args[1] = N_28477; - ; + if (ltid_in_bounds_127871) { + x_110599 = ((volatile __local + double *) red_arr_mem_127863)[sext_i32_i64(local_tid_127859)]; + if ((local_tid_127859 - squot32(local_tid_127859, 32) * + 32) == 0) { + x_110598 = x_110599; + } } - local_failure = true; - goto error_0; } - } - - float x_38156 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_37873 * - N_28477 + - i_38151]; - int32_t x_38157 = sub32(x_38124, x_38126); - int32_t i_38158 = add32(x_38157, index_primexp_42400); - int64_t i_38159 = sext_i32_i64(i_38158); - bool x_38160 = sle64((int64_t) 0, i_38159); - bool y_38161 = slt64(i_38159, N_28477); - bool bounds_check_38162 = x_38160 && y_38161; - bool index_certs_38163; - - if (!bounds_check_38162) { + // in-block scan (hopefully no barriers needed) { - if (atomic_cmpxchg_i32_global(global_failure, -1, 116) == - -1) { - global_failure_args[0] = i_38159; - global_failure_args[1] = N_28477; - ; + skip_threads_127872 = 1; + while (slt32(skip_threads_127872, 32)) { + if (sle32(skip_threads_127872, local_tid_127859 - + squot32(local_tid_127859, 32) * 32) && + ltid_in_bounds_127871) { + // read operands + { + x_110598 = ((volatile __local + double *) red_arr_mem_127863)[sext_i32_i64(local_tid_127859) - + sext_i32_i64(skip_threads_127872)]; + } + // perform operation + { + bool inactive_127873 = + slt64(srem64(sext_i32_i64(local_tid_127859), + k2p2zq_75151), + sext_i32_i64(local_tid_127859) - + sext_i32_i64(local_tid_127859 - + skip_threads_127872)); + + if (inactive_127873) { + x_110598 = x_110599; + } + if (!inactive_127873) { + double defunc_1_op_res_110600 = x_110598 + + x_110599; + + x_110598 = defunc_1_op_res_110600; + } + } + } + if (sle32(wave_sizze_127861, skip_threads_127872)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127872, local_tid_127859 - + squot32(local_tid_127859, 32) * 32) && + ltid_in_bounds_127871) { + // write result + { + ((volatile __local + double *) red_arr_mem_127863)[sext_i32_i64(local_tid_127859)] = + x_110598; + x_110599 = x_110598; + } + } + if (sle32(wave_sizze_127861, skip_threads_127872)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127872 *= 2; } - local_failure = true; - goto error_0; } - } - - float y_38164 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_37873 * - N_28477 + - i_38159]; - float defunc_0_f_res_f_res_f_res_38165 = x_38156 - y_38164; - - defunc_0_f_res_f_res_38149 = defunc_0_f_res_f_res_f_res_38165; - } - defunc_0_f_res_38147 = defunc_0_f_res_f_res_38149; - } - ((__local float *) mem_45329)[gtid_37876] = defunc_0_f_res_38147; - - error_0: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t dims_flat_46630; - - dims_flat_46630 = iota32_arg_28909; - - float x_38142; - float x_38143; - float x_46632; - float x_46633; - bool ltid_in_bounds_46635; - - ltid_in_bounds_46635 = slt64(sext_i32_i64(local_tid_46625), - iota32_arg_28909); - - int32_t skip_threads_46636; - - // read input for in-block scan - { - if (ltid_in_bounds_46635) { - x_38143 = ((volatile __local - float *) mem_45329)[sext_i32_i64(local_tid_46625)]; - if ((local_tid_46625 - squot32(local_tid_46625, 32) * 32) == 0) { - x_38142 = x_38143; - } - } - } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46636 = 1; - while (slt32(skip_threads_46636, 32)) { - if (sle32(skip_threads_46636, local_tid_46625 - - squot32(local_tid_46625, 32) * 32) && - ltid_in_bounds_46635) { - // read operands + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' { - x_38142 = ((volatile __local - float *) mem_45329)[sext_i32_i64(local_tid_46625) - - sext_i32_i64(skip_threads_46636)]; + if ((local_tid_127859 - squot32(local_tid_127859, 32) * + 32) == 31 && ltid_in_bounds_127871) { + ((volatile __local + double *) red_arr_mem_127863)[sext_i32_i64(squot32(local_tid_127859, + 32))] = + x_110598; + } } - // perform operation + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' { - bool inactive_46637 = - slt64(srem64(sext_i32_i64(local_tid_46625), - iota32_arg_28909), - sext_i32_i64(local_tid_46625) - - sext_i32_i64(local_tid_46625 - - skip_threads_46636)); + int32_t skip_threads_127874; - if (inactive_46637) { - x_38142 = x_38143; + // read input for in-block scan + { + if (squot32(local_tid_127859, 32) == 0 && + ltid_in_bounds_127871) { + x_127869 = ((volatile __local + double *) red_arr_mem_127863)[sext_i32_i64(local_tid_127859)]; + if ((local_tid_127859 - squot32(local_tid_127859, + 32) * 32) == 0) { + x_127868 = x_127869; + } + } } - if (!inactive_46637) { - float defunc_1_op_res_38144 = x_38142 + x_38143; - - x_38142 = defunc_1_op_res_38144; + // in-block scan (hopefully no barriers needed) + { + skip_threads_127874 = 1; + while (slt32(skip_threads_127874, 32)) { + if (sle32(skip_threads_127874, local_tid_127859 - + squot32(local_tid_127859, 32) * 32) && + (squot32(local_tid_127859, 32) == 0 && + ltid_in_bounds_127871)) { + // read operands + { + x_127868 = ((volatile __local + double *) red_arr_mem_127863)[sext_i32_i64(local_tid_127859) - + sext_i32_i64(skip_threads_127874)]; + } + // perform operation + { + bool inactive_127875 = + slt64(srem64(sext_i32_i64(local_tid_127859 * + 32 + 32 - 1), + k2p2zq_75151), + sext_i32_i64(local_tid_127859 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_127859 - + skip_threads_127874) * + 32 + 32 - 1)); + + if (inactive_127875) { + x_127868 = x_127869; + } + if (!inactive_127875) { + double defunc_1_op_res_127870 = + x_127868 + x_127869; + + x_127868 = defunc_1_op_res_127870; + } + } + } + if (sle32(wave_sizze_127861, skip_threads_127874)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127874, local_tid_127859 - + squot32(local_tid_127859, 32) * 32) && + (squot32(local_tid_127859, 32) == 0 && + ltid_in_bounds_127871)) { + // write result + { + ((volatile __local + double *) red_arr_mem_127863)[sext_i32_i64(local_tid_127859)] = + x_127868; + x_127869 = x_127868; + } + } + if (sle32(wave_sizze_127861, skip_threads_127874)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127874 *= 2; + } } } - } - if (sle32(wave_sizze_46627, skip_threads_46636)) { barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46636, local_tid_46625 - - squot32(local_tid_46625, 32) * 32) && - ltid_in_bounds_46635) { - // write result + // carry-in for every block except the first { - ((volatile __local - float *) mem_45329)[sext_i32_i64(local_tid_46625)] = - x_38142; - x_38143 = x_38142; + if (!(squot32(local_tid_127859, 32) == 0 || + !ltid_in_bounds_127871)) { + // read operands + { + x_110599 = x_110598; + x_110598 = ((__local + double *) red_arr_mem_127863)[sext_i32_i64(squot32(local_tid_127859, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_127876 = + slt64(srem64(sext_i32_i64(local_tid_127859), + k2p2zq_75151), + sext_i32_i64(local_tid_127859) - + sext_i32_i64(squot32(local_tid_127859, + 32) * 32 - 1)); + + if (inactive_127876) { + x_110598 = x_110599; + } + if (!inactive_127876) { + double defunc_1_op_res_110600 = x_110598 + + x_110599; + + x_110598 = defunc_1_op_res_110600; + } + } + // write final result + { + ((__local + double *) red_arr_mem_127863)[sext_i32_i64(local_tid_127859)] = + x_110598; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_127859, 32) == 0) { + ((__local + double *) red_arr_mem_127863)[sext_i32_i64(local_tid_127859)] = + x_110599; + } } - } - if (sle32(wave_sizze_46627, skip_threads_46636)) { barrier(CLK_LOCAL_MEM_FENCE); } - skip_threads_46636 *= 2; } - } - barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' - { - if ((local_tid_46625 - squot32(local_tid_46625, 32) * 32) == 31 && - ltid_in_bounds_46635) { - ((volatile __local - float *) mem_45329)[sext_i32_i64(squot32(local_tid_46625, 32))] = - x_38142; + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_127867) * + squot64(segred_group_sizze_110594, + segment_sizze_nonzzero_127856) + + sext_i32_i64(local_tid_127859), m_75136 * k2p2zq_75151 * + k2p2zq_75151) && slt64(sext_i32_i64(local_tid_127859), + squot64(segred_group_sizze_110594, + segment_sizze_nonzzero_127856))) { + ((__global + double *) mem_123623)[squot64(sext_i32_i64(virt_group_id_127867) * + squot64(segred_group_sizze_110594, + segment_sizze_nonzzero_127856) + + sext_i32_i64(local_tid_127859), + k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151) + + squot64(sext_i32_i64(virt_group_id_127867) * + squot64(segred_group_sizze_110594, + segment_sizze_nonzzero_127856) + + sext_i32_i64(local_tid_127859) - + squot64(sext_i32_i64(virt_group_id_127867) * + squot64(segred_group_sizze_110594, + segment_sizze_nonzzero_127856) + + sext_i32_i64(local_tid_127859), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151) * k2p2zq_75151 + + (sext_i32_i64(virt_group_id_127867) * + squot64(segred_group_sizze_110594, + segment_sizze_nonzzero_127856) + + sext_i32_i64(local_tid_127859) - + squot64(sext_i32_i64(virt_group_id_127867) * + squot64(segred_group_sizze_110594, + segment_sizze_nonzzero_127856) + + sext_i32_i64(local_tid_127859), + k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151) - + squot64(sext_i32_i64(virt_group_id_127867) * + squot64(segred_group_sizze_110594, + segment_sizze_nonzzero_127856) + + sext_i32_i64(local_tid_127859) - + squot64(sext_i32_i64(virt_group_id_127867) * + squot64(segred_group_sizze_110594, + segment_sizze_nonzzero_127856) + + sext_i32_i64(local_tid_127859), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151) * + k2p2zq_75151)] = ((__local + double *) red_arr_mem_127863)[(sext_i32_i64(local_tid_127859) + + (int64_t) 1) * + segment_sizze_nonzzero_127856 - + (int64_t) 1]; + } } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' - { - int32_t skip_threads_46638; + + error_1: + return; + #undef segred_group_sizze_110594 +} +__kernel void mainzisegred_small_108892(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_127719_backing_aligned_0, + int64_t m_75136, int64_t k2p2zq_75151, + int64_t x_110426, int64_t i_110427, + int64_t j_m_i_110431, + int64_t num_groups_110513, + int64_t binop_x_120251, + int64_t segment_sizze_nonzzero_127712, + __global unsigned char *mem_123143, + __global + unsigned char *mem_param_123252, + __global unsigned char *mem_123338) +{ + #define segred_group_sizze_110512 (mainzisegred_group_sizze_108886) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_127719_backing_0 = + (__local volatile + char *) red_arr_mem_127719_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127714; + int32_t local_tid_127715; + int64_t group_sizze_127718; + int32_t wave_sizze_127717; + int32_t group_tid_127716; + + global_tid_127714 = get_global_id(0); + local_tid_127715 = get_local_id(0); + group_sizze_127718 = get_local_size(0); + wave_sizze_127717 = LOCKSTEP_WIDTH; + group_tid_127716 = get_group_id(0); + + int32_t phys_tid_108892; + + phys_tid_108892 = global_tid_127714; + + __local char *red_arr_mem_127719; + + red_arr_mem_127719 = (__local char *) red_arr_mem_127719_backing_0; + + int32_t phys_group_id_127721; + + phys_group_id_127721 = get_group_id(0); + for (int32_t i_127722 = 0; i_127722 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136 * k2p2zq_75151, + squot64(segred_group_sizze_110512, + segment_sizze_nonzzero_127712))) - + phys_group_id_127721, sext_i64_i32(num_groups_110513)); + i_127722++) { + int32_t virt_group_id_127723 = phys_group_id_127721 + i_127722 * + sext_i64_i32(num_groups_110513); + int64_t gtid_108881 = squot64(squot64(sext_i32_i64(local_tid_127715), + segment_sizze_nonzzero_127712) + + sext_i32_i64(virt_group_id_127723) * + squot64(segred_group_sizze_110512, + segment_sizze_nonzzero_127712), + k2p2zq_75151); + int64_t gtid_108882 = squot64(sext_i32_i64(local_tid_127715), + segment_sizze_nonzzero_127712) + + sext_i32_i64(virt_group_id_127723) * + squot64(segred_group_sizze_110512, + segment_sizze_nonzzero_127712) - + squot64(squot64(sext_i32_i64(local_tid_127715), + segment_sizze_nonzzero_127712) + + sext_i32_i64(virt_group_id_127723) * + squot64(segred_group_sizze_110512, + segment_sizze_nonzzero_127712), k2p2zq_75151) * + k2p2zq_75151; + int64_t gtid_108891 = srem64(sext_i32_i64(local_tid_127715), + j_m_i_110431); - // read input for in-block scan + // apply map function if in bounds { - if (squot32(local_tid_46625, 32) == 0 && ltid_in_bounds_46635) { - x_46633 = ((volatile __local - float *) mem_45329)[sext_i32_i64(local_tid_46625)]; - if ((local_tid_46625 - squot32(local_tid_46625, 32) * 32) == - 0) { - x_46632 = x_46633; + if (slt64((int64_t) 0, j_m_i_110431) && ((slt64(gtid_108881, + m_75136) && + slt64(gtid_108882, + k2p2zq_75151)) && + slt64(sext_i32_i64(local_tid_127715), + j_m_i_110431 * + squot64(segred_group_sizze_110512, + segment_sizze_nonzzero_127712)))) { + int64_t slice_115165 = gtid_108891 + x_110426; + double x_110523 = ((__global double *) mem_123143)[gtid_108881 * + (k2p2zq_75151 * + k2p2zq_75151) + + slice_115165 * + k2p2zq_75151 + + i_110427]; + bool isnan_res_110524; + + isnan_res_110524 = futrts_isnan64(x_110523); + + double defunc_1_f_res_110525; + + if (isnan_res_110524) { + defunc_1_f_res_110525 = 0.0; + } else { + double x_110522 = ((__global + double *) mem_param_123252)[gtid_108881 * + binop_x_120251 + + gtid_108882 * + k2p2zq_75151 + + slice_115165]; + double defunc_1_f_res_f_res_110526 = x_110522 * x_110523; + + defunc_1_f_res_110525 = defunc_1_f_res_f_res_110526; + } + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_127719)[sext_i32_i64(local_tid_127715)] = + defunc_1_f_res_110525; } + } else { + ((__local + double *) red_arr_mem_127719)[sext_i32_i64(local_tid_127715)] = + 0.0; } } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46638 = 1; - while (slt32(skip_threads_46638, 32)) { - if (sle32(skip_threads_46638, local_tid_46625 - - squot32(local_tid_46625, 32) * 32) && - (squot32(local_tid_46625, 32) == 0 && - ltid_in_bounds_46635)) { - // read operands - { - x_46632 = ((volatile __local - float *) mem_45329)[sext_i32_i64(local_tid_46625) - - sext_i32_i64(skip_threads_46638)]; + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, j_m_i_110431)) { + // perform segmented scan to imitate reduction + { + double x_110516; + double x_110517; + double x_127724; + double x_127725; + bool ltid_in_bounds_127727; + + ltid_in_bounds_127727 = slt64(sext_i32_i64(local_tid_127715), + j_m_i_110431 * + squot64(segred_group_sizze_110512, + segment_sizze_nonzzero_127712)); + + int32_t skip_threads_127728; + + // read input for in-block scan + { + if (ltid_in_bounds_127727) { + x_110517 = ((volatile __local + double *) red_arr_mem_127719)[sext_i32_i64(local_tid_127715)]; + if ((local_tid_127715 - squot32(local_tid_127715, 32) * + 32) == 0) { + x_110516 = x_110517; + } } - // perform operation - { - bool inactive_46639 = - slt64(srem64(sext_i32_i64(local_tid_46625 * 32 + - 32 - 1), iota32_arg_28909), - sext_i32_i64(local_tid_46625 * 32 + 32 - 1) - - sext_i32_i64((local_tid_46625 - - skip_threads_46638) * 32 + 32 - - 1)); - - if (inactive_46639) { - x_46632 = x_46633; + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127728 = 1; + while (slt32(skip_threads_127728, 32)) { + if (sle32(skip_threads_127728, local_tid_127715 - + squot32(local_tid_127715, 32) * 32) && + ltid_in_bounds_127727) { + // read operands + { + x_110516 = ((volatile __local + double *) red_arr_mem_127719)[sext_i32_i64(local_tid_127715) - + sext_i32_i64(skip_threads_127728)]; + } + // perform operation + { + bool inactive_127729 = + slt64(srem64(sext_i32_i64(local_tid_127715), + j_m_i_110431), + sext_i32_i64(local_tid_127715) - + sext_i32_i64(local_tid_127715 - + skip_threads_127728)); + + if (inactive_127729) { + x_110516 = x_110517; + } + if (!inactive_127729) { + double defunc_1_op_res_110518 = x_110516 + + x_110517; + + x_110516 = defunc_1_op_res_110518; + } + } } - if (!inactive_46639) { - float defunc_1_op_res_46634 = x_46632 + x_46633; - - x_46632 = defunc_1_op_res_46634; + if (sle32(wave_sizze_127717, skip_threads_127728)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127728, local_tid_127715 - + squot32(local_tid_127715, 32) * 32) && + ltid_in_bounds_127727) { + // write result + { + ((volatile __local + double *) red_arr_mem_127719)[sext_i32_i64(local_tid_127715)] = + x_110516; + x_110517 = x_110516; + } + } + if (sle32(wave_sizze_127717, skip_threads_127728)) { + barrier(CLK_LOCAL_MEM_FENCE); } + skip_threads_127728 *= 2; } } - if (sle32(wave_sizze_46627, skip_threads_46638)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46638, local_tid_46625 - - squot32(local_tid_46625, 32) * 32) && - (squot32(local_tid_46625, 32) == 0 && - ltid_in_bounds_46635)) { - // write result - { + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_127715 - squot32(local_tid_127715, 32) * + 32) == 31 && ltid_in_bounds_127727) { ((volatile __local - float *) mem_45329)[sext_i32_i64(local_tid_46625)] = - x_46632; - x_46633 = x_46632; + double *) red_arr_mem_127719)[sext_i32_i64(squot32(local_tid_127715, + 32))] = + x_110516; } } - if (sle32(wave_sizze_46627, skip_threads_46638)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46638 *= 2; - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_46625, 32) == 0 || !ltid_in_bounds_46635)) { - // read operands - { - x_38143 = x_38142; - x_38142 = ((__local - float *) mem_45329)[sext_i32_i64(squot32(local_tid_46625, - 32)) - - (int64_t) 1]; - } - // perform operation - { - bool inactive_46640 = - slt64(srem64(sext_i32_i64(local_tid_46625), - iota32_arg_28909), - sext_i32_i64(local_tid_46625) - - sext_i32_i64(squot32(local_tid_46625, 32) * 32 - 1)); - - if (inactive_46640) { - x_38142 = x_38143; - } - if (!inactive_46640) { - float defunc_1_op_res_38144 = x_38142 + x_38143; + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_127730; - x_38142 = defunc_1_op_res_38144; - } - } - // write final result - { - ((__local float *) mem_45329)[sext_i32_i64(local_tid_46625)] = - x_38142; - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_46625, 32) == 0) { - ((__local float *) mem_45329)[sext_i32_i64(local_tid_46625)] = - x_38143; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - - bool acc0_38171; - int32_t acc0_38172; - float acc0_38173; - int64_t gtid_37878 = sext_i32_i64(ltid_pre_46629); - int32_t phys_tid_37879 = local_tid_46625; - __local char *red_arr_mem_46641; - - red_arr_mem_46641 = (__local char *) red_arr_mem_46641_backing_1; - - __local char *red_arr_mem_46643; - - red_arr_mem_46643 = (__local char *) red_arr_mem_46643_backing_2; - - __local char *red_arr_mem_46645; - - red_arr_mem_46645 = (__local char *) red_arr_mem_46645_backing_3; - - float x_38188; - - x_38188 = ((__local float *) mem_45329)[gtid_37878]; - - float x_38189 = ((__global float *) mem_45284)[gtid_37878]; - int32_t index_primexp_42403 = sext_i64_i32(gtid_37878); - float defunc_0_f_res_38192 = x_38188 / y_38133; - bool cond_38193 = slt32(index_primexp_42403, y_38130); - bool isnan_res_38194; - - isnan_res_38194 = futrts_isnan32(defunc_0_f_res_38192); - - bool cond_t_res_38195 = !isnan_res_38194; - bool x_38196 = cond_38193 && cond_t_res_38195; - float abs_res_38197 = (float) fabs(defunc_0_f_res_38192); - bool defunc_2_f_res_t_res_38198 = x_38189 < abs_res_38197; - bool x_38199 = x_38196 && defunc_2_f_res_t_res_38198; - float defunc_1_f_res_38200; - - if (cond_38193) { - defunc_1_f_res_38200 = defunc_0_f_res_38192; - } else { - defunc_1_f_res_38200 = 0.0F; - } - ((__local bool *) red_arr_mem_46641)[gtid_37878] = x_38199; - ((__local int32_t *) red_arr_mem_46643)[gtid_37878] = index_primexp_42403; - ((__local float *) red_arr_mem_46645)[gtid_37878] = defunc_1_f_res_38200; - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t offset_46647; - int32_t skip_waves_46648; - - skip_waves_46648 = 1; - - bool x_38174; - int32_t x_38175; - float x_38176; - bool x_38177; - int32_t x_38178; - float x_38179; - - offset_46647 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46625, sext_i64_i32(iota32_arg_28909))) { - x_38174 = ((__local - bool *) red_arr_mem_46641)[sext_i32_i64(local_tid_46625 + - offset_46647)]; - x_38175 = ((__local - int32_t *) red_arr_mem_46643)[sext_i32_i64(local_tid_46625 + - offset_46647)]; - x_38176 = ((__local - float *) red_arr_mem_46645)[sext_i32_i64(local_tid_46625 + - offset_46647)]; - } - } - offset_46647 = 1; - while (slt32(offset_46647, wave_sizze_46627)) { - if (slt32(local_tid_46625 + offset_46647, - sext_i64_i32(iota32_arg_28909)) && ((local_tid_46625 - - squot32(local_tid_46625, - wave_sizze_46627) * - wave_sizze_46627) & (2 * - offset_46647 - - 1)) == - 0) { - // read array element - { - x_38177 = ((volatile __local - bool *) red_arr_mem_46641)[sext_i32_i64(local_tid_46625 + - offset_46647)]; - x_38178 = ((volatile __local - int32_t *) red_arr_mem_46643)[sext_i32_i64(local_tid_46625 + - offset_46647)]; - x_38179 = ((volatile __local - float *) red_arr_mem_46645)[sext_i32_i64(local_tid_46625 + - offset_46647)]; - } - // apply reduction operation - { - bool defunc_1_op_res_38180; - int32_t defunc_1_op_res_38181; - - if (x_38174) { - defunc_1_op_res_38180 = x_38174; - defunc_1_op_res_38181 = x_38175; - } else { - bool x_38182 = x_38177 && x_38177; - bool x_38183 = !x_38177; - bool y_38184 = x_38174 && x_38183; - bool defunc_1_op_res_f_res_38185 = x_38182 || y_38184; - int32_t defunc_1_op_res_f_res_38186; - - if (x_38177) { - defunc_1_op_res_f_res_38186 = x_38178; - } else { - defunc_1_op_res_f_res_38186 = x_38175; + // read input for in-block scan + { + if (squot32(local_tid_127715, 32) == 0 && + ltid_in_bounds_127727) { + x_127725 = ((volatile __local + double *) red_arr_mem_127719)[sext_i32_i64(local_tid_127715)]; + if ((local_tid_127715 - squot32(local_tid_127715, + 32) * 32) == 0) { + x_127724 = x_127725; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127730 = 1; + while (slt32(skip_threads_127730, 32)) { + if (sle32(skip_threads_127730, local_tid_127715 - + squot32(local_tid_127715, 32) * 32) && + (squot32(local_tid_127715, 32) == 0 && + ltid_in_bounds_127727)) { + // read operands + { + x_127724 = ((volatile __local + double *) red_arr_mem_127719)[sext_i32_i64(local_tid_127715) - + sext_i32_i64(skip_threads_127730)]; + } + // perform operation + { + bool inactive_127731 = + slt64(srem64(sext_i32_i64(local_tid_127715 * + 32 + 32 - 1), + j_m_i_110431), + sext_i32_i64(local_tid_127715 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_127715 - + skip_threads_127730) * + 32 + 32 - 1)); + + if (inactive_127731) { + x_127724 = x_127725; + } + if (!inactive_127731) { + double defunc_1_op_res_127726 = + x_127724 + x_127725; + + x_127724 = defunc_1_op_res_127726; + } + } + } + if (sle32(wave_sizze_127717, skip_threads_127730)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127730, local_tid_127715 - + squot32(local_tid_127715, 32) * 32) && + (squot32(local_tid_127715, 32) == 0 && + ltid_in_bounds_127727)) { + // write result + { + ((volatile __local + double *) red_arr_mem_127719)[sext_i32_i64(local_tid_127715)] = + x_127724; + x_127725 = x_127724; + } + } + if (sle32(wave_sizze_127717, skip_threads_127730)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127730 *= 2; + } } - defunc_1_op_res_38180 = defunc_1_op_res_f_res_38185; - defunc_1_op_res_38181 = defunc_1_op_res_f_res_38186; } - - float defunc_1_op_res_38187 = x_38176 + x_38179; - - x_38174 = defunc_1_op_res_38180; - x_38175 = defunc_1_op_res_38181; - x_38176 = defunc_1_op_res_38187; - } - // write result of operation - { - ((volatile __local - bool *) red_arr_mem_46641)[sext_i32_i64(local_tid_46625)] = - x_38174; - ((volatile __local - int32_t *) red_arr_mem_46643)[sext_i32_i64(local_tid_46625)] = - x_38175; - ((volatile __local - float *) red_arr_mem_46645)[sext_i32_i64(local_tid_46625)] = - x_38176; - } - } - offset_46647 *= 2; - } - while (slt32(skip_waves_46648, squot32(sext_i64_i32(iota32_arg_28909) + - wave_sizze_46627 - 1, - wave_sizze_46627))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46647 = skip_waves_46648 * wave_sizze_46627; - if (slt32(local_tid_46625 + offset_46647, - sext_i64_i32(iota32_arg_28909)) && ((local_tid_46625 - - squot32(local_tid_46625, - wave_sizze_46627) * - wave_sizze_46627) == 0 && - (squot32(local_tid_46625, - wave_sizze_46627) & - (2 * skip_waves_46648 - - 1)) == 0)) { - // read array element - { - x_38177 = ((__local - bool *) red_arr_mem_46641)[sext_i32_i64(local_tid_46625 + - offset_46647)]; - x_38178 = ((__local - int32_t *) red_arr_mem_46643)[sext_i32_i64(local_tid_46625 + - offset_46647)]; - x_38179 = ((__local - float *) red_arr_mem_46645)[sext_i32_i64(local_tid_46625 + - offset_46647)]; - } - // apply reduction operation - { - bool defunc_1_op_res_38180; - int32_t defunc_1_op_res_38181; - - if (x_38174) { - defunc_1_op_res_38180 = x_38174; - defunc_1_op_res_38181 = x_38175; - } else { - bool x_38182 = x_38177 && x_38177; - bool x_38183 = !x_38177; - bool y_38184 = x_38174 && x_38183; - bool defunc_1_op_res_f_res_38185 = x_38182 || y_38184; - int32_t defunc_1_op_res_f_res_38186; - - if (x_38177) { - defunc_1_op_res_f_res_38186 = x_38178; - } else { - defunc_1_op_res_f_res_38186 = x_38175; + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_127715, 32) == 0 || + !ltid_in_bounds_127727)) { + // read operands + { + x_110517 = x_110516; + x_110516 = ((__local + double *) red_arr_mem_127719)[sext_i32_i64(squot32(local_tid_127715, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_127732 = + slt64(srem64(sext_i32_i64(local_tid_127715), + j_m_i_110431), + sext_i32_i64(local_tid_127715) - + sext_i32_i64(squot32(local_tid_127715, + 32) * 32 - 1)); + + if (inactive_127732) { + x_110516 = x_110517; + } + if (!inactive_127732) { + double defunc_1_op_res_110518 = x_110516 + + x_110517; + + x_110516 = defunc_1_op_res_110518; + } + } + // write final result + { + ((__local + double *) red_arr_mem_127719)[sext_i32_i64(local_tid_127715)] = + x_110516; + } } - defunc_1_op_res_38180 = defunc_1_op_res_f_res_38185; - defunc_1_op_res_38181 = defunc_1_op_res_f_res_38186; } - - float defunc_1_op_res_38187 = x_38176 + x_38179; - - x_38174 = defunc_1_op_res_38180; - x_38175 = defunc_1_op_res_38181; - x_38176 = defunc_1_op_res_38187; - } - // write result of operation - { - ((__local - bool *) red_arr_mem_46641)[sext_i32_i64(local_tid_46625)] = - x_38174; - ((__local - int32_t *) red_arr_mem_46643)[sext_i32_i64(local_tid_46625)] = - x_38175; - ((__local - float *) red_arr_mem_46645)[sext_i32_i64(local_tid_46625)] = - x_38176; - } - } - skip_waves_46648 *= 2; - } - barrier(CLK_LOCAL_MEM_FENCE); - acc0_38171 = ((__local bool *) red_arr_mem_46641)[(int64_t) 0]; - acc0_38172 = ((__local int32_t *) red_arr_mem_46643)[(int64_t) 0]; - acc0_38173 = ((__local float *) red_arr_mem_46645)[(int64_t) 0]; - - bool x_38203 = acc0_38171 && acc0_38171; - int32_t defunc_1_op_res_f_res_38207; - - if (acc0_38171) { - defunc_1_op_res_f_res_38207 = acc0_38172; - } else { - defunc_1_op_res_f_res_38207 = -1; - } - - bool cond_38213 = y_38130 == 0; - float defunc_0_f_res_38214; - - if (cond_38213) { - defunc_0_f_res_38214 = 0.0F; - } else { - float i32_res_38215 = sitofp_i32_f32(y_38130); - float defunc_0_f_res_f_res_38216 = acc0_38173 / i32_res_38215; - - defunc_0_f_res_38214 = defunc_0_f_res_f_res_38216; - } - - bool cond_38217 = !x_38203; - int32_t fst_breakzq_38218; - - if (cond_38217) { - fst_breakzq_38218 = -1; - } else { - bool cond_38219 = slt32(defunc_1_op_res_f_res_38207, y_38130); - int32_t adjustValInds_res_38220; - - if (cond_38219) { - int32_t i_38221 = add32(x_38124, defunc_1_op_res_f_res_38207); - int64_t i_38222 = sext_i32_i64(i_38221); - bool x_38223 = sle64((int64_t) 0, i_38222); - bool y_38224 = slt64(i_38222, N_28477); - bool bounds_check_38225 = x_38223 && y_38224; - bool index_certs_38226; - - if (!bounds_check_38225) { + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block { - if (atomic_cmpxchg_i32_global(global_failure, -1, 117) == - -1) { - global_failure_args[0] = i_38222; - global_failure_args[1] = N_28477; - ; + if (squot32(local_tid_127715, 32) == 0) { + ((__local + double *) red_arr_mem_127719)[sext_i32_i64(local_tid_127715)] = + x_110517; } - local_failure = true; - goto error_3; } + barrier(CLK_LOCAL_MEM_FENCE); } - - int32_t x_38227 = ((__global - int32_t *) defunc_4_map_res_mem_45179)[gtid_37873 * - N_28477 + - i_38222]; - int32_t adjustValInds_res_t_res_38228 = sub32(x_38227, n_28481); - - adjustValInds_res_38220 = adjustValInds_res_t_res_38228; - } else { - adjustValInds_res_38220 = -1; } - fst_breakzq_38218 = adjustValInds_res_38220; - } - - bool cond_38229 = sle32(x_38124, 5); - bool cond_f_res_38230 = sle32(y_38130, 5); - bool x_38231 = !cond_38229; - bool y_38232 = cond_f_res_38230 && x_38231; - bool cond_38233 = cond_38229 || y_38232; - int32_t fst_breakzq_38234; - - if (cond_38233) { - fst_breakzq_38234 = -2; - } else { - fst_breakzq_38234 = fst_breakzq_38218; - } - if (local_tid_46625 == 0) { - ((__global int32_t *) mem_45332)[gtid_37873] = fst_breakzq_38234; - } - if (local_tid_46625 == 0) { - ((__global float *) mem_45334)[gtid_37873] = defunc_0_f_res_38214; + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_127723) * + squot64(segred_group_sizze_110512, + segment_sizze_nonzzero_127712) + + sext_i32_i64(local_tid_127715), m_75136 * k2p2zq_75151) && + slt64(sext_i32_i64(local_tid_127715), + squot64(segred_group_sizze_110512, + segment_sizze_nonzzero_127712))) { + ((__global + double *) mem_123338)[squot64(sext_i32_i64(virt_group_id_127723) * + squot64(segred_group_sizze_110512, + segment_sizze_nonzzero_127712) + + sext_i32_i64(local_tid_127715), + k2p2zq_75151) * k2p2zq_75151 + + (sext_i32_i64(virt_group_id_127723) * + squot64(segred_group_sizze_110512, + segment_sizze_nonzzero_127712) + + sext_i32_i64(local_tid_127715) - + squot64(sext_i32_i64(virt_group_id_127723) * + squot64(segred_group_sizze_110512, + segment_sizze_nonzzero_127712) + + sext_i32_i64(local_tid_127715), + k2p2zq_75151) * + k2p2zq_75151)] = ((__local + double *) red_arr_mem_127719)[(sext_i32_i64(local_tid_127715) + + (int64_t) 1) * + segment_sizze_nonzzero_127712 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - error_3: + error_1: return; + #undef segred_group_sizze_110512 } -__kernel void mainMagnitudezisegmap_intragroup_42541(__global - int *global_failure, - __local volatile - int64_t *mem_44480_backing_aligned_0, - int64_t m_28478, - int32_t n_28481, - int64_t i32_res_28493, - int64_t Ty_42529, - int64_t Tx_42530, - int64_t gridDim_x_42531, - int64_t gridDim_y_42532, - int64_t group_sizze_tile3d_42536, - int64_t count_shmem_42537, - __global - unsigned char *mem_44393, - __global - unsigned char *mem_44397, - __global - unsigned char *mem_44468, - __global - unsigned char *mem_44528) +__kernel void mainzisegred_small_109665(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_127494_backing_aligned_0, + int64_t m_75136, + int64_t defunc_2_reduce_res_75260, + int64_t rp1_75837, int64_t j_109957, + int64_t num_groups_109990, + int64_t segment_sizze_nonzzero_127487, + __global unsigned char *mem_120246, + __global unsigned char *mem_122730) { + #define segred_group_sizze_109989 (mainzisegred_group_sizze_109659) + const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict mem_44480_backing_0 = (__local volatile - char *) mem_44480_backing_aligned_0; + __local volatile char *restrict red_arr_mem_127494_backing_0 = + (__local volatile + char *) red_arr_mem_127494_backing_aligned_0; if (*global_failure >= 0) return; - int32_t global_tid_45722; - int32_t local_tid_45723; - int64_t group_sizze_45726; - int32_t wave_sizze_45725; - int32_t group_tid_45724; - - global_tid_45722 = get_global_id(0); - local_tid_45723 = get_local_id(0); - group_sizze_45726 = get_local_size(0); - wave_sizze_45725 = LOCKSTEP_WIDTH; - group_tid_45724 = get_group_id(0); - - int32_t gid_flat_42541; - - gid_flat_42541 = group_tid_45724; - - int32_t ltid_pre_45727; - - ltid_pre_45727 = squot32(local_tid_45723, sext_i64_i32(Ty_42529) * - sext_i64_i32(Tx_42530)); - - int32_t ltid_pre_45728; - - ltid_pre_45728 = squot32(local_tid_45723 - squot32(local_tid_45723, - sext_i64_i32(Ty_42529) * - sext_i64_i32(Tx_42530)) * - (sext_i64_i32(Ty_42529) * sext_i64_i32(Tx_42530)), - sext_i64_i32(Tx_42530)); + int32_t global_tid_127489; + int32_t local_tid_127490; + int64_t group_sizze_127493; + int32_t wave_sizze_127492; + int32_t group_tid_127491; - int32_t ltid_pre_45729; + global_tid_127489 = get_global_id(0); + local_tid_127490 = get_local_id(0); + group_sizze_127493 = get_local_size(0); + wave_sizze_127492 = LOCKSTEP_WIDTH; + group_tid_127491 = get_group_id(0); - ltid_pre_45729 = local_tid_45723 - squot32(local_tid_45723, - sext_i64_i32(Ty_42529) * - sext_i64_i32(Tx_42530)) * - (sext_i64_i32(Ty_42529) * sext_i64_i32(Tx_42530)) - - squot32(local_tid_45723 - squot32(local_tid_45723, - sext_i64_i32(Ty_42529) * - sext_i64_i32(Tx_42530)) * - (sext_i64_i32(Ty_42529) * sext_i64_i32(Tx_42530)), - sext_i64_i32(Tx_42530)) * sext_i64_i32(Tx_42530); + int32_t phys_tid_109665; - int32_t ltid_pre_45730; + phys_tid_109665 = global_tid_127489; - ltid_pre_45730 = squot32(local_tid_45723, sext_i64_i32(Tx_42530)); + __local char *red_arr_mem_127494; - int32_t ltid_pre_45731; + red_arr_mem_127494 = (__local char *) red_arr_mem_127494_backing_0; - ltid_pre_45731 = local_tid_45723 - squot32(local_tid_45723, - sext_i64_i32(Tx_42530)) * - sext_i64_i32(Tx_42530); + int32_t phys_group_id_127496; - int32_t ltid_pre_45732; - - ltid_pre_45732 = local_tid_45723; - - int64_t gid_zz_42540; - - gid_zz_42540 = squot64(sext_i32_i64(group_tid_45724), gridDim_y_42532 * - gridDim_x_42531); - - int64_t gid_y_42539; - - gid_y_42539 = squot64(sext_i32_i64(group_tid_45724) - - squot64(sext_i32_i64(group_tid_45724), - gridDim_y_42532 * gridDim_x_42531) * - (gridDim_y_42532 * gridDim_x_42531), gridDim_x_42531); - - int64_t gid_x_42538; - - gid_x_42538 = sext_i32_i64(group_tid_45724) - - squot64(sext_i32_i64(group_tid_45724), gridDim_y_42532 * - gridDim_x_42531) * (gridDim_y_42532 * gridDim_x_42531) - - squot64(sext_i32_i64(group_tid_45724) - - squot64(sext_i32_i64(group_tid_45724), gridDim_y_42532 * - gridDim_x_42531) * (gridDim_y_42532 * gridDim_x_42531), - gridDim_x_42531) * gridDim_x_42531; - - int64_t ii_42542; - - ii_42542 = (int64_t) 30 * gid_zz_42540; - - int64_t jj1_42543 = Ty_42529 * gid_y_42539; - int64_t jj2_42544 = Tx_42530 * gid_x_42538; - float mem_44478[30]; - int64_t ltid_y_42547 = sext_i32_i64(ltid_pre_45730); - int64_t ltid_x_42545 = sext_i32_i64(ltid_pre_45731); - int32_t ltid_flat_42546 = local_tid_45723; - float mem_44472[30]; - - for (int32_t i_44270 = 0; i_44270 < 30; i_44270++) { - int64_t i_42555 = sext_i32_i64(i_44270); + phys_group_id_127496 = get_group_id(0); + for (int32_t i_127497 = 0; i_127497 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, + squot64(segred_group_sizze_109989, + segment_sizze_nonzzero_127487))) - + phys_group_id_127496, sext_i64_i32(num_groups_109990)); + i_127497++) { + int32_t virt_group_id_127498 = phys_group_id_127496 + i_127497 * + sext_i64_i32(num_groups_109990); + int64_t gtid_109656 = squot64(sext_i32_i64(local_tid_127490), + segment_sizze_nonzzero_127487) + + sext_i32_i64(virt_group_id_127498) * + squot64(segred_group_sizze_109989, + segment_sizze_nonzzero_127487); + int64_t gtid_109664 = srem64(sext_i32_i64(local_tid_127490), rp1_75837); - mem_44472[i_42555] = 0.0F; - } - for (int64_t i_45734 = 0; i_45734 < (int64_t) 30; i_45734++) { - mem_44478[i_45734] = mem_44472[i_45734]; - } - barrier(CLK_LOCAL_MEM_FENCE); - - __local char *mem_44480; - - mem_44480 = (__local char *) mem_44480_backing_0; - - float loop_mem_44510[30]; - float mem_param_44481[30]; - - for (int32_t i_1 = 0; i_1 < 30; i_1++) - mem_param_44481[i_1] = mem_44478[i_1]; - for (int32_t i_44272 = 0; i_44272 < n_28481; i_44272++) { - int64_t i_42560 = sext_i32_i64(i_44272); - - for (int64_t i_42563 = 0; i_42563 < count_shmem_42537; i_42563++) { - int64_t offs_42576 = group_sizze_tile3d_42536 * i_42563; - int64_t ltid_42566 = sext_i32_i64(ltid_pre_45732); - int32_t ltid_flat_42565 = local_tid_45723; - int64_t loc_ind_42577 = ltid_42566 + offs_42576; - int64_t gtid_42578 = ii_42542 + loc_ind_42577; - bool cond_42579 = slt64(gtid_42578, m_28478); - float y_elem_42580; - - if (cond_42579) { - float Y_elem_42582 = ((__global float *) mem_44468)[i_42560 * - m_28478 + - gtid_42578]; - - y_elem_42580 = Y_elem_42582; - } else { - y_elem_42580 = 0.0F; - } - - bool cond_42584 = slt64(loc_ind_42577, (int64_t) 30); - int64_t y_loc_ind_42585; - - if (cond_42584) { - y_loc_ind_42585 = loc_ind_42577; + // apply map function if in bounds + { + if (slt64((int64_t) 0, rp1_75837) && (slt64(gtid_109656, m_75136) && + slt64(sext_i32_i64(local_tid_127490), + rp1_75837 * + squot64(segred_group_sizze_109989, + segment_sizze_nonzzero_127487)))) { + double x_109997 = ((__global double *) mem_120246)[j_109957 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_109656 * + defunc_2_reduce_res_75260 + + gtid_109664]; + double defunc_1_f_res_109998 = x_109997 * x_109997; + + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_127494)[sext_i32_i64(local_tid_127490)] = + defunc_1_f_res_109998; + } } else { - y_loc_ind_42585 = (int64_t) -1; - } - if (sle64((int64_t) 0, y_loc_ind_42585) && slt64(y_loc_ind_42585, - (int64_t) 30)) { - ((__local float *) mem_44480)[y_loc_ind_42585] = y_elem_42580; + ((__local + double *) red_arr_mem_127494)[sext_i32_i64(local_tid_127490)] = + 0.0; } - barrier(CLK_LOCAL_MEM_FENCE); } - - float mem_44509[30]; - int64_t ltid_y_42591 = sext_i32_i64(ltid_pre_45730); - int64_t ltid_x_42589 = sext_i32_i64(ltid_pre_45731); - int32_t ltid_flat_42590 = local_tid_45723; - int64_t gtid_42618 = jj1_42543 + ltid_y_42591; - int64_t gtid_42619 = jj2_42544 + ltid_x_42589; - bool binop_x_42621 = slt64(gtid_42618, i32_res_28493); - bool binop_y_42622 = slt64(gtid_42619, i32_res_28493); - bool cond_42623 = binop_x_42621 && binop_y_42622; - float mem_45450[30]; - - if (cond_42623) { - float x_42626 = ((__global float *) mem_44393)[i_42560 * - i32_res_28493 + - gtid_42618]; - float x_42628 = ((__global float *) mem_44397)[i_42560 * - i32_res_28493 + - gtid_42619]; - - for (int32_t i_44271 = 0; i_44271 < 30; i_44271++) { - int64_t i_42630 = sext_i32_i64(i_44271); - int64_t gtid_42632 = ii_42542 + i_42630; - bool cond_42633 = slt64(gtid_42632, m_28478); + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, rp1_75837)) { + // perform segmented scan to imitate reduction + { + double x_109993; + double x_109994; + double x_127499; + double x_127500; + bool ltid_in_bounds_127502; - if (cond_42633) { - float inp_reg_var2zz_42635 = ((__local - float *) mem_44480)[i_42630]; - float res_reg_var2zz_42636 = mem_param_44481[i_42630]; - float x_42640 = x_42626 * x_42628; - bool isnan_res_42641; - - isnan_res_42641 = futrts_isnan32(inp_reg_var2zz_42635); - - float y_42642; - - if (isnan_res_42641) { - y_42642 = 0.0F; - } else { - y_42642 = 1.0F; + ltid_in_bounds_127502 = slt64(sext_i32_i64(local_tid_127490), + rp1_75837 * + squot64(segred_group_sizze_109989, + segment_sizze_nonzzero_127487)); + + int32_t skip_threads_127503; + + // read input for in-block scan + { + if (ltid_in_bounds_127502) { + x_109994 = ((volatile __local + double *) red_arr_mem_127494)[sext_i32_i64(local_tid_127490)]; + if ((local_tid_127490 - squot32(local_tid_127490, 32) * + 32) == 0) { + x_109993 = x_109994; + } } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127503 = 1; + while (slt32(skip_threads_127503, 32)) { + if (sle32(skip_threads_127503, local_tid_127490 - + squot32(local_tid_127490, 32) * 32) && + ltid_in_bounds_127502) { + // read operands + { + x_109993 = ((volatile __local + double *) red_arr_mem_127494)[sext_i32_i64(local_tid_127490) - + sext_i32_i64(skip_threads_127503)]; + } + // perform operation + { + bool inactive_127504 = + slt64(srem64(sext_i32_i64(local_tid_127490), + rp1_75837), + sext_i32_i64(local_tid_127490) - + sext_i32_i64(local_tid_127490 - + skip_threads_127503)); + + if (inactive_127504) { + x_109993 = x_109994; + } + if (!inactive_127504) { + double defunc_1_op_res_109995 = x_109993 + + x_109994; + + x_109993 = defunc_1_op_res_109995; + } + } + } + if (sle32(wave_sizze_127492, skip_threads_127503)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127503, local_tid_127490 - + squot32(local_tid_127490, 32) * 32) && + ltid_in_bounds_127502) { + // write result + { + ((volatile __local + double *) red_arr_mem_127494)[sext_i32_i64(local_tid_127490)] = + x_109993; + x_109994 = x_109993; + } + } + if (sle32(wave_sizze_127492, skip_threads_127503)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127503 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_127490 - squot32(local_tid_127490, 32) * + 32) == 31 && ltid_in_bounds_127502) { + ((volatile __local + double *) red_arr_mem_127494)[sext_i32_i64(squot32(local_tid_127490, + 32))] = + x_109993; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_127505; - float defunc_2_f_res_42643 = x_42640 * y_42642; - float defunc_1_op_res_42647 = res_reg_var2zz_42636 + - defunc_2_f_res_42643; - - mem_param_44481[i_42630] = defunc_1_op_res_42647; + // read input for in-block scan + { + if (squot32(local_tid_127490, 32) == 0 && + ltid_in_bounds_127502) { + x_127500 = ((volatile __local + double *) red_arr_mem_127494)[sext_i32_i64(local_tid_127490)]; + if ((local_tid_127490 - squot32(local_tid_127490, + 32) * 32) == 0) { + x_127499 = x_127500; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127505 = 1; + while (slt32(skip_threads_127505, 32)) { + if (sle32(skip_threads_127505, local_tid_127490 - + squot32(local_tid_127490, 32) * 32) && + (squot32(local_tid_127490, 32) == 0 && + ltid_in_bounds_127502)) { + // read operands + { + x_127499 = ((volatile __local + double *) red_arr_mem_127494)[sext_i32_i64(local_tid_127490) - + sext_i32_i64(skip_threads_127505)]; + } + // perform operation + { + bool inactive_127506 = + slt64(srem64(sext_i32_i64(local_tid_127490 * + 32 + 32 - 1), rp1_75837), + sext_i32_i64(local_tid_127490 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_127490 - + skip_threads_127505) * + 32 + 32 - 1)); + + if (inactive_127506) { + x_127499 = x_127500; + } + if (!inactive_127506) { + double defunc_1_op_res_127501 = + x_127499 + x_127500; + + x_127499 = defunc_1_op_res_127501; + } + } + } + if (sle32(wave_sizze_127492, skip_threads_127505)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127505, local_tid_127490 - + squot32(local_tid_127490, 32) * 32) && + (squot32(local_tid_127490, 32) == 0 && + ltid_in_bounds_127502)) { + // write result + { + ((volatile __local + double *) red_arr_mem_127494)[sext_i32_i64(local_tid_127490)] = + x_127499; + x_127500 = x_127499; + } + } + if (sle32(wave_sizze_127492, skip_threads_127505)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127505 *= 2; + } + } } - } - for (int64_t i_45740 = 0; i_45740 < (int64_t) 30; i_45740++) { - mem_45450[i_45740] = mem_param_44481[i_45740]; - } - } else { - for (int64_t i_45741 = 0; i_45741 < (int64_t) 30; i_45741++) { - mem_45450[i_45741] = mem_param_44481[i_45741]; + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_127490, 32) == 0 || + !ltid_in_bounds_127502)) { + // read operands + { + x_109994 = x_109993; + x_109993 = ((__local + double *) red_arr_mem_127494)[sext_i32_i64(squot32(local_tid_127490, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_127507 = + slt64(srem64(sext_i32_i64(local_tid_127490), + rp1_75837), + sext_i32_i64(local_tid_127490) - + sext_i32_i64(squot32(local_tid_127490, + 32) * 32 - 1)); + + if (inactive_127507) { + x_109993 = x_109994; + } + if (!inactive_127507) { + double defunc_1_op_res_109995 = x_109993 + + x_109994; + + x_109993 = defunc_1_op_res_109995; + } + } + // write final result + { + ((__local + double *) red_arr_mem_127494)[sext_i32_i64(local_tid_127490)] = + x_109993; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_127490, 32) == 0) { + ((__local + double *) red_arr_mem_127494)[sext_i32_i64(local_tid_127490)] = + x_109994; + } + } + barrier(CLK_LOCAL_MEM_FENCE); } } - for (int64_t i_45742 = 0; i_45742 < (int64_t) 30; i_45742++) { - mem_44509[i_45742] = mem_45450[i_45742]; - } barrier(CLK_LOCAL_MEM_FENCE); - - float mem_param_tmp_45735[30]; - - for (int32_t i_2 = 0; i_2 < 30; i_2++) - mem_param_tmp_45735[i_2] = mem_44509[i_2]; - for (int32_t i_3 = 0; i_3 < 30; i_3++) - mem_param_44481[i_3] = mem_param_tmp_45735[i_3]; - } - for (int32_t i_4 = 0; i_4 < 30; i_4++) - loop_mem_44510[i_4] = mem_param_44481[i_4]; - - float mem_44524[30 * 1 * 1]; - int64_t ltid_zz_42656 = sext_i32_i64(ltid_pre_45727); - int64_t ltid_y_42655 = sext_i32_i64(ltid_pre_45728); - int64_t ltid_x_42653 = sext_i32_i64(ltid_pre_45729); - int32_t ltid_flat_42654 = local_tid_45723; - float mem_44518[30 * 1 * 1]; - - for (int32_t i_44274 = 0; i_44274 < 30; i_44274++) { - int64_t i_42665 = sext_i32_i64(i_44274); - - for (int64_t i_45744 = 0; i_45744 < (int64_t) 1; i_45744++) { - mem_44518[i_42665 + i_45744] = loop_mem_44510[i_42665 + i_45744]; - } - } - for (int64_t i_45745 = 0; i_45745 < (int64_t) 30; i_45745++) { - for (int64_t i_45746 = 0; i_45746 < (int64_t) 1; i_45746++) { - for (int64_t i_45747 = 0; i_45747 < (int64_t) 1; i_45747++) { - mem_44524[i_45745 + i_45746 + i_45747] = mem_44518[i_45745 + - i_45746 + - i_45747]; - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t reg_tile_i_45748 = squot64(sext_i32_i64(local_tid_45723), Ty_42529 * - Tx_42530); - int64_t reg_tile_i_45749 = squot64(sext_i32_i64(local_tid_45723) - - squot64(sext_i32_i64(local_tid_45723), - Ty_42529 * Tx_42530) * - (Ty_42529 * Tx_42530), Tx_42530); - int64_t reg_tile_i_45750 = sext_i32_i64(local_tid_45723) - - squot64(sext_i32_i64(local_tid_45723), Ty_42529 * Tx_42530) * - (Ty_42529 * Tx_42530) - squot64(sext_i32_i64(local_tid_45723) - - squot64(sext_i32_i64(local_tid_45723), - Ty_42529 * Tx_42530) * - (Ty_42529 * Tx_42530), Tx_42530) * - Tx_42530; - int64_t tile_dim_start_45751 = (int64_t) 30 * (gid_zz_42540 + - reg_tile_i_45748); - int64_t tile_dim_start_45752 = Ty_42529 * gid_y_42539 + reg_tile_i_45749; - int64_t tile_dim_start_45753 = Tx_42530 * gid_x_42538 + reg_tile_i_45750; - - for (int64_t nest_i_45754 = 0; nest_i_45754 < (int64_t) 30; - nest_i_45754++) { - for (int64_t nest_i_45755 = 0; nest_i_45755 < (int64_t) 1; - nest_i_45755++) { - for (int64_t nest_i_45756 = 0; nest_i_45756 < (int64_t) 1; - nest_i_45756++) { - if ((slt64(tile_dim_start_45751 + nest_i_45754, m_28478) && - slt64(tile_dim_start_45752 + nest_i_45755, - i32_res_28493)) && slt64(tile_dim_start_45753 + - nest_i_45756, - i32_res_28493)) { - ((__global float *) mem_44528)[(tile_dim_start_45751 + - nest_i_45754) * - (i32_res_28493 * - i32_res_28493) + - (tile_dim_start_45752 + - nest_i_45755) * - i32_res_28493 + - (tile_dim_start_45753 + - nest_i_45756)] = - mem_44524[nest_i_45754 + nest_i_45755 + nest_i_45756]; - } + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_127498) * + squot64(segred_group_sizze_109989, + segment_sizze_nonzzero_127487) + + sext_i32_i64(local_tid_127490), m_75136) && + slt64(sext_i32_i64(local_tid_127490), + squot64(segred_group_sizze_109989, + segment_sizze_nonzzero_127487))) { + ((__global + double *) mem_122730)[sext_i32_i64(virt_group_id_127498) * + squot64(segred_group_sizze_109989, + segment_sizze_nonzzero_127487) + + sext_i32_i64(local_tid_127490)] = + ((__local + double *) red_arr_mem_127494)[(sext_i32_i64(local_tid_127490) + + (int64_t) 1) * + segment_sizze_nonzzero_127487 - + (int64_t) 1]; } } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - error_4: + error_1: return; + #undef segred_group_sizze_109989 } -__kernel void mainMagnitudezisegmap_intragroup_42694(__global - int *global_failure, - __local volatile - int64_t *mem_44668_backing_aligned_0, - __local volatile - int64_t *mem_44666_backing_aligned_1, - int64_t N_28477, - int64_t m_28478, - int64_t i32_res_28487, - int64_t i32_res_28493, - int64_t gridDim_x_42688, - int64_t full_tiles_42719, - int64_t kk_42926, __global - unsigned char *images_mem_44381, - __global - unsigned char *mem_44393, - __global - unsigned char *mem_44840) +__kernel void mainzisegred_small_109818(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_127408_backing_aligned_0, + int64_t m_75136, int64_t k2p2zq_75151, + int64_t defunc_2_reduce_res_75260, + int64_t r_75826, + int64_t num_groups_109897, + int64_t segment_sizze_nonzzero_127401, + __global unsigned char *mem_120246, + __global + unsigned char *mem_param_121967, + __global unsigned char *mem_122677) { - #define Ty_42675 (mainMagnitudeziTy_42672) - #define Ry_42676 (mainMagnitudeziRy_42674) - #define Tx_42677 (mainMagnitudeziTx_42671) - #define Rx_42678 (mainMagnitudeziRx_42673) - #define Tk_42679 (mainMagnitudeziTk_42670) - #define tk_div_tx_42680 (sdiv_up64(mainMagnitudeziTk_42670, mainMagnitudeziTx_42671)) - #define tk_div_ty_42681 (sdiv_up64(mainMagnitudeziTk_42670, mainMagnitudeziTy_42672)) - #define TxRx_42682 (mainMagnitudeziTx_42671 * mainMagnitudeziRx_42673) - #define TyRy_42683 (mainMagnitudeziTy_42672 * mainMagnitudeziRy_42674) - #define a_loc_szz_42685 (mainMagnitudeziTk_42670 * (mainMagnitudeziTy_42672 * mainMagnitudeziRy_42674)) - #define b_loc_szz_42687 (mainMagnitudeziRx_42673 * (mainMagnitudeziTx_42671 * mainMagnitudeziTk_42670)) + #define segred_group_sizze_109896 (mainzisegred_group_sizze_109812) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict mem_44668_backing_1 = (__local volatile - char *) mem_44668_backing_aligned_0; - __local volatile char *restrict mem_44666_backing_0 = (__local volatile - char *) mem_44666_backing_aligned_1; + __local volatile char *restrict red_arr_mem_127408_backing_0 = + (__local volatile + char *) red_arr_mem_127408_backing_aligned_0; if (*global_failure >= 0) return; - int32_t global_tid_45897; - int32_t local_tid_45898; - int64_t group_sizze_45901; - int32_t wave_sizze_45900; - int32_t group_tid_45899; - - global_tid_45897 = get_global_id(0); - local_tid_45898 = get_local_id(0); - group_sizze_45901 = get_local_size(0); - wave_sizze_45900 = LOCKSTEP_WIDTH; - group_tid_45899 = get_group_id(0); - - int32_t gid_flat_42694; - - gid_flat_42694 = group_tid_45899; - - int32_t ltid_pre_45902; - - ltid_pre_45902 = squot32(local_tid_45898, sext_i64_i32(Tx_42677)); - - int32_t ltid_pre_45903; - - ltid_pre_45903 = local_tid_45898 - squot32(local_tid_45898, - sext_i64_i32(Tx_42677)) * - sext_i64_i32(Tx_42677); - - int64_t gid_y_42693; - - gid_y_42693 = squot64(sext_i32_i64(group_tid_45899), gridDim_x_42688); - - int64_t gid_x_42692; - - gid_x_42692 = sext_i32_i64(group_tid_45899) - - squot64(sext_i32_i64(group_tid_45899), gridDim_x_42688) * - gridDim_x_42688; - - int64_t iii_42695; - - iii_42695 = TyRy_42683 * gid_y_42693; - - int64_t jjj_42696 = TxRx_42682 * gid_x_42692; - float mem_44664[Ry_42676 * Rx_42678]; - int64_t ltid_y_42699 = sext_i32_i64(ltid_pre_45902); - int64_t ltid_x_42697 = sext_i32_i64(ltid_pre_45903); - int32_t ltid_flat_42698 = local_tid_45898; - float mem_44655[Ry_42676 * Rx_42678]; - - for (int64_t i_42710 = 0; i_42710 < Ry_42676; i_42710++) { - for (int64_t i_42713 = 0; i_42713 < Rx_42678; i_42713++) { - mem_44655[i_42710 * Rx_42678 + i_42713] = 0.0F; - } - } - for (int64_t i_45906 = 0; i_45906 < Ry_42676; i_45906++) { - for (int64_t i_45907 = 0; i_45907 < Rx_42678; i_45907++) { - mem_44664[i_45906 * Rx_42678 + i_45907] = mem_44655[i_45906 * - Rx_42678 + - i_45907]; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - - __local char *mem_44666; - - mem_44666 = (__local char *) mem_44666_backing_0; - - __local char *mem_44668; - - mem_44668 = (__local char *) mem_44668_backing_1; - - float mem_44739[Ry_42676]; - float mem_44743[Rx_42678]; - float loop_mem_44755[Ry_42676 * Rx_42678]; - float mem_param_44669[Ry_42676 * Rx_42678]; - - for (int32_t i_2 = 0; i_2 < Ry_42676 * Rx_42678; i_2++) - mem_param_44669[i_2] = mem_44664[i_2]; - for (int64_t i_42720 = 0; i_42720 < full_tiles_42719; i_42720++) { - int64_t kk_42724 = Tk_42679 * i_42720; + int32_t global_tid_127403; + int32_t local_tid_127404; + int64_t group_sizze_127407; + int32_t wave_sizze_127406; + int32_t group_tid_127405; + + global_tid_127403 = get_global_id(0); + local_tid_127404 = get_local_id(0); + group_sizze_127407 = get_local_size(0); + wave_sizze_127406 = LOCKSTEP_WIDTH; + group_tid_127405 = get_group_id(0); + + int32_t phys_tid_109818; + + phys_tid_109818 = global_tid_127403; + + __local char *red_arr_mem_127408; + + red_arr_mem_127408 = (__local char *) red_arr_mem_127408_backing_0; + + int32_t phys_group_id_127410; + + phys_group_id_127410 = get_group_id(0); + for (int32_t i_127411 = 0; i_127411 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, + squot64(segred_group_sizze_109896, + segment_sizze_nonzzero_127401))) - + phys_group_id_127410, sext_i64_i32(num_groups_109897)); + i_127411++) { + int32_t virt_group_id_127412 = phys_group_id_127410 + i_127411 * + sext_i64_i32(num_groups_109897); + int64_t gtid_109809 = squot64(sext_i32_i64(local_tid_127404), + segment_sizze_nonzzero_127401) + + sext_i32_i64(virt_group_id_127412) * + squot64(segred_group_sizze_109896, + segment_sizze_nonzzero_127401); + int64_t gtid_109817 = srem64(sext_i32_i64(local_tid_127404), + k2p2zq_75151); - for (int64_t i_42725 = 0; i_42725 < Ry_42676; i_42725++) { - int64_t binop_y_42748 = Ty_42675 * i_42725; - - for (int64_t i_42727 = 0; i_42727 < tk_div_tx_42680; i_42727++) { - int64_t binop_y_42746 = Tx_42677 * i_42727; - int64_t ltid_x_42729 = sext_i32_i64(ltid_pre_45902); - int64_t ltid_y_42730 = sext_i32_i64(ltid_pre_45903); - int32_t ltid_flat_42731 = local_tid_45898; - int64_t k_42747 = ltid_y_42730 + binop_y_42746; - int64_t i_42749 = ltid_x_42729 + binop_y_42748; - int64_t gtid_42750 = iii_42695 + i_42749; - int64_t A_col_idx_42751 = kk_42724 + k_42747; - bool cond_42752 = slt64(gtid_42750, m_28478); - float A_elem_42753; - - if (cond_42752) { - float A_elem_42755 = ((__global - float *) images_mem_44381)[gtid_42750 * - N_28477 + - A_col_idx_42751]; - - A_elem_42753 = A_elem_42755; - } else { - A_elem_42753 = 0.0F; - } - - bool cond_42757 = slt64(k_42747, Tk_42679); - int64_t a_loc_ind_42758; + // apply map function if in bounds + { + if (slt64((int64_t) 0, k2p2zq_75151) && (slt64(gtid_109809, + m_75136) && + slt64(sext_i32_i64(local_tid_127404), + k2p2zq_75151 * + squot64(segred_group_sizze_109896, + segment_sizze_nonzzero_127401)))) { + double x_109905 = ((__global double *) mem_120246)[gtid_109817 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_109809 * + defunc_2_reduce_res_75260 + + r_75826]; + double x_109906 = ((__global + double *) mem_param_121967)[gtid_109809 * + k2p2zq_75151 + + gtid_109817]; + double defunc_1_f_res_109907 = x_109905 * x_109906; - if (cond_42757) { - int64_t binop_y_42759 = Tk_42679 * i_42749; - int64_t loc_fi_42760 = k_42747 + binop_y_42759; - - a_loc_ind_42758 = loc_fi_42760; - } else { - a_loc_ind_42758 = (int64_t) -1; - } - if (sle64((int64_t) 0, a_loc_ind_42758) && - slt64(a_loc_ind_42758, a_loc_szz_42685)) { - ((__local float *) mem_44666)[a_loc_ind_42758] = - A_elem_42753; + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_127408)[sext_i32_i64(local_tid_127404)] = + defunc_1_f_res_109907; } - barrier(CLK_LOCAL_MEM_FENCE); + } else { + ((__local + double *) red_arr_mem_127408)[sext_i32_i64(local_tid_127404)] = + 0.0; } } - for (int64_t i_42765 = 0; i_42765 < tk_div_ty_42681; i_42765++) { - int64_t binop_y_42786 = Ty_42675 * i_42765; - - for (int64_t i_42767 = 0; i_42767 < Rx_42678; i_42767++) { - int64_t binop_y_42788 = Tx_42677 * i_42767; - int64_t ltid_x_42769 = sext_i32_i64(ltid_pre_45902); - int64_t ltid_y_42770 = sext_i32_i64(ltid_pre_45903); - int32_t ltid_flat_42771 = local_tid_45898; - int64_t k_42787 = ltid_x_42769 + binop_y_42786; - int64_t j_42789 = ltid_y_42770 + binop_y_42788; - int64_t gtid_42790 = jjj_42696 + j_42789; - int64_t B_row_idx_42791 = kk_42724 + k_42787; - bool cond_42792 = slt64(gtid_42790, i32_res_28493); - float B_elem_42793; + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, k2p2zq_75151)) { + // perform segmented scan to imitate reduction + { + double x_109900; + double x_109901; + double x_127413; + double x_127414; + bool ltid_in_bounds_127416; - if (cond_42792) { - float B_elem_42795 = ((__global - float *) mem_44393)[B_row_idx_42791 * - i32_res_28493 + - gtid_42790]; - - B_elem_42793 = B_elem_42795; - } else { - B_elem_42793 = 0.0F; - } + ltid_in_bounds_127416 = slt64(sext_i32_i64(local_tid_127404), + k2p2zq_75151 * + squot64(segred_group_sizze_109896, + segment_sizze_nonzzero_127401)); - bool cond_42797 = slt64(k_42787, Tk_42679); - int64_t b_loc_ind_42798; + int32_t skip_threads_127417; - if (cond_42797) { - int64_t binop_y_42799 = TxRx_42682 * k_42787; - int64_t loc_fi_42800 = j_42789 + binop_y_42799; - - b_loc_ind_42798 = loc_fi_42800; - } else { - b_loc_ind_42798 = (int64_t) -1; + // read input for in-block scan + { + if (ltid_in_bounds_127416) { + x_109901 = ((volatile __local + double *) red_arr_mem_127408)[sext_i32_i64(local_tid_127404)]; + if ((local_tid_127404 - squot32(local_tid_127404, 32) * + 32) == 0) { + x_109900 = x_109901; + } + } } - if (sle64((int64_t) 0, b_loc_ind_42798) && - slt64(b_loc_ind_42798, b_loc_szz_42687)) { - ((__local float *) mem_44668)[b_loc_ind_42798] = - B_elem_42793; + // in-block scan (hopefully no barriers needed) + { + skip_threads_127417 = 1; + while (slt32(skip_threads_127417, 32)) { + if (sle32(skip_threads_127417, local_tid_127404 - + squot32(local_tid_127404, 32) * 32) && + ltid_in_bounds_127416) { + // read operands + { + x_109900 = ((volatile __local + double *) red_arr_mem_127408)[sext_i32_i64(local_tid_127404) - + sext_i32_i64(skip_threads_127417)]; + } + // perform operation + { + bool inactive_127418 = + slt64(srem64(sext_i32_i64(local_tid_127404), + k2p2zq_75151), + sext_i32_i64(local_tid_127404) - + sext_i32_i64(local_tid_127404 - + skip_threads_127417)); + + if (inactive_127418) { + x_109900 = x_109901; + } + if (!inactive_127418) { + double defunc_1_op_res_109902 = x_109900 + + x_109901; + + x_109900 = defunc_1_op_res_109902; + } + } + } + if (sle32(wave_sizze_127406, skip_threads_127417)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127417, local_tid_127404 - + squot32(local_tid_127404, 32) * 32) && + ltid_in_bounds_127416) { + // write result + { + ((volatile __local + double *) red_arr_mem_127408)[sext_i32_i64(local_tid_127404)] = + x_109900; + x_109901 = x_109900; + } + } + if (sle32(wave_sizze_127406, skip_threads_127417)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127417 *= 2; + } } barrier(CLK_LOCAL_MEM_FENCE); - } - } - - float loop_mem_44754[Ry_42676 * Rx_42678]; - float mem_param_44726[Ry_42676 * Rx_42678]; - - for (int32_t i_3 = 0; i_3 < Ry_42676 * Rx_42678; i_3++) - mem_param_44726[i_3] = mem_param_44669[i_3]; - for (int64_t i_42805 = 0; i_42805 < Tk_42679; i_42805++) { - int64_t binop_y_42844 = TxRx_42682 * i_42805; - int64_t ltid_y_42809 = sext_i32_i64(ltid_pre_45902); - int64_t ltid_x_42807 = sext_i32_i64(ltid_pre_45903); - int32_t ltid_flat_42808 = local_tid_45898; - float mem_44729[Ry_42676]; - float mem_44731[Rx_42678]; - int64_t binop_x_42835 = Ry_42676 * ltid_y_42809; - - for (int64_t i_42833 = 0; i_42833 < Ry_42676; i_42833++) { - int64_t binop_x_42836 = i_42833 + binop_x_42835; - int64_t binop_y_42837 = Tk_42679 * binop_x_42836; - int64_t a_loc_ind_42838 = i_42805 + binop_y_42837; - - for (int64_t i_45919 = 0; i_45919 < (int64_t) 1; i_45919++) { - mem_44729[i_42833 + i_45919] = ((__local - float *) mem_44666)[a_loc_ind_42838 + - i_45919]; - } - } - - int64_t binop_y_42846 = Rx_42678 * ltid_x_42807; - - for (int64_t i_42842 = 0; i_42842 < Rx_42678; i_42842++) { - int64_t binop_x_42845 = i_42842 + binop_y_42844; - int64_t b_loc_ind_42847 = binop_x_42845 + binop_y_42846; - - for (int64_t i_45921 = 0; i_45921 < (int64_t) 1; i_45921++) { - mem_44731[i_42842 + i_45921] = ((__local - float *) mem_44668)[b_loc_ind_42847 + - i_45921]; + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_127404 - squot32(local_tid_127404, 32) * + 32) == 31 && ltid_in_bounds_127416) { + ((volatile __local + double *) red_arr_mem_127408)[sext_i32_i64(squot32(local_tid_127404, + 32))] = + x_109900; + } } - } - for (int64_t i_45922 = 0; i_45922 < Ry_42676; i_45922++) { - mem_44739[i_45922] = mem_44729[i_45922]; - } - for (int64_t i_45923 = 0; i_45923 < Rx_42678; i_45923++) { - mem_44743[i_45923] = mem_44731[i_45923]; - } - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_44753[Ry_42676 * Rx_42678]; - int64_t ltid_y_42854 = sext_i32_i64(ltid_pre_45902); - int64_t ltid_x_42852 = sext_i32_i64(ltid_pre_45903); - int32_t ltid_flat_42853 = local_tid_45898; - int64_t binop_y_42897 = Ry_42676 * ltid_y_42854; - int64_t binop_y_42901 = Rx_42678 * ltid_x_42852; - - for (int64_t i_42891 = 0; i_42891 < Ry_42676; i_42891++) { - int64_t binop_x_42896 = iii_42695 + i_42891; - int64_t cmpop_x_42898 = binop_x_42896 + binop_y_42897; - bool binop_x_42899 = slt64(cmpop_x_42898, m_28478); - - for (int64_t i_42894 = 0; i_42894 < Rx_42678; i_42894++) { - int64_t binop_x_42900 = jjj_42696 + i_42894; - int64_t cmpop_x_42902 = binop_x_42900 + binop_y_42901; - bool binop_y_42903 = slt64(cmpop_x_42902, i32_res_28493); - bool cond_42904 = binop_x_42899 && binop_y_42903; + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_127419; - if (cond_42904) { - float a_42906 = mem_44739[i_42891]; - float c_42908 = mem_param_44726[i_42891 * Rx_42678 + - i_42894]; - bool isnan_res_42911; - - isnan_res_42911 = futrts_isnan32(a_42906); - - float defunc_1_f_res_42912; - - if (isnan_res_42911) { - defunc_1_f_res_42912 = 0.0F; - } else { - float b_42907 = mem_44743[i_42894]; - float defunc_1_f_res_f_res_42913 = a_42906 * - b_42907; - - defunc_1_f_res_42912 = defunc_1_f_res_f_res_42913; + // read input for in-block scan + { + if (squot32(local_tid_127404, 32) == 0 && + ltid_in_bounds_127416) { + x_127414 = ((volatile __local + double *) red_arr_mem_127408)[sext_i32_i64(local_tid_127404)]; + if ((local_tid_127404 - squot32(local_tid_127404, + 32) * 32) == 0) { + x_127413 = x_127414; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127419 = 1; + while (slt32(skip_threads_127419, 32)) { + if (sle32(skip_threads_127419, local_tid_127404 - + squot32(local_tid_127404, 32) * 32) && + (squot32(local_tid_127404, 32) == 0 && + ltid_in_bounds_127416)) { + // read operands + { + x_127413 = ((volatile __local + double *) red_arr_mem_127408)[sext_i32_i64(local_tid_127404) - + sext_i32_i64(skip_threads_127419)]; + } + // perform operation + { + bool inactive_127420 = + slt64(srem64(sext_i32_i64(local_tid_127404 * + 32 + 32 - 1), + k2p2zq_75151), + sext_i32_i64(local_tid_127404 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_127404 - + skip_threads_127419) * + 32 + 32 - 1)); + + if (inactive_127420) { + x_127413 = x_127414; + } + if (!inactive_127420) { + double defunc_1_op_res_127415 = + x_127413 + x_127414; + + x_127413 = defunc_1_op_res_127415; + } + } + } + if (sle32(wave_sizze_127406, skip_threads_127419)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127419, local_tid_127404 - + squot32(local_tid_127404, 32) * 32) && + (squot32(local_tid_127404, 32) == 0 && + ltid_in_bounds_127416)) { + // write result + { + ((volatile __local + double *) red_arr_mem_127408)[sext_i32_i64(local_tid_127404)] = + x_127413; + x_127414 = x_127413; + } + } + if (sle32(wave_sizze_127406, skip_threads_127419)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127419 *= 2; } - - float defunc_1_op_res_42917 = c_42908 + - defunc_1_f_res_42912; - - mem_param_44726[i_42891 * Rx_42678 + i_42894] = - defunc_1_op_res_42917; } } - } - for (int64_t i_45926 = 0; i_45926 < Ry_42676; i_45926++) { - for (int64_t i_45927 = 0; i_45927 < Rx_42678; i_45927++) { - mem_44753[i_45926 * Rx_42678 + i_45927] = - mem_param_44726[i_45926 * Rx_42678 + i_45927]; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_param_tmp_45916[Ry_42676 * Rx_42678]; - - for (int32_t i_4 = 0; i_4 < Ry_42676 * Rx_42678; i_4++) - mem_param_tmp_45916[i_4] = mem_44753[i_4]; - for (int32_t i_5 = 0; i_5 < Ry_42676 * Rx_42678; i_5++) - mem_param_44726[i_5] = mem_param_tmp_45916[i_5]; - } - for (int32_t i_6 = 0; i_6 < Ry_42676 * Rx_42678; i_6++) - loop_mem_44754[i_6] = mem_param_44726[i_6]; - - float mem_param_tmp_45908[Ry_42676 * Rx_42678]; - - for (int32_t i_7 = 0; i_7 < Ry_42676 * Rx_42678; i_7++) - mem_param_tmp_45908[i_7] = loop_mem_44754[i_7]; - for (int32_t i_8 = 0; i_8 < Ry_42676 * Rx_42678; i_8++) - mem_param_44669[i_8] = mem_param_tmp_45908[i_8]; - } - for (int32_t i_9 = 0; i_9 < Ry_42676 * Rx_42678; i_9++) - loop_mem_44755[i_9] = mem_param_44669[i_9]; - for (int64_t i_42927 = 0; i_42927 < Ry_42676; i_42927++) { - int64_t binop_y_42952 = Ty_42675 * i_42927; - - for (int64_t i_42929 = 0; i_42929 < tk_div_tx_42680; i_42929++) { - int64_t binop_y_42950 = Tx_42677 * i_42929; - int64_t ltid_x_42931 = sext_i32_i64(ltid_pre_45902); - int64_t ltid_y_42932 = sext_i32_i64(ltid_pre_45903); - int32_t ltid_flat_42933 = local_tid_45898; - int64_t k_42951 = ltid_y_42932 + binop_y_42950; - int64_t i_42953 = ltid_x_42931 + binop_y_42952; - int64_t gtid_42954 = iii_42695 + i_42953; - int64_t A_col_idx_42955 = kk_42926 + k_42951; - bool binop_x_42956 = slt64(gtid_42954, m_28478); - bool binop_y_42957 = slt64(A_col_idx_42955, i32_res_28487); - bool cond_42958 = binop_x_42956 && binop_y_42957; - float A_elem_42959; - - if (cond_42958) { - float A_elem_42961 = ((__global - float *) images_mem_44381)[gtid_42954 * - N_28477 + - A_col_idx_42955]; - - A_elem_42959 = A_elem_42961; - } else { - A_elem_42959 = 0.0F; - } - - bool cond_42963 = slt64(k_42951, Tk_42679); - int64_t a_loc_ind_42964; - - if (cond_42963) { - int64_t binop_y_42965 = Tk_42679 * i_42953; - int64_t loc_fi_42966 = k_42951 + binop_y_42965; - - a_loc_ind_42964 = loc_fi_42966; - } else { - a_loc_ind_42964 = (int64_t) -1; - } - if (sle64((int64_t) 0, a_loc_ind_42964) && slt64(a_loc_ind_42964, - a_loc_szz_42685)) { - ((__local float *) mem_44666)[a_loc_ind_42964] = A_elem_42959; - } - barrier(CLK_LOCAL_MEM_FENCE); - } - } - for (int64_t i_42971 = 0; i_42971 < tk_div_ty_42681; i_42971++) { - int64_t binop_y_42994 = Ty_42675 * i_42971; - - for (int64_t i_42973 = 0; i_42973 < Rx_42678; i_42973++) { - int64_t binop_y_42996 = Tx_42677 * i_42973; - int64_t ltid_x_42975 = sext_i32_i64(ltid_pre_45902); - int64_t ltid_y_42976 = sext_i32_i64(ltid_pre_45903); - int32_t ltid_flat_42977 = local_tid_45898; - int64_t k_42995 = ltid_x_42975 + binop_y_42994; - int64_t j_42997 = ltid_y_42976 + binop_y_42996; - int64_t gtid_42998 = jjj_42696 + j_42997; - int64_t B_row_idx_42999 = kk_42926 + k_42995; - bool binop_x_43000 = slt64(gtid_42998, i32_res_28493); - bool binop_y_43001 = slt64(B_row_idx_42999, i32_res_28487); - bool cond_43002 = binop_x_43000 && binop_y_43001; - float B_elem_43003; - - if (cond_43002) { - float B_elem_43005 = ((__global - float *) mem_44393)[B_row_idx_42999 * - i32_res_28493 + - gtid_42998]; - - B_elem_43003 = B_elem_43005; - } else { - B_elem_43003 = 0.0F; - } - - bool cond_43007 = slt64(k_42995, Tk_42679); - int64_t b_loc_ind_43008; - - if (cond_43007) { - int64_t binop_y_43009 = TxRx_42682 * k_42995; - int64_t loc_fi_43010 = j_42997 + binop_y_43009; - - b_loc_ind_43008 = loc_fi_43010; - } else { - b_loc_ind_43008 = (int64_t) -1; - } - if (sle64((int64_t) 0, b_loc_ind_43008) && slt64(b_loc_ind_43008, - b_loc_szz_42687)) { - ((__local float *) mem_44668)[b_loc_ind_43008] = B_elem_43003; - } - barrier(CLK_LOCAL_MEM_FENCE); - } - } - - float mem_44821[Ry_42676]; - float mem_44825[Rx_42678]; - float mem_44835[Ry_42676 * Rx_42678]; - float loop_mem_44837[Ry_42676 * Rx_42678]; - float mem_param_44808[Ry_42676 * Rx_42678]; - - for (int32_t i_10 = 0; i_10 < Ry_42676 * Rx_42678; i_10++) - mem_param_44808[i_10] = loop_mem_44755[i_10]; - for (int64_t i_43015 = 0; i_43015 < Tk_42679; i_43015++) { - int64_t cmpop_x_43017 = kk_42926 + i_43015; - bool cond_43018 = slt64(cmpop_x_43017, i32_res_28487); - float mem_45468[Ry_42676 * Rx_42678]; - - if (cond_43018) { - int64_t binop_y_43056 = TxRx_42682 * i_43015; - int64_t bytes_44810 = (int64_t) 4 * Ry_42676; - int64_t bytes_44812 = (int64_t) 4 * Rx_42678; - int64_t ltid_y_43021 = sext_i32_i64(ltid_pre_45902); - int64_t ltid_x_43019 = sext_i32_i64(ltid_pre_45903); - int32_t ltid_flat_43020 = local_tid_45898; - float mem_44811[Ry_42676]; - float mem_44813[Rx_42678]; - int64_t binop_x_43047 = Ry_42676 * ltid_y_43021; - - for (int64_t i_43045 = 0; i_43045 < Ry_42676; i_43045++) { - int64_t binop_x_43048 = i_43045 + binop_x_43047; - int64_t binop_y_43049 = Tk_42679 * binop_x_43048; - int64_t a_loc_ind_43050 = i_43015 + binop_y_43049; - - for (int64_t i_45935 = 0; i_45935 < (int64_t) 1; i_45935++) { - mem_44811[i_43045 + i_45935] = ((__local - float *) mem_44666)[a_loc_ind_43050 + - i_45935]; - } - } - - int64_t binop_y_43058 = Rx_42678 * ltid_x_43019; - - for (int64_t i_43054 = 0; i_43054 < Rx_42678; i_43054++) { - int64_t binop_x_43057 = i_43054 + binop_y_43056; - int64_t b_loc_ind_43059 = binop_x_43057 + binop_y_43058; - - for (int64_t i_45937 = 0; i_45937 < (int64_t) 1; i_45937++) { - mem_44813[i_43054 + i_45937] = ((__local - float *) mem_44668)[b_loc_ind_43059 + - i_45937]; - } - } - for (int64_t i_45938 = 0; i_45938 < Ry_42676; i_45938++) { - mem_44821[i_45938] = mem_44811[i_45938]; - } - for (int64_t i_45939 = 0; i_45939 < Rx_42678; i_45939++) { - mem_44825[i_45939] = mem_44813[i_45939]; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t ltid_y_43066 = sext_i32_i64(ltid_pre_45902); - int64_t ltid_x_43064 = sext_i32_i64(ltid_pre_45903); - int32_t ltid_flat_43065 = local_tid_45898; - int64_t binop_y_43109 = Ry_42676 * ltid_y_43066; - int64_t binop_y_43113 = Rx_42678 * ltid_x_43064; - - for (int64_t i_43103 = 0; i_43103 < Ry_42676; i_43103++) { - int64_t binop_x_43108 = iii_42695 + i_43103; - int64_t cmpop_x_43110 = binop_x_43108 + binop_y_43109; - bool binop_x_43111 = slt64(cmpop_x_43110, m_28478); - - for (int64_t i_43106 = 0; i_43106 < Rx_42678; i_43106++) { - int64_t binop_x_43112 = jjj_42696 + i_43106; - int64_t cmpop_x_43114 = binop_x_43112 + binop_y_43113; - bool binop_y_43115 = slt64(cmpop_x_43114, i32_res_28493); - bool cond_43116 = binop_x_43111 && binop_y_43115; - - if (cond_43116) { - float a_43118 = mem_44821[i_43103]; - float c_43120 = mem_param_44808[i_43103 * Rx_42678 + - i_43106]; - bool isnan_res_43123; - - isnan_res_43123 = futrts_isnan32(a_43118); - - float defunc_1_f_res_43124; - - if (isnan_res_43123) { - defunc_1_f_res_43124 = 0.0F; - } else { - float b_43119 = mem_44825[i_43106]; - float defunc_1_f_res_f_res_43125 = a_43118 * - b_43119; + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_127404, 32) == 0 || + !ltid_in_bounds_127416)) { + // read operands + { + x_109901 = x_109900; + x_109900 = ((__local + double *) red_arr_mem_127408)[sext_i32_i64(squot32(local_tid_127404, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_127421 = + slt64(srem64(sext_i32_i64(local_tid_127404), + k2p2zq_75151), + sext_i32_i64(local_tid_127404) - + sext_i32_i64(squot32(local_tid_127404, + 32) * 32 - 1)); - defunc_1_f_res_43124 = defunc_1_f_res_f_res_43125; + if (inactive_127421) { + x_109900 = x_109901; + } + if (!inactive_127421) { + double defunc_1_op_res_109902 = x_109900 + + x_109901; + + x_109900 = defunc_1_op_res_109902; + } + } + // write final result + { + ((__local + double *) red_arr_mem_127408)[sext_i32_i64(local_tid_127404)] = + x_109900; } - - float defunc_1_op_res_43129 = c_43120 + - defunc_1_f_res_43124; - - mem_param_44808[i_43103 * Rx_42678 + i_43106] = - defunc_1_op_res_43129; } } - } - for (int64_t i_45942 = 0; i_45942 < Ry_42676; i_45942++) { - for (int64_t i_45943 = 0; i_45943 < Rx_42678; i_45943++) { - mem_44835[i_45942 * Rx_42678 + i_45943] = - mem_param_44808[i_45942 * Rx_42678 + i_45943]; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - for (int64_t i_45944 = 0; i_45944 < Ry_42676; i_45944++) { - for (int64_t i_45945 = 0; i_45945 < Rx_42678; i_45945++) { - mem_45468[i_45944 * Rx_42678 + i_45945] = - mem_44835[i_45944 * Rx_42678 + i_45945]; - } - } - } else { - for (int64_t i_45946 = 0; i_45946 < Ry_42676; i_45946++) { - for (int64_t i_45947 = 0; i_45947 < Rx_42678; i_45947++) { - mem_45468[i_45946 * Rx_42678 + i_45947] = - mem_param_44808[i_45946 * Rx_42678 + i_45947]; + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_127404, 32) == 0) { + ((__local + double *) red_arr_mem_127408)[sext_i32_i64(local_tid_127404)] = + x_109901; + } } + barrier(CLK_LOCAL_MEM_FENCE); } } - - float mem_param_tmp_45932[Ry_42676 * Rx_42678]; - - for (int32_t i_11 = 0; i_11 < Ry_42676 * Rx_42678; i_11++) - mem_param_tmp_45932[i_11] = mem_45468[i_11]; - for (int32_t i_12 = 0; i_12 < Ry_42676 * Rx_42678; i_12++) - mem_param_44808[i_12] = mem_param_tmp_45932[i_12]; - } - for (int32_t i_13 = 0; i_13 < Ry_42676 * Rx_42678; i_13++) - loop_mem_44837[i_13] = mem_param_44808[i_13]; - - int64_t reg_tile_i_45948 = squot64(sext_i32_i64(local_tid_45898), Tx_42677); - int64_t reg_tile_i_45949 = sext_i32_i64(local_tid_45898) - - squot64(sext_i32_i64(local_tid_45898), Tx_42677) * Tx_42677; - int64_t tile_dim_start_45950 = Ry_42676 * (Ty_42675 * gid_y_42693 + - reg_tile_i_45948); - int64_t tile_dim_start_45951 = Rx_42678 * (Tx_42677 * gid_x_42692 + - reg_tile_i_45949); - - for (int64_t nest_i_45952 = 0; nest_i_45952 < Ry_42676; nest_i_45952++) { - for (int64_t nest_i_45953 = 0; nest_i_45953 < Rx_42678; - nest_i_45953++) { - if (slt64(tile_dim_start_45950 + nest_i_45952, m_28478) && - slt64(tile_dim_start_45951 + nest_i_45953, i32_res_28493)) { - ((__global float *) mem_44840)[(tile_dim_start_45950 + - nest_i_45952) * i32_res_28493 + - (tile_dim_start_45951 + - nest_i_45953)] = - loop_mem_44837[nest_i_45952 * Rx_42678 + nest_i_45953]; + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_127412) * + squot64(segred_group_sizze_109896, + segment_sizze_nonzzero_127401) + + sext_i32_i64(local_tid_127404), m_75136) && + slt64(sext_i32_i64(local_tid_127404), + squot64(segred_group_sizze_109896, + segment_sizze_nonzzero_127401))) { + ((__global + double *) mem_122677)[sext_i32_i64(virt_group_id_127412) * + squot64(segred_group_sizze_109896, + segment_sizze_nonzzero_127401) + + sext_i32_i64(local_tid_127404)] = + ((__local + double *) red_arr_mem_127408)[(sext_i32_i64(local_tid_127404) + + (int64_t) 1) * + segment_sizze_nonzzero_127401 - + (int64_t) 1]; } } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - error_9: - return; - #undef Ty_42675 - #undef Ry_42676 - #undef Tx_42677 - #undef Rx_42678 - #undef Tk_42679 - #undef tk_div_tx_42680 - #undef tk_div_ty_42681 - #undef TxRx_42682 - #undef TyRy_42683 - #undef a_loc_szz_42685 - #undef b_loc_szz_42687 -} -__kernel void mainMagnitudezisegmap_intragroup_43143(__global - int *global_failure, - __local volatile - int64_t *mem_44898_backing_aligned_0, - __local volatile - int64_t *mem_44889_backing_aligned_1, - int64_t m_28478, - int64_t i32_res_28493, - int64_t num_groups_y_43141, - int64_t num_whole_tiles_43159, - int64_t residual_input_43286, - unsigned char cond_43287, - __global - unsigned char *defunc_3_map_res_mem_44850, - __global - unsigned char *mem_44879, - __global - unsigned char *mem_44906) -{ - #define tile_sizze_43138 (mainMagnitudezitile_sizze_43137) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - __local volatile char *restrict mem_44898_backing_5 = (__local volatile - char *) mem_44898_backing_aligned_0; - __local volatile char *restrict mem_44889_backing_0 = (__local volatile - char *) mem_44889_backing_aligned_1; - - if (*global_failure >= 0) - return; - - int32_t global_tid_46029; - int32_t local_tid_46030; - int64_t group_sizze_46033; - int32_t wave_sizze_46032; - int32_t group_tid_46031; - - global_tid_46029 = get_global_id(0); - local_tid_46030 = get_local_id(0); - group_sizze_46033 = get_local_size(0); - wave_sizze_46032 = LOCKSTEP_WIDTH; - group_tid_46031 = get_group_id(0); - - int32_t gid_flat_43143; - - gid_flat_43143 = group_tid_46031; - - int32_t ltid_pre_46034; - - ltid_pre_46034 = squot32(local_tid_46030, sext_i64_i32(tile_sizze_43138)); - - int32_t ltid_pre_46035; - - ltid_pre_46035 = local_tid_46030 - squot32(local_tid_46030, - sext_i64_i32(tile_sizze_43138)) * - sext_i64_i32(tile_sizze_43138); - - int64_t gid_x_43135; - - gid_x_43135 = squot64(sext_i32_i64(group_tid_46031), num_groups_y_43141); - - int64_t gid_y_43136; - - gid_y_43136 = sext_i32_i64(group_tid_46031) - - squot64(sext_i32_i64(group_tid_46031), num_groups_y_43141) * - num_groups_y_43141; - - float mem_44884[1]; - int64_t ltid_y_43162 = sext_i32_i64(ltid_pre_46034); - int64_t ltid_x_43160 = sext_i32_i64(ltid_pre_46035); - int32_t ltid_flat_43161 = local_tid_46030; - - if (slt64(ltid_y_43162, tile_sizze_43138) && slt64(ltid_x_43160, - tile_sizze_43138)) { - mem_44884[(int64_t) 0] = 0.0F; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t binop_x_43245 = gid_x_43135 * tile_sizze_43138; - int64_t binop_x_43260 = gid_y_43136 * tile_sizze_43138; - __local char *mem_44889; - - mem_44889 = (__local char *) mem_44889_backing_0; - - float accs_mem_44894[1]; - float mem_param_44885[1]; - - for (int32_t i_1 = 0; i_1 < 1; i_1++) - mem_param_44885[i_1] = mem_44884[i_1]; - for (int64_t tile_id_43171 = 0; tile_id_43171 < num_whole_tiles_43159; - tile_id_43171++) { - int64_t binop_x_43243 = tile_sizze_43138 * tile_id_43171; - int64_t ltid_y_43174 = sext_i32_i64(ltid_pre_46034); - int64_t ltid_x_43172 = sext_i32_i64(ltid_pre_46035); - int32_t ltid_flat_43173 = local_tid_46030; - int64_t j_43244 = ltid_x_43172 + binop_x_43243; - int64_t gtid_43246 = ltid_y_43174 + binop_x_43245; - bool binop_x_43251 = slt64(j_43244, i32_res_28493); - bool binop_y_43252 = slt64(gtid_43246, m_28478); - bool cond_43253 = binop_x_43251 && binop_y_43252; - float pre_43254; - - if (cond_43253) { - float x_43255 = ((__global - float *) defunc_3_map_res_mem_44850)[gtid_43246 * - i32_res_28493 + - j_43244]; - - pre_43254 = x_43255; - } else { - pre_43254 = 0.0F; - } - ((__local float *) mem_44889)[ltid_y_43174 * tile_sizze_43138 + - ltid_x_43172] = pre_43254; - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_44893[1]; - int64_t ltid_y_43206 = sext_i32_i64(ltid_pre_46034); - int64_t ltid_x_43204 = sext_i32_i64(ltid_pre_46035); - int32_t ltid_flat_43205 = local_tid_46030; - int64_t gtid_43259 = ltid_y_43206 + binop_x_43245; - int64_t gtid_43261 = ltid_x_43204 + binop_x_43260; - float acc_43264 = mem_param_44885[(int64_t) 0]; - bool binop_x_43268 = slt64(gtid_43259, m_28478); - bool binop_y_43269 = slt64(gtid_43261, i32_res_28493); - bool cond_43270 = binop_x_43268 && binop_y_43269; - float acc_43271; - - if (cond_43270) { - float x_43272; - float redout_44315 = acc_43264; - - for (int64_t i_44316 = 0; i_44316 < tile_sizze_43138; i_44316++) { - float x_43276 = ((__local float *) mem_44889)[ltid_y_43206 * - tile_sizze_43138 + - i_44316]; - int64_t slice_44367 = binop_x_43243 + i_44316; - float x_43277 = ((__global float *) mem_44879)[slice_44367 * - (i32_res_28493 * - m_28478) + - gtid_43259 * - i32_res_28493 + - gtid_43261]; - float defunc_1_f_res_43278 = x_43276 * x_43277; - float defunc_1_op_res_43275 = defunc_1_f_res_43278 + - redout_44315; - float redout_tmp_46038 = defunc_1_op_res_43275; - - redout_44315 = redout_tmp_46038; - } - x_43272 = redout_44315; - acc_43271 = x_43272; - } else { - acc_43271 = acc_43264; - } - mem_44893[(int64_t) 0] = acc_43271; - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_param_tmp_46036[1]; - - for (int32_t i_2 = 0; i_2 < 1; i_2++) - mem_param_tmp_46036[i_2] = mem_44893[i_2]; - for (int32_t i_3 = 0; i_3 < 1; i_3++) - mem_param_44885[i_3] = mem_param_tmp_46036[i_3]; - } - for (int32_t i_4 = 0; i_4 < 1; i_4++) - accs_mem_44894[i_4] = mem_param_44885[i_4]; - - __local char *mem_44898; - - mem_44898 = (__local char *) mem_44898_backing_5; - - float mem_44902[1]; - float mem_45482[1]; - - if (cond_43287) { - mem_45482[(int64_t) 0] = accs_mem_44894[(int64_t) 0]; - } else { - int64_t binop_x_43360 = tile_sizze_43138 * num_whole_tiles_43159; - int64_t ltid_y_43290 = sext_i32_i64(ltid_pre_46034); - int64_t ltid_x_43288 = sext_i32_i64(ltid_pre_46035); - int32_t ltid_flat_43289 = local_tid_46030; - int64_t j_43361 = ltid_x_43288 + binop_x_43360; - int64_t gtid_43363 = binop_x_43245 + ltid_y_43290; - bool binop_x_43368 = slt64(j_43361, i32_res_28493); - bool binop_y_43369 = slt64(gtid_43363, m_28478); - bool cond_43370 = binop_x_43368 && binop_y_43369; - float pre_43371; - - if (cond_43370) { - float x_43372 = ((__global - float *) defunc_3_map_res_mem_44850)[gtid_43363 * - i32_res_28493 + - j_43361]; - - pre_43371 = x_43372; - } else { - pre_43371 = 0.0F; - } - ((__local float *) mem_44898)[ltid_y_43290 * tile_sizze_43138 + - ltid_x_43288] = pre_43371; - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t ltid_y_43323 = sext_i32_i64(ltid_pre_46034); - int64_t ltid_x_43321 = sext_i32_i64(ltid_pre_46035); - int32_t ltid_flat_43322 = local_tid_46030; - int64_t gtid_43377 = binop_x_43245 + ltid_y_43323; - int64_t gtid_43379 = binop_x_43260 + ltid_x_43321; - float acc_43382 = accs_mem_44894[(int64_t) 0]; - bool binop_x_43386 = slt64(gtid_43377, m_28478); - bool binop_y_43387 = slt64(gtid_43379, i32_res_28493); - bool cond_43388 = binop_x_43386 && binop_y_43387; - float acc_43389; - - if (cond_43388) { - float x_43390; - float redout_44317 = acc_43382; - - for (int64_t i_44318 = 0; i_44318 < residual_input_43286; - i_44318++) { - float x_43394 = ((__local float *) mem_44898)[ltid_y_43323 * - tile_sizze_43138 + - i_44318]; - int64_t slice_44368 = binop_x_43360 + i_44318; - float x_43395 = ((__global float *) mem_44879)[slice_44368 * - (i32_res_28493 * - m_28478) + - gtid_43377 * - i32_res_28493 + - gtid_43379]; - float defunc_1_f_res_43396 = x_43394 * x_43395; - float defunc_1_op_res_43393 = defunc_1_f_res_43396 + - redout_44317; - float redout_tmp_46039 = defunc_1_op_res_43393; - - redout_44317 = redout_tmp_46039; - } - x_43390 = redout_44317; - acc_43389 = x_43390; - } else { - acc_43389 = acc_43382; - } - mem_44902[(int64_t) 0] = acc_43389; - barrier(CLK_LOCAL_MEM_FENCE); - mem_45482[(int64_t) 0] = mem_44902[(int64_t) 0]; - } - - int64_t thread_out_index_46040 = gid_x_43135 * tile_sizze_43138 + - sext_i32_i64(ltid_pre_46034); - int64_t thread_out_index_46041 = gid_y_43136 * tile_sizze_43138 + - sext_i32_i64(ltid_pre_46035); - - if (slt64(thread_out_index_46040, m_28478) && slt64(thread_out_index_46041, - i32_res_28493)) { - ((__global float *) mem_44906)[thread_out_index_46040 * i32_res_28493 + - thread_out_index_46041] = - mem_45482[(int64_t) 0]; - } - - error_5: + error_1: return; - #undef tile_sizze_43138 + #undef segred_group_sizze_109896 } -__kernel void mainMagnitudezisegmap_intragroup_43435(__global - int *global_failure, - __local volatile - int64_t *mem_44958_backing_aligned_0, - __local volatile - int64_t *mem_44956_backing_aligned_1, - int64_t N_28477, - int64_t m_28478, - int64_t i32_res_28493, - int64_t gridDim_x_43429, - int64_t full_tiles_43460, - int64_t kk_43663, __global - unsigned char *defunc_4_map_res_mem_44916, - __global - unsigned char *mem_44940, - __global - unsigned char *mem_45130) +__kernel void mainzisegred_small_109847(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_127341_backing_aligned_0, + int64_t m_75136, int64_t k2p2zq_75151, + int64_t defunc_2_reduce_res_75260, + int64_t r_75826, + int64_t num_groups_109866, + int64_t segment_sizze_nonzzero_127334, + __global unsigned char *mem_120246, + __global unsigned char *mem_122665, + __global unsigned char *mem_122668, + __global unsigned char *mem_122671) { - #define Ty_43416 (mainMagnitudeziTy_43413) - #define Ry_43417 (mainMagnitudeziRy_43415) - #define Tx_43418 (mainMagnitudeziTx_43412) - #define Rx_43419 (mainMagnitudeziRx_43414) - #define Tk_43420 (mainMagnitudeziTk_43411) - #define tk_div_tx_43421 (sdiv_up64(mainMagnitudeziTk_43411, mainMagnitudeziTx_43412)) - #define tk_div_ty_43422 (sdiv_up64(mainMagnitudeziTk_43411, mainMagnitudeziTy_43413)) - #define TxRx_43423 (mainMagnitudeziTx_43412 * mainMagnitudeziRx_43414) - #define TyRy_43424 (mainMagnitudeziTy_43413 * mainMagnitudeziRy_43415) - #define a_loc_szz_43426 (mainMagnitudeziTk_43411 * (mainMagnitudeziTy_43413 * mainMagnitudeziRy_43415)) - #define b_loc_szz_43428 (mainMagnitudeziRx_43414 * (mainMagnitudeziTx_43412 * mainMagnitudeziTk_43411)) + #define segred_group_sizze_109865 (mainzisegred_group_sizze_109841) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict mem_44958_backing_1 = (__local volatile - char *) mem_44958_backing_aligned_0; - __local volatile char *restrict mem_44956_backing_0 = (__local volatile - char *) mem_44956_backing_aligned_1; + __local volatile char *restrict red_arr_mem_127341_backing_0 = + (__local volatile + char *) red_arr_mem_127341_backing_aligned_0; if (*global_failure >= 0) return; - int32_t global_tid_46117; - int32_t local_tid_46118; - int64_t group_sizze_46121; - int32_t wave_sizze_46120; - int32_t group_tid_46119; - - global_tid_46117 = get_global_id(0); - local_tid_46118 = get_local_id(0); - group_sizze_46121 = get_local_size(0); - wave_sizze_46120 = LOCKSTEP_WIDTH; - group_tid_46119 = get_group_id(0); - - int32_t gid_flat_43435; - - gid_flat_43435 = group_tid_46119; - - int32_t ltid_pre_46122; - - ltid_pre_46122 = squot32(local_tid_46118, sext_i64_i32(Tx_43418)); - - int32_t ltid_pre_46123; - - ltid_pre_46123 = local_tid_46118 - squot32(local_tid_46118, - sext_i64_i32(Tx_43418)) * - sext_i64_i32(Tx_43418); - - int64_t gid_y_43434; - - gid_y_43434 = squot64(sext_i32_i64(group_tid_46119), gridDim_x_43429); - - int64_t gid_x_43433; - - gid_x_43433 = sext_i32_i64(group_tid_46119) - - squot64(sext_i32_i64(group_tid_46119), gridDim_x_43429) * - gridDim_x_43429; - - int64_t iii_43436; - - iii_43436 = TyRy_43424 * gid_y_43434; - - int64_t jjj_43437 = TxRx_43423 * gid_x_43433; - float mem_44954[Ry_43417 * Rx_43419]; - int64_t ltid_y_43440 = sext_i32_i64(ltid_pre_46122); - int64_t ltid_x_43438 = sext_i32_i64(ltid_pre_46123); - int32_t ltid_flat_43439 = local_tid_46118; - float mem_44945[Ry_43417 * Rx_43419]; - - for (int64_t i_43451 = 0; i_43451 < Ry_43417; i_43451++) { - for (int64_t i_43454 = 0; i_43454 < Rx_43419; i_43454++) { - mem_44945[i_43451 * Rx_43419 + i_43454] = 0.0F; - } - } - for (int64_t i_46126 = 0; i_46126 < Ry_43417; i_46126++) { - for (int64_t i_46127 = 0; i_46127 < Rx_43419; i_46127++) { - mem_44954[i_46126 * Rx_43419 + i_46127] = mem_44945[i_46126 * - Rx_43419 + - i_46127]; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - - __local char *mem_44956; - - mem_44956 = (__local char *) mem_44956_backing_0; - - __local char *mem_44958; - - mem_44958 = (__local char *) mem_44958_backing_1; - - float mem_45029[Ry_43417]; - float mem_45033[Rx_43419]; - float loop_mem_45045[Ry_43417 * Rx_43419]; - float mem_param_44959[Ry_43417 * Rx_43419]; - - for (int32_t i_2 = 0; i_2 < Ry_43417 * Rx_43419; i_2++) - mem_param_44959[i_2] = mem_44954[i_2]; - for (int64_t i_43461 = 0; i_43461 < full_tiles_43460; i_43461++) { - int64_t kk_43465 = Tk_43420 * i_43461; + int32_t global_tid_127336; + int32_t local_tid_127337; + int64_t group_sizze_127340; + int32_t wave_sizze_127339; + int32_t group_tid_127338; + + global_tid_127336 = get_global_id(0); + local_tid_127337 = get_local_id(0); + group_sizze_127340 = get_local_size(0); + wave_sizze_127339 = LOCKSTEP_WIDTH; + group_tid_127338 = get_group_id(0); + + int32_t phys_tid_109847; + + phys_tid_109847 = global_tid_127336; + + __local char *red_arr_mem_127341; + + red_arr_mem_127341 = (__local char *) red_arr_mem_127341_backing_0; + + int32_t phys_group_id_127343; + + phys_group_id_127343 = get_group_id(0); + for (int32_t i_127344 = 0; i_127344 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, + squot64(segred_group_sizze_109865, + segment_sizze_nonzzero_127334))) - + phys_group_id_127343, sext_i64_i32(num_groups_109866)); + i_127344++) { + int32_t virt_group_id_127345 = phys_group_id_127343 + i_127344 * + sext_i64_i32(num_groups_109866); + int64_t gtid_109838 = squot64(sext_i32_i64(local_tid_127337), + segment_sizze_nonzzero_127334) + + sext_i32_i64(virt_group_id_127345) * + squot64(segred_group_sizze_109865, + segment_sizze_nonzzero_127334); + int64_t gtid_109846 = srem64(sext_i32_i64(local_tid_127337), + k2p2zq_75151); - for (int64_t i_43466 = 0; i_43466 < Ry_43417; i_43466++) { - int64_t binop_y_43489 = Ty_43416 * i_43466; - - for (int64_t i_43468 = 0; i_43468 < tk_div_tx_43421; i_43468++) { - int64_t binop_y_43487 = Tx_43418 * i_43468; - int64_t ltid_x_43470 = sext_i32_i64(ltid_pre_46122); - int64_t ltid_y_43471 = sext_i32_i64(ltid_pre_46123); - int32_t ltid_flat_43472 = local_tid_46118; - int64_t k_43488 = ltid_y_43471 + binop_y_43487; - int64_t i_43490 = ltid_x_43470 + binop_y_43489; - int64_t gtid_43491 = iii_43436 + i_43490; - int64_t A_col_idx_43492 = kk_43465 + k_43488; - bool cond_43493 = slt64(gtid_43491, m_28478); - float A_elem_43494; - - if (cond_43493) { - float A_elem_43496 = ((__global - float *) defunc_4_map_res_mem_44916)[gtid_43491 * - i32_res_28493 + - A_col_idx_43492]; + // apply map function if in bounds + { + if (slt64((int64_t) 0, k2p2zq_75151) && (slt64(gtid_109838, + m_75136) && + slt64(sext_i32_i64(local_tid_127337), + k2p2zq_75151 * + squot64(segred_group_sizze_109865, + segment_sizze_nonzzero_127334)))) { + double x_109876 = ((__global double *) mem_120246)[gtid_109846 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_109838 * + defunc_2_reduce_res_75260 + + r_75826]; + double defunc_0_f_res_109877; + double redout_119829 = 0.0; + + for (int64_t i_119830 = 0; i_119830 < k2p2zq_75151; + i_119830++) { + double x_109881 = ((__global + double *) mem_120246)[i_119830 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_109838 * + defunc_2_reduce_res_75260 + + r_75826]; + double x_109882 = ((__global + double *) mem_122665)[i_119830 * + (k2p2zq_75151 * + m_75136) + + gtid_109838 * + k2p2zq_75151 + + gtid_109846]; + double defunc_1_f_res_109883 = x_109881 * x_109882; + double defunc_1_op_res_109880 = defunc_1_f_res_109883 + + redout_119829; + double redout_tmp_127346 = defunc_1_op_res_109880; - A_elem_43494 = A_elem_43496; - } else { - A_elem_43494 = 0.0F; + redout_119829 = redout_tmp_127346; } + defunc_0_f_res_109877 = redout_119829; - bool cond_43498 = slt64(k_43488, Tk_43420); - int64_t a_loc_ind_43499; + double defunc_1_f_res_109884 = x_109876 * defunc_0_f_res_109877; - if (cond_43498) { - int64_t binop_y_43500 = Tk_43420 * i_43490; - int64_t loc_fi_43501 = k_43488 + binop_y_43500; - - a_loc_ind_43499 = loc_fi_43501; - } else { - a_loc_ind_43499 = (int64_t) -1; + // save map-out results + { + ((__global double *) mem_122671)[gtid_109838 * + k2p2zq_75151 + + gtid_109846] = + defunc_0_f_res_109877; } - if (sle64((int64_t) 0, a_loc_ind_43499) && - slt64(a_loc_ind_43499, a_loc_szz_43426)) { - ((__local float *) mem_44956)[a_loc_ind_43499] = - A_elem_43494; + // save results to be reduced + { + ((__local + double *) red_arr_mem_127341)[sext_i32_i64(local_tid_127337)] = + defunc_1_f_res_109884; } - barrier(CLK_LOCAL_MEM_FENCE); + } else { + ((__local + double *) red_arr_mem_127341)[sext_i32_i64(local_tid_127337)] = + 0.0; } } - for (int64_t i_43506 = 0; i_43506 < tk_div_ty_43422; i_43506++) { - int64_t binop_y_43527 = Ty_43416 * i_43506; - - for (int64_t i_43508 = 0; i_43508 < Rx_43419; i_43508++) { - int64_t binop_y_43529 = Tx_43418 * i_43508; - int64_t ltid_x_43510 = sext_i32_i64(ltid_pre_46122); - int64_t ltid_y_43511 = sext_i32_i64(ltid_pre_46123); - int32_t ltid_flat_43512 = local_tid_46118; - int64_t k_43528 = ltid_x_43510 + binop_y_43527; - int64_t j_43530 = ltid_y_43511 + binop_y_43529; - int64_t gtid_43531 = jjj_43437 + j_43530; - int64_t B_row_idx_43532 = kk_43465 + k_43528; - bool cond_43533 = slt64(gtid_43531, N_28477); - float B_elem_43534; + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, k2p2zq_75151)) { + // perform segmented scan to imitate reduction + { + double x_109870; + double x_109871; + double x_127347; + double x_127348; + bool ltid_in_bounds_127350; - if (cond_43533) { - float B_elem_43536 = ((__global - float *) mem_44940)[B_row_idx_43532 * - N_28477 + - gtid_43531]; - - B_elem_43534 = B_elem_43536; - } else { - B_elem_43534 = 0.0F; - } + ltid_in_bounds_127350 = slt64(sext_i32_i64(local_tid_127337), + k2p2zq_75151 * + squot64(segred_group_sizze_109865, + segment_sizze_nonzzero_127334)); - bool cond_43538 = slt64(k_43528, Tk_43420); - int64_t b_loc_ind_43539; + int32_t skip_threads_127351; - if (cond_43538) { - int64_t binop_y_43540 = TxRx_43423 * k_43528; - int64_t loc_fi_43541 = j_43530 + binop_y_43540; - - b_loc_ind_43539 = loc_fi_43541; - } else { - b_loc_ind_43539 = (int64_t) -1; + // read input for in-block scan + { + if (ltid_in_bounds_127350) { + x_109871 = ((volatile __local + double *) red_arr_mem_127341)[sext_i32_i64(local_tid_127337)]; + if ((local_tid_127337 - squot32(local_tid_127337, 32) * + 32) == 0) { + x_109870 = x_109871; + } + } } - if (sle64((int64_t) 0, b_loc_ind_43539) && - slt64(b_loc_ind_43539, b_loc_szz_43428)) { - ((__local float *) mem_44958)[b_loc_ind_43539] = - B_elem_43534; + // in-block scan (hopefully no barriers needed) + { + skip_threads_127351 = 1; + while (slt32(skip_threads_127351, 32)) { + if (sle32(skip_threads_127351, local_tid_127337 - + squot32(local_tid_127337, 32) * 32) && + ltid_in_bounds_127350) { + // read operands + { + x_109870 = ((volatile __local + double *) red_arr_mem_127341)[sext_i32_i64(local_tid_127337) - + sext_i32_i64(skip_threads_127351)]; + } + // perform operation + { + bool inactive_127352 = + slt64(srem64(sext_i32_i64(local_tid_127337), + k2p2zq_75151), + sext_i32_i64(local_tid_127337) - + sext_i32_i64(local_tid_127337 - + skip_threads_127351)); + + if (inactive_127352) { + x_109870 = x_109871; + } + if (!inactive_127352) { + double defunc_1_op_res_109872 = x_109870 + + x_109871; + + x_109870 = defunc_1_op_res_109872; + } + } + } + if (sle32(wave_sizze_127339, skip_threads_127351)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127351, local_tid_127337 - + squot32(local_tid_127337, 32) * 32) && + ltid_in_bounds_127350) { + // write result + { + ((volatile __local + double *) red_arr_mem_127341)[sext_i32_i64(local_tid_127337)] = + x_109870; + x_109871 = x_109870; + } + } + if (sle32(wave_sizze_127339, skip_threads_127351)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127351 *= 2; + } } barrier(CLK_LOCAL_MEM_FENCE); - } - } - - float loop_mem_45044[Ry_43417 * Rx_43419]; - float mem_param_45016[Ry_43417 * Rx_43419]; - - for (int32_t i_3 = 0; i_3 < Ry_43417 * Rx_43419; i_3++) - mem_param_45016[i_3] = mem_param_44959[i_3]; - for (int64_t i_43546 = 0; i_43546 < Tk_43420; i_43546++) { - int64_t binop_y_43585 = TxRx_43423 * i_43546; - int64_t ltid_y_43550 = sext_i32_i64(ltid_pre_46122); - int64_t ltid_x_43548 = sext_i32_i64(ltid_pre_46123); - int32_t ltid_flat_43549 = local_tid_46118; - float mem_45019[Ry_43417]; - float mem_45021[Rx_43419]; - int64_t binop_x_43576 = Ry_43417 * ltid_y_43550; - - for (int64_t i_43574 = 0; i_43574 < Ry_43417; i_43574++) { - int64_t binop_x_43577 = i_43574 + binop_x_43576; - int64_t binop_y_43578 = Tk_43420 * binop_x_43577; - int64_t a_loc_ind_43579 = i_43546 + binop_y_43578; - - for (int64_t i_46139 = 0; i_46139 < (int64_t) 1; i_46139++) { - mem_45019[i_43574 + i_46139] = ((__local - float *) mem_44956)[a_loc_ind_43579 + - i_46139]; - } - } - - int64_t binop_y_43587 = Rx_43419 * ltid_x_43548; - - for (int64_t i_43583 = 0; i_43583 < Rx_43419; i_43583++) { - int64_t binop_x_43586 = i_43583 + binop_y_43585; - int64_t b_loc_ind_43588 = binop_x_43586 + binop_y_43587; - - for (int64_t i_46141 = 0; i_46141 < (int64_t) 1; i_46141++) { - mem_45021[i_43583 + i_46141] = ((__local - float *) mem_44958)[b_loc_ind_43588 + - i_46141]; - } - } - for (int64_t i_46142 = 0; i_46142 < Ry_43417; i_46142++) { - mem_45029[i_46142] = mem_45019[i_46142]; - } - for (int64_t i_46143 = 0; i_46143 < Rx_43419; i_46143++) { - mem_45033[i_46143] = mem_45021[i_46143]; - } - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_45043[Ry_43417 * Rx_43419]; - int64_t ltid_y_43595 = sext_i32_i64(ltid_pre_46122); - int64_t ltid_x_43593 = sext_i32_i64(ltid_pre_46123); - int32_t ltid_flat_43594 = local_tid_46118; - int64_t binop_y_43636 = Ry_43417 * ltid_y_43595; - int64_t binop_y_43640 = Rx_43419 * ltid_x_43593; - - for (int64_t i_43630 = 0; i_43630 < Ry_43417; i_43630++) { - int64_t binop_x_43635 = iii_43436 + i_43630; - int64_t cmpop_x_43637 = binop_x_43635 + binop_y_43636; - bool binop_x_43638 = slt64(cmpop_x_43637, m_28478); - - for (int64_t i_43633 = 0; i_43633 < Rx_43419; i_43633++) { - int64_t binop_x_43639 = jjj_43437 + i_43633; - int64_t cmpop_x_43641 = binop_x_43639 + binop_y_43640; - bool binop_y_43642 = slt64(cmpop_x_43641, N_28477); - bool cond_43643 = binop_x_43638 && binop_y_43642; - - if (cond_43643) { - float a_43645 = mem_45029[i_43630]; - float b_43646 = mem_45033[i_43633]; - float c_43647 = mem_param_45016[i_43630 * Rx_43419 + - i_43633]; - float defunc_1_f_res_43650 = a_43645 * b_43646; - float defunc_1_op_res_43654 = c_43647 + - defunc_1_f_res_43650; - - mem_param_45016[i_43630 * Rx_43419 + i_43633] = - defunc_1_op_res_43654; + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_127337 - squot32(local_tid_127337, 32) * + 32) == 31 && ltid_in_bounds_127350) { + ((volatile __local + double *) red_arr_mem_127341)[sext_i32_i64(squot32(local_tid_127337, + 32))] = + x_109870; } } - } - for (int64_t i_46146 = 0; i_46146 < Ry_43417; i_46146++) { - for (int64_t i_46147 = 0; i_46147 < Rx_43419; i_46147++) { - mem_45043[i_46146 * Rx_43419 + i_46147] = - mem_param_45016[i_46146 * Rx_43419 + i_46147]; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_param_tmp_46136[Ry_43417 * Rx_43419]; - - for (int32_t i_4 = 0; i_4 < Ry_43417 * Rx_43419; i_4++) - mem_param_tmp_46136[i_4] = mem_45043[i_4]; - for (int32_t i_5 = 0; i_5 < Ry_43417 * Rx_43419; i_5++) - mem_param_45016[i_5] = mem_param_tmp_46136[i_5]; - } - for (int32_t i_6 = 0; i_6 < Ry_43417 * Rx_43419; i_6++) - loop_mem_45044[i_6] = mem_param_45016[i_6]; - - float mem_param_tmp_46128[Ry_43417 * Rx_43419]; - - for (int32_t i_7 = 0; i_7 < Ry_43417 * Rx_43419; i_7++) - mem_param_tmp_46128[i_7] = loop_mem_45044[i_7]; - for (int32_t i_8 = 0; i_8 < Ry_43417 * Rx_43419; i_8++) - mem_param_44959[i_8] = mem_param_tmp_46128[i_8]; - } - for (int32_t i_9 = 0; i_9 < Ry_43417 * Rx_43419; i_9++) - loop_mem_45045[i_9] = mem_param_44959[i_9]; - for (int64_t i_43664 = 0; i_43664 < Ry_43417; i_43664++) { - int64_t binop_y_43689 = Ty_43416 * i_43664; - - for (int64_t i_43666 = 0; i_43666 < tk_div_tx_43421; i_43666++) { - int64_t binop_y_43687 = Tx_43418 * i_43666; - int64_t ltid_x_43668 = sext_i32_i64(ltid_pre_46122); - int64_t ltid_y_43669 = sext_i32_i64(ltid_pre_46123); - int32_t ltid_flat_43670 = local_tid_46118; - int64_t k_43688 = ltid_y_43669 + binop_y_43687; - int64_t i_43690 = ltid_x_43668 + binop_y_43689; - int64_t gtid_43691 = iii_43436 + i_43690; - int64_t A_col_idx_43692 = kk_43663 + k_43688; - bool binop_x_43693 = slt64(gtid_43691, m_28478); - bool binop_y_43694 = slt64(A_col_idx_43692, i32_res_28493); - bool cond_43695 = binop_x_43693 && binop_y_43694; - float A_elem_43696; - - if (cond_43695) { - float A_elem_43698 = ((__global - float *) defunc_4_map_res_mem_44916)[gtid_43691 * - i32_res_28493 + - A_col_idx_43692]; - - A_elem_43696 = A_elem_43698; - } else { - A_elem_43696 = 0.0F; - } - - bool cond_43700 = slt64(k_43688, Tk_43420); - int64_t a_loc_ind_43701; - - if (cond_43700) { - int64_t binop_y_43702 = Tk_43420 * i_43690; - int64_t loc_fi_43703 = k_43688 + binop_y_43702; - - a_loc_ind_43701 = loc_fi_43703; - } else { - a_loc_ind_43701 = (int64_t) -1; - } - if (sle64((int64_t) 0, a_loc_ind_43701) && slt64(a_loc_ind_43701, - a_loc_szz_43426)) { - ((__local float *) mem_44956)[a_loc_ind_43701] = A_elem_43696; - } - barrier(CLK_LOCAL_MEM_FENCE); - } - } - for (int64_t i_43708 = 0; i_43708 < tk_div_ty_43422; i_43708++) { - int64_t binop_y_43731 = Ty_43416 * i_43708; - - for (int64_t i_43710 = 0; i_43710 < Rx_43419; i_43710++) { - int64_t binop_y_43733 = Tx_43418 * i_43710; - int64_t ltid_x_43712 = sext_i32_i64(ltid_pre_46122); - int64_t ltid_y_43713 = sext_i32_i64(ltid_pre_46123); - int32_t ltid_flat_43714 = local_tid_46118; - int64_t k_43732 = ltid_x_43712 + binop_y_43731; - int64_t j_43734 = ltid_y_43713 + binop_y_43733; - int64_t gtid_43735 = jjj_43437 + j_43734; - int64_t B_row_idx_43736 = kk_43663 + k_43732; - bool binop_x_43737 = slt64(gtid_43735, N_28477); - bool binop_y_43738 = slt64(B_row_idx_43736, i32_res_28493); - bool cond_43739 = binop_x_43737 && binop_y_43738; - float B_elem_43740; - - if (cond_43739) { - float B_elem_43742 = ((__global - float *) mem_44940)[B_row_idx_43736 * - N_28477 + - gtid_43735]; - - B_elem_43740 = B_elem_43742; - } else { - B_elem_43740 = 0.0F; - } - - bool cond_43744 = slt64(k_43732, Tk_43420); - int64_t b_loc_ind_43745; - - if (cond_43744) { - int64_t binop_y_43746 = TxRx_43423 * k_43732; - int64_t loc_fi_43747 = j_43734 + binop_y_43746; - - b_loc_ind_43745 = loc_fi_43747; - } else { - b_loc_ind_43745 = (int64_t) -1; - } - if (sle64((int64_t) 0, b_loc_ind_43745) && slt64(b_loc_ind_43745, - b_loc_szz_43428)) { - ((__local float *) mem_44958)[b_loc_ind_43745] = B_elem_43740; - } - barrier(CLK_LOCAL_MEM_FENCE); - } - } - - float mem_45111[Ry_43417]; - float mem_45115[Rx_43419]; - float mem_45125[Ry_43417 * Rx_43419]; - float loop_mem_45127[Ry_43417 * Rx_43419]; - float mem_param_45098[Ry_43417 * Rx_43419]; - - for (int32_t i_10 = 0; i_10 < Ry_43417 * Rx_43419; i_10++) - mem_param_45098[i_10] = loop_mem_45045[i_10]; - for (int64_t i_43752 = 0; i_43752 < Tk_43420; i_43752++) { - int64_t cmpop_x_43754 = kk_43663 + i_43752; - bool cond_43755 = slt64(cmpop_x_43754, i32_res_28493); - float mem_45498[Ry_43417 * Rx_43419]; - - if (cond_43755) { - int64_t binop_y_43793 = TxRx_43423 * i_43752; - int64_t bytes_45100 = (int64_t) 4 * Ry_43417; - int64_t bytes_45102 = (int64_t) 4 * Rx_43419; - int64_t ltid_y_43758 = sext_i32_i64(ltid_pre_46122); - int64_t ltid_x_43756 = sext_i32_i64(ltid_pre_46123); - int32_t ltid_flat_43757 = local_tid_46118; - float mem_45101[Ry_43417]; - float mem_45103[Rx_43419]; - int64_t binop_x_43784 = Ry_43417 * ltid_y_43758; - - for (int64_t i_43782 = 0; i_43782 < Ry_43417; i_43782++) { - int64_t binop_x_43785 = i_43782 + binop_x_43784; - int64_t binop_y_43786 = Tk_43420 * binop_x_43785; - int64_t a_loc_ind_43787 = i_43752 + binop_y_43786; - - for (int64_t i_46155 = 0; i_46155 < (int64_t) 1; i_46155++) { - mem_45101[i_43782 + i_46155] = ((__local - float *) mem_44956)[a_loc_ind_43787 + - i_46155]; - } - } - - int64_t binop_y_43795 = Rx_43419 * ltid_x_43756; - - for (int64_t i_43791 = 0; i_43791 < Rx_43419; i_43791++) { - int64_t binop_x_43794 = i_43791 + binop_y_43793; - int64_t b_loc_ind_43796 = binop_x_43794 + binop_y_43795; - - for (int64_t i_46157 = 0; i_46157 < (int64_t) 1; i_46157++) { - mem_45103[i_43791 + i_46157] = ((__local - float *) mem_44958)[b_loc_ind_43796 + - i_46157]; - } - } - for (int64_t i_46158 = 0; i_46158 < Ry_43417; i_46158++) { - mem_45111[i_46158] = mem_45101[i_46158]; - } - for (int64_t i_46159 = 0; i_46159 < Rx_43419; i_46159++) { - mem_45115[i_46159] = mem_45103[i_46159]; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t ltid_y_43803 = sext_i32_i64(ltid_pre_46122); - int64_t ltid_x_43801 = sext_i32_i64(ltid_pre_46123); - int32_t ltid_flat_43802 = local_tid_46118; - int64_t binop_y_43844 = Ry_43417 * ltid_y_43803; - int64_t binop_y_43848 = Rx_43419 * ltid_x_43801; - - for (int64_t i_43838 = 0; i_43838 < Ry_43417; i_43838++) { - int64_t binop_x_43843 = iii_43436 + i_43838; - int64_t cmpop_x_43845 = binop_x_43843 + binop_y_43844; - bool binop_x_43846 = slt64(cmpop_x_43845, m_28478); - - for (int64_t i_43841 = 0; i_43841 < Rx_43419; i_43841++) { - int64_t binop_x_43847 = jjj_43437 + i_43841; - int64_t cmpop_x_43849 = binop_x_43847 + binop_y_43848; - bool binop_y_43850 = slt64(cmpop_x_43849, N_28477); - bool cond_43851 = binop_x_43846 && binop_y_43850; + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_127353; - if (cond_43851) { - float a_43853 = mem_45111[i_43838]; - float b_43854 = mem_45115[i_43841]; - float c_43855 = mem_param_45098[i_43838 * Rx_43419 + - i_43841]; - float defunc_1_f_res_43858 = a_43853 * b_43854; - float defunc_1_op_res_43862 = c_43855 + - defunc_1_f_res_43858; - - mem_param_45098[i_43838 * Rx_43419 + i_43841] = - defunc_1_op_res_43862; + // read input for in-block scan + { + if (squot32(local_tid_127337, 32) == 0 && + ltid_in_bounds_127350) { + x_127348 = ((volatile __local + double *) red_arr_mem_127341)[sext_i32_i64(local_tid_127337)]; + if ((local_tid_127337 - squot32(local_tid_127337, + 32) * 32) == 0) { + x_127347 = x_127348; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127353 = 1; + while (slt32(skip_threads_127353, 32)) { + if (sle32(skip_threads_127353, local_tid_127337 - + squot32(local_tid_127337, 32) * 32) && + (squot32(local_tid_127337, 32) == 0 && + ltid_in_bounds_127350)) { + // read operands + { + x_127347 = ((volatile __local + double *) red_arr_mem_127341)[sext_i32_i64(local_tid_127337) - + sext_i32_i64(skip_threads_127353)]; + } + // perform operation + { + bool inactive_127354 = + slt64(srem64(sext_i32_i64(local_tid_127337 * + 32 + 32 - 1), + k2p2zq_75151), + sext_i32_i64(local_tid_127337 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_127337 - + skip_threads_127353) * + 32 + 32 - 1)); + + if (inactive_127354) { + x_127347 = x_127348; + } + if (!inactive_127354) { + double defunc_1_op_res_127349 = + x_127347 + x_127348; + + x_127347 = defunc_1_op_res_127349; + } + } + } + if (sle32(wave_sizze_127339, skip_threads_127353)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127353, local_tid_127337 - + squot32(local_tid_127337, 32) * 32) && + (squot32(local_tid_127337, 32) == 0 && + ltid_in_bounds_127350)) { + // write result + { + ((volatile __local + double *) red_arr_mem_127341)[sext_i32_i64(local_tid_127337)] = + x_127347; + x_127348 = x_127347; + } + } + if (sle32(wave_sizze_127339, skip_threads_127353)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127353 *= 2; + } } } - } - for (int64_t i_46162 = 0; i_46162 < Ry_43417; i_46162++) { - for (int64_t i_46163 = 0; i_46163 < Rx_43419; i_46163++) { - mem_45125[i_46162 * Rx_43419 + i_46163] = - mem_param_45098[i_46162 * Rx_43419 + i_46163]; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - for (int64_t i_46164 = 0; i_46164 < Ry_43417; i_46164++) { - for (int64_t i_46165 = 0; i_46165 < Rx_43419; i_46165++) { - mem_45498[i_46164 * Rx_43419 + i_46165] = - mem_45125[i_46164 * Rx_43419 + i_46165]; + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_127337, 32) == 0 || + !ltid_in_bounds_127350)) { + // read operands + { + x_109871 = x_109870; + x_109870 = ((__local + double *) red_arr_mem_127341)[sext_i32_i64(squot32(local_tid_127337, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_127355 = + slt64(srem64(sext_i32_i64(local_tid_127337), + k2p2zq_75151), + sext_i32_i64(local_tid_127337) - + sext_i32_i64(squot32(local_tid_127337, + 32) * 32 - 1)); + + if (inactive_127355) { + x_109870 = x_109871; + } + if (!inactive_127355) { + double defunc_1_op_res_109872 = x_109870 + + x_109871; + + x_109870 = defunc_1_op_res_109872; + } + } + // write final result + { + ((__local + double *) red_arr_mem_127341)[sext_i32_i64(local_tid_127337)] = + x_109870; + } + } } - } - } else { - for (int64_t i_46166 = 0; i_46166 < Ry_43417; i_46166++) { - for (int64_t i_46167 = 0; i_46167 < Rx_43419; i_46167++) { - mem_45498[i_46166 * Rx_43419 + i_46167] = - mem_param_45098[i_46166 * Rx_43419 + i_46167]; + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_127337, 32) == 0) { + ((__local + double *) red_arr_mem_127341)[sext_i32_i64(local_tid_127337)] = + x_109871; + } } + barrier(CLK_LOCAL_MEM_FENCE); } } - - float mem_param_tmp_46152[Ry_43417 * Rx_43419]; - - for (int32_t i_11 = 0; i_11 < Ry_43417 * Rx_43419; i_11++) - mem_param_tmp_46152[i_11] = mem_45498[i_11]; - for (int32_t i_12 = 0; i_12 < Ry_43417 * Rx_43419; i_12++) - mem_param_45098[i_12] = mem_param_tmp_46152[i_12]; - } - for (int32_t i_13 = 0; i_13 < Ry_43417 * Rx_43419; i_13++) - loop_mem_45127[i_13] = mem_param_45098[i_13]; - - int64_t reg_tile_i_46168 = squot64(sext_i32_i64(local_tid_46118), Tx_43418); - int64_t reg_tile_i_46169 = sext_i32_i64(local_tid_46118) - - squot64(sext_i32_i64(local_tid_46118), Tx_43418) * Tx_43418; - int64_t tile_dim_start_46170 = Ry_43417 * (Ty_43416 * gid_y_43434 + - reg_tile_i_46168); - int64_t tile_dim_start_46171 = Rx_43419 * (Tx_43418 * gid_x_43433 + - reg_tile_i_46169); - - for (int64_t nest_i_46172 = 0; nest_i_46172 < Ry_43417; nest_i_46172++) { - for (int64_t nest_i_46173 = 0; nest_i_46173 < Rx_43419; - nest_i_46173++) { - if (slt64(tile_dim_start_46170 + nest_i_46172, m_28478) && - slt64(tile_dim_start_46171 + nest_i_46173, N_28477)) { - ((__global float *) mem_45130)[(tile_dim_start_46170 + - nest_i_46172) * N_28477 + - (tile_dim_start_46171 + - nest_i_46173)] = - loop_mem_45127[nest_i_46172 * Rx_43419 + nest_i_46173]; + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_127345) * + squot64(segred_group_sizze_109865, + segment_sizze_nonzzero_127334) + + sext_i32_i64(local_tid_127337), m_75136) && + slt64(sext_i32_i64(local_tid_127337), + squot64(segred_group_sizze_109865, + segment_sizze_nonzzero_127334))) { + ((__global + double *) mem_122668)[sext_i32_i64(virt_group_id_127345) * + squot64(segred_group_sizze_109865, + segment_sizze_nonzzero_127334) + + sext_i32_i64(local_tid_127337)] = + ((__local + double *) red_arr_mem_127341)[(sext_i32_i64(local_tid_127337) + + (int64_t) 1) * + segment_sizze_nonzzero_127334 - + (int64_t) 1]; } } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - error_9: + error_1: return; - #undef Ty_43416 - #undef Ry_43417 - #undef Tx_43418 - #undef Rx_43419 - #undef Tk_43420 - #undef tk_div_tx_43421 - #undef tk_div_ty_43422 - #undef TxRx_43423 - #undef TyRy_43424 - #undef a_loc_szz_43426 - #undef b_loc_szz_43428 -} -__kernel void mainMagnitudezisegmap_intragroup_43869(__global - int *global_failure, - int failure_is_an_option, - __global - int64_t *global_failure_args, - __local volatile - int64_t *mem_45203_backing_aligned_0, - __local volatile - int64_t *mem_45196_backing_aligned_1, - int64_t N_28477, - int64_t m_28478, - int32_t n_28481, - float hfrac_28483, - int64_t i32_res_28487, - int32_t k2p2_28489, - int64_t num_whole_tiles_43891, - int64_t residual_input_43992, - unsigned char cond_43993, - __global - unsigned char *mem_45182, - __global - unsigned char *mem_45185, - __global - unsigned char *mem_45216, - __global - unsigned char *mem_45218, - __global - unsigned char *mem_45220) + #undef segred_group_sizze_109865 +} +__kernel void mainzisegred_small_111144(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_128217_backing_aligned_0, + int64_t m_75136, int64_t k2p2zq_75151, + int64_t defunc_2_reduce_res_75260, + int64_t index_primexp_76437, + int64_t num_groups_111352, + int64_t binop_x_120251, + int64_t segment_sizze_nonzzero_128210, + __global unsigned char *mem_120246, + __global + unsigned char *mem_param_123778, + __global unsigned char *mem_123907, + __global unsigned char *mem_123910, + __global unsigned char *mem_123944, + __global unsigned char *mem_123948) { - #define segmap_group_sizze_36893 (mainMagnitudezisegmap_group_sizze_36858) + #define segred_group_sizze_111351 (mainzisegred_group_sizze_111138) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict mem_45203_backing_5 = (__local volatile - char *) mem_45203_backing_aligned_0; - __local volatile char *restrict mem_45196_backing_0 = (__local volatile - char *) mem_45196_backing_aligned_1; - volatile __local bool local_failure; - - if (failure_is_an_option) { - int failed = *global_failure >= 0; - - if (failed) - return; - } - local_failure = false; - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t global_tid_46341; - int32_t local_tid_46342; - int64_t group_sizze_46345; - int32_t wave_sizze_46344; - int32_t group_tid_46343; - - global_tid_46341 = get_global_id(0); - local_tid_46342 = get_local_id(0); - group_sizze_46345 = get_local_size(0); - wave_sizze_46344 = LOCKSTEP_WIDTH; - group_tid_46343 = get_group_id(0); - - int32_t gid_flat_43869; - - gid_flat_43869 = group_tid_46343; - - int32_t ltid_pre_46346; - - ltid_pre_46346 = local_tid_46342; - - int64_t gid_43868; - - gid_43868 = sext_i32_i64(group_tid_46343); - - int64_t binop_x_43876; - - binop_x_43876 = segmap_group_sizze_36893 * gid_43868; + __local volatile char *restrict red_arr_mem_128217_backing_0 = + (__local volatile + char *) red_arr_mem_128217_backing_aligned_0; - int32_t mem_45189[1]; - int64_t ltid_43870 = sext_i32_i64(ltid_pre_46346); - int32_t ltid_flat_43871 = local_tid_46342; - int64_t gtid_43877 = ltid_43870 + binop_x_43876; - bool cond_43878 = slt64(gtid_43877, m_28478); - int32_t pre_43879; + if (*global_failure >= 0) + return; - if (cond_43878) { - int32_t defunc_0_f_res_43881; - int32_t redout_44325 = 0; + int32_t global_tid_128212; + int32_t local_tid_128213; + int64_t group_sizze_128216; + int32_t wave_sizze_128215; + int32_t group_tid_128214; + + global_tid_128212 = get_global_id(0); + local_tid_128213 = get_local_id(0); + group_sizze_128216 = get_local_size(0); + wave_sizze_128215 = LOCKSTEP_WIDTH; + group_tid_128214 = get_group_id(0); + + int32_t phys_tid_111144; + + phys_tid_111144 = global_tid_128212; + + __local char *red_arr_mem_128217; + + red_arr_mem_128217 = (__local char *) red_arr_mem_128217_backing_0; + + int32_t phys_group_id_128219; + + phys_group_id_128219 = get_group_id(0); + for (int32_t i_128220 = 0; i_128220 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136 * k2p2zq_75151, + squot64(segred_group_sizze_111351, + segment_sizze_nonzzero_128210))) - + phys_group_id_128219, sext_i64_i32(num_groups_111352)); + i_128220++) { + int32_t virt_group_id_128221 = phys_group_id_128219 + i_128220 * + sext_i64_i32(num_groups_111352); + int64_t gtid_111133 = squot64(squot64(sext_i32_i64(local_tid_128213), + segment_sizze_nonzzero_128210) + + sext_i32_i64(virt_group_id_128221) * + squot64(segred_group_sizze_111351, + segment_sizze_nonzzero_128210), + k2p2zq_75151); + int64_t gtid_111134 = squot64(sext_i32_i64(local_tid_128213), + segment_sizze_nonzzero_128210) + + sext_i32_i64(virt_group_id_128221) * + squot64(segred_group_sizze_111351, + segment_sizze_nonzzero_128210) - + squot64(squot64(sext_i32_i64(local_tid_128213), + segment_sizze_nonzzero_128210) + + sext_i32_i64(virt_group_id_128221) * + squot64(segred_group_sizze_111351, + segment_sizze_nonzzero_128210), k2p2zq_75151) * + k2p2zq_75151; + int64_t gtid_111143 = srem64(sext_i32_i64(local_tid_128213), + k2p2zq_75151); - for (int32_t i_44370 = 0; i_44370 < n_28481; i_44370++) { - int64_t i_44326 = sext_i32_i64(i_44370); - float x_43885 = ((__global float *) mem_45182)[i_44326 * m_28478 + - gtid_43877]; - bool isnan_res_43886; - - isnan_res_43886 = futrts_isnan32(x_43885); - - bool cond_43887 = !isnan_res_43886; - int32_t defunc_0_f_res_43888 = btoi_bool_i32(cond_43887); - int32_t defunc_1_op_res_43884 = add32(defunc_0_f_res_43888, - redout_44325); - int32_t redout_tmp_46347 = defunc_1_op_res_43884; - - redout_44325 = redout_tmp_46347; + // apply map function if in bounds + { + if (slt64((int64_t) 0, k2p2zq_75151) && ((slt64(gtid_111133, + m_75136) && + slt64(gtid_111134, + k2p2zq_75151)) && + slt64(sext_i32_i64(local_tid_128213), + k2p2zq_75151 * + squot64(segred_group_sizze_111351, + segment_sizze_nonzzero_128210)))) { + double fr_111362 = ((__global + double *) mem_123910)[gtid_111133]; + double x_111363 = ((__global double *) mem_123907)[gtid_111133 * + k2p2zq_75151 + + gtid_111134]; + double x_111365 = ((__global double *) mem_123907)[gtid_111133 * + k2p2zq_75151 + + gtid_111143]; + double x_111366 = ((__global + double *) mem_param_123778)[gtid_111133 * + binop_x_120251 + + gtid_111134 * + k2p2zq_75151 + + gtid_111143]; + double x_111367 = ((__global double *) mem_120246)[gtid_111143 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_111133 * + defunc_2_reduce_res_75260 + + index_primexp_76437]; + double x_111368 = x_111363 * x_111365; + double y_111369 = x_111368 / fr_111362; + double defunc_1_f_res_111370 = x_111366 - y_111369; + double defunc_1_f_res_111371 = x_111367 * defunc_1_f_res_111370; + + // save map-out results + { + ((__global double *) mem_123948)[gtid_111133 * + (k2p2zq_75151 * + k2p2zq_75151) + + gtid_111134 * + k2p2zq_75151 + + gtid_111143] = + defunc_1_f_res_111370; + } + // save results to be reduced + { + ((__local + double *) red_arr_mem_128217)[sext_i32_i64(local_tid_128213)] = + defunc_1_f_res_111371; + } + } else { + ((__local + double *) red_arr_mem_128217)[sext_i32_i64(local_tid_128213)] = + 0.0; + } } - defunc_0_f_res_43881 = redout_44325; - pre_43879 = defunc_0_f_res_43881; - } else { - pre_43879 = 0; - } - mem_45189[(int64_t) 0] = pre_43879; - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_45192[1]; - int64_t ltid_43892 = sext_i32_i64(ltid_pre_46346); - int32_t ltid_flat_43893 = local_tid_46342; - - mem_45192[(int64_t) 0] = 0.0F; - barrier(CLK_LOCAL_MEM_FENCE); - - __local char *mem_45196; - - mem_45196 = (__local char *) mem_45196_backing_0; - - float accs_mem_45200[1]; - float mem_param_45193[1]; - - for (int32_t i_1 = 0; i_1 < 1; i_1++) - mem_param_45193[i_1] = mem_45192[i_1]; - for (int64_t tile_id_43899 = 0; tile_id_43899 < num_whole_tiles_43891; - tile_id_43899++) { - int64_t binop_x_43948 = segmap_group_sizze_36893 * tile_id_43899; - int64_t ltid_43900 = sext_i32_i64(ltid_pre_46346); - int32_t ltid_flat_43901 = local_tid_46342; - int64_t j_43949 = ltid_43900 + binop_x_43948; - bool cond_43953 = slt64(j_43949, i32_res_28487); - int32_t pre_43954; - - if (cond_43953) { - int32_t index_primexp_44283 = sext_i64_i32(j_43949); - - pre_43954 = index_primexp_44283; - } else { - pre_43954 = 0; - } - ((__local int32_t *) mem_45196)[ltid_43900] = pre_43954; - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_45199[1]; - int64_t ltid_43919 = sext_i32_i64(ltid_pre_46346); - int32_t ltid_flat_43920 = local_tid_46342; - int64_t gtid_43959 = binop_x_43876 + ltid_43919; - float acc_43961 = mem_param_45193[(int64_t) 0]; - bool cond_43962 = slt64(gtid_43959, m_28478); - float acc_43963; - - if (cond_43962) { - int32_t defunc_0_f_res_43960 = mem_45189[(int64_t) 0]; - float x_43964; - float redout_44327 = acc_43961; - - for (int64_t i_44328 = 0; i_44328 < segmap_group_sizze_36893; - i_44328++) { - int32_t x_43968 = ((__local int32_t *) mem_45196)[i_44328]; - bool cond_43969 = slt32(x_43968, defunc_0_f_res_43960); - float defunc_0_f_res_43970; - - if (cond_43969) { - int64_t i_43971 = sext_i32_i64(x_43968); - bool x_43972 = sle64((int64_t) 0, i_43971); - bool y_43973 = slt64(i_43971, N_28477); - bool bounds_check_43974 = x_43972 && y_43973; - bool index_certs_43975; - - if (!bounds_check_43974) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 94) == -1) { - global_failure_args[0] = i_43971; - global_failure_args[1] = N_28477; - ; + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, k2p2zq_75151)) { + // perform segmented scan to imitate reduction + { + double x_111356; + double x_111357; + double x_128222; + double x_128223; + bool ltid_in_bounds_128225; + + ltid_in_bounds_128225 = slt64(sext_i32_i64(local_tid_128213), + k2p2zq_75151 * + squot64(segred_group_sizze_111351, + segment_sizze_nonzzero_128210)); + + int32_t skip_threads_128226; + + // read input for in-block scan + { + if (ltid_in_bounds_128225) { + x_111357 = ((volatile __local + double *) red_arr_mem_128217)[sext_i32_i64(local_tid_128213)]; + if ((local_tid_128213 - squot32(local_tid_128213, 32) * + 32) == 0) { + x_111356 = x_111357; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128226 = 1; + while (slt32(skip_threads_128226, 32)) { + if (sle32(skip_threads_128226, local_tid_128213 - + squot32(local_tid_128213, 32) * 32) && + ltid_in_bounds_128225) { + // read operands + { + x_111356 = ((volatile __local + double *) red_arr_mem_128217)[sext_i32_i64(local_tid_128213) - + sext_i32_i64(skip_threads_128226)]; } - local_failure = true; - goto error_3; + // perform operation + { + bool inactive_128227 = + slt64(srem64(sext_i32_i64(local_tid_128213), + k2p2zq_75151), + sext_i32_i64(local_tid_128213) - + sext_i32_i64(local_tid_128213 - + skip_threads_128226)); + + if (inactive_128227) { + x_111356 = x_111357; + } + if (!inactive_128227) { + double defunc_1_op_res_111358 = x_111356 + + x_111357; + + x_111356 = defunc_1_op_res_111358; + } + } + } + if (sle32(wave_sizze_128215, skip_threads_128226)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128226, local_tid_128213 - + squot32(local_tid_128213, 32) * 32) && + ltid_in_bounds_128225) { + // write result + { + ((volatile __local + double *) red_arr_mem_128217)[sext_i32_i64(local_tid_128213)] = + x_111356; + x_111357 = x_111356; + } + } + if (sle32(wave_sizze_128215, skip_threads_128226)) { + barrier(CLK_LOCAL_MEM_FENCE); } + skip_threads_128226 *= 2; } - - float defunc_0_f_res_t_res_43976 = ((__global - float *) mem_45185)[i_43971 * - m_28478 + - gtid_43959]; - - defunc_0_f_res_43970 = defunc_0_f_res_t_res_43976; - } else { - defunc_0_f_res_43970 = 0.0F; } - - float defunc_0_f_res_43977 = defunc_0_f_res_43970 * - defunc_0_f_res_43970; - float defunc_1_op_res_43967 = defunc_0_f_res_43977 + - redout_44327; - float redout_tmp_46350 = defunc_1_op_res_43967; - - redout_44327 = redout_tmp_46350; - } - x_43964 = redout_44327; - acc_43963 = x_43964; - } else { - acc_43963 = acc_43961; - } - mem_45199[(int64_t) 0] = acc_43963; - - error_3: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_param_tmp_46348[1]; - - for (int32_t i_2 = 0; i_2 < 1; i_2++) - mem_param_tmp_46348[i_2] = mem_45199[i_2]; - for (int32_t i_3 = 0; i_3 < 1; i_3++) - mem_param_45193[i_3] = mem_param_tmp_46348[i_3]; - } - for (int32_t i_4 = 0; i_4 < 1; i_4++) - accs_mem_45200[i_4] = mem_param_45193[i_4]; - - __local char *mem_45203; - - mem_45203 = (__local char *) mem_45203_backing_5; - - float mem_45206[1]; - float mem_45511[1]; - - if (cond_43993) { - mem_45511[(int64_t) 0] = accs_mem_45200[(int64_t) 0]; - } else { - int64_t binop_x_44003 = segmap_group_sizze_36893 * - num_whole_tiles_43891; - int64_t ltid_43994 = sext_i32_i64(ltid_pre_46346); - int32_t ltid_flat_43995 = local_tid_46342; - int64_t j_44004 = ltid_43994 + binop_x_44003; - bool cond_44008 = slt64(j_44004, i32_res_28487); - int32_t pre_44009; - - if (cond_44008) { - int32_t index_primexp_44284 = sext_i64_i32(j_44004); - - pre_44009 = index_primexp_44284; - } else { - pre_44009 = 0; - } - ((__local int32_t *) mem_45203)[ltid_43994] = pre_44009; - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t ltid_44014 = sext_i32_i64(ltid_pre_46346); - int32_t ltid_flat_44015 = local_tid_46342; - int64_t gtid_44022 = binop_x_43876 + ltid_44014; - float acc_44024 = accs_mem_45200[(int64_t) 0]; - bool cond_44025 = slt64(gtid_44022, m_28478); - float acc_44026; - - if (cond_44025) { - int32_t defunc_0_f_res_44023 = mem_45189[(int64_t) 0]; - float x_44027; - float redout_44329 = acc_44024; - - for (int64_t i_44330 = 0; i_44330 < residual_input_43992; - i_44330++) { - int32_t x_44031 = ((__local int32_t *) mem_45203)[i_44330]; - bool cond_44032 = slt32(x_44031, defunc_0_f_res_44023); - float defunc_0_f_res_44033; - - if (cond_44032) { - int64_t i_44034 = sext_i32_i64(x_44031); - bool x_44035 = sle64((int64_t) 0, i_44034); - bool y_44036 = slt64(i_44034, N_28477); - bool bounds_check_44037 = x_44035 && y_44036; - bool index_certs_44038; + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128213 - squot32(local_tid_128213, 32) * + 32) == 31 && ltid_in_bounds_128225) { + ((volatile __local + double *) red_arr_mem_128217)[sext_i32_i64(squot32(local_tid_128213, + 32))] = + x_111356; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128228; - if (!bounds_check_44037) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 95) == -1) { - global_failure_args[0] = i_44034; - global_failure_args[1] = N_28477; - ; + // read input for in-block scan + { + if (squot32(local_tid_128213, 32) == 0 && + ltid_in_bounds_128225) { + x_128223 = ((volatile __local + double *) red_arr_mem_128217)[sext_i32_i64(local_tid_128213)]; + if ((local_tid_128213 - squot32(local_tid_128213, + 32) * 32) == 0) { + x_128222 = x_128223; } - local_failure = true; - goto error_5; } } - - float defunc_0_f_res_t_res_44039 = ((__global - float *) mem_45185)[i_44034 * - m_28478 + - gtid_44022]; - - defunc_0_f_res_44033 = defunc_0_f_res_t_res_44039; - } else { - defunc_0_f_res_44033 = 0.0F; - } - - float defunc_0_f_res_44040 = defunc_0_f_res_44033 * - defunc_0_f_res_44033; - float defunc_1_op_res_44030 = defunc_0_f_res_44040 + - redout_44329; - float redout_tmp_46351 = defunc_1_op_res_44030; - - redout_44329 = redout_tmp_46351; - } - x_44027 = redout_44329; - acc_44026 = x_44027; - } else { - acc_44026 = acc_44024; - } - mem_45206[(int64_t) 0] = acc_44026; - - error_5: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); - mem_45511[(int64_t) 0] = mem_45206[(int64_t) 0]; - } - - int32_t mem_45210[1]; - int32_t mem_45212[1]; - float mem_45214[1]; - int64_t ltid_44043 = sext_i32_i64(ltid_pre_46346); - int32_t ltid_flat_44044 = local_tid_46342; - int64_t gtid_44054 = binop_x_43876 + ltid_44043; - bool cond_44056 = slt64(gtid_44054, m_28478); - int32_t postlude_44057; - int32_t postlude_44058; - float postlude_44059; - - if (cond_44056) { - float defunc_0_f_res_44055 = mem_45511[(int64_t) 0]; - int32_t defunc_0_f_res_44060 = mem_45189[(int64_t) 0]; - int32_t r32_arg_44061 = sub32(defunc_0_f_res_44060, k2p2_28489); - float i32_res_44062 = sitofp_i32_f32(r32_arg_44061); - float sqrt_arg_44063 = defunc_0_f_res_44055 / i32_res_44062; - float sqrt_res_44064; - - sqrt_res_44064 = futrts_sqrt32(sqrt_arg_44063); - - float i32_res_44065 = sitofp_i32_f32(defunc_0_f_res_44060); - float t32_arg_44066 = hfrac_28483 * i32_res_44065; - int32_t f32_res_44067 = fptosi_f32_i32(t32_arg_44066); - - postlude_44057 = f32_res_44067; - postlude_44058 = defunc_0_f_res_44060; - postlude_44059 = sqrt_res_44064; - } else { - postlude_44057 = 0; - postlude_44058 = 0; - postlude_44059 = 0.0F; - } - mem_45210[(int64_t) 0] = postlude_44057; - mem_45212[(int64_t) 0] = postlude_44058; - mem_45214[(int64_t) 0] = postlude_44059; - barrier(CLK_LOCAL_MEM_FENCE); - if (slt64(sext_i32_i64(local_tid_46342) + segmap_group_sizze_36893 * - sext_i32_i64(group_tid_46343), m_28478)) { - ((__global int32_t *) mem_45216)[sext_i32_i64(local_tid_46342) + - segmap_group_sizze_36893 * - sext_i32_i64(group_tid_46343)] = - mem_45210[(int64_t) 0]; - } - if (slt64(sext_i32_i64(local_tid_46342) + segmap_group_sizze_36893 * - sext_i32_i64(group_tid_46343), m_28478)) { - ((__global int32_t *) mem_45218)[sext_i32_i64(local_tid_46342) + - segmap_group_sizze_36893 * - sext_i32_i64(group_tid_46343)] = - mem_45212[(int64_t) 0]; - } - if (slt64(sext_i32_i64(local_tid_46342) + segmap_group_sizze_36893 * - sext_i32_i64(group_tid_46343), m_28478)) { - ((__global float *) mem_45220)[sext_i32_i64(local_tid_46342) + - segmap_group_sizze_36893 * - sext_i32_i64(group_tid_46343)] = - mem_45214[(int64_t) 0]; - } - - error_7: - return; - #undef segmap_group_sizze_36893 -} -__kernel void mainMagnitudezisegmap_intragroup_44075(__global - int *global_failure, - int failure_is_an_option, - __global - int64_t *global_failure_args, - __local volatile - int64_t *mem_45269_backing_aligned_0, - __local volatile - int64_t *mem_45262_backing_aligned_1, - int64_t N_28477, - int64_t m_28478, - int64_t i32_res_28880, - int64_t num_whole_tiles_44095, - int64_t residual_input_44206, - unsigned char cond_44207, - __global - unsigned char *defunc_4_map_res_mem_45178, - __global - unsigned char *defunc_3_map_res_mem_45244, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global - unsigned char *mem_45275) -{ - #define segmap_group_sizze_37150 (mainMagnitudezisegmap_group_sizze_37127) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - __local volatile char *restrict mem_45269_backing_5 = (__local volatile - char *) mem_45269_backing_aligned_0; - __local volatile char *restrict mem_45262_backing_0 = (__local volatile - char *) mem_45262_backing_aligned_1; - volatile __local bool local_failure; - - if (failure_is_an_option) { - int failed = *global_failure >= 0; - - if (failed) - return; - } - local_failure = false; - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t global_tid_46525; - int32_t local_tid_46526; - int64_t group_sizze_46529; - int32_t wave_sizze_46528; - int32_t group_tid_46527; - - global_tid_46525 = get_global_id(0); - local_tid_46526 = get_local_id(0); - group_sizze_46529 = get_local_size(0); - wave_sizze_46528 = LOCKSTEP_WIDTH; - group_tid_46527 = get_group_id(0); - - int32_t gid_flat_44075; - - gid_flat_44075 = group_tid_46527; - - int32_t ltid_pre_46530; - - ltid_pre_46530 = local_tid_46526; - - int64_t gid_44074; - - gid_44074 = sext_i32_i64(group_tid_46527); - - int64_t binop_x_44084; - - binop_x_44084 = segmap_group_sizze_37150 * gid_44074; - - int32_t mem_45253[1]; - int32_t mem_45255[1]; - int64_t ltid_44076 = sext_i32_i64(ltid_pre_46530); - int32_t ltid_flat_44077 = local_tid_46526; - int64_t gtid_44085 = ltid_44076 + binop_x_44084; - bool cond_44086 = slt64(gtid_44085, m_28478); - int32_t pre_44087; - int32_t pre_44088; - - if (cond_44086) { - int32_t x_44089 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_44085]; - int32_t x_44090 = ((__global - int32_t *) defunc_3_map_res_mem_45244)[gtid_44085]; - - pre_44087 = x_44089; - pre_44088 = x_44090; - } else { - pre_44087 = 0; - pre_44088 = 0; - } - mem_45253[(int64_t) 0] = pre_44087; - mem_45255[(int64_t) 0] = pre_44088; - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_45258[1]; - int64_t ltid_44096 = sext_i32_i64(ltid_pre_46530); - int32_t ltid_flat_44097 = local_tid_46526; - - mem_45258[(int64_t) 0] = 0.0F; - barrier(CLK_LOCAL_MEM_FENCE); - - __local char *mem_45262; - - mem_45262 = (__local char *) mem_45262_backing_0; - - float accs_mem_45266[1]; - float mem_param_45259[1]; - - for (int32_t i_1 = 0; i_1 < 1; i_1++) - mem_param_45259[i_1] = mem_45258[i_1]; - for (int64_t tile_id_44103 = 0; tile_id_44103 < num_whole_tiles_44095; - tile_id_44103++) { - int64_t binop_x_44156 = segmap_group_sizze_37150 * tile_id_44103; - int64_t ltid_44104 = sext_i32_i64(ltid_pre_46530); - int32_t ltid_flat_44105 = local_tid_46526; - int64_t j_44157 = ltid_44104 + binop_x_44156; - bool cond_44162 = slt64(j_44157, i32_res_28880); - int32_t pre_44163; - - if (cond_44162) { - int32_t index_primexp_44285 = sext_i64_i32(j_44157); - int32_t tile_elem_44164 = index_primexp_44285; - - pre_44163 = tile_elem_44164; - } else { - pre_44163 = 0; - } - ((__local int32_t *) mem_45262)[ltid_44104] = pre_44163; - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_45265[1]; - int64_t ltid_44124 = sext_i32_i64(ltid_pre_46530); - int32_t ltid_flat_44125 = local_tid_46526; - int64_t gtid_44168 = binop_x_44084 + ltid_44124; - float acc_44171 = mem_param_45259[(int64_t) 0]; - bool cond_44172 = slt64(gtid_44168, m_28478); - float acc_44173; - - if (cond_44172) { - int32_t x_44169 = mem_45253[(int64_t) 0]; - int32_t x_44170 = mem_45255[(int64_t) 0]; - float x_44174; - float redout_44331 = acc_44171; - - for (int64_t i_44332 = 0; i_44332 < segmap_group_sizze_37150; - i_44332++) { - int32_t x_44178 = ((__local int32_t *) mem_45262)[i_44332]; - bool cond_44179 = slt32(x_44178, x_44170); - float defunc_0_f_res_44180; - - if (cond_44179) { - int32_t x_44181 = add32(x_44169, x_44178); - int32_t x_44182 = sub32(x_44181, x_44170); - int32_t i_44183 = add32(1, x_44182); - int64_t i_44184 = sext_i32_i64(i_44183); - bool x_44185 = sle64((int64_t) 0, i_44184); - bool y_44186 = slt64(i_44184, N_28477); - bool bounds_check_44187 = x_44185 && y_44186; - bool index_certs_44188; - - if (!bounds_check_44187) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 99) == -1) { - global_failure_args[0] = i_44184; - global_failure_args[1] = N_28477; - ; + // in-block scan (hopefully no barriers needed) + { + skip_threads_128228 = 1; + while (slt32(skip_threads_128228, 32)) { + if (sle32(skip_threads_128228, local_tid_128213 - + squot32(local_tid_128213, 32) * 32) && + (squot32(local_tid_128213, 32) == 0 && + ltid_in_bounds_128225)) { + // read operands + { + x_128222 = ((volatile __local + double *) red_arr_mem_128217)[sext_i32_i64(local_tid_128213) - + sext_i32_i64(skip_threads_128228)]; + } + // perform operation + { + bool inactive_128229 = + slt64(srem64(sext_i32_i64(local_tid_128213 * + 32 + 32 - 1), + k2p2zq_75151), + sext_i32_i64(local_tid_128213 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_128213 - + skip_threads_128228) * + 32 + 32 - 1)); + + if (inactive_128229) { + x_128222 = x_128223; + } + if (!inactive_128229) { + double defunc_1_op_res_128224 = + x_128222 + x_128223; + + x_128222 = defunc_1_op_res_128224; + } + } } - local_failure = true; - goto error_3; + if (sle32(wave_sizze_128215, skip_threads_128228)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128228, local_tid_128213 - + squot32(local_tid_128213, 32) * 32) && + (squot32(local_tid_128213, 32) == 0 && + ltid_in_bounds_128225)) { + // write result + { + ((volatile __local + double *) red_arr_mem_128217)[sext_i32_i64(local_tid_128213)] = + x_128222; + x_128223 = x_128222; + } + } + if (sle32(wave_sizze_128215, skip_threads_128228)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128228 *= 2; } } - - float defunc_0_f_res_t_res_44189 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_44168 * - N_28477 + - i_44184]; - - defunc_0_f_res_44180 = defunc_0_f_res_t_res_44189; - } else { - defunc_0_f_res_44180 = 0.0F; } - - float defunc_1_op_res_44177 = defunc_0_f_res_44180 + - redout_44331; - float redout_tmp_46533 = defunc_1_op_res_44177; - - redout_44331 = redout_tmp_46533; - } - x_44174 = redout_44331; - acc_44173 = x_44174; - } else { - acc_44173 = acc_44171; - } - mem_45265[(int64_t) 0] = acc_44173; - - error_3: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); - - float mem_param_tmp_46531[1]; - - for (int32_t i_2 = 0; i_2 < 1; i_2++) - mem_param_tmp_46531[i_2] = mem_45265[i_2]; - for (int32_t i_3 = 0; i_3 < 1; i_3++) - mem_param_45259[i_3] = mem_param_tmp_46531[i_3]; - } - for (int32_t i_4 = 0; i_4 < 1; i_4++) - accs_mem_45266[i_4] = mem_param_45259[i_4]; - - __local char *mem_45269; - - mem_45269 = (__local char *) mem_45269_backing_5; - - float mem_45272[1]; - float mem_45520[1]; - - if (cond_44207) { - mem_45520[(int64_t) 0] = accs_mem_45266[(int64_t) 0]; - } else { - int64_t binop_x_44217 = segmap_group_sizze_37150 * - num_whole_tiles_44095; - int64_t ltid_44208 = sext_i32_i64(ltid_pre_46530); - int32_t ltid_flat_44209 = local_tid_46526; - int64_t j_44218 = ltid_44208 + binop_x_44217; - bool cond_44223 = slt64(j_44218, i32_res_28880); - int32_t pre_44224; - - if (cond_44223) { - int32_t index_primexp_44286 = sext_i64_i32(j_44218); - int32_t tile_elem_44225 = index_primexp_44286; - - pre_44224 = tile_elem_44225; - } else { - pre_44224 = 0; - } - ((__local int32_t *) mem_45269)[ltid_44208] = pre_44224; - barrier(CLK_LOCAL_MEM_FENCE); - - int64_t ltid_44229 = sext_i32_i64(ltid_pre_46530); - int32_t ltid_flat_44230 = local_tid_46526; - int64_t gtid_44237 = binop_x_44084 + ltid_44229; - float acc_44240 = accs_mem_45266[(int64_t) 0]; - bool cond_44241 = slt64(gtid_44237, m_28478); - float acc_44242; - - if (cond_44241) { - int32_t x_44238 = mem_45253[(int64_t) 0]; - int32_t x_44239 = mem_45255[(int64_t) 0]; - float x_44243; - float redout_44333 = acc_44240; - - for (int64_t i_44334 = 0; i_44334 < residual_input_44206; - i_44334++) { - int32_t x_44247 = ((__local int32_t *) mem_45269)[i_44334]; - bool cond_44248 = slt32(x_44247, x_44239); - float defunc_0_f_res_44249; - - if (cond_44248) { - int32_t x_44250 = add32(x_44238, x_44247); - int32_t x_44251 = sub32(x_44250, x_44239); - int32_t i_44252 = add32(1, x_44251); - int64_t i_44253 = sext_i32_i64(i_44252); - bool x_44254 = sle64((int64_t) 0, i_44253); - bool y_44255 = slt64(i_44253, N_28477); - bool bounds_check_44256 = x_44254 && y_44255; - bool index_certs_44257; - - if (!bounds_check_44256) { + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128213, 32) == 0 || + !ltid_in_bounds_128225)) { + // read operands { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 100) == -1) { - global_failure_args[0] = i_44253; - global_failure_args[1] = N_28477; - ; + x_111357 = x_111356; + x_111356 = ((__local + double *) red_arr_mem_128217)[sext_i32_i64(squot32(local_tid_128213, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128230 = + slt64(srem64(sext_i32_i64(local_tid_128213), + k2p2zq_75151), + sext_i32_i64(local_tid_128213) - + sext_i32_i64(squot32(local_tid_128213, + 32) * 32 - 1)); + + if (inactive_128230) { + x_111356 = x_111357; + } + if (!inactive_128230) { + double defunc_1_op_res_111358 = x_111356 + + x_111357; + + x_111356 = defunc_1_op_res_111358; } - local_failure = true; - goto error_5; + } + // write final result + { + ((__local + double *) red_arr_mem_128217)[sext_i32_i64(local_tid_128213)] = + x_111356; } } - - float defunc_0_f_res_t_res_44258 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_44237 * - N_28477 + - i_44253]; - - defunc_0_f_res_44249 = defunc_0_f_res_t_res_44258; - } else { - defunc_0_f_res_44249 = 0.0F; } - - float defunc_1_op_res_44246 = defunc_0_f_res_44249 + - redout_44333; - float redout_tmp_46534 = defunc_1_op_res_44246; - - redout_44333 = redout_tmp_46534; + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128213, 32) == 0) { + ((__local + double *) red_arr_mem_128217)[sext_i32_i64(local_tid_128213)] = + x_111357; + } + } + barrier(CLK_LOCAL_MEM_FENCE); } - x_44243 = redout_44333; - acc_44242 = x_44243; - } else { - acc_44242 = acc_44240; } - mem_45272[(int64_t) 0] = acc_44242; - - error_5: barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_128221) * + squot64(segred_group_sizze_111351, + segment_sizze_nonzzero_128210) + + sext_i32_i64(local_tid_128213), m_75136 * k2p2zq_75151) && + slt64(sext_i32_i64(local_tid_128213), + squot64(segred_group_sizze_111351, + segment_sizze_nonzzero_128210))) { + ((__global + double *) mem_123944)[squot64(sext_i32_i64(virt_group_id_128221) * + squot64(segred_group_sizze_111351, + segment_sizze_nonzzero_128210) + + sext_i32_i64(local_tid_128213), + k2p2zq_75151) * k2p2zq_75151 + + (sext_i32_i64(virt_group_id_128221) * + squot64(segred_group_sizze_111351, + segment_sizze_nonzzero_128210) + + sext_i32_i64(local_tid_128213) - + squot64(sext_i32_i64(virt_group_id_128221) * + squot64(segred_group_sizze_111351, + segment_sizze_nonzzero_128210) + + sext_i32_i64(local_tid_128213), + k2p2zq_75151) * + k2p2zq_75151)] = ((__local + double *) red_arr_mem_128217)[(sext_i32_i64(local_tid_128213) + + (int64_t) 1) * + segment_sizze_nonzzero_128210 - + (int64_t) 1]; + } + } barrier(CLK_LOCAL_MEM_FENCE); - mem_45520[(int64_t) 0] = mem_45272[(int64_t) 0]; - } - if (slt64(sext_i32_i64(local_tid_46526) + segmap_group_sizze_37150 * - sext_i32_i64(group_tid_46527), m_28478)) { - ((__global float *) mem_45275)[sext_i32_i64(local_tid_46526) + - segmap_group_sizze_37150 * - sext_i32_i64(group_tid_46527)] = - mem_45520[(int64_t) 0]; + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } - error_6: + error_1: return; - #undef segmap_group_sizze_37150 + #undef segred_group_sizze_111351 } -__kernel void mainMagnitudezisegred_large_34797(__global int *global_failure, - __local volatile - int64_t *sync_arr_mem_45794_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_45792_backing_aligned_1, - int64_t N_28477, - int64_t i32_res_28487, - int64_t i32_res_28493, - int64_t num_groups_34936, - int64_t groups_per_segment_45778, - int64_t elements_per_thread_45779, - int64_t virt_num_groups_45780, - int64_t threads_per_segment_45782, - __global - unsigned char *images_mem_44381, - __global - unsigned char *binop_p_mem_44390, - __global - unsigned char *mem_44531, - __global - unsigned char *mem_44536, - __global - unsigned char *group_res_arr_mem_45783, - __global - unsigned char *mainMagnitudezicounter_mem_45785) +__kernel void mainzisegred_small_111219(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_128141_backing_aligned_0, + int64_t m_75136, int64_t k2p2zq_75151, + int64_t defunc_2_reduce_res_75260, + int64_t index_primexp_76437, + int64_t num_groups_111293, + int64_t segment_sizze_nonzzero_128134, + __global unsigned char *mem_120246, + __global + unsigned char *mem_param_123786, + __global unsigned char *mem_123913) { - #define segred_group_sizze_34935 (mainMagnitudezisegred_group_sizze_34791) + #define segred_group_sizze_111292 (mainzisegred_group_sizze_111213) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict sync_arr_mem_45794_backing_1 = - (__local volatile - char *) sync_arr_mem_45794_backing_aligned_0; - __local volatile char *restrict red_arr_mem_45792_backing_0 = + __local volatile char *restrict red_arr_mem_128141_backing_0 = (__local volatile - char *) red_arr_mem_45792_backing_aligned_1; + char *) red_arr_mem_128141_backing_aligned_0; if (*global_failure >= 0) return; - int32_t global_tid_45787; - int32_t local_tid_45788; - int64_t group_sizze_45791; - int32_t wave_sizze_45790; - int32_t group_tid_45789; - - global_tid_45787 = get_global_id(0); - local_tid_45788 = get_local_id(0); - group_sizze_45791 = get_local_size(0); - wave_sizze_45790 = LOCKSTEP_WIDTH; - group_tid_45789 = get_group_id(0); - - int32_t phys_tid_34797; - - phys_tid_34797 = global_tid_45787; - - __local char *red_arr_mem_45792; - - red_arr_mem_45792 = (__local char *) red_arr_mem_45792_backing_0; - - __local char *sync_arr_mem_45794; - - sync_arr_mem_45794 = (__local char *) sync_arr_mem_45794_backing_1; - - int32_t phys_group_id_45796; - - phys_group_id_45796 = get_group_id(0); - for (int32_t i_45797 = 0; i_45797 < - sdiv_up32(sext_i64_i32(virt_num_groups_45780) - phys_group_id_45796, - sext_i64_i32(num_groups_34936)); i_45797++) { - int32_t virt_group_id_45798 = phys_group_id_45796 + i_45797 * - sext_i64_i32(num_groups_34936); - int32_t flat_segment_id_45799 = squot32(virt_group_id_45798, - sext_i64_i32(groups_per_segment_45778)); - int64_t global_tid_45800 = srem64(sext_i32_i64(virt_group_id_45798) * - segred_group_sizze_34935 + - sext_i32_i64(local_tid_45788), - segred_group_sizze_34935 * - groups_per_segment_45778); - int64_t gtid_34784 = squot64(sext_i32_i64(flat_segment_id_45799), - i32_res_28493 * i32_res_28493); - int64_t gtid_34785 = squot64(sext_i32_i64(flat_segment_id_45799) - - squot64(sext_i32_i64(flat_segment_id_45799), - i32_res_28493 * i32_res_28493) * - (i32_res_28493 * i32_res_28493), - i32_res_28493); - int64_t gtid_34786 = sext_i32_i64(flat_segment_id_45799) - - squot64(sext_i32_i64(flat_segment_id_45799), i32_res_28493 * - i32_res_28493) * (i32_res_28493 * i32_res_28493) - - squot64(sext_i32_i64(flat_segment_id_45799) - - squot64(sext_i32_i64(flat_segment_id_45799), - i32_res_28493 * i32_res_28493) * - (i32_res_28493 * i32_res_28493), i32_res_28493) * - i32_res_28493; - int64_t gtid_34796; - float x_acc_45801; - int64_t chunk_sizze_45802; - - chunk_sizze_45802 = smin64(elements_per_thread_45779, - sdiv_up64(i32_res_28487 - - sext_i32_i64(sext_i64_i32(global_tid_45800)), - threads_per_segment_45782)); + int32_t global_tid_128136; + int32_t local_tid_128137; + int64_t group_sizze_128140; + int32_t wave_sizze_128139; + int32_t group_tid_128138; + + global_tid_128136 = get_global_id(0); + local_tid_128137 = get_local_id(0); + group_sizze_128140 = get_local_size(0); + wave_sizze_128139 = LOCKSTEP_WIDTH; + group_tid_128138 = get_group_id(0); + + int32_t phys_tid_111219; + + phys_tid_111219 = global_tid_128136; + + __local char *red_arr_mem_128141; + + red_arr_mem_128141 = (__local char *) red_arr_mem_128141_backing_0; + + int32_t phys_group_id_128143; + + phys_group_id_128143 = get_group_id(0); + for (int32_t i_128144 = 0; i_128144 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, + squot64(segred_group_sizze_111292, + segment_sizze_nonzzero_128134))) - + phys_group_id_128143, sext_i64_i32(num_groups_111293)); + i_128144++) { + int32_t virt_group_id_128145 = phys_group_id_128143 + i_128144 * + sext_i64_i32(num_groups_111293); + int64_t gtid_111210 = squot64(sext_i32_i64(local_tid_128137), + segment_sizze_nonzzero_128134) + + sext_i32_i64(virt_group_id_128145) * + squot64(segred_group_sizze_111292, + segment_sizze_nonzzero_128134); + int64_t gtid_111218 = srem64(sext_i32_i64(local_tid_128137), + k2p2zq_75151); - float x_34939; - float x_34940; - - // neutral-initialise the accumulators + // apply map function if in bounds { - x_acc_45801 = 0.0F; - } - for (int64_t i_45806 = 0; i_45806 < chunk_sizze_45802; i_45806++) { - gtid_34796 = sext_i32_i64(sext_i64_i32(global_tid_45800)) + - threads_per_segment_45782 * i_45806; - // apply map function - { - float x_34945 = ((__global - float *) images_mem_44381)[gtid_34784 * - N_28477 + - gtid_34796]; - float x_34946 = ((__global - float *) binop_p_mem_44390)[gtid_34785 * - N_28477 + - gtid_34796]; - float x_34947 = ((__global float *) mem_44531)[gtid_34786 * - N_28477 + - gtid_34796]; - float x_34948 = x_34946 * x_34947; - bool isnan_res_34949; - - isnan_res_34949 = futrts_isnan32(x_34945); - - float y_34950; - - if (isnan_res_34949) { - y_34950 = 0.0F; - } else { - y_34950 = 1.0F; - } - - float defunc_2_f_res_34951 = x_34948 * y_34950; + if (slt64((int64_t) 0, k2p2zq_75151) && (slt64(gtid_111210, + m_75136) && + slt64(sext_i32_i64(local_tid_128137), + k2p2zq_75151 * + squot64(segred_group_sizze_111292, + segment_sizze_nonzzero_128134)))) { + double x_111301 = ((__global double *) mem_120246)[gtid_111218 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_111210 * + defunc_2_reduce_res_75260 + + index_primexp_76437]; + double x_111302 = ((__global + double *) mem_param_123786)[gtid_111210 * + k2p2zq_75151 + + gtid_111218]; + double defunc_1_f_res_111303 = x_111301 * x_111302; // save map-out results { } - // load accumulator - { - x_34939 = x_acc_45801; - } - // load new values - { - x_34940 = defunc_2_f_res_34951; - } - // apply reduction operator - { - float defunc_1_op_res_34941 = x_34939 + x_34940; - - // store in accumulator - { - x_acc_45801 = defunc_1_op_res_34941; - } - } - } - } - // to reduce current chunk, first store our result in memory - { - x_34939 = x_acc_45801; - ((__local - float *) red_arr_mem_45792)[sext_i32_i64(local_tid_45788)] = - x_34939; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t offset_45807; - int32_t skip_waves_45808; - - skip_waves_45808 = 1; - - float x_45803; - float x_45804; - - offset_45807 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_45788, - sext_i64_i32(segred_group_sizze_34935))) { - x_45803 = ((__local - float *) red_arr_mem_45792)[sext_i32_i64(local_tid_45788 + - offset_45807)]; - } - } - offset_45807 = 1; - while (slt32(offset_45807, wave_sizze_45790)) { - if (slt32(local_tid_45788 + offset_45807, - sext_i64_i32(segred_group_sizze_34935)) && - ((local_tid_45788 - squot32(local_tid_45788, wave_sizze_45790) * - wave_sizze_45790) & (2 * offset_45807 - 1)) == 0) { - // read array element - { - x_45804 = ((volatile __local - float *) red_arr_mem_45792)[sext_i32_i64(local_tid_45788 + - offset_45807)]; - } - // apply reduction operation - { - float defunc_1_op_res_45805 = x_45803 + x_45804; - - x_45803 = defunc_1_op_res_45805; - } - // write result of operation - { - ((volatile __local - float *) red_arr_mem_45792)[sext_i32_i64(local_tid_45788)] = - x_45803; - } - } - offset_45807 *= 2; - } - while (slt32(skip_waves_45808, - squot32(sext_i64_i32(segred_group_sizze_34935) + - wave_sizze_45790 - 1, wave_sizze_45790))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_45807 = skip_waves_45808 * wave_sizze_45790; - if (slt32(local_tid_45788 + offset_45807, - sext_i64_i32(segred_group_sizze_34935)) && - ((local_tid_45788 - squot32(local_tid_45788, wave_sizze_45790) * - wave_sizze_45790) == 0 && (squot32(local_tid_45788, - wave_sizze_45790) & (2 * - skip_waves_45808 - - 1)) == - 0)) { - // read array element - { - x_45804 = ((__local - float *) red_arr_mem_45792)[sext_i32_i64(local_tid_45788 + - offset_45807)]; - } - // apply reduction operation - { - float defunc_1_op_res_45805 = x_45803 + x_45804; - - x_45803 = defunc_1_op_res_45805; - } - // write result of operation + // save results to be reduced { ((__local - float *) red_arr_mem_45792)[sext_i32_i64(local_tid_45788)] = - x_45803; + double *) red_arr_mem_128141)[sext_i32_i64(local_tid_128137)] = + defunc_1_f_res_111303; } + } else { + ((__local + double *) red_arr_mem_128141)[sext_i32_i64(local_tid_128137)] = + 0.0; } - skip_waves_45808 *= 2; } barrier(CLK_LOCAL_MEM_FENCE); - // first thread saves the result in accumulator - { - if (sext_i32_i64(local_tid_45788) == (int64_t) 0) { - x_acc_45801 = x_45803; - } - } - if (groups_per_segment_45778 == (int64_t) 1) { - // first thread in group saves final result to memory - { - if (local_tid_45788 == 0) { - ((__global float *) mem_44536)[gtid_34784 * (i32_res_28493 * - i32_res_28493) + - gtid_34785 * i32_res_28493 + - gtid_34786] = x_acc_45801; - } - } - } else { - int32_t old_counter_45809; - - // first thread in group saves group result to global memory + if (slt64((int64_t) 0, k2p2zq_75151)) { + // perform segmented scan to imitate reduction { - if (local_tid_45788 == 0) { - ((__global - float *) group_res_arr_mem_45783)[sext_i32_i64(virt_group_id_45798) * - segred_group_sizze_34935] = - x_acc_45801; - mem_fence_global(); - old_counter_45809 = - atomic_add_i32_global(&((volatile __global - int *) mainMagnitudezicounter_mem_45785)[sext_i32_i64(srem32(flat_segment_id_45799, - 10240))], - (int) 1); - ((__local bool *) sync_arr_mem_45794)[(int64_t) 0] = - old_counter_45809 == groups_per_segment_45778 - - (int64_t) 1; - } - } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - - bool is_last_group_45810; - - is_last_group_45810 = ((__local - bool *) sync_arr_mem_45794)[(int64_t) 0]; - if (is_last_group_45810) { - if (local_tid_45788 == 0) { - old_counter_45809 = - atomic_add_i32_global(&((volatile __global - int *) mainMagnitudezicounter_mem_45785)[sext_i32_i64(srem32(flat_segment_id_45799, - 10240))], - (int) ((int64_t) 0 - - groups_per_segment_45778)); - } - // read in the per-group-results + double x_111296; + double x_111297; + double x_128146; + double x_128147; + bool ltid_in_bounds_128149; + + ltid_in_bounds_128149 = slt64(sext_i32_i64(local_tid_128137), + k2p2zq_75151 * + squot64(segred_group_sizze_111292, + segment_sizze_nonzzero_128134)); + + int32_t skip_threads_128150; + + // read input for in-block scan { - int64_t read_per_thread_45811 = - sdiv_up64(groups_per_segment_45778, - segred_group_sizze_34935); - - x_34939 = 0.0F; - for (int64_t i_45812 = 0; i_45812 < read_per_thread_45811; - i_45812++) { - int64_t group_res_id_45813 = - sext_i32_i64(local_tid_45788) * - read_per_thread_45811 + i_45812; - int64_t index_of_group_res_45814 = - sext_i32_i64(flat_segment_id_45799) * - groups_per_segment_45778 + group_res_id_45813; - - if (slt64(group_res_id_45813, - groups_per_segment_45778)) { - x_34940 = ((__global - float *) group_res_arr_mem_45783)[index_of_group_res_45814 * - segred_group_sizze_34935]; - - float defunc_1_op_res_34941; - - defunc_1_op_res_34941 = x_34939 + x_34940; - x_34939 = defunc_1_op_res_34941; + if (ltid_in_bounds_128149) { + x_111297 = ((volatile __local + double *) red_arr_mem_128141)[sext_i32_i64(local_tid_128137)]; + if ((local_tid_128137 - squot32(local_tid_128137, 32) * + 32) == 0) { + x_111296 = x_111297; } } } - ((__local - float *) red_arr_mem_45792)[sext_i32_i64(local_tid_45788)] = - x_34939; - barrier(CLK_LOCAL_MEM_FENCE); - // reduce the per-group results + // in-block scan (hopefully no barriers needed) { - int32_t offset_45815; - int32_t skip_waves_45816; - - skip_waves_45816 = 1; - - float x_45803; - float x_45804; - - offset_45815 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_45788, - sext_i64_i32(segred_group_sizze_34935))) { - x_45803 = ((__local - float *) red_arr_mem_45792)[sext_i32_i64(local_tid_45788 + - offset_45815)]; - } - } - offset_45815 = 1; - while (slt32(offset_45815, wave_sizze_45790)) { - if (slt32(local_tid_45788 + offset_45815, - sext_i64_i32(segred_group_sizze_34935)) && - ((local_tid_45788 - squot32(local_tid_45788, - wave_sizze_45790) * - wave_sizze_45790) & (2 * offset_45815 - 1)) == - 0) { - // read array element + skip_threads_128150 = 1; + while (slt32(skip_threads_128150, 32)) { + if (sle32(skip_threads_128150, local_tid_128137 - + squot32(local_tid_128137, 32) * 32) && + ltid_in_bounds_128149) { + // read operands { - x_45804 = ((volatile __local - float *) red_arr_mem_45792)[sext_i32_i64(local_tid_45788 + - offset_45815)]; + x_111296 = ((volatile __local + double *) red_arr_mem_128141)[sext_i32_i64(local_tid_128137) - + sext_i32_i64(skip_threads_128150)]; } - // apply reduction operation + // perform operation { - float defunc_1_op_res_45805 = x_45803 + x_45804; + bool inactive_128151 = + slt64(srem64(sext_i32_i64(local_tid_128137), + k2p2zq_75151), + sext_i32_i64(local_tid_128137) - + sext_i32_i64(local_tid_128137 - + skip_threads_128150)); - x_45803 = defunc_1_op_res_45805; + if (inactive_128151) { + x_111296 = x_111297; + } + if (!inactive_128151) { + double defunc_1_op_res_111298 = x_111296 + + x_111297; + + x_111296 = defunc_1_op_res_111298; + } } - // write result of operation + } + if (sle32(wave_sizze_128139, skip_threads_128150)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128150, local_tid_128137 - + squot32(local_tid_128137, 32) * 32) && + ltid_in_bounds_128149) { + // write result { ((volatile __local - float *) red_arr_mem_45792)[sext_i32_i64(local_tid_45788)] = - x_45803; + double *) red_arr_mem_128141)[sext_i32_i64(local_tid_128137)] = + x_111296; + x_111297 = x_111296; } } - offset_45815 *= 2; + if (sle32(wave_sizze_128139, skip_threads_128150)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128150 *= 2; } - while (slt32(skip_waves_45816, - squot32(sext_i64_i32(segred_group_sizze_34935) + - wave_sizze_45790 - 1, - wave_sizze_45790))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_45815 = skip_waves_45816 * wave_sizze_45790; - if (slt32(local_tid_45788 + offset_45815, - sext_i64_i32(segred_group_sizze_34935)) && - ((local_tid_45788 - squot32(local_tid_45788, - wave_sizze_45790) * - wave_sizze_45790) == 0 && - (squot32(local_tid_45788, wave_sizze_45790) & (2 * - skip_waves_45816 - - 1)) == - 0)) { - // read array element - { - x_45804 = ((__local - float *) red_arr_mem_45792)[sext_i32_i64(local_tid_45788 + - offset_45815)]; + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128137 - squot32(local_tid_128137, 32) * + 32) == 31 && ltid_in_bounds_128149) { + ((volatile __local + double *) red_arr_mem_128141)[sext_i32_i64(squot32(local_tid_128137, + 32))] = + x_111296; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128152; + + // read input for in-block scan + { + if (squot32(local_tid_128137, 32) == 0 && + ltid_in_bounds_128149) { + x_128147 = ((volatile __local + double *) red_arr_mem_128141)[sext_i32_i64(local_tid_128137)]; + if ((local_tid_128137 - squot32(local_tid_128137, + 32) * 32) == 0) { + x_128146 = x_128147; } - // apply reduction operation - { - float defunc_1_op_res_45805 = x_45803 + x_45804; - - x_45803 = defunc_1_op_res_45805; + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128152 = 1; + while (slt32(skip_threads_128152, 32)) { + if (sle32(skip_threads_128152, local_tid_128137 - + squot32(local_tid_128137, 32) * 32) && + (squot32(local_tid_128137, 32) == 0 && + ltid_in_bounds_128149)) { + // read operands + { + x_128146 = ((volatile __local + double *) red_arr_mem_128141)[sext_i32_i64(local_tid_128137) - + sext_i32_i64(skip_threads_128152)]; + } + // perform operation + { + bool inactive_128153 = + slt64(srem64(sext_i32_i64(local_tid_128137 * + 32 + 32 - 1), + k2p2zq_75151), + sext_i32_i64(local_tid_128137 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_128137 - + skip_threads_128152) * + 32 + 32 - 1)); + + if (inactive_128153) { + x_128146 = x_128147; + } + if (!inactive_128153) { + double defunc_1_op_res_128148 = + x_128146 + x_128147; + + x_128146 = defunc_1_op_res_128148; + } + } } - // write result of operation - { - ((__local - float *) red_arr_mem_45792)[sext_i32_i64(local_tid_45788)] = - x_45803; + if (sle32(wave_sizze_128139, skip_threads_128152)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128152, local_tid_128137 - + squot32(local_tid_128137, 32) * 32) && + (squot32(local_tid_128137, 32) == 0 && + ltid_in_bounds_128149)) { + // write result + { + ((volatile __local + double *) red_arr_mem_128141)[sext_i32_i64(local_tid_128137)] = + x_128146; + x_128147 = x_128146; + } + } + if (sle32(wave_sizze_128139, skip_threads_128152)) { + barrier(CLK_LOCAL_MEM_FENCE); } + skip_threads_128152 *= 2; } - skip_waves_45816 *= 2; } - // and back to memory with the final result - { - if (local_tid_45788 == 0) { - ((__global float *) mem_44536)[gtid_34784 * - (i32_res_28493 * - i32_res_28493) + - gtid_34785 * - i32_res_28493 + - gtid_34786] = - x_45803; + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128137, 32) == 0 || + !ltid_in_bounds_128149)) { + // read operands + { + x_111297 = x_111296; + x_111296 = ((__local + double *) red_arr_mem_128141)[sext_i32_i64(squot32(local_tid_128137, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128154 = + slt64(srem64(sext_i32_i64(local_tid_128137), + k2p2zq_75151), + sext_i32_i64(local_tid_128137) - + sext_i32_i64(squot32(local_tid_128137, + 32) * 32 - 1)); + + if (inactive_128154) { + x_111296 = x_111297; + } + if (!inactive_128154) { + double defunc_1_op_res_111298 = x_111296 + + x_111297; + + x_111296 = defunc_1_op_res_111298; + } + } + // write final result + { + ((__local + double *) red_arr_mem_128141)[sext_i32_i64(local_tid_128137)] = + x_111296; } } } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128137, 32) == 0) { + ((__local + double *) red_arr_mem_128141)[sext_i32_i64(local_tid_128137)] = + x_111297; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_128145) * + squot64(segred_group_sizze_111292, + segment_sizze_nonzzero_128134) + + sext_i32_i64(local_tid_128137), m_75136) && + slt64(sext_i32_i64(local_tid_128137), + squot64(segred_group_sizze_111292, + segment_sizze_nonzzero_128134))) { + ((__global + double *) mem_123913)[sext_i32_i64(virt_group_id_128145) * + squot64(segred_group_sizze_111292, + segment_sizze_nonzzero_128134) + + sext_i32_i64(local_tid_128137)] = + ((__local + double *) red_arr_mem_128141)[(sext_i32_i64(local_tid_128137) + + (int64_t) 1) * + segment_sizze_nonzzero_128134 - + (int64_t) 1]; } } + barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } error_1: return; - #undef segred_group_sizze_34935 + #undef segred_group_sizze_111292 } -__kernel void mainMagnitudezisegred_large_36148(__global int *global_failure, - __local volatile - int64_t *sync_arr_mem_45991_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_45989_backing_aligned_1, - int64_t N_28477, - int64_t i32_res_28487, - int64_t i32_res_28493, - int64_t num_groups_36201, - int64_t groups_per_segment_45975, - int64_t elements_per_thread_45976, - int64_t virt_num_groups_45977, - int64_t threads_per_segment_45979, - __global - unsigned char *images_mem_44381, - __global - unsigned char *binop_p_mem_44390, - __global - unsigned char *mem_44844, - __global - unsigned char *group_res_arr_mem_45980, - __global - unsigned char *mainMagnitudezicounter_mem_45982) +__kernel void mainzisegred_small_111246(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_128074_backing_aligned_0, + int64_t m_75136, int64_t k2p2zq_75151, + int64_t defunc_2_reduce_res_75260, + int64_t index_primexp_76437, + int64_t num_groups_111265, + int64_t segment_sizze_nonzzero_128067, + __global unsigned char *mem_120246, + __global unsigned char *mem_123901, + __global unsigned char *mem_123904, + __global unsigned char *mem_123907) { - #define segred_group_sizze_36200 (mainMagnitudezisegred_group_sizze_36142) + #define segred_group_sizze_111264 (mainzisegred_group_sizze_111240) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict sync_arr_mem_45991_backing_1 = + __local volatile char *restrict red_arr_mem_128074_backing_0 = (__local volatile - char *) sync_arr_mem_45991_backing_aligned_0; - __local volatile char *restrict red_arr_mem_45989_backing_0 = - (__local volatile - char *) red_arr_mem_45989_backing_aligned_1; + char *) red_arr_mem_128074_backing_aligned_0; if (*global_failure >= 0) return; - int32_t global_tid_45984; - int32_t local_tid_45985; - int64_t group_sizze_45988; - int32_t wave_sizze_45987; - int32_t group_tid_45986; - - global_tid_45984 = get_global_id(0); - local_tid_45985 = get_local_id(0); - group_sizze_45988 = get_local_size(0); - wave_sizze_45987 = LOCKSTEP_WIDTH; - group_tid_45986 = get_group_id(0); - - int32_t phys_tid_36148; - - phys_tid_36148 = global_tid_45984; - - __local char *red_arr_mem_45989; - - red_arr_mem_45989 = (__local char *) red_arr_mem_45989_backing_0; - - __local char *sync_arr_mem_45991; - - sync_arr_mem_45991 = (__local char *) sync_arr_mem_45991_backing_1; - - int32_t phys_group_id_45993; - - phys_group_id_45993 = get_group_id(0); - for (int32_t i_45994 = 0; i_45994 < - sdiv_up32(sext_i64_i32(virt_num_groups_45977) - phys_group_id_45993, - sext_i64_i32(num_groups_36201)); i_45994++) { - int32_t virt_group_id_45995 = phys_group_id_45993 + i_45994 * - sext_i64_i32(num_groups_36201); - int32_t flat_segment_id_45996 = squot32(virt_group_id_45995, - sext_i64_i32(groups_per_segment_45975)); - int64_t global_tid_45997 = srem64(sext_i32_i64(virt_group_id_45995) * - segred_group_sizze_36200 + - sext_i32_i64(local_tid_45985), - segred_group_sizze_36200 * - groups_per_segment_45975); - int64_t gtid_36137 = squot64(sext_i32_i64(flat_segment_id_45996), - i32_res_28493); - int64_t gtid_36138 = sext_i32_i64(flat_segment_id_45996) - - squot64(sext_i32_i64(flat_segment_id_45996), i32_res_28493) * - i32_res_28493; - int64_t gtid_36147; - float x_acc_45998; - int64_t chunk_sizze_45999; + int32_t global_tid_128069; + int32_t local_tid_128070; + int64_t group_sizze_128073; + int32_t wave_sizze_128072; + int32_t group_tid_128071; + + global_tid_128069 = get_global_id(0); + local_tid_128070 = get_local_id(0); + group_sizze_128073 = get_local_size(0); + wave_sizze_128072 = LOCKSTEP_WIDTH; + group_tid_128071 = get_group_id(0); + + int32_t phys_tid_111246; + + phys_tid_111246 = global_tid_128069; + + __local char *red_arr_mem_128074; + + red_arr_mem_128074 = (__local char *) red_arr_mem_128074_backing_0; + + int32_t phys_group_id_128076; + + phys_group_id_128076 = get_group_id(0); + for (int32_t i_128077 = 0; i_128077 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, + squot64(segred_group_sizze_111264, + segment_sizze_nonzzero_128067))) - + phys_group_id_128076, sext_i64_i32(num_groups_111265)); + i_128077++) { + int32_t virt_group_id_128078 = phys_group_id_128076 + i_128077 * + sext_i64_i32(num_groups_111265); + int64_t gtid_111237 = squot64(sext_i32_i64(local_tid_128070), + segment_sizze_nonzzero_128067) + + sext_i32_i64(virt_group_id_128078) * + squot64(segred_group_sizze_111264, + segment_sizze_nonzzero_128067); + int64_t gtid_111245 = srem64(sext_i32_i64(local_tid_128070), + k2p2zq_75151); - chunk_sizze_45999 = smin64(elements_per_thread_45976, - sdiv_up64(i32_res_28487 - - sext_i32_i64(sext_i64_i32(global_tid_45997)), - threads_per_segment_45979)); - - float x_36204; - float x_36205; - - // neutral-initialise the accumulators + // apply map function if in bounds { - x_acc_45998 = 0.0F; - } - for (int64_t i_46003 = 0; i_46003 < chunk_sizze_45999; i_46003++) { - gtid_36147 = sext_i32_i64(sext_i64_i32(global_tid_45997)) + - threads_per_segment_45979 * i_46003; - // apply map function - { - float x_36210 = ((__global - float *) images_mem_44381)[gtid_36137 * - N_28477 + - gtid_36147]; - bool isnan_res_36211; - - isnan_res_36211 = futrts_isnan32(x_36210); - - float defunc_1_f_res_36212; - - if (isnan_res_36211) { - defunc_1_f_res_36212 = 0.0F; - } else { - float x_36209 = ((__global - float *) binop_p_mem_44390)[gtid_36138 * - N_28477 + - gtid_36147]; - float defunc_1_f_res_f_res_36213 = x_36209 * x_36210; + if (slt64((int64_t) 0, k2p2zq_75151) && (slt64(gtid_111237, + m_75136) && + slt64(sext_i32_i64(local_tid_128070), + k2p2zq_75151 * + squot64(segred_group_sizze_111264, + segment_sizze_nonzzero_128067)))) { + double x_111275 = ((__global double *) mem_120246)[gtid_111245 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_111237 * + defunc_2_reduce_res_75260 + + index_primexp_76437]; + double defunc_0_f_res_111276; + double redout_119889 = 0.0; + + for (int64_t i_119890 = 0; i_119890 < k2p2zq_75151; + i_119890++) { + double x_111280 = ((__global + double *) mem_120246)[i_119890 * + (defunc_2_reduce_res_75260 * + m_75136) + + gtid_111237 * + defunc_2_reduce_res_75260 + + index_primexp_76437]; + double x_111281 = ((__global + double *) mem_123901)[i_119890 * + (k2p2zq_75151 * + m_75136) + + gtid_111237 * + k2p2zq_75151 + + gtid_111245]; + double defunc_1_f_res_111282 = x_111280 * x_111281; + double defunc_1_op_res_111279 = defunc_1_f_res_111282 + + redout_119889; + double redout_tmp_128079 = defunc_1_op_res_111279; - defunc_1_f_res_36212 = defunc_1_f_res_f_res_36213; + redout_119889 = redout_tmp_128079; } + defunc_0_f_res_111276 = redout_119889; + + double defunc_1_f_res_111283 = x_111275 * defunc_0_f_res_111276; + // save map-out results - { } - // load accumulator - { - x_36204 = x_acc_45998; - } - // load new values - { - x_36205 = defunc_1_f_res_36212; - } - // apply reduction operator - { - float defunc_1_op_res_36206 = x_36204 + x_36205; - - // store in accumulator - { - x_acc_45998 = defunc_1_op_res_36206; - } - } - } - } - // to reduce current chunk, first store our result in memory - { - x_36204 = x_acc_45998; - ((__local - float *) red_arr_mem_45989)[sext_i32_i64(local_tid_45985)] = - x_36204; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t offset_46004; - int32_t skip_waves_46005; - - skip_waves_46005 = 1; - - float x_46000; - float x_46001; - - offset_46004 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_45985, - sext_i64_i32(segred_group_sizze_36200))) { - x_46000 = ((__local - float *) red_arr_mem_45989)[sext_i32_i64(local_tid_45985 + - offset_46004)]; - } - } - offset_46004 = 1; - while (slt32(offset_46004, wave_sizze_45987)) { - if (slt32(local_tid_45985 + offset_46004, - sext_i64_i32(segred_group_sizze_36200)) && - ((local_tid_45985 - squot32(local_tid_45985, wave_sizze_45987) * - wave_sizze_45987) & (2 * offset_46004 - 1)) == 0) { - // read array element - { - x_46001 = ((volatile __local - float *) red_arr_mem_45989)[sext_i32_i64(local_tid_45985 + - offset_46004)]; - } - // apply reduction operation - { - float defunc_1_op_res_46002 = x_46000 + x_46001; - - x_46000 = defunc_1_op_res_46002; - } - // write result of operation - { - ((volatile __local - float *) red_arr_mem_45989)[sext_i32_i64(local_tid_45985)] = - x_46000; - } - } - offset_46004 *= 2; - } - while (slt32(skip_waves_46005, - squot32(sext_i64_i32(segred_group_sizze_36200) + - wave_sizze_45987 - 1, wave_sizze_45987))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46004 = skip_waves_46005 * wave_sizze_45987; - if (slt32(local_tid_45985 + offset_46004, - sext_i64_i32(segred_group_sizze_36200)) && - ((local_tid_45985 - squot32(local_tid_45985, wave_sizze_45987) * - wave_sizze_45987) == 0 && (squot32(local_tid_45985, - wave_sizze_45987) & (2 * - skip_waves_46005 - - 1)) == - 0)) { - // read array element - { - x_46001 = ((__local - float *) red_arr_mem_45989)[sext_i32_i64(local_tid_45985 + - offset_46004)]; - } - // apply reduction operation { - float defunc_1_op_res_46002 = x_46000 + x_46001; - - x_46000 = defunc_1_op_res_46002; + ((__global double *) mem_123907)[gtid_111237 * + k2p2zq_75151 + + gtid_111245] = + defunc_0_f_res_111276; } - // write result of operation + // save results to be reduced { ((__local - float *) red_arr_mem_45989)[sext_i32_i64(local_tid_45985)] = - x_46000; + double *) red_arr_mem_128074)[sext_i32_i64(local_tid_128070)] = + defunc_1_f_res_111283; } + } else { + ((__local + double *) red_arr_mem_128074)[sext_i32_i64(local_tid_128070)] = + 0.0; } - skip_waves_46005 *= 2; } barrier(CLK_LOCAL_MEM_FENCE); - // first thread saves the result in accumulator - { - if (sext_i32_i64(local_tid_45985) == (int64_t) 0) { - x_acc_45998 = x_46000; - } - } - if (groups_per_segment_45975 == (int64_t) 1) { - // first thread in group saves final result to memory - { - if (local_tid_45985 == 0) { - ((__global float *) mem_44844)[gtid_36137 * i32_res_28493 + - gtid_36138] = x_acc_45998; - } - } - } else { - int32_t old_counter_46006; - - // first thread in group saves group result to global memory + if (slt64((int64_t) 0, k2p2zq_75151)) { + // perform segmented scan to imitate reduction { - if (local_tid_45985 == 0) { - ((__global - float *) group_res_arr_mem_45980)[sext_i32_i64(virt_group_id_45995) * - segred_group_sizze_36200] = - x_acc_45998; - mem_fence_global(); - old_counter_46006 = - atomic_add_i32_global(&((volatile __global - int *) mainMagnitudezicounter_mem_45982)[sext_i32_i64(srem32(flat_segment_id_45996, - 10240))], - (int) 1); - ((__local bool *) sync_arr_mem_45991)[(int64_t) 0] = - old_counter_46006 == groups_per_segment_45975 - - (int64_t) 1; - } - } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - - bool is_last_group_46007; - - is_last_group_46007 = ((__local - bool *) sync_arr_mem_45991)[(int64_t) 0]; - if (is_last_group_46007) { - if (local_tid_45985 == 0) { - old_counter_46006 = - atomic_add_i32_global(&((volatile __global - int *) mainMagnitudezicounter_mem_45982)[sext_i32_i64(srem32(flat_segment_id_45996, - 10240))], - (int) ((int64_t) 0 - - groups_per_segment_45975)); - } - // read in the per-group-results + double x_111269; + double x_111270; + double x_128080; + double x_128081; + bool ltid_in_bounds_128083; + + ltid_in_bounds_128083 = slt64(sext_i32_i64(local_tid_128070), + k2p2zq_75151 * + squot64(segred_group_sizze_111264, + segment_sizze_nonzzero_128067)); + + int32_t skip_threads_128084; + + // read input for in-block scan { - int64_t read_per_thread_46008 = - sdiv_up64(groups_per_segment_45975, - segred_group_sizze_36200); - - x_36204 = 0.0F; - for (int64_t i_46009 = 0; i_46009 < read_per_thread_46008; - i_46009++) { - int64_t group_res_id_46010 = - sext_i32_i64(local_tid_45985) * - read_per_thread_46008 + i_46009; - int64_t index_of_group_res_46011 = - sext_i32_i64(flat_segment_id_45996) * - groups_per_segment_45975 + group_res_id_46010; - - if (slt64(group_res_id_46010, - groups_per_segment_45975)) { - x_36205 = ((__global - float *) group_res_arr_mem_45980)[index_of_group_res_46011 * - segred_group_sizze_36200]; - - float defunc_1_op_res_36206; - - defunc_1_op_res_36206 = x_36204 + x_36205; - x_36204 = defunc_1_op_res_36206; + if (ltid_in_bounds_128083) { + x_111270 = ((volatile __local + double *) red_arr_mem_128074)[sext_i32_i64(local_tid_128070)]; + if ((local_tid_128070 - squot32(local_tid_128070, 32) * + 32) == 0) { + x_111269 = x_111270; } } } - ((__local - float *) red_arr_mem_45989)[sext_i32_i64(local_tid_45985)] = - x_36204; - barrier(CLK_LOCAL_MEM_FENCE); - // reduce the per-group results + // in-block scan (hopefully no barriers needed) { - int32_t offset_46012; - int32_t skip_waves_46013; - - skip_waves_46013 = 1; - - float x_46000; - float x_46001; - - offset_46012 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_45985, - sext_i64_i32(segred_group_sizze_36200))) { - x_46000 = ((__local - float *) red_arr_mem_45989)[sext_i32_i64(local_tid_45985 + - offset_46012)]; - } - } - offset_46012 = 1; - while (slt32(offset_46012, wave_sizze_45987)) { - if (slt32(local_tid_45985 + offset_46012, - sext_i64_i32(segred_group_sizze_36200)) && - ((local_tid_45985 - squot32(local_tid_45985, - wave_sizze_45987) * - wave_sizze_45987) & (2 * offset_46012 - 1)) == - 0) { - // read array element + skip_threads_128084 = 1; + while (slt32(skip_threads_128084, 32)) { + if (sle32(skip_threads_128084, local_tid_128070 - + squot32(local_tid_128070, 32) * 32) && + ltid_in_bounds_128083) { + // read operands { - x_46001 = ((volatile __local - float *) red_arr_mem_45989)[sext_i32_i64(local_tid_45985 + - offset_46012)]; + x_111269 = ((volatile __local + double *) red_arr_mem_128074)[sext_i32_i64(local_tid_128070) - + sext_i32_i64(skip_threads_128084)]; } - // apply reduction operation + // perform operation { - float defunc_1_op_res_46002 = x_46000 + x_46001; + bool inactive_128085 = + slt64(srem64(sext_i32_i64(local_tid_128070), + k2p2zq_75151), + sext_i32_i64(local_tid_128070) - + sext_i32_i64(local_tid_128070 - + skip_threads_128084)); - x_46000 = defunc_1_op_res_46002; + if (inactive_128085) { + x_111269 = x_111270; + } + if (!inactive_128085) { + double defunc_1_op_res_111271 = x_111269 + + x_111270; + + x_111269 = defunc_1_op_res_111271; + } } - // write result of operation + } + if (sle32(wave_sizze_128072, skip_threads_128084)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128084, local_tid_128070 - + squot32(local_tid_128070, 32) * 32) && + ltid_in_bounds_128083) { + // write result { ((volatile __local - float *) red_arr_mem_45989)[sext_i32_i64(local_tid_45985)] = - x_46000; + double *) red_arr_mem_128074)[sext_i32_i64(local_tid_128070)] = + x_111269; + x_111270 = x_111269; } } - offset_46012 *= 2; + if (sle32(wave_sizze_128072, skip_threads_128084)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128084 *= 2; } - while (slt32(skip_waves_46013, - squot32(sext_i64_i32(segred_group_sizze_36200) + - wave_sizze_45987 - 1, - wave_sizze_45987))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46012 = skip_waves_46013 * wave_sizze_45987; - if (slt32(local_tid_45985 + offset_46012, - sext_i64_i32(segred_group_sizze_36200)) && - ((local_tid_45985 - squot32(local_tid_45985, - wave_sizze_45987) * - wave_sizze_45987) == 0 && - (squot32(local_tid_45985, wave_sizze_45987) & (2 * - skip_waves_46013 - - 1)) == - 0)) { - // read array element - { - x_46001 = ((__local - float *) red_arr_mem_45989)[sext_i32_i64(local_tid_45985 + - offset_46012)]; + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128070 - squot32(local_tid_128070, 32) * + 32) == 31 && ltid_in_bounds_128083) { + ((volatile __local + double *) red_arr_mem_128074)[sext_i32_i64(squot32(local_tid_128070, + 32))] = + x_111269; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128086; + + // read input for in-block scan + { + if (squot32(local_tid_128070, 32) == 0 && + ltid_in_bounds_128083) { + x_128081 = ((volatile __local + double *) red_arr_mem_128074)[sext_i32_i64(local_tid_128070)]; + if ((local_tid_128070 - squot32(local_tid_128070, + 32) * 32) == 0) { + x_128080 = x_128081; } - // apply reduction operation - { - float defunc_1_op_res_46002 = x_46000 + x_46001; - - x_46000 = defunc_1_op_res_46002; + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128086 = 1; + while (slt32(skip_threads_128086, 32)) { + if (sle32(skip_threads_128086, local_tid_128070 - + squot32(local_tid_128070, 32) * 32) && + (squot32(local_tid_128070, 32) == 0 && + ltid_in_bounds_128083)) { + // read operands + { + x_128080 = ((volatile __local + double *) red_arr_mem_128074)[sext_i32_i64(local_tid_128070) - + sext_i32_i64(skip_threads_128086)]; + } + // perform operation + { + bool inactive_128087 = + slt64(srem64(sext_i32_i64(local_tid_128070 * + 32 + 32 - 1), + k2p2zq_75151), + sext_i32_i64(local_tid_128070 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_128070 - + skip_threads_128086) * + 32 + 32 - 1)); + + if (inactive_128087) { + x_128080 = x_128081; + } + if (!inactive_128087) { + double defunc_1_op_res_128082 = + x_128080 + x_128081; + + x_128080 = defunc_1_op_res_128082; + } + } } - // write result of operation - { - ((__local - float *) red_arr_mem_45989)[sext_i32_i64(local_tid_45985)] = - x_46000; + if (sle32(wave_sizze_128072, skip_threads_128086)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128086, local_tid_128070 - + squot32(local_tid_128070, 32) * 32) && + (squot32(local_tid_128070, 32) == 0 && + ltid_in_bounds_128083)) { + // write result + { + ((volatile __local + double *) red_arr_mem_128074)[sext_i32_i64(local_tid_128070)] = + x_128080; + x_128081 = x_128080; + } } + if (sle32(wave_sizze_128072, skip_threads_128086)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128086 *= 2; } - skip_waves_46013 *= 2; } - // and back to memory with the final result - { - if (local_tid_45985 == 0) { - ((__global float *) mem_44844)[gtid_36137 * - i32_res_28493 + - gtid_36138] = - x_46000; + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128070, 32) == 0 || + !ltid_in_bounds_128083)) { + // read operands + { + x_111270 = x_111269; + x_111269 = ((__local + double *) red_arr_mem_128074)[sext_i32_i64(squot32(local_tid_128070, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128088 = + slt64(srem64(sext_i32_i64(local_tid_128070), + k2p2zq_75151), + sext_i32_i64(local_tid_128070) - + sext_i32_i64(squot32(local_tid_128070, + 32) * 32 - 1)); + + if (inactive_128088) { + x_111269 = x_111270; + } + if (!inactive_128088) { + double defunc_1_op_res_111271 = x_111269 + + x_111270; + + x_111269 = defunc_1_op_res_111271; + } + } + // write final result + { + ((__local + double *) red_arr_mem_128074)[sext_i32_i64(local_tid_128070)] = + x_111269; } } } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128070, 32) == 0) { + ((__local + double *) red_arr_mem_128074)[sext_i32_i64(local_tid_128070)] = + x_111270; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_128078) * + squot64(segred_group_sizze_111264, + segment_sizze_nonzzero_128067) + + sext_i32_i64(local_tid_128070), m_75136) && + slt64(sext_i32_i64(local_tid_128070), + squot64(segred_group_sizze_111264, + segment_sizze_nonzzero_128067))) { + ((__global + double *) mem_123904)[sext_i32_i64(virt_group_id_128078) * + squot64(segred_group_sizze_111264, + segment_sizze_nonzzero_128067) + + sext_i32_i64(local_tid_128070)] = + ((__local + double *) red_arr_mem_128074)[(sext_i32_i64(local_tid_128070) + + (int64_t) 1) * + segment_sizze_nonzzero_128067 - + (int64_t) 1]; } } + barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } error_1: return; - #undef segred_group_sizze_36200 + #undef segred_group_sizze_111264 } -__kernel void mainMagnitudezisegred_large_36285(__global int *global_failure, - __local volatile - int64_t *sync_arr_mem_46079_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_46077_backing_aligned_1, - int64_t i32_res_28493, - int64_t num_groups_36334, - int64_t groups_per_segment_46063, - int64_t elements_per_thread_46064, - int64_t virt_num_groups_46065, - int64_t threads_per_segment_46067, - __global - unsigned char *defunc_3_map_res_mem_44629, - __global - unsigned char *defunc_3_map_res_mem_44850, - __global - unsigned char *mem_44910, - __global - unsigned char *group_res_arr_mem_46068, - __global - unsigned char *mainMagnitudezicounter_mem_46070) +__kernel void mainzisegred_small_111603(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_128409_backing_aligned_0, + int64_t m_75136, + int64_t num_recresids_padded_75809, + int64_t num_groups_111685, + int64_t segment_sizze_nonzzero_128402, + __global unsigned char *mem_124045, + __global unsigned char *mem_124051, + __global unsigned char *mem_124054) { - #define segred_group_sizze_36333 (mainMagnitudezisegred_group_sizze_36279) + #define segred_group_sizze_111684 (mainzisegred_group_sizze_111597) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict sync_arr_mem_46079_backing_1 = - (__local volatile - char *) sync_arr_mem_46079_backing_aligned_0; - __local volatile char *restrict red_arr_mem_46077_backing_0 = + __local volatile char *restrict red_arr_mem_128409_backing_0 = (__local volatile - char *) red_arr_mem_46077_backing_aligned_1; + char *) red_arr_mem_128409_backing_aligned_0; if (*global_failure >= 0) return; - int32_t global_tid_46072; - int32_t local_tid_46073; - int64_t group_sizze_46076; - int32_t wave_sizze_46075; - int32_t group_tid_46074; - - global_tid_46072 = get_global_id(0); - local_tid_46073 = get_local_id(0); - group_sizze_46076 = get_local_size(0); - wave_sizze_46075 = LOCKSTEP_WIDTH; - group_tid_46074 = get_group_id(0); - - int32_t phys_tid_36285; - - phys_tid_36285 = global_tid_46072; - - __local char *red_arr_mem_46077; - - red_arr_mem_46077 = (__local char *) red_arr_mem_46077_backing_0; - - __local char *sync_arr_mem_46079; - - sync_arr_mem_46079 = (__local char *) sync_arr_mem_46079_backing_1; - - int32_t phys_group_id_46081; - - phys_group_id_46081 = get_group_id(0); - for (int32_t i_46082 = 0; i_46082 < - sdiv_up32(sext_i64_i32(virt_num_groups_46065) - phys_group_id_46081, - sext_i64_i32(num_groups_36334)); i_46082++) { - int32_t virt_group_id_46083 = phys_group_id_46081 + i_46082 * - sext_i64_i32(num_groups_36334); - int32_t flat_segment_id_46084 = squot32(virt_group_id_46083, - sext_i64_i32(groups_per_segment_46063)); - int64_t global_tid_46085 = srem64(sext_i32_i64(virt_group_id_46083) * - segred_group_sizze_36333 + - sext_i32_i64(local_tid_46073), - segred_group_sizze_36333 * - groups_per_segment_46063); - int64_t gtid_36274 = squot64(sext_i32_i64(flat_segment_id_46084), - i32_res_28493); - int64_t gtid_36275 = sext_i32_i64(flat_segment_id_46084) - - squot64(sext_i32_i64(flat_segment_id_46084), i32_res_28493) * - i32_res_28493; - int64_t gtid_36284; - float x_acc_46086; - int64_t chunk_sizze_46087; - - chunk_sizze_46087 = smin64(elements_per_thread_46064, - sdiv_up64(i32_res_28493 - - sext_i32_i64(sext_i64_i32(global_tid_46085)), - threads_per_segment_46067)); - - float x_36337; - float x_36338; - - // neutral-initialise the accumulators - { - x_acc_46086 = 0.0F; - } - for (int64_t i_46091 = 0; i_46091 < chunk_sizze_46087; i_46091++) { - gtid_36284 = sext_i32_i64(sext_i64_i32(global_tid_46085)) + - threads_per_segment_46067 * i_46091; - // apply map function - { - float x_36343 = ((__global - float *) defunc_3_map_res_mem_44850)[gtid_36274 * - i32_res_28493 + - gtid_36284]; - float x_36344 = ((__global - float *) defunc_3_map_res_mem_44629)[gtid_36274 * - (i32_res_28493 * - i32_res_28493) + - gtid_36275 * - i32_res_28493 + - gtid_36284]; - float defunc_1_f_res_36345 = x_36343 * x_36344; - - // save map-out results - { } - // load accumulator - { - x_36337 = x_acc_46086; - } - // load new values - { - x_36338 = defunc_1_f_res_36345; - } - // apply reduction operator - { - float defunc_1_op_res_36339 = x_36337 + x_36338; - - // store in accumulator - { - x_acc_46086 = defunc_1_op_res_36339; - } - } - } - } - // to reduce current chunk, first store our result in memory - { - x_36337 = x_acc_46086; - ((__local - float *) red_arr_mem_46077)[sext_i32_i64(local_tid_46073)] = - x_36337; - } - barrier(CLK_LOCAL_MEM_FENCE); + int32_t global_tid_128404; + int32_t local_tid_128405; + int64_t group_sizze_128408; + int32_t wave_sizze_128407; + int32_t group_tid_128406; + + global_tid_128404 = get_global_id(0); + local_tid_128405 = get_local_id(0); + group_sizze_128408 = get_local_size(0); + wave_sizze_128407 = LOCKSTEP_WIDTH; + group_tid_128406 = get_group_id(0); + + int32_t phys_tid_111603; + + phys_tid_111603 = global_tid_128404; + + __local char *red_arr_mem_128409; + + red_arr_mem_128409 = (__local char *) red_arr_mem_128409_backing_0; + + int32_t phys_group_id_128411; + + phys_group_id_128411 = get_group_id(0); + for (int32_t i_128412 = 0; i_128412 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, + squot64(segred_group_sizze_111684, + segment_sizze_nonzzero_128402))) - + phys_group_id_128411, sext_i64_i32(num_groups_111685)); + i_128412++) { + int32_t virt_group_id_128413 = phys_group_id_128411 + i_128412 * + sext_i64_i32(num_groups_111685); + int64_t gtid_111594 = squot64(sext_i32_i64(local_tid_128405), + segment_sizze_nonzzero_128402) + + sext_i32_i64(virt_group_id_128413) * + squot64(segred_group_sizze_111684, + segment_sizze_nonzzero_128402); + int64_t gtid_111602 = srem64(sext_i32_i64(local_tid_128405), + num_recresids_padded_75809); - int32_t offset_46092; - int32_t skip_waves_46093; - - skip_waves_46093 = 1; - - float x_46088; - float x_46089; - - offset_46092 = 0; - // participating threads read initial accumulator + // apply map function if in bounds { - if (slt32(local_tid_46073, - sext_i64_i32(segred_group_sizze_36333))) { - x_46088 = ((__local - float *) red_arr_mem_46077)[sext_i32_i64(local_tid_46073 + - offset_46092)]; - } - } - offset_46092 = 1; - while (slt32(offset_46092, wave_sizze_46075)) { - if (slt32(local_tid_46073 + offset_46092, - sext_i64_i32(segred_group_sizze_36333)) && - ((local_tid_46073 - squot32(local_tid_46073, wave_sizze_46075) * - wave_sizze_46075) & (2 * offset_46092 - 1)) == 0) { - // read array element - { - x_46089 = ((volatile __local - float *) red_arr_mem_46077)[sext_i32_i64(local_tid_46073 + - offset_46092)]; - } - // apply reduction operation - { - float defunc_1_op_res_46090 = x_46088 + x_46089; - - x_46088 = defunc_1_op_res_46090; - } - // write result of operation - { - ((volatile __local - float *) red_arr_mem_46077)[sext_i32_i64(local_tid_46073)] = - x_46088; - } - } - offset_46092 *= 2; - } - while (slt32(skip_waves_46093, - squot32(sext_i64_i32(segred_group_sizze_36333) + - wave_sizze_46075 - 1, wave_sizze_46075))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46092 = skip_waves_46093 * wave_sizze_46075; - if (slt32(local_tid_46073 + offset_46092, - sext_i64_i32(segred_group_sizze_36333)) && - ((local_tid_46073 - squot32(local_tid_46073, wave_sizze_46075) * - wave_sizze_46075) == 0 && (squot32(local_tid_46073, - wave_sizze_46075) & (2 * - skip_waves_46093 - - 1)) == - 0)) { - // read array element - { - x_46089 = ((__local - float *) red_arr_mem_46077)[sext_i32_i64(local_tid_46073 + - offset_46092)]; - } - // apply reduction operation - { - float defunc_1_op_res_46090 = x_46088 + x_46089; + if (slt64((int64_t) 0, num_recresids_padded_75809) && + (slt64(gtid_111594, m_75136) && + slt64(sext_i32_i64(local_tid_128405), + num_recresids_padded_75809 * + squot64(segred_group_sizze_111684, + segment_sizze_nonzzero_128402)))) { + double x_111693 = ((__global double *) mem_124045)[gtid_111594 * + num_recresids_padded_75809 + + gtid_111602]; + bool isnan_res_111694; + + isnan_res_111694 = futrts_isnan64(x_111693); + + double defunc_0_f_res_111695; + + if (isnan_res_111694) { + defunc_0_f_res_111695 = 0.0; + } else { + double x_mean_111692 = ((__global + double *) mem_124051)[gtid_111594]; + double x_111696 = x_111693 - x_mean_111692; + double defunc_0_f_res_f_res_111697 = fpow64(x_111696, 2.0); - x_46088 = defunc_1_op_res_46090; + defunc_0_f_res_111695 = defunc_0_f_res_f_res_111697; } - // write result of operation + // save map-out results + { } + // save results to be reduced { ((__local - float *) red_arr_mem_46077)[sext_i32_i64(local_tid_46073)] = - x_46088; + double *) red_arr_mem_128409)[sext_i32_i64(local_tid_128405)] = + defunc_0_f_res_111695; } + } else { + ((__local + double *) red_arr_mem_128409)[sext_i32_i64(local_tid_128405)] = + 0.0; } - skip_waves_46093 *= 2; } barrier(CLK_LOCAL_MEM_FENCE); - // first thread saves the result in accumulator - { - if (sext_i32_i64(local_tid_46073) == (int64_t) 0) { - x_acc_46086 = x_46088; - } - } - if (groups_per_segment_46063 == (int64_t) 1) { - // first thread in group saves final result to memory - { - if (local_tid_46073 == 0) { - ((__global float *) mem_44910)[gtid_36274 * i32_res_28493 + - gtid_36275] = x_acc_46086; - } - } - } else { - int32_t old_counter_46094; - - // first thread in group saves group result to global memory + if (slt64((int64_t) 0, num_recresids_padded_75809)) { + // perform segmented scan to imitate reduction { - if (local_tid_46073 == 0) { - ((__global - float *) group_res_arr_mem_46068)[sext_i32_i64(virt_group_id_46083) * - segred_group_sizze_36333] = - x_acc_46086; - mem_fence_global(); - old_counter_46094 = - atomic_add_i32_global(&((volatile __global - int *) mainMagnitudezicounter_mem_46070)[sext_i32_i64(srem32(flat_segment_id_46084, - 10240))], - (int) 1); - ((__local bool *) sync_arr_mem_46079)[(int64_t) 0] = - old_counter_46094 == groups_per_segment_46063 - - (int64_t) 1; - } - } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - - bool is_last_group_46095; - - is_last_group_46095 = ((__local - bool *) sync_arr_mem_46079)[(int64_t) 0]; - if (is_last_group_46095) { - if (local_tid_46073 == 0) { - old_counter_46094 = - atomic_add_i32_global(&((volatile __global - int *) mainMagnitudezicounter_mem_46070)[sext_i32_i64(srem32(flat_segment_id_46084, - 10240))], - (int) ((int64_t) 0 - - groups_per_segment_46063)); - } - // read in the per-group-results + double x_111688; + double x_111689; + double x_128414; + double x_128415; + bool ltid_in_bounds_128417; + + ltid_in_bounds_128417 = slt64(sext_i32_i64(local_tid_128405), + num_recresids_padded_75809 * + squot64(segred_group_sizze_111684, + segment_sizze_nonzzero_128402)); + + int32_t skip_threads_128418; + + // read input for in-block scan { - int64_t read_per_thread_46096 = - sdiv_up64(groups_per_segment_46063, - segred_group_sizze_36333); - - x_36337 = 0.0F; - for (int64_t i_46097 = 0; i_46097 < read_per_thread_46096; - i_46097++) { - int64_t group_res_id_46098 = - sext_i32_i64(local_tid_46073) * - read_per_thread_46096 + i_46097; - int64_t index_of_group_res_46099 = - sext_i32_i64(flat_segment_id_46084) * - groups_per_segment_46063 + group_res_id_46098; - - if (slt64(group_res_id_46098, - groups_per_segment_46063)) { - x_36338 = ((__global - float *) group_res_arr_mem_46068)[index_of_group_res_46099 * - segred_group_sizze_36333]; - - float defunc_1_op_res_36339; - - defunc_1_op_res_36339 = x_36337 + x_36338; - x_36337 = defunc_1_op_res_36339; + if (ltid_in_bounds_128417) { + x_111689 = ((volatile __local + double *) red_arr_mem_128409)[sext_i32_i64(local_tid_128405)]; + if ((local_tid_128405 - squot32(local_tid_128405, 32) * + 32) == 0) { + x_111688 = x_111689; } } } - ((__local - float *) red_arr_mem_46077)[sext_i32_i64(local_tid_46073)] = - x_36337; - barrier(CLK_LOCAL_MEM_FENCE); - // reduce the per-group results + // in-block scan (hopefully no barriers needed) { - int32_t offset_46100; - int32_t skip_waves_46101; - - skip_waves_46101 = 1; - - float x_46088; - float x_46089; - - offset_46100 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46073, - sext_i64_i32(segred_group_sizze_36333))) { - x_46088 = ((__local - float *) red_arr_mem_46077)[sext_i32_i64(local_tid_46073 + - offset_46100)]; - } - } - offset_46100 = 1; - while (slt32(offset_46100, wave_sizze_46075)) { - if (slt32(local_tid_46073 + offset_46100, - sext_i64_i32(segred_group_sizze_36333)) && - ((local_tid_46073 - squot32(local_tid_46073, - wave_sizze_46075) * - wave_sizze_46075) & (2 * offset_46100 - 1)) == - 0) { - // read array element + skip_threads_128418 = 1; + while (slt32(skip_threads_128418, 32)) { + if (sle32(skip_threads_128418, local_tid_128405 - + squot32(local_tid_128405, 32) * 32) && + ltid_in_bounds_128417) { + // read operands { - x_46089 = ((volatile __local - float *) red_arr_mem_46077)[sext_i32_i64(local_tid_46073 + - offset_46100)]; + x_111688 = ((volatile __local + double *) red_arr_mem_128409)[sext_i32_i64(local_tid_128405) - + sext_i32_i64(skip_threads_128418)]; } - // apply reduction operation + // perform operation { - float defunc_1_op_res_46090 = x_46088 + x_46089; + bool inactive_128419 = + slt64(srem64(sext_i32_i64(local_tid_128405), + num_recresids_padded_75809), + sext_i32_i64(local_tid_128405) - + sext_i32_i64(local_tid_128405 - + skip_threads_128418)); - x_46088 = defunc_1_op_res_46090; + if (inactive_128419) { + x_111688 = x_111689; + } + if (!inactive_128419) { + double defunc_1_op_res_111690 = x_111688 + + x_111689; + + x_111688 = defunc_1_op_res_111690; + } } - // write result of operation + } + if (sle32(wave_sizze_128407, skip_threads_128418)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128418, local_tid_128405 - + squot32(local_tid_128405, 32) * 32) && + ltid_in_bounds_128417) { + // write result { ((volatile __local - float *) red_arr_mem_46077)[sext_i32_i64(local_tid_46073)] = - x_46088; + double *) red_arr_mem_128409)[sext_i32_i64(local_tid_128405)] = + x_111688; + x_111689 = x_111688; } } - offset_46100 *= 2; + if (sle32(wave_sizze_128407, skip_threads_128418)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128418 *= 2; } - while (slt32(skip_waves_46101, - squot32(sext_i64_i32(segred_group_sizze_36333) + - wave_sizze_46075 - 1, - wave_sizze_46075))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46100 = skip_waves_46101 * wave_sizze_46075; - if (slt32(local_tid_46073 + offset_46100, - sext_i64_i32(segred_group_sizze_36333)) && - ((local_tid_46073 - squot32(local_tid_46073, - wave_sizze_46075) * - wave_sizze_46075) == 0 && - (squot32(local_tid_46073, wave_sizze_46075) & (2 * - skip_waves_46101 - - 1)) == - 0)) { - // read array element - { - x_46089 = ((__local - float *) red_arr_mem_46077)[sext_i32_i64(local_tid_46073 + - offset_46100)]; + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128405 - squot32(local_tid_128405, 32) * + 32) == 31 && ltid_in_bounds_128417) { + ((volatile __local + double *) red_arr_mem_128409)[sext_i32_i64(squot32(local_tid_128405, + 32))] = + x_111688; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128420; + + // read input for in-block scan + { + if (squot32(local_tid_128405, 32) == 0 && + ltid_in_bounds_128417) { + x_128415 = ((volatile __local + double *) red_arr_mem_128409)[sext_i32_i64(local_tid_128405)]; + if ((local_tid_128405 - squot32(local_tid_128405, + 32) * 32) == 0) { + x_128414 = x_128415; } - // apply reduction operation - { - float defunc_1_op_res_46090 = x_46088 + x_46089; - - x_46088 = defunc_1_op_res_46090; + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128420 = 1; + while (slt32(skip_threads_128420, 32)) { + if (sle32(skip_threads_128420, local_tid_128405 - + squot32(local_tid_128405, 32) * 32) && + (squot32(local_tid_128405, 32) == 0 && + ltid_in_bounds_128417)) { + // read operands + { + x_128414 = ((volatile __local + double *) red_arr_mem_128409)[sext_i32_i64(local_tid_128405) - + sext_i32_i64(skip_threads_128420)]; + } + // perform operation + { + bool inactive_128421 = + slt64(srem64(sext_i32_i64(local_tid_128405 * + 32 + 32 - 1), + num_recresids_padded_75809), + sext_i32_i64(local_tid_128405 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_128405 - + skip_threads_128420) * + 32 + 32 - 1)); + + if (inactive_128421) { + x_128414 = x_128415; + } + if (!inactive_128421) { + double defunc_1_op_res_128416 = + x_128414 + x_128415; + + x_128414 = defunc_1_op_res_128416; + } + } } - // write result of operation - { - ((__local - float *) red_arr_mem_46077)[sext_i32_i64(local_tid_46073)] = - x_46088; + if (sle32(wave_sizze_128407, skip_threads_128420)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128420, local_tid_128405 - + squot32(local_tid_128405, 32) * 32) && + (squot32(local_tid_128405, 32) == 0 && + ltid_in_bounds_128417)) { + // write result + { + ((volatile __local + double *) red_arr_mem_128409)[sext_i32_i64(local_tid_128405)] = + x_128414; + x_128415 = x_128414; + } } + if (sle32(wave_sizze_128407, skip_threads_128420)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128420 *= 2; } - skip_waves_46101 *= 2; } - // and back to memory with the final result - { - if (local_tid_46073 == 0) { - ((__global float *) mem_44910)[gtid_36274 * - i32_res_28493 + - gtid_36275] = - x_46088; + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128405, 32) == 0 || + !ltid_in_bounds_128417)) { + // read operands + { + x_111689 = x_111688; + x_111688 = ((__local + double *) red_arr_mem_128409)[sext_i32_i64(squot32(local_tid_128405, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128422 = + slt64(srem64(sext_i32_i64(local_tid_128405), + num_recresids_padded_75809), + sext_i32_i64(local_tid_128405) - + sext_i32_i64(squot32(local_tid_128405, + 32) * 32 - 1)); + + if (inactive_128422) { + x_111688 = x_111689; + } + if (!inactive_128422) { + double defunc_1_op_res_111690 = x_111688 + + x_111689; + + x_111688 = defunc_1_op_res_111690; + } + } + // write final result + { + ((__local + double *) red_arr_mem_128409)[sext_i32_i64(local_tid_128405)] = + x_111688; } } } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128405, 32) == 0) { + ((__local + double *) red_arr_mem_128409)[sext_i32_i64(local_tid_128405)] = + x_111689; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_128413) * + squot64(segred_group_sizze_111684, + segment_sizze_nonzzero_128402) + + sext_i32_i64(local_tid_128405), m_75136) && + slt64(sext_i32_i64(local_tid_128405), + squot64(segred_group_sizze_111684, + segment_sizze_nonzzero_128402))) { + ((__global + double *) mem_124054)[sext_i32_i64(virt_group_id_128413) * + squot64(segred_group_sizze_111684, + segment_sizze_nonzzero_128402) + + sext_i32_i64(local_tid_128405)] = + ((__local + double *) red_arr_mem_128409)[(sext_i32_i64(local_tid_128405) + + (int64_t) 1) * + segment_sizze_nonzzero_128402 - + (int64_t) 1]; } } + barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } error_1: return; - #undef segred_group_sizze_36333 + #undef segred_group_sizze_111684 } -__kernel void mainMagnitudezisegred_large_36415(__global int *global_failure, - __local volatile - int64_t *sync_arr_mem_46211_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_46209_backing_aligned_1, - int64_t N_28477, - int64_t i32_res_28493, - int64_t num_groups_36462, - int64_t groups_per_segment_46195, - int64_t elements_per_thread_46196, - int64_t virt_num_groups_46197, - int64_t threads_per_segment_46199, - __global - unsigned char *mem_44397, - __global - unsigned char *defunc_4_map_res_mem_44916, - __global - unsigned char *mem_45134, - __global - unsigned char *group_res_arr_mem_46200, - __global - unsigned char *mainMagnitudezicounter_mem_46202) +__kernel void mainzisegred_small_111633(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_128334_backing_aligned_0, + int64_t m_75136, + int64_t num_recresids_padded_75809, + int64_t num_groups_111663, + int64_t segment_sizze_nonzzero_128327, + __global unsigned char *mem_124045, + __global unsigned char *mem_124048) { - #define segred_group_sizze_36461 (mainMagnitudezisegred_group_sizze_36409) + #define segred_group_sizze_111662 (mainzisegred_group_sizze_111627) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict sync_arr_mem_46211_backing_1 = - (__local volatile - char *) sync_arr_mem_46211_backing_aligned_0; - __local volatile char *restrict red_arr_mem_46209_backing_0 = + __local volatile char *restrict red_arr_mem_128334_backing_0 = (__local volatile - char *) red_arr_mem_46209_backing_aligned_1; + char *) red_arr_mem_128334_backing_aligned_0; if (*global_failure >= 0) return; - int32_t global_tid_46204; - int32_t local_tid_46205; - int64_t group_sizze_46208; - int32_t wave_sizze_46207; - int32_t group_tid_46206; - - global_tid_46204 = get_global_id(0); - local_tid_46205 = get_local_id(0); - group_sizze_46208 = get_local_size(0); - wave_sizze_46207 = LOCKSTEP_WIDTH; - group_tid_46206 = get_group_id(0); - - int32_t phys_tid_36415; - - phys_tid_36415 = global_tid_46204; - - __local char *red_arr_mem_46209; - - red_arr_mem_46209 = (__local char *) red_arr_mem_46209_backing_0; - - __local char *sync_arr_mem_46211; - - sync_arr_mem_46211 = (__local char *) sync_arr_mem_46211_backing_1; - - int32_t phys_group_id_46213; - - phys_group_id_46213 = get_group_id(0); - for (int32_t i_46214 = 0; i_46214 < - sdiv_up32(sext_i64_i32(virt_num_groups_46197) - phys_group_id_46213, - sext_i64_i32(num_groups_36462)); i_46214++) { - int32_t virt_group_id_46215 = phys_group_id_46213 + i_46214 * - sext_i64_i32(num_groups_36462); - int32_t flat_segment_id_46216 = squot32(virt_group_id_46215, - sext_i64_i32(groups_per_segment_46195)); - int64_t global_tid_46217 = srem64(sext_i32_i64(virt_group_id_46215) * - segred_group_sizze_36461 + - sext_i32_i64(local_tid_46205), - segred_group_sizze_36461 * - groups_per_segment_46195); - int64_t gtid_36404 = squot64(sext_i32_i64(flat_segment_id_46216), - N_28477); - int64_t gtid_36405 = sext_i32_i64(flat_segment_id_46216) - - squot64(sext_i32_i64(flat_segment_id_46216), N_28477) * N_28477; - int64_t gtid_36414; - float x_acc_46218; - int64_t chunk_sizze_46219; - - chunk_sizze_46219 = smin64(elements_per_thread_46196, - sdiv_up64(i32_res_28493 - - sext_i32_i64(sext_i64_i32(global_tid_46217)), - threads_per_segment_46199)); + int32_t global_tid_128329; + int32_t local_tid_128330; + int64_t group_sizze_128333; + int32_t wave_sizze_128332; + int32_t group_tid_128331; + + global_tid_128329 = get_global_id(0); + local_tid_128330 = get_local_id(0); + group_sizze_128333 = get_local_size(0); + wave_sizze_128332 = LOCKSTEP_WIDTH; + group_tid_128331 = get_group_id(0); + + int32_t phys_tid_111633; + + phys_tid_111633 = global_tid_128329; + + __local char *red_arr_mem_128334; + + red_arr_mem_128334 = (__local char *) red_arr_mem_128334_backing_0; + + int32_t phys_group_id_128336; + + phys_group_id_128336 = get_group_id(0); + for (int32_t i_128337 = 0; i_128337 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, + squot64(segred_group_sizze_111662, + segment_sizze_nonzzero_128327))) - + phys_group_id_128336, sext_i64_i32(num_groups_111663)); + i_128337++) { + int32_t virt_group_id_128338 = phys_group_id_128336 + i_128337 * + sext_i64_i32(num_groups_111663); + int64_t gtid_111624 = squot64(sext_i32_i64(local_tid_128330), + segment_sizze_nonzzero_128327) + + sext_i32_i64(virt_group_id_128338) * + squot64(segred_group_sizze_111662, + segment_sizze_nonzzero_128327); + int64_t gtid_111632 = srem64(sext_i32_i64(local_tid_128330), + num_recresids_padded_75809); - float x_36465; - float x_36466; - - // neutral-initialise the accumulators + // apply map function if in bounds { - x_acc_46218 = 0.0F; - } - for (int64_t i_46223 = 0; i_46223 < chunk_sizze_46219; i_46223++) { - gtid_36414 = sext_i32_i64(sext_i64_i32(global_tid_46217)) + - threads_per_segment_46199 * i_46223; - // apply map function - { - float x_36470 = ((__global - float *) defunc_4_map_res_mem_44916)[gtid_36404 * - i32_res_28493 + - gtid_36414]; - float x_36471 = ((__global float *) mem_44397)[gtid_36405 * - i32_res_28493 + - gtid_36414]; - float defunc_1_f_res_36472 = x_36470 * x_36471; + if (slt64((int64_t) 0, num_recresids_padded_75809) && + (slt64(gtid_111624, m_75136) && + slt64(sext_i32_i64(local_tid_128330), + num_recresids_padded_75809 * + squot64(segred_group_sizze_111662, + segment_sizze_nonzzero_128327)))) { + double x_111674 = ((__global double *) mem_124045)[gtid_111624 * + num_recresids_padded_75809 + + gtid_111632]; // save map-out results { } - // load accumulator - { - x_36465 = x_acc_46218; - } - // load new values - { - x_36466 = defunc_1_f_res_36472; - } - // apply reduction operator + // save results to be reduced { - float defunc_1_op_res_36467 = x_36465 + x_36466; - - // store in accumulator - { - x_acc_46218 = defunc_1_op_res_36467; - } + ((__local + double *) red_arr_mem_128334)[sext_i32_i64(local_tid_128330)] = + x_111674; } + } else { + ((__local + double *) red_arr_mem_128334)[sext_i32_i64(local_tid_128330)] = + 0.0; } } - // to reduce current chunk, first store our result in memory - { - x_36465 = x_acc_46218; - ((__local - float *) red_arr_mem_46209)[sext_i32_i64(local_tid_46205)] = - x_36465; - } barrier(CLK_LOCAL_MEM_FENCE); - - int32_t offset_46224; - int32_t skip_waves_46225; - - skip_waves_46225 = 1; - - float x_46220; - float x_46221; - - offset_46224 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46205, - sext_i64_i32(segred_group_sizze_36461))) { - x_46220 = ((__local - float *) red_arr_mem_46209)[sext_i32_i64(local_tid_46205 + - offset_46224)]; - } - } - offset_46224 = 1; - while (slt32(offset_46224, wave_sizze_46207)) { - if (slt32(local_tid_46205 + offset_46224, - sext_i64_i32(segred_group_sizze_36461)) && - ((local_tid_46205 - squot32(local_tid_46205, wave_sizze_46207) * - wave_sizze_46207) & (2 * offset_46224 - 1)) == 0) { - // read array element - { - x_46221 = ((volatile __local - float *) red_arr_mem_46209)[sext_i32_i64(local_tid_46205 + - offset_46224)]; - } - // apply reduction operation + if (slt64((int64_t) 0, num_recresids_padded_75809)) { + // perform segmented scan to imitate reduction + { + double x_111666; + double x_111667; + double x_128339; + double x_128340; + bool ltid_in_bounds_128346; + + ltid_in_bounds_128346 = slt64(sext_i32_i64(local_tid_128330), + num_recresids_padded_75809 * + squot64(segred_group_sizze_111662, + segment_sizze_nonzzero_128327)); + + int32_t skip_threads_128347; + + // read input for in-block scan { - float defunc_1_op_res_46222 = x_46220 + x_46221; - - x_46220 = defunc_1_op_res_46222; + if (ltid_in_bounds_128346) { + x_111667 = ((volatile __local + double *) red_arr_mem_128334)[sext_i32_i64(local_tid_128330)]; + if ((local_tid_128330 - squot32(local_tid_128330, 32) * + 32) == 0) { + x_111666 = x_111667; + } + } } - // write result of operation + // in-block scan (hopefully no barriers needed) { - ((volatile __local - float *) red_arr_mem_46209)[sext_i32_i64(local_tid_46205)] = - x_46220; - } - } - offset_46224 *= 2; - } - while (slt32(skip_waves_46225, - squot32(sext_i64_i32(segred_group_sizze_36461) + - wave_sizze_46207 - 1, wave_sizze_46207))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46224 = skip_waves_46225 * wave_sizze_46207; - if (slt32(local_tid_46205 + offset_46224, - sext_i64_i32(segred_group_sizze_36461)) && - ((local_tid_46205 - squot32(local_tid_46205, wave_sizze_46207) * - wave_sizze_46207) == 0 && (squot32(local_tid_46205, - wave_sizze_46207) & (2 * - skip_waves_46225 - - 1)) == - 0)) { - // read array element - { - x_46221 = ((__local - float *) red_arr_mem_46209)[sext_i32_i64(local_tid_46205 + - offset_46224)]; - } - // apply reduction operation - { - float defunc_1_op_res_46222 = x_46220 + x_46221; - - x_46220 = defunc_1_op_res_46222; - } - // write result of operation - { - ((__local - float *) red_arr_mem_46209)[sext_i32_i64(local_tid_46205)] = - x_46220; - } - } - skip_waves_46225 *= 2; - } - barrier(CLK_LOCAL_MEM_FENCE); - // first thread saves the result in accumulator - { - if (sext_i32_i64(local_tid_46205) == (int64_t) 0) { - x_acc_46218 = x_46220; - } - } - if (groups_per_segment_46195 == (int64_t) 1) { - // first thread in group saves final result to memory - { - if (local_tid_46205 == 0) { - ((__global float *) mem_45134)[gtid_36404 * N_28477 + - gtid_36405] = x_acc_46218; - } - } - } else { - int32_t old_counter_46226; - - // first thread in group saves group result to global memory - { - if (local_tid_46205 == 0) { - ((__global - float *) group_res_arr_mem_46200)[sext_i32_i64(virt_group_id_46215) * - segred_group_sizze_36461] = - x_acc_46218; - mem_fence_global(); - old_counter_46226 = - atomic_add_i32_global(&((volatile __global - int *) mainMagnitudezicounter_mem_46202)[sext_i32_i64(srem32(flat_segment_id_46216, - 10240))], - (int) 1); - ((__local bool *) sync_arr_mem_46211)[(int64_t) 0] = - old_counter_46226 == groups_per_segment_46195 - - (int64_t) 1; - } - } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - - bool is_last_group_46227; - - is_last_group_46227 = ((__local - bool *) sync_arr_mem_46211)[(int64_t) 0]; - if (is_last_group_46227) { - if (local_tid_46205 == 0) { - old_counter_46226 = - atomic_add_i32_global(&((volatile __global - int *) mainMagnitudezicounter_mem_46202)[sext_i32_i64(srem32(flat_segment_id_46216, - 10240))], - (int) ((int64_t) 0 - - groups_per_segment_46195)); + skip_threads_128347 = 1; + while (slt32(skip_threads_128347, 32)) { + if (sle32(skip_threads_128347, local_tid_128330 - + squot32(local_tid_128330, 32) * 32) && + ltid_in_bounds_128346) { + // read operands + { + x_111666 = ((volatile __local + double *) red_arr_mem_128334)[sext_i32_i64(local_tid_128330) - + sext_i32_i64(skip_threads_128347)]; + } + // perform operation + { + bool inactive_128348 = + slt64(srem64(sext_i32_i64(local_tid_128330), + num_recresids_padded_75809), + sext_i32_i64(local_tid_128330) - + sext_i32_i64(local_tid_128330 - + skip_threads_128347)); + + if (inactive_128348) { + x_111666 = x_111667; + } + if (!inactive_128348) { + bool isnan_res_111668; + + isnan_res_111668 = futrts_isnan64(x_111666); + + double defunc_1_op_res_111669; + + if (isnan_res_111668) { + defunc_1_op_res_111669 = x_111667; + } else { + bool isnan_res_111670; + + isnan_res_111670 = + futrts_isnan64(x_111667); + + double defunc_1_op_res_f_res_111671; + + if (isnan_res_111670) { + defunc_1_op_res_f_res_111671 = + x_111666; + } else { + double + defunc_1_op_res_f_res_f_res_111672 = + x_111666 + x_111667; + + defunc_1_op_res_f_res_111671 = + defunc_1_op_res_f_res_f_res_111672; + } + defunc_1_op_res_111669 = + defunc_1_op_res_f_res_111671; + } + x_111666 = defunc_1_op_res_111669; + } + } + } + if (sle32(wave_sizze_128332, skip_threads_128347)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128347, local_tid_128330 - + squot32(local_tid_128330, 32) * 32) && + ltid_in_bounds_128346) { + // write result + { + ((volatile __local + double *) red_arr_mem_128334)[sext_i32_i64(local_tid_128330)] = + x_111666; + x_111667 = x_111666; + } + } + if (sle32(wave_sizze_128332, skip_threads_128347)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128347 *= 2; + } } - // read in the per-group-results + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' { - int64_t read_per_thread_46228 = - sdiv_up64(groups_per_segment_46195, - segred_group_sizze_36461); - - x_36465 = 0.0F; - for (int64_t i_46229 = 0; i_46229 < read_per_thread_46228; - i_46229++) { - int64_t group_res_id_46230 = - sext_i32_i64(local_tid_46205) * - read_per_thread_46228 + i_46229; - int64_t index_of_group_res_46231 = - sext_i32_i64(flat_segment_id_46216) * - groups_per_segment_46195 + group_res_id_46230; - - if (slt64(group_res_id_46230, - groups_per_segment_46195)) { - x_36466 = ((__global - float *) group_res_arr_mem_46200)[index_of_group_res_46231 * - segred_group_sizze_36461]; - - float defunc_1_op_res_36467; - - defunc_1_op_res_36467 = x_36465 + x_36466; - x_36465 = defunc_1_op_res_36467; - } + if ((local_tid_128330 - squot32(local_tid_128330, 32) * + 32) == 31 && ltid_in_bounds_128346) { + ((volatile __local + double *) red_arr_mem_128334)[sext_i32_i64(squot32(local_tid_128330, + 32))] = + x_111666; } } - ((__local - float *) red_arr_mem_46209)[sext_i32_i64(local_tid_46205)] = - x_36465; barrier(CLK_LOCAL_MEM_FENCE); - // reduce the per-group results + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' { - int32_t offset_46232; - int32_t skip_waves_46233; - - skip_waves_46233 = 1; + int32_t skip_threads_128349; - float x_46220; - float x_46221; - - offset_46232 = 0; - // participating threads read initial accumulator + // read input for in-block scan { - if (slt32(local_tid_46205, - sext_i64_i32(segred_group_sizze_36461))) { - x_46220 = ((__local - float *) red_arr_mem_46209)[sext_i32_i64(local_tid_46205 + - offset_46232)]; + if (squot32(local_tid_128330, 32) == 0 && + ltid_in_bounds_128346) { + x_128340 = ((volatile __local + double *) red_arr_mem_128334)[sext_i32_i64(local_tid_128330)]; + if ((local_tid_128330 - squot32(local_tid_128330, + 32) * 32) == 0) { + x_128339 = x_128340; + } } } - offset_46232 = 1; - while (slt32(offset_46232, wave_sizze_46207)) { - if (slt32(local_tid_46205 + offset_46232, - sext_i64_i32(segred_group_sizze_36461)) && - ((local_tid_46205 - squot32(local_tid_46205, - wave_sizze_46207) * - wave_sizze_46207) & (2 * offset_46232 - 1)) == - 0) { - // read array element - { - x_46221 = ((volatile __local - float *) red_arr_mem_46209)[sext_i32_i64(local_tid_46205 + - offset_46232)]; + // in-block scan (hopefully no barriers needed) + { + skip_threads_128349 = 1; + while (slt32(skip_threads_128349, 32)) { + if (sle32(skip_threads_128349, local_tid_128330 - + squot32(local_tid_128330, 32) * 32) && + (squot32(local_tid_128330, 32) == 0 && + ltid_in_bounds_128346)) { + // read operands + { + x_128339 = ((volatile __local + double *) red_arr_mem_128334)[sext_i32_i64(local_tid_128330) - + sext_i32_i64(skip_threads_128349)]; + } + // perform operation + { + bool inactive_128350 = + slt64(srem64(sext_i32_i64(local_tid_128330 * + 32 + 32 - 1), + num_recresids_padded_75809), + sext_i32_i64(local_tid_128330 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_128330 - + skip_threads_128349) * + 32 + 32 - 1)); + + if (inactive_128350) { + x_128339 = x_128340; + } + if (!inactive_128350) { + bool isnan_res_128341; + + isnan_res_128341 = + futrts_isnan64(x_128339); + + double defunc_1_op_res_128342; + + if (isnan_res_128341) { + defunc_1_op_res_128342 = x_128340; + } else { + bool isnan_res_128343; + + isnan_res_128343 = + futrts_isnan64(x_128340); + + double defunc_1_op_res_f_res_128344; + + if (isnan_res_128343) { + defunc_1_op_res_f_res_128344 = + x_128339; + } else { + double + defunc_1_op_res_f_res_f_res_128345 + = x_128339 + x_128340; + + defunc_1_op_res_f_res_128344 = + defunc_1_op_res_f_res_f_res_128345; + } + defunc_1_op_res_128342 = + defunc_1_op_res_f_res_128344; + } + x_128339 = defunc_1_op_res_128342; + } + } } - // apply reduction operation - { - float defunc_1_op_res_46222 = x_46220 + x_46221; - - x_46220 = defunc_1_op_res_46222; + if (sle32(wave_sizze_128332, skip_threads_128349)) { + barrier(CLK_LOCAL_MEM_FENCE); } - // write result of operation - { - ((volatile __local - float *) red_arr_mem_46209)[sext_i32_i64(local_tid_46205)] = - x_46220; + if (sle32(skip_threads_128349, local_tid_128330 - + squot32(local_tid_128330, 32) * 32) && + (squot32(local_tid_128330, 32) == 0 && + ltid_in_bounds_128346)) { + // write result + { + ((volatile __local + double *) red_arr_mem_128334)[sext_i32_i64(local_tid_128330)] = + x_128339; + x_128340 = x_128339; + } } + if (sle32(wave_sizze_128332, skip_threads_128349)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128349 *= 2; } - offset_46232 *= 2; } - while (slt32(skip_waves_46233, - squot32(sext_i64_i32(segred_group_sizze_36461) + - wave_sizze_46207 - 1, - wave_sizze_46207))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46232 = skip_waves_46233 * wave_sizze_46207; - if (slt32(local_tid_46205 + offset_46232, - sext_i64_i32(segred_group_sizze_36461)) && - ((local_tid_46205 - squot32(local_tid_46205, - wave_sizze_46207) * - wave_sizze_46207) == 0 && - (squot32(local_tid_46205, wave_sizze_46207) & (2 * - skip_waves_46233 - - 1)) == - 0)) { - // read array element - { - x_46221 = ((__local - float *) red_arr_mem_46209)[sext_i32_i64(local_tid_46205 + - offset_46232)]; + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128330, 32) == 0 || + !ltid_in_bounds_128346)) { + // read operands + { + x_111667 = x_111666; + x_111666 = ((__local + double *) red_arr_mem_128334)[sext_i32_i64(squot32(local_tid_128330, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128351 = + slt64(srem64(sext_i32_i64(local_tid_128330), + num_recresids_padded_75809), + sext_i32_i64(local_tid_128330) - + sext_i32_i64(squot32(local_tid_128330, + 32) * 32 - 1)); + + if (inactive_128351) { + x_111666 = x_111667; } - // apply reduction operation - { - float defunc_1_op_res_46222 = x_46220 + x_46221; + if (!inactive_128351) { + bool isnan_res_111668; - x_46220 = defunc_1_op_res_46222; - } - // write result of operation - { - ((__local - float *) red_arr_mem_46209)[sext_i32_i64(local_tid_46205)] = - x_46220; + isnan_res_111668 = futrts_isnan64(x_111666); + + double defunc_1_op_res_111669; + + if (isnan_res_111668) { + defunc_1_op_res_111669 = x_111667; + } else { + bool isnan_res_111670; + + isnan_res_111670 = futrts_isnan64(x_111667); + + double defunc_1_op_res_f_res_111671; + + if (isnan_res_111670) { + defunc_1_op_res_f_res_111671 = x_111666; + } else { + double + defunc_1_op_res_f_res_f_res_111672 = + x_111666 + x_111667; + + defunc_1_op_res_f_res_111671 = + defunc_1_op_res_f_res_f_res_111672; + } + defunc_1_op_res_111669 = + defunc_1_op_res_f_res_111671; + } + x_111666 = defunc_1_op_res_111669; } } - skip_waves_46233 *= 2; - } - // and back to memory with the final result - { - if (local_tid_46205 == 0) { - ((__global float *) mem_45134)[gtid_36404 * - N_28477 + - gtid_36405] = - x_46220; + // write final result + { + ((__local + double *) red_arr_mem_128334)[sext_i32_i64(local_tid_128330)] = + x_111666; } } } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128330, 32) == 0) { + ((__local + double *) red_arr_mem_128334)[sext_i32_i64(local_tid_128330)] = + x_111667; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_128338) * + squot64(segred_group_sizze_111662, + segment_sizze_nonzzero_128327) + + sext_i32_i64(local_tid_128330), m_75136) && + slt64(sext_i32_i64(local_tid_128330), + squot64(segred_group_sizze_111662, + segment_sizze_nonzzero_128327))) { + ((__global + double *) mem_124048)[sext_i32_i64(virt_group_id_128338) * + squot64(segred_group_sizze_111662, + segment_sizze_nonzzero_128327) + + sext_i32_i64(local_tid_128330)] = + ((__local + double *) red_arr_mem_128334)[(sext_i32_i64(local_tid_128330) + + (int64_t) 1) * + segment_sizze_nonzzero_128327 - + (int64_t) 1]; } } + barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } error_1: return; - #undef segred_group_sizze_36461 + #undef segred_group_sizze_111662 } -__kernel void mainMagnitudezisegred_large_36993(__global int *global_failure, - int failure_is_an_option, - __global - int64_t *global_failure_args, - __local volatile - int64_t *sync_arr_mem_46463_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_46461_backing_aligned_1, - int64_t N_28477, - int64_t i32_res_28487, - int64_t num_groups_37044, - int64_t groups_per_segment_46447, - int64_t elements_per_thread_46448, - int64_t virt_num_groups_46449, - int64_t threads_per_segment_46451, - __global - unsigned char *defunc_4_map_res_mem_45178, - __global - unsigned char *mem_45232, - __global - unsigned char *mem_45235, - __global - unsigned char *group_res_arr_mem_46452, - __global - unsigned char *mainMagnitudezicounter_mem_46454) +__kernel void mainzisegred_small_112268(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_128639_backing_aligned_0, + int64_t m_75136, + int64_t num_recresids_padded_75809, + int64_t Nmk_76536, + int64_t num_groups_112545, + int64_t segment_sizze_nonzzero_128632, + __global + unsigned char *defunc_3_map_res_mem_124068, + __global unsigned char *mem_124078, + __global unsigned char *mem_124130) { - #define segred_group_sizze_37043 (mainMagnitudezisegred_group_sizze_36987) + #define segred_group_sizze_112544 (mainzisegred_group_sizze_112262) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict sync_arr_mem_46463_backing_1 = + __local volatile char *restrict red_arr_mem_128639_backing_0 = (__local volatile - char *) sync_arr_mem_46463_backing_aligned_0; - __local volatile char *restrict red_arr_mem_46461_backing_0 = - (__local volatile - char *) red_arr_mem_46461_backing_aligned_1; - volatile __local bool local_failure; - - if (failure_is_an_option) { - int failed = *global_failure >= 0; - - if (failed) - return; - } - local_failure = false; - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t global_tid_46456; - int32_t local_tid_46457; - int64_t group_sizze_46460; - int32_t wave_sizze_46459; - int32_t group_tid_46458; + char *) red_arr_mem_128639_backing_aligned_0; - global_tid_46456 = get_global_id(0); - local_tid_46457 = get_local_id(0); - group_sizze_46460 = get_local_size(0); - wave_sizze_46459 = LOCKSTEP_WIDTH; - group_tid_46458 = get_group_id(0); - - int32_t phys_tid_36993; - - phys_tid_36993 = global_tid_46456; - - __local char *red_arr_mem_46461; - - red_arr_mem_46461 = (__local char *) red_arr_mem_46461_backing_0; - - __local char *sync_arr_mem_46463; - - sync_arr_mem_46463 = (__local char *) sync_arr_mem_46463_backing_1; - - int32_t phys_group_id_46465; + if (*global_failure >= 0) + return; - phys_group_id_46465 = get_group_id(0); - for (int32_t i_46466 = 0; i_46466 < - sdiv_up32(sext_i64_i32(virt_num_groups_46449) - phys_group_id_46465, - sext_i64_i32(num_groups_37044)); i_46466++) { - int32_t virt_group_id_46467 = phys_group_id_46465 + i_46466 * - sext_i64_i32(num_groups_37044); - int32_t flat_segment_id_46468 = squot32(virt_group_id_46467, - sext_i64_i32(groups_per_segment_46447)); - int64_t global_tid_46469 = srem64(sext_i32_i64(virt_group_id_46467) * - segred_group_sizze_37043 + - sext_i32_i64(local_tid_46457), - segred_group_sizze_37043 * - groups_per_segment_46447); - int64_t gtid_36984 = sext_i32_i64(flat_segment_id_46468); - int64_t gtid_36992; - float x_acc_46470; - int64_t chunk_sizze_46471; - - chunk_sizze_46471 = smin64(elements_per_thread_46448, - sdiv_up64(i32_res_28487 - - sext_i32_i64(sext_i64_i32(global_tid_46469)), - threads_per_segment_46451)); + int32_t global_tid_128634; + int32_t local_tid_128635; + int64_t group_sizze_128638; + int32_t wave_sizze_128637; + int32_t group_tid_128636; + + global_tid_128634 = get_global_id(0); + local_tid_128635 = get_local_id(0); + group_sizze_128638 = get_local_size(0); + wave_sizze_128637 = LOCKSTEP_WIDTH; + group_tid_128636 = get_group_id(0); + + int32_t phys_tid_112268; + + phys_tid_112268 = global_tid_128634; + + __local char *red_arr_mem_128639; + + red_arr_mem_128639 = (__local char *) red_arr_mem_128639_backing_0; + + int32_t phys_group_id_128641; + + phys_group_id_128641 = get_group_id(0); + for (int32_t i_128642 = 0; i_128642 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, + squot64(segred_group_sizze_112544, + segment_sizze_nonzzero_128632))) - + phys_group_id_128641, sext_i64_i32(num_groups_112545)); + i_128642++) { + int32_t virt_group_id_128643 = phys_group_id_128641 + i_128642 * + sext_i64_i32(num_groups_112545); + int64_t gtid_112259 = squot64(sext_i32_i64(local_tid_128635), + segment_sizze_nonzzero_128632) + + sext_i32_i64(virt_group_id_128643) * + squot64(segred_group_sizze_112544, + segment_sizze_nonzzero_128632); + int64_t gtid_112267 = srem64(sext_i32_i64(local_tid_128635), + num_recresids_padded_75809); - float x_37047; - float x_37048; - - // neutral-initialise the accumulators + // apply map function if in bounds { - x_acc_46470 = 0.0F; - } - for (int64_t i_46475 = 0; i_46475 < chunk_sizze_46471; i_46475++) { - gtid_36992 = sext_i32_i64(sext_i64_i32(global_tid_46469)) + - threads_per_segment_46451 * i_46475; - // apply map function - { - int32_t defunc_0_f_res_37051 = ((__global - int32_t *) mem_45232)[gtid_36984]; - int32_t index_primexp_42385 = sext_i64_i32(gtid_36992); - bool cond_37053 = slt32(index_primexp_42385, - defunc_0_f_res_37051); - float defunc_0_f_res_37054; - - if (cond_37053) { - int64_t i_37055 = sext_i32_i64(index_primexp_42385); - bool x_37056 = sle64((int64_t) 0, i_37055); - bool y_37057 = slt64(i_37055, N_28477); - bool bounds_check_37058 = x_37056 && y_37057; - bool index_certs_37059; - - if (!bounds_check_37058) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 98) == -1) { - global_failure_args[0] = i_37055; - global_failure_args[1] = N_28477; - ; - } - local_failure = true; - goto error_0; - } - } - - float defunc_0_f_res_t_res_37060 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_36984 * - N_28477 + - i_37055]; - - defunc_0_f_res_37054 = defunc_0_f_res_t_res_37060; + if (slt64((int64_t) 0, num_recresids_padded_75809) && + (slt64(gtid_112259, m_75136) && + slt64(sext_i32_i64(local_tid_128635), + num_recresids_padded_75809 * + squot64(segred_group_sizze_112544, + segment_sizze_nonzzero_128632)))) { + int64_t slice_115288 = (int64_t) 1 + gtid_112267; + double x_112554 = ((__global + double *) defunc_3_map_res_mem_124068)[gtid_112259 * + Nmk_76536 + + slice_115288]; + double x_112555 = ((__global double *) mem_124078)[gtid_112259 * + Nmk_76536 + + slice_115288]; + double abs_res_112556 = fabs(x_112554); + bool cond_112557 = x_112555 < abs_res_112556; + int64_t defunc_2_f_res_112558; + + if (cond_112557) { + defunc_2_f_res_112558 = gtid_112267; } else { - defunc_0_f_res_37054 = 0.0F; + defunc_2_f_res_112558 = (int64_t) 9223372036854775807; } - - float defunc_0_f_res_37061 = defunc_0_f_res_37054 * - defunc_0_f_res_37054; - // save map-out results { } - // load accumulator - { - x_37047 = x_acc_46470; - } - // load new values - { - x_37048 = defunc_0_f_res_37061; - } - // apply reduction operator - { - float defunc_1_op_res_37049 = x_37047 + x_37048; - - // store in accumulator - { - x_acc_46470 = defunc_1_op_res_37049; - } - } - } - } - // to reduce current chunk, first store our result in memory - { - x_37047 = x_acc_46470; - ((__local - float *) red_arr_mem_46461)[sext_i32_i64(local_tid_46457)] = - x_37047; - } - - error_0: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t offset_46476; - int32_t skip_waves_46477; - - skip_waves_46477 = 1; - - float x_46472; - float x_46473; - - offset_46476 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46457, - sext_i64_i32(segred_group_sizze_37043))) { - x_46472 = ((__local - float *) red_arr_mem_46461)[sext_i32_i64(local_tid_46457 + - offset_46476)]; - } - } - offset_46476 = 1; - while (slt32(offset_46476, wave_sizze_46459)) { - if (slt32(local_tid_46457 + offset_46476, - sext_i64_i32(segred_group_sizze_37043)) && - ((local_tid_46457 - squot32(local_tid_46457, wave_sizze_46459) * - wave_sizze_46459) & (2 * offset_46476 - 1)) == 0) { - // read array element - { - x_46473 = ((volatile __local - float *) red_arr_mem_46461)[sext_i32_i64(local_tid_46457 + - offset_46476)]; - } - // apply reduction operation - { - float defunc_1_op_res_46474 = x_46472 + x_46473; - - x_46472 = defunc_1_op_res_46474; - } - // write result of operation - { - ((volatile __local - float *) red_arr_mem_46461)[sext_i32_i64(local_tid_46457)] = - x_46472; - } - } - offset_46476 *= 2; - } - while (slt32(skip_waves_46477, - squot32(sext_i64_i32(segred_group_sizze_37043) + - wave_sizze_46459 - 1, wave_sizze_46459))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46476 = skip_waves_46477 * wave_sizze_46459; - if (slt32(local_tid_46457 + offset_46476, - sext_i64_i32(segred_group_sizze_37043)) && - ((local_tid_46457 - squot32(local_tid_46457, wave_sizze_46459) * - wave_sizze_46459) == 0 && (squot32(local_tid_46457, - wave_sizze_46459) & (2 * - skip_waves_46477 - - 1)) == - 0)) { - // read array element - { - x_46473 = ((__local - float *) red_arr_mem_46461)[sext_i32_i64(local_tid_46457 + - offset_46476)]; - } - // apply reduction operation - { - float defunc_1_op_res_46474 = x_46472 + x_46473; - - x_46472 = defunc_1_op_res_46474; - } - // write result of operation + // save results to be reduced { ((__local - float *) red_arr_mem_46461)[sext_i32_i64(local_tid_46457)] = - x_46472; + int64_t *) red_arr_mem_128639)[sext_i32_i64(local_tid_128635)] = + defunc_2_f_res_112558; } + } else { + ((__local + int64_t *) red_arr_mem_128639)[sext_i32_i64(local_tid_128635)] = + (int64_t) 9223372036854775807; } - skip_waves_46477 *= 2; } barrier(CLK_LOCAL_MEM_FENCE); - // first thread saves the result in accumulator - { - if (sext_i32_i64(local_tid_46457) == (int64_t) 0) { - x_acc_46470 = x_46472; - } - } - if (groups_per_segment_46447 == (int64_t) 1) { - // first thread in group saves final result to memory - { - if (local_tid_46457 == 0) { - ((__global float *) mem_45235)[gtid_36984] = x_acc_46470; - } - } - } else { - int32_t old_counter_46478; - - // first thread in group saves group result to global memory + if (slt64((int64_t) 0, num_recresids_padded_75809)) { + // perform segmented scan to imitate reduction { - if (local_tid_46457 == 0) { - ((__global - float *) group_res_arr_mem_46452)[sext_i32_i64(virt_group_id_46467) * - segred_group_sizze_37043] = - x_acc_46470; - mem_fence_global(); - old_counter_46478 = - atomic_add_i32_global(&((volatile __global - int *) mainMagnitudezicounter_mem_46454)[sext_i32_i64(srem32(flat_segment_id_46468, - 10240))], - (int) 1); - ((__local bool *) sync_arr_mem_46463)[(int64_t) 0] = - old_counter_46478 == groups_per_segment_46447 - - (int64_t) 1; - } - } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - - bool is_last_group_46479; - - is_last_group_46479 = ((__local - bool *) sync_arr_mem_46463)[(int64_t) 0]; - if (is_last_group_46479) { - if (local_tid_46457 == 0) { - old_counter_46478 = - atomic_add_i32_global(&((volatile __global - int *) mainMagnitudezicounter_mem_46454)[sext_i32_i64(srem32(flat_segment_id_46468, - 10240))], - (int) ((int64_t) 0 - - groups_per_segment_46447)); - } - // read in the per-group-results + int64_t x_112548; + int64_t x_112549; + int64_t x_128644; + int64_t x_128645; + bool ltid_in_bounds_128647; + + ltid_in_bounds_128647 = slt64(sext_i32_i64(local_tid_128635), + num_recresids_padded_75809 * + squot64(segred_group_sizze_112544, + segment_sizze_nonzzero_128632)); + + int32_t skip_threads_128648; + + // read input for in-block scan { - int64_t read_per_thread_46480 = - sdiv_up64(groups_per_segment_46447, - segred_group_sizze_37043); - - x_37047 = 0.0F; - for (int64_t i_46481 = 0; i_46481 < read_per_thread_46480; - i_46481++) { - int64_t group_res_id_46482 = - sext_i32_i64(local_tid_46457) * - read_per_thread_46480 + i_46481; - int64_t index_of_group_res_46483 = - sext_i32_i64(flat_segment_id_46468) * - groups_per_segment_46447 + group_res_id_46482; - - if (slt64(group_res_id_46482, - groups_per_segment_46447)) { - x_37048 = ((__global - float *) group_res_arr_mem_46452)[index_of_group_res_46483 * - segred_group_sizze_37043]; - - float defunc_1_op_res_37049; - - defunc_1_op_res_37049 = x_37047 + x_37048; - x_37047 = defunc_1_op_res_37049; + if (ltid_in_bounds_128647) { + x_112549 = ((volatile __local + int64_t *) red_arr_mem_128639)[sext_i32_i64(local_tid_128635)]; + if ((local_tid_128635 - squot32(local_tid_128635, 32) * + 32) == 0) { + x_112548 = x_112549; } } } - ((__local - float *) red_arr_mem_46461)[sext_i32_i64(local_tid_46457)] = - x_37047; - barrier(CLK_LOCAL_MEM_FENCE); - // reduce the per-group results + // in-block scan (hopefully no barriers needed) { - int32_t offset_46484; - int32_t skip_waves_46485; - - skip_waves_46485 = 1; - - float x_46472; - float x_46473; - - offset_46484 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46457, - sext_i64_i32(segred_group_sizze_37043))) { - x_46472 = ((__local - float *) red_arr_mem_46461)[sext_i32_i64(local_tid_46457 + - offset_46484)]; - } - } - offset_46484 = 1; - while (slt32(offset_46484, wave_sizze_46459)) { - if (slt32(local_tid_46457 + offset_46484, - sext_i64_i32(segred_group_sizze_37043)) && - ((local_tid_46457 - squot32(local_tid_46457, - wave_sizze_46459) * - wave_sizze_46459) & (2 * offset_46484 - 1)) == - 0) { - // read array element + skip_threads_128648 = 1; + while (slt32(skip_threads_128648, 32)) { + if (sle32(skip_threads_128648, local_tid_128635 - + squot32(local_tid_128635, 32) * 32) && + ltid_in_bounds_128647) { + // read operands { - x_46473 = ((volatile __local - float *) red_arr_mem_46461)[sext_i32_i64(local_tid_46457 + - offset_46484)]; + x_112548 = ((volatile __local + int64_t *) red_arr_mem_128639)[sext_i32_i64(local_tid_128635) - + sext_i32_i64(skip_threads_128648)]; } - // apply reduction operation + // perform operation { - float defunc_1_op_res_46474 = x_46472 + x_46473; + bool inactive_128649 = + slt64(srem64(sext_i32_i64(local_tid_128635), + num_recresids_padded_75809), + sext_i32_i64(local_tid_128635) - + sext_i32_i64(local_tid_128635 - + skip_threads_128648)); - x_46472 = defunc_1_op_res_46474; + if (inactive_128649) { + x_112548 = x_112549; + } + if (!inactive_128649) { + int64_t defunc_1_op_res_112550 = + smin64(x_112548, x_112549); + + x_112548 = defunc_1_op_res_112550; + } } - // write result of operation + } + if (sle32(wave_sizze_128637, skip_threads_128648)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128648, local_tid_128635 - + squot32(local_tid_128635, 32) * 32) && + ltid_in_bounds_128647) { + // write result { ((volatile __local - float *) red_arr_mem_46461)[sext_i32_i64(local_tid_46457)] = - x_46472; + int64_t *) red_arr_mem_128639)[sext_i32_i64(local_tid_128635)] = + x_112548; + x_112549 = x_112548; } } - offset_46484 *= 2; + if (sle32(wave_sizze_128637, skip_threads_128648)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128648 *= 2; } - while (slt32(skip_waves_46485, - squot32(sext_i64_i32(segred_group_sizze_37043) + - wave_sizze_46459 - 1, - wave_sizze_46459))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46484 = skip_waves_46485 * wave_sizze_46459; - if (slt32(local_tid_46457 + offset_46484, - sext_i64_i32(segred_group_sizze_37043)) && - ((local_tid_46457 - squot32(local_tid_46457, - wave_sizze_46459) * - wave_sizze_46459) == 0 && - (squot32(local_tid_46457, wave_sizze_46459) & (2 * - skip_waves_46485 - - 1)) == - 0)) { - // read array element - { - x_46473 = ((__local - float *) red_arr_mem_46461)[sext_i32_i64(local_tid_46457 + - offset_46484)]; + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128635 - squot32(local_tid_128635, 32) * + 32) == 31 && ltid_in_bounds_128647) { + ((volatile __local + int64_t *) red_arr_mem_128639)[sext_i32_i64(squot32(local_tid_128635, + 32))] = + x_112548; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128650; + + // read input for in-block scan + { + if (squot32(local_tid_128635, 32) == 0 && + ltid_in_bounds_128647) { + x_128645 = ((volatile __local + int64_t *) red_arr_mem_128639)[sext_i32_i64(local_tid_128635)]; + if ((local_tid_128635 - squot32(local_tid_128635, + 32) * 32) == 0) { + x_128644 = x_128645; } - // apply reduction operation - { - float defunc_1_op_res_46474 = x_46472 + x_46473; - - x_46472 = defunc_1_op_res_46474; + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128650 = 1; + while (slt32(skip_threads_128650, 32)) { + if (sle32(skip_threads_128650, local_tid_128635 - + squot32(local_tid_128635, 32) * 32) && + (squot32(local_tid_128635, 32) == 0 && + ltid_in_bounds_128647)) { + // read operands + { + x_128644 = ((volatile __local + int64_t *) red_arr_mem_128639)[sext_i32_i64(local_tid_128635) - + sext_i32_i64(skip_threads_128650)]; + } + // perform operation + { + bool inactive_128651 = + slt64(srem64(sext_i32_i64(local_tid_128635 * + 32 + 32 - 1), + num_recresids_padded_75809), + sext_i32_i64(local_tid_128635 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_128635 - + skip_threads_128650) * + 32 + 32 - 1)); + + if (inactive_128651) { + x_128644 = x_128645; + } + if (!inactive_128651) { + int64_t defunc_1_op_res_128646 = + smin64(x_128644, x_128645); + + x_128644 = defunc_1_op_res_128646; + } + } } - // write result of operation - { - ((__local - float *) red_arr_mem_46461)[sext_i32_i64(local_tid_46457)] = - x_46472; + if (sle32(wave_sizze_128637, skip_threads_128650)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128650, local_tid_128635 - + squot32(local_tid_128635, 32) * 32) && + (squot32(local_tid_128635, 32) == 0 && + ltid_in_bounds_128647)) { + // write result + { + ((volatile __local + int64_t *) red_arr_mem_128639)[sext_i32_i64(local_tid_128635)] = + x_128644; + x_128645 = x_128644; + } + } + if (sle32(wave_sizze_128637, skip_threads_128650)) { + barrier(CLK_LOCAL_MEM_FENCE); } + skip_threads_128650 *= 2; } - skip_waves_46485 *= 2; } - // and back to memory with the final result - { - if (local_tid_46457 == 0) { - ((__global float *) mem_45235)[gtid_36984] = - x_46472; + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128635, 32) == 0 || + !ltid_in_bounds_128647)) { + // read operands + { + x_112549 = x_112548; + x_112548 = ((__local + int64_t *) red_arr_mem_128639)[sext_i32_i64(squot32(local_tid_128635, + 32)) - + (int64_t) 1]; } + // perform operation + { + bool inactive_128652 = + slt64(srem64(sext_i32_i64(local_tid_128635), + num_recresids_padded_75809), + sext_i32_i64(local_tid_128635) - + sext_i32_i64(squot32(local_tid_128635, + 32) * 32 - 1)); + + if (inactive_128652) { + x_112548 = x_112549; + } + if (!inactive_128652) { + int64_t defunc_1_op_res_112550 = + smin64(x_112548, x_112549); + + x_112548 = defunc_1_op_res_112550; + } + } + // write final result + { + ((__local + int64_t *) red_arr_mem_128639)[sext_i32_i64(local_tid_128635)] = + x_112548; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128635, 32) == 0) { + ((__local + int64_t *) red_arr_mem_128639)[sext_i32_i64(local_tid_128635)] = + x_112549; } } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_128643) * + squot64(segred_group_sizze_112544, + segment_sizze_nonzzero_128632) + + sext_i32_i64(local_tid_128635), m_75136) && + slt64(sext_i32_i64(local_tid_128635), + squot64(segred_group_sizze_112544, + segment_sizze_nonzzero_128632))) { + ((__global + int64_t *) mem_124130)[sext_i32_i64(virt_group_id_128643) * + squot64(segred_group_sizze_112544, + segment_sizze_nonzzero_128632) + + sext_i32_i64(local_tid_128635)] = + ((__local + int64_t *) red_arr_mem_128639)[(sext_i32_i64(local_tid_128635) + + (int64_t) 1) * + segment_sizze_nonzzero_128632 - + (int64_t) 1]; } } + barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } error_1: return; - #undef segred_group_sizze_37043 + #undef segred_group_sizze_112544 } -__kernel void mainMagnitudezisegred_large_37018(__global int *global_failure, - __local volatile - int64_t *sync_arr_mem_46403_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_46401_backing_aligned_1, - int64_t N_28477, - int64_t i32_res_28487, - int64_t num_groups_37030, - int64_t groups_per_segment_46387, - int64_t elements_per_thread_46388, - int64_t virt_num_groups_46389, - int64_t threads_per_segment_46391, - __global - unsigned char *images_mem_44381, - __global - unsigned char *mem_45232, - __global - unsigned char *group_res_arr_mem_46392, - __global - unsigned char *mainMagnitudezicounter_mem_46394) +__kernel void mainzisegred_small_112393(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_128574_backing_aligned_0, + int64_t m_75136, + int64_t num_recresids_padded_75809, + int64_t Nmk_76536, + int64_t num_groups_112425, + int64_t segment_sizze_nonzzero_128567, + __global + unsigned char *defunc_3_map_res_mem_124068, + __global unsigned char *mem_124121, + __global unsigned char *mem_124124) { - #define segred_group_sizze_37029 (mainMagnitudezisegred_group_sizze_37012) + #define segred_group_sizze_112424 (mainzisegred_group_sizze_112387) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict sync_arr_mem_46403_backing_1 = - (__local volatile - char *) sync_arr_mem_46403_backing_aligned_0; - __local volatile char *restrict red_arr_mem_46401_backing_0 = + __local volatile char *restrict red_arr_mem_128574_backing_0 = (__local volatile - char *) red_arr_mem_46401_backing_aligned_1; + char *) red_arr_mem_128574_backing_aligned_0; if (*global_failure >= 0) return; - int32_t global_tid_46396; - int32_t local_tid_46397; - int64_t group_sizze_46400; - int32_t wave_sizze_46399; - int32_t group_tid_46398; - - global_tid_46396 = get_global_id(0); - local_tid_46397 = get_local_id(0); - group_sizze_46400 = get_local_size(0); - wave_sizze_46399 = LOCKSTEP_WIDTH; - group_tid_46398 = get_group_id(0); - - int32_t phys_tid_37018; - - phys_tid_37018 = global_tid_46396; - - __local char *red_arr_mem_46401; - - red_arr_mem_46401 = (__local char *) red_arr_mem_46401_backing_0; - - __local char *sync_arr_mem_46403; - - sync_arr_mem_46403 = (__local char *) sync_arr_mem_46403_backing_1; - - int32_t phys_group_id_46405; - - phys_group_id_46405 = get_group_id(0); - for (int32_t i_46406 = 0; i_46406 < - sdiv_up32(sext_i64_i32(virt_num_groups_46389) - phys_group_id_46405, - sext_i64_i32(num_groups_37030)); i_46406++) { - int32_t virt_group_id_46407 = phys_group_id_46405 + i_46406 * - sext_i64_i32(num_groups_37030); - int32_t flat_segment_id_46408 = squot32(virt_group_id_46407, - sext_i64_i32(groups_per_segment_46387)); - int64_t global_tid_46409 = srem64(sext_i32_i64(virt_group_id_46407) * - segred_group_sizze_37029 + - sext_i32_i64(local_tid_46397), - segred_group_sizze_37029 * - groups_per_segment_46387); - int64_t gtid_37009 = sext_i32_i64(flat_segment_id_46408); - int64_t gtid_37017; - int32_t x_acc_46410; - int64_t chunk_sizze_46411; - - chunk_sizze_46411 = smin64(elements_per_thread_46388, - sdiv_up64(i32_res_28487 - - sext_i32_i64(sext_i64_i32(global_tid_46409)), - threads_per_segment_46391)); + int32_t global_tid_128569; + int32_t local_tid_128570; + int64_t group_sizze_128573; + int32_t wave_sizze_128572; + int32_t group_tid_128571; + + global_tid_128569 = get_global_id(0); + local_tid_128570 = get_local_id(0); + group_sizze_128573 = get_local_size(0); + wave_sizze_128572 = LOCKSTEP_WIDTH; + group_tid_128571 = get_group_id(0); + + int32_t phys_tid_112393; + + phys_tid_112393 = global_tid_128569; + + __local char *red_arr_mem_128574; + + red_arr_mem_128574 = (__local char *) red_arr_mem_128574_backing_0; + + int32_t phys_group_id_128576; + + phys_group_id_128576 = get_group_id(0); + for (int32_t i_128577 = 0; i_128577 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, + squot64(segred_group_sizze_112424, + segment_sizze_nonzzero_128567))) - + phys_group_id_128576, sext_i64_i32(num_groups_112425)); + i_128577++) { + int32_t virt_group_id_128578 = phys_group_id_128576 + i_128577 * + sext_i64_i32(num_groups_112425); + int64_t gtid_112384 = squot64(sext_i32_i64(local_tid_128570), + segment_sizze_nonzzero_128567) + + sext_i32_i64(virt_group_id_128578) * + squot64(segred_group_sizze_112424, + segment_sizze_nonzzero_128567); + int64_t gtid_112392 = srem64(sext_i32_i64(local_tid_128570), + num_recresids_padded_75809); - int32_t x_37033; - int32_t x_37034; - - // neutral-initialise the accumulators + // apply map function if in bounds { - x_acc_46410 = 0; - } - for (int64_t i_46415 = 0; i_46415 < chunk_sizze_46411; i_46415++) { - gtid_37017 = sext_i32_i64(sext_i64_i32(global_tid_46409)) + - threads_per_segment_46391 * i_46415; - // apply map function - { - float x_37037 = ((__global - float *) images_mem_44381)[gtid_37009 * - N_28477 + - gtid_37017]; - bool isnan_res_37038; - - isnan_res_37038 = futrts_isnan32(x_37037); - - bool cond_37039 = !isnan_res_37038; - int32_t defunc_0_f_res_37040 = btoi_bool_i32(cond_37039); + if (slt64((int64_t) 0, num_recresids_padded_75809) && + (slt64(gtid_112384, m_75136) && + slt64(sext_i32_i64(local_tid_128570), + num_recresids_padded_75809 * + squot64(segred_group_sizze_112424, + segment_sizze_nonzzero_128567)))) { + double i64_res_112432 = ((__global + double *) mem_124121)[gtid_112384]; + int64_t slice_115286 = (int64_t) 1 + gtid_112392; + double x_112433 = ((__global + double *) defunc_3_map_res_mem_124068)[gtid_112384 * + Nmk_76536 + + slice_115286]; + int64_t x_112435 = mul64((int64_t) 2, gtid_112392); + int64_t i64_arg_112436 = add64((int64_t) 2, x_112435); + double i64_res_112437 = sitofp_i64_f64(i64_arg_112436); + double y_112438 = i64_res_112437 / i64_res_112432; + double lifted_div_res_112439 = 1.0 + y_112438; + double abs_arg_112440 = x_112433 / lifted_div_res_112439; + double abs_res_112441 = fabs(abs_arg_112440); // save map-out results { } - // load accumulator - { - x_37033 = x_acc_46410; - } - // load new values - { - x_37034 = defunc_0_f_res_37040; - } - // apply reduction operator + // save results to be reduced { - int32_t defunc_1_op_res_37035 = add32(x_37033, x_37034); - - // store in accumulator - { - x_acc_46410 = defunc_1_op_res_37035; - } + ((__local + double *) red_arr_mem_128574)[sext_i32_i64(local_tid_128570)] = + abs_res_112441; } + } else { + ((__local + double *) red_arr_mem_128574)[sext_i32_i64(local_tid_128570)] = + -INFINITY; } } - // to reduce current chunk, first store our result in memory - { - x_37033 = x_acc_46410; - ((__local - int32_t *) red_arr_mem_46401)[sext_i32_i64(local_tid_46397)] = - x_37033; - } barrier(CLK_LOCAL_MEM_FENCE); - - int32_t offset_46416; - int32_t skip_waves_46417; - - skip_waves_46417 = 1; - - int32_t x_46412; - int32_t x_46413; - - offset_46416 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46397, - sext_i64_i32(segred_group_sizze_37029))) { - x_46412 = ((__local - int32_t *) red_arr_mem_46401)[sext_i32_i64(local_tid_46397 + - offset_46416)]; - } - } - offset_46416 = 1; - while (slt32(offset_46416, wave_sizze_46399)) { - if (slt32(local_tid_46397 + offset_46416, - sext_i64_i32(segred_group_sizze_37029)) && - ((local_tid_46397 - squot32(local_tid_46397, wave_sizze_46399) * - wave_sizze_46399) & (2 * offset_46416 - 1)) == 0) { - // read array element + if (slt64((int64_t) 0, num_recresids_padded_75809)) { + // perform segmented scan to imitate reduction + { + double x_112428; + double x_112429; + double x_128579; + double x_128580; + bool ltid_in_bounds_128582; + + ltid_in_bounds_128582 = slt64(sext_i32_i64(local_tid_128570), + num_recresids_padded_75809 * + squot64(segred_group_sizze_112424, + segment_sizze_nonzzero_128567)); + + int32_t skip_threads_128583; + + // read input for in-block scan { - x_46413 = ((volatile __local - int32_t *) red_arr_mem_46401)[sext_i32_i64(local_tid_46397 + - offset_46416)]; + if (ltid_in_bounds_128582) { + x_112429 = ((volatile __local + double *) red_arr_mem_128574)[sext_i32_i64(local_tid_128570)]; + if ((local_tid_128570 - squot32(local_tid_128570, 32) * + 32) == 0) { + x_112428 = x_112429; + } + } } - // apply reduction operation + // in-block scan (hopefully no barriers needed) { - int32_t defunc_1_op_res_46414 = add32(x_46412, x_46413); - - x_46412 = defunc_1_op_res_46414; - } - // write result of operation - { - ((volatile __local - int32_t *) red_arr_mem_46401)[sext_i32_i64(local_tid_46397)] = - x_46412; - } - } - offset_46416 *= 2; - } - while (slt32(skip_waves_46417, - squot32(sext_i64_i32(segred_group_sizze_37029) + - wave_sizze_46399 - 1, wave_sizze_46399))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46416 = skip_waves_46417 * wave_sizze_46399; - if (slt32(local_tid_46397 + offset_46416, - sext_i64_i32(segred_group_sizze_37029)) && - ((local_tid_46397 - squot32(local_tid_46397, wave_sizze_46399) * - wave_sizze_46399) == 0 && (squot32(local_tid_46397, - wave_sizze_46399) & (2 * - skip_waves_46417 - - 1)) == - 0)) { - // read array element - { - x_46413 = ((__local - int32_t *) red_arr_mem_46401)[sext_i32_i64(local_tid_46397 + - offset_46416)]; - } - // apply reduction operation - { - int32_t defunc_1_op_res_46414 = add32(x_46412, x_46413); - - x_46412 = defunc_1_op_res_46414; - } - // write result of operation - { - ((__local - int32_t *) red_arr_mem_46401)[sext_i32_i64(local_tid_46397)] = - x_46412; - } - } - skip_waves_46417 *= 2; - } - barrier(CLK_LOCAL_MEM_FENCE); - // first thread saves the result in accumulator - { - if (sext_i32_i64(local_tid_46397) == (int64_t) 0) { - x_acc_46410 = x_46412; - } - } - if (groups_per_segment_46387 == (int64_t) 1) { - // first thread in group saves final result to memory - { - if (local_tid_46397 == 0) { - ((__global int32_t *) mem_45232)[gtid_37009] = x_acc_46410; - } - } - } else { - int32_t old_counter_46418; - - // first thread in group saves group result to global memory - { - if (local_tid_46397 == 0) { - ((__global - int32_t *) group_res_arr_mem_46392)[sext_i32_i64(virt_group_id_46407) * - segred_group_sizze_37029] = - x_acc_46410; - mem_fence_global(); - old_counter_46418 = - atomic_add_i32_global(&((volatile __global - int *) mainMagnitudezicounter_mem_46394)[sext_i32_i64(srem32(flat_segment_id_46408, - 10240))], - (int) 1); - ((__local bool *) sync_arr_mem_46403)[(int64_t) 0] = - old_counter_46418 == groups_per_segment_46387 - - (int64_t) 1; - } - } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - - bool is_last_group_46419; - - is_last_group_46419 = ((__local - bool *) sync_arr_mem_46403)[(int64_t) 0]; - if (is_last_group_46419) { - if (local_tid_46397 == 0) { - old_counter_46418 = - atomic_add_i32_global(&((volatile __global - int *) mainMagnitudezicounter_mem_46394)[sext_i32_i64(srem32(flat_segment_id_46408, - 10240))], - (int) ((int64_t) 0 - - groups_per_segment_46387)); + skip_threads_128583 = 1; + while (slt32(skip_threads_128583, 32)) { + if (sle32(skip_threads_128583, local_tid_128570 - + squot32(local_tid_128570, 32) * 32) && + ltid_in_bounds_128582) { + // read operands + { + x_112428 = ((volatile __local + double *) red_arr_mem_128574)[sext_i32_i64(local_tid_128570) - + sext_i32_i64(skip_threads_128583)]; + } + // perform operation + { + bool inactive_128584 = + slt64(srem64(sext_i32_i64(local_tid_128570), + num_recresids_padded_75809), + sext_i32_i64(local_tid_128570) - + sext_i32_i64(local_tid_128570 - + skip_threads_128583)); + + if (inactive_128584) { + x_112428 = x_112429; + } + if (!inactive_128584) { + double defunc_1_op_res_112430 = + fmax64(x_112428, x_112429); + + x_112428 = defunc_1_op_res_112430; + } + } + } + if (sle32(wave_sizze_128572, skip_threads_128583)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128583, local_tid_128570 - + squot32(local_tid_128570, 32) * 32) && + ltid_in_bounds_128582) { + // write result + { + ((volatile __local + double *) red_arr_mem_128574)[sext_i32_i64(local_tid_128570)] = + x_112428; + x_112429 = x_112428; + } + } + if (sle32(wave_sizze_128572, skip_threads_128583)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128583 *= 2; + } } - // read in the per-group-results + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' { - int64_t read_per_thread_46420 = - sdiv_up64(groups_per_segment_46387, - segred_group_sizze_37029); - - x_37033 = 0; - for (int64_t i_46421 = 0; i_46421 < read_per_thread_46420; - i_46421++) { - int64_t group_res_id_46422 = - sext_i32_i64(local_tid_46397) * - read_per_thread_46420 + i_46421; - int64_t index_of_group_res_46423 = - sext_i32_i64(flat_segment_id_46408) * - groups_per_segment_46387 + group_res_id_46422; - - if (slt64(group_res_id_46422, - groups_per_segment_46387)) { - x_37034 = ((__global - int32_t *) group_res_arr_mem_46392)[index_of_group_res_46423 * - segred_group_sizze_37029]; - - int32_t defunc_1_op_res_37035; - - defunc_1_op_res_37035 = add32(x_37033, x_37034); - x_37033 = defunc_1_op_res_37035; - } + if ((local_tid_128570 - squot32(local_tid_128570, 32) * + 32) == 31 && ltid_in_bounds_128582) { + ((volatile __local + double *) red_arr_mem_128574)[sext_i32_i64(squot32(local_tid_128570, + 32))] = + x_112428; } } - ((__local - int32_t *) red_arr_mem_46401)[sext_i32_i64(local_tid_46397)] = - x_37033; barrier(CLK_LOCAL_MEM_FENCE); - // reduce the per-group results + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' { - int32_t offset_46424; - int32_t skip_waves_46425; - - skip_waves_46425 = 1; + int32_t skip_threads_128585; - int32_t x_46412; - int32_t x_46413; - - offset_46424 = 0; - // participating threads read initial accumulator + // read input for in-block scan { - if (slt32(local_tid_46397, - sext_i64_i32(segred_group_sizze_37029))) { - x_46412 = ((__local - int32_t *) red_arr_mem_46401)[sext_i32_i64(local_tid_46397 + - offset_46424)]; + if (squot32(local_tid_128570, 32) == 0 && + ltid_in_bounds_128582) { + x_128580 = ((volatile __local + double *) red_arr_mem_128574)[sext_i32_i64(local_tid_128570)]; + if ((local_tid_128570 - squot32(local_tid_128570, + 32) * 32) == 0) { + x_128579 = x_128580; + } } } - offset_46424 = 1; - while (slt32(offset_46424, wave_sizze_46399)) { - if (slt32(local_tid_46397 + offset_46424, - sext_i64_i32(segred_group_sizze_37029)) && - ((local_tid_46397 - squot32(local_tid_46397, - wave_sizze_46399) * - wave_sizze_46399) & (2 * offset_46424 - 1)) == - 0) { - // read array element - { - x_46413 = ((volatile __local - int32_t *) red_arr_mem_46401)[sext_i32_i64(local_tid_46397 + - offset_46424)]; + // in-block scan (hopefully no barriers needed) + { + skip_threads_128585 = 1; + while (slt32(skip_threads_128585, 32)) { + if (sle32(skip_threads_128585, local_tid_128570 - + squot32(local_tid_128570, 32) * 32) && + (squot32(local_tid_128570, 32) == 0 && + ltid_in_bounds_128582)) { + // read operands + { + x_128579 = ((volatile __local + double *) red_arr_mem_128574)[sext_i32_i64(local_tid_128570) - + sext_i32_i64(skip_threads_128585)]; + } + // perform operation + { + bool inactive_128586 = + slt64(srem64(sext_i32_i64(local_tid_128570 * + 32 + 32 - 1), + num_recresids_padded_75809), + sext_i32_i64(local_tid_128570 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_128570 - + skip_threads_128585) * + 32 + 32 - 1)); + + if (inactive_128586) { + x_128579 = x_128580; + } + if (!inactive_128586) { + double defunc_1_op_res_128581 = + fmax64(x_128579, x_128580); + + x_128579 = defunc_1_op_res_128581; + } + } } - // apply reduction operation - { - int32_t defunc_1_op_res_46414 = add32(x_46412, - x_46413); - - x_46412 = defunc_1_op_res_46414; + if (sle32(wave_sizze_128572, skip_threads_128585)) { + barrier(CLK_LOCAL_MEM_FENCE); } - // write result of operation - { - ((volatile __local - int32_t *) red_arr_mem_46401)[sext_i32_i64(local_tid_46397)] = - x_46412; + if (sle32(skip_threads_128585, local_tid_128570 - + squot32(local_tid_128570, 32) * 32) && + (squot32(local_tid_128570, 32) == 0 && + ltid_in_bounds_128582)) { + // write result + { + ((volatile __local + double *) red_arr_mem_128574)[sext_i32_i64(local_tid_128570)] = + x_128579; + x_128580 = x_128579; + } + } + if (sle32(wave_sizze_128572, skip_threads_128585)) { + barrier(CLK_LOCAL_MEM_FENCE); } + skip_threads_128585 *= 2; } - offset_46424 *= 2; } - while (slt32(skip_waves_46425, - squot32(sext_i64_i32(segred_group_sizze_37029) + - wave_sizze_46399 - 1, - wave_sizze_46399))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46424 = skip_waves_46425 * wave_sizze_46399; - if (slt32(local_tid_46397 + offset_46424, - sext_i64_i32(segred_group_sizze_37029)) && - ((local_tid_46397 - squot32(local_tid_46397, - wave_sizze_46399) * - wave_sizze_46399) == 0 && - (squot32(local_tid_46397, wave_sizze_46399) & (2 * - skip_waves_46425 - - 1)) == - 0)) { - // read array element - { - x_46413 = ((__local - int32_t *) red_arr_mem_46401)[sext_i32_i64(local_tid_46397 + - offset_46424)]; + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128570, 32) == 0 || + !ltid_in_bounds_128582)) { + // read operands + { + x_112429 = x_112428; + x_112428 = ((__local + double *) red_arr_mem_128574)[sext_i32_i64(squot32(local_tid_128570, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128587 = + slt64(srem64(sext_i32_i64(local_tid_128570), + num_recresids_padded_75809), + sext_i32_i64(local_tid_128570) - + sext_i32_i64(squot32(local_tid_128570, + 32) * 32 - 1)); + + if (inactive_128587) { + x_112428 = x_112429; } - // apply reduction operation - { - int32_t defunc_1_op_res_46414 = add32(x_46412, - x_46413); + if (!inactive_128587) { + double defunc_1_op_res_112430 = fmax64(x_112428, + x_112429); - x_46412 = defunc_1_op_res_46414; - } - // write result of operation - { - ((__local - int32_t *) red_arr_mem_46401)[sext_i32_i64(local_tid_46397)] = - x_46412; + x_112428 = defunc_1_op_res_112430; } } - skip_waves_46425 *= 2; - } - // and back to memory with the final result - { - if (local_tid_46397 == 0) { - ((__global int32_t *) mem_45232)[gtid_37009] = - x_46412; + // write final result + { + ((__local + double *) red_arr_mem_128574)[sext_i32_i64(local_tid_128570)] = + x_112428; } } } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128570, 32) == 0) { + ((__local + double *) red_arr_mem_128574)[sext_i32_i64(local_tid_128570)] = + x_112429; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_128578) * + squot64(segred_group_sizze_112424, + segment_sizze_nonzzero_128567) + + sext_i32_i64(local_tid_128570), m_75136) && + slt64(sext_i32_i64(local_tid_128570), + squot64(segred_group_sizze_112424, + segment_sizze_nonzzero_128567))) { + ((__global + double *) mem_124124)[sext_i32_i64(virt_group_id_128578) * + squot64(segred_group_sizze_112424, + segment_sizze_nonzzero_128567) + + sext_i32_i64(local_tid_128570)] = + ((__local + double *) red_arr_mem_128574)[(sext_i32_i64(local_tid_128570) + + (int64_t) 1) * + segment_sizze_nonzzero_128567 - + (int64_t) 1]; } } + barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } error_1: return; - #undef segred_group_sizze_37029 + #undef segred_group_sizze_112424 } -__kernel void mainMagnitudezisegred_large_37181(__global int *global_failure, - int failure_is_an_option, - __global - int64_t *global_failure_args, - __local volatile - int64_t *sync_arr_mem_46572_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_46570_backing_aligned_1, - int64_t N_28477, - int64_t i32_res_28880, - int64_t num_groups_37203, - int64_t groups_per_segment_46556, - int64_t elements_per_thread_46557, - int64_t virt_num_groups_46558, - int64_t threads_per_segment_46560, - __global - unsigned char *defunc_4_map_res_mem_45178, - __global - unsigned char *defunc_3_map_res_mem_45244, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global - unsigned char *mem_45278, - __global - unsigned char *group_res_arr_mem_46561, - __global - unsigned char *mainMagnitudezicounter_mem_46563) +__kernel void mainzisegred_small_112741(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_128773_backing_aligned_0, + int64_t N_75135, int64_t m_75136, + int64_t n_75139, int64_t k2p2zq_75151, + int64_t num_groups_112880, + int64_t segment_sizze_nonzzero_128766, + __global + unsigned char *binop_p_mem_120117, + __global unsigned char *mem_124142, + __global unsigned char *mem_124276, + __global unsigned char *mem_124281) { - #define segred_group_sizze_37202 (mainMagnitudezisegred_group_sizze_37175) + #define segred_group_sizze_112879 (mainzisegred_group_sizze_112735) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict sync_arr_mem_46572_backing_1 = + __local volatile char *restrict red_arr_mem_128773_backing_0 = (__local volatile - char *) sync_arr_mem_46572_backing_aligned_0; - __local volatile char *restrict red_arr_mem_46570_backing_0 = - (__local volatile - char *) red_arr_mem_46570_backing_aligned_1; - volatile __local bool local_failure; - - if (failure_is_an_option) { - int failed = *global_failure >= 0; - - if (failed) - return; - } - local_failure = false; - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t global_tid_46565; - int32_t local_tid_46566; - int64_t group_sizze_46569; - int32_t wave_sizze_46568; - int32_t group_tid_46567; - - global_tid_46565 = get_global_id(0); - local_tid_46566 = get_local_id(0); - group_sizze_46569 = get_local_size(0); - wave_sizze_46568 = LOCKSTEP_WIDTH; - group_tid_46567 = get_group_id(0); - - int32_t phys_tid_37181; - - phys_tid_37181 = global_tid_46565; - - __local char *red_arr_mem_46570; - - red_arr_mem_46570 = (__local char *) red_arr_mem_46570_backing_0; - - __local char *sync_arr_mem_46572; + char *) red_arr_mem_128773_backing_aligned_0; - sync_arr_mem_46572 = (__local char *) sync_arr_mem_46572_backing_1; - - int32_t phys_group_id_46574; + if (*global_failure >= 0) + return; - phys_group_id_46574 = get_group_id(0); - for (int32_t i_46575 = 0; i_46575 < - sdiv_up32(sext_i64_i32(virt_num_groups_46558) - phys_group_id_46574, - sext_i64_i32(num_groups_37203)); i_46575++) { - int32_t virt_group_id_46576 = phys_group_id_46574 + i_46575 * - sext_i64_i32(num_groups_37203); - int32_t flat_segment_id_46577 = squot32(virt_group_id_46576, - sext_i64_i32(groups_per_segment_46556)); - int64_t global_tid_46578 = srem64(sext_i32_i64(virt_group_id_46576) * - segred_group_sizze_37202 + - sext_i32_i64(local_tid_46566), - segred_group_sizze_37202 * - groups_per_segment_46556); - int64_t gtid_37172 = sext_i32_i64(flat_segment_id_46577); - int64_t gtid_37180; - float x_acc_46579; - int64_t chunk_sizze_46580; - - chunk_sizze_46580 = smin64(elements_per_thread_46557, - sdiv_up64(i32_res_28880 - - sext_i32_i64(sext_i64_i32(global_tid_46578)), - threads_per_segment_46560)); + int32_t global_tid_128768; + int32_t local_tid_128769; + int64_t group_sizze_128772; + int32_t wave_sizze_128771; + int32_t group_tid_128770; + + global_tid_128768 = get_global_id(0); + local_tid_128769 = get_local_id(0); + group_sizze_128772 = get_local_size(0); + wave_sizze_128771 = LOCKSTEP_WIDTH; + group_tid_128770 = get_group_id(0); + + int32_t phys_tid_112741; + + phys_tid_112741 = global_tid_128768; + + __local char *red_arr_mem_128773; + + red_arr_mem_128773 = (__local char *) red_arr_mem_128773_backing_0; + + int32_t phys_group_id_128775; + + phys_group_id_128775 = get_group_id(0); + for (int32_t i_128776 = 0; i_128776 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136 * k2p2zq_75151 * k2p2zq_75151, + squot64(segred_group_sizze_112879, + segment_sizze_nonzzero_128766))) - + phys_group_id_128775, sext_i64_i32(num_groups_112880)); + i_128776++) { + int32_t virt_group_id_128777 = phys_group_id_128775 + i_128776 * + sext_i64_i32(num_groups_112880); + int64_t gtid_112728 = squot64(squot64(sext_i32_i64(local_tid_128769), + segment_sizze_nonzzero_128766) + + sext_i32_i64(virt_group_id_128777) * + squot64(segred_group_sizze_112879, + segment_sizze_nonzzero_128766), + k2p2zq_75151 * k2p2zq_75151); + int64_t gtid_112729 = squot64(squot64(sext_i32_i64(local_tid_128769), + segment_sizze_nonzzero_128766) + + sext_i32_i64(virt_group_id_128777) * + squot64(segred_group_sizze_112879, + segment_sizze_nonzzero_128766) - + squot64(squot64(sext_i32_i64(local_tid_128769), + segment_sizze_nonzzero_128766) + + sext_i32_i64(virt_group_id_128777) * + squot64(segred_group_sizze_112879, + segment_sizze_nonzzero_128766), + k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151); + int64_t gtid_112730 = squot64(sext_i32_i64(local_tid_128769), + segment_sizze_nonzzero_128766) + + sext_i32_i64(virt_group_id_128777) * + squot64(segred_group_sizze_112879, + segment_sizze_nonzzero_128766) - + squot64(squot64(sext_i32_i64(local_tid_128769), + segment_sizze_nonzzero_128766) + + sext_i32_i64(virt_group_id_128777) * + squot64(segred_group_sizze_112879, + segment_sizze_nonzzero_128766), k2p2zq_75151 * + k2p2zq_75151) * (k2p2zq_75151 * k2p2zq_75151) - + squot64(squot64(sext_i32_i64(local_tid_128769), + segment_sizze_nonzzero_128766) + + sext_i32_i64(virt_group_id_128777) * + squot64(segred_group_sizze_112879, + segment_sizze_nonzzero_128766) - + squot64(squot64(sext_i32_i64(local_tid_128769), + segment_sizze_nonzzero_128766) + + sext_i32_i64(virt_group_id_128777) * + squot64(segred_group_sizze_112879, + segment_sizze_nonzzero_128766), + k2p2zq_75151 * k2p2zq_75151) * (k2p2zq_75151 * + k2p2zq_75151), + k2p2zq_75151) * k2p2zq_75151; + int64_t gtid_112740 = srem64(sext_i32_i64(local_tid_128769), n_75139); - float x_37206; - float x_37207; - - // neutral-initialise the accumulators + // apply map function if in bounds { - x_acc_46579 = 0.0F; - } - for (int64_t i_46584 = 0; i_46584 < chunk_sizze_46580; i_46584++) { - gtid_37180 = sext_i32_i64(sext_i64_i32(global_tid_46578)) + - threads_per_segment_46560 * i_46584; - // apply map function - { - int32_t x_37211 = ((__global - int32_t *) defunc_3_map_res_mem_45244)[gtid_37172]; - int32_t index_primexp_42390 = sext_i64_i32(gtid_37180); - bool cond_37213 = slt32(index_primexp_42390, x_37211); - float defunc_0_f_res_37214; - - if (cond_37213) { - int32_t x_37210 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_37172]; - int32_t x_37215 = add32(x_37210, index_primexp_42390); - int32_t x_37216 = sub32(x_37215, x_37211); - int32_t i_37217 = add32(1, x_37216); - int64_t i_37218 = sext_i32_i64(i_37217); - bool x_37219 = sle64((int64_t) 0, i_37218); - bool y_37220 = slt64(i_37218, N_28477); - bool bounds_check_37221 = x_37219 && y_37220; - bool index_certs_37222; - - if (!bounds_check_37221) { - { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 102) == -1) { - global_failure_args[0] = i_37218; - global_failure_args[1] = N_28477; - ; - } - local_failure = true; - goto error_0; - } - } - - float defunc_0_f_res_t_res_37223 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_37172 * - N_28477 + - i_37218]; - - defunc_0_f_res_37214 = defunc_0_f_res_t_res_37223; + if (slt64((int64_t) 0, n_75139) && (((slt64(gtid_112728, m_75136) && + slt64(gtid_112729, + k2p2zq_75151)) && + slt64(gtid_112730, + k2p2zq_75151)) && + slt64(sext_i32_i64(local_tid_128769), + n_75139 * + squot64(segred_group_sizze_112879, + segment_sizze_nonzzero_128766)))) { + double x_112889 = ((__global double *) mem_124142)[gtid_112728 * + N_75135 + + gtid_112740]; + double x_112890 = ((__global + double *) binop_p_mem_120117)[gtid_112729 * + N_75135 + + gtid_112740]; + double x_112891 = ((__global double *) mem_124276)[gtid_112730 * + N_75135 + + gtid_112740]; + double x_112892 = x_112890 * x_112891; + bool isnan_res_112893; + + isnan_res_112893 = futrts_isnan64(x_112889); + + double y_112894; + + if (isnan_res_112893) { + y_112894 = 0.0; } else { - defunc_0_f_res_37214 = 0.0F; + y_112894 = 1.0; } + + double defunc_2_f_res_112895 = x_112892 * y_112894; + // save map-out results { } - // load accumulator - { - x_37206 = x_acc_46579; - } - // load new values - { - x_37207 = defunc_0_f_res_37214; - } - // apply reduction operator - { - float defunc_1_op_res_37208 = x_37206 + x_37207; - - // store in accumulator - { - x_acc_46579 = defunc_1_op_res_37208; - } - } - } - } - // to reduce current chunk, first store our result in memory - { - x_37206 = x_acc_46579; - ((__local - float *) red_arr_mem_46570)[sext_i32_i64(local_tid_46566)] = - x_37206; - } - - error_0: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t offset_46585; - int32_t skip_waves_46586; - - skip_waves_46586 = 1; - - float x_46581; - float x_46582; - - offset_46585 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46566, - sext_i64_i32(segred_group_sizze_37202))) { - x_46581 = ((__local - float *) red_arr_mem_46570)[sext_i32_i64(local_tid_46566 + - offset_46585)]; - } - } - offset_46585 = 1; - while (slt32(offset_46585, wave_sizze_46568)) { - if (slt32(local_tid_46566 + offset_46585, - sext_i64_i32(segred_group_sizze_37202)) && - ((local_tid_46566 - squot32(local_tid_46566, wave_sizze_46568) * - wave_sizze_46568) & (2 * offset_46585 - 1)) == 0) { - // read array element - { - x_46582 = ((volatile __local - float *) red_arr_mem_46570)[sext_i32_i64(local_tid_46566 + - offset_46585)]; - } - // apply reduction operation - { - float defunc_1_op_res_46583 = x_46581 + x_46582; - - x_46581 = defunc_1_op_res_46583; - } - // write result of operation - { - ((volatile __local - float *) red_arr_mem_46570)[sext_i32_i64(local_tid_46566)] = - x_46581; - } - } - offset_46585 *= 2; - } - while (slt32(skip_waves_46586, - squot32(sext_i64_i32(segred_group_sizze_37202) + - wave_sizze_46568 - 1, wave_sizze_46568))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46585 = skip_waves_46586 * wave_sizze_46568; - if (slt32(local_tid_46566 + offset_46585, - sext_i64_i32(segred_group_sizze_37202)) && - ((local_tid_46566 - squot32(local_tid_46566, wave_sizze_46568) * - wave_sizze_46568) == 0 && (squot32(local_tid_46566, - wave_sizze_46568) & (2 * - skip_waves_46586 - - 1)) == - 0)) { - // read array element - { - x_46582 = ((__local - float *) red_arr_mem_46570)[sext_i32_i64(local_tid_46566 + - offset_46585)]; - } - // apply reduction operation - { - float defunc_1_op_res_46583 = x_46581 + x_46582; - - x_46581 = defunc_1_op_res_46583; - } - // write result of operation + // save results to be reduced { ((__local - float *) red_arr_mem_46570)[sext_i32_i64(local_tid_46566)] = - x_46581; + double *) red_arr_mem_128773)[sext_i32_i64(local_tid_128769)] = + defunc_2_f_res_112895; } + } else { + ((__local + double *) red_arr_mem_128773)[sext_i32_i64(local_tid_128769)] = + 0.0; } - skip_waves_46586 *= 2; } barrier(CLK_LOCAL_MEM_FENCE); - // first thread saves the result in accumulator - { - if (sext_i32_i64(local_tid_46566) == (int64_t) 0) { - x_acc_46579 = x_46581; - } - } - if (groups_per_segment_46556 == (int64_t) 1) { - // first thread in group saves final result to memory - { - if (local_tid_46566 == 0) { - ((__global float *) mem_45278)[gtid_37172] = x_acc_46579; - } - } - } else { - int32_t old_counter_46587; - - // first thread in group saves group result to global memory + if (slt64((int64_t) 0, n_75139)) { + // perform segmented scan to imitate reduction { - if (local_tid_46566 == 0) { - ((__global - float *) group_res_arr_mem_46561)[sext_i32_i64(virt_group_id_46576) * - segred_group_sizze_37202] = - x_acc_46579; - mem_fence_global(); - old_counter_46587 = - atomic_add_i32_global(&((volatile __global - int *) mainMagnitudezicounter_mem_46563)[sext_i32_i64(srem32(flat_segment_id_46577, - 10240))], - (int) 1); - ((__local bool *) sync_arr_mem_46572)[(int64_t) 0] = - old_counter_46587 == groups_per_segment_46556 - - (int64_t) 1; - } - } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - - bool is_last_group_46588; - - is_last_group_46588 = ((__local - bool *) sync_arr_mem_46572)[(int64_t) 0]; - if (is_last_group_46588) { - if (local_tid_46566 == 0) { - old_counter_46587 = - atomic_add_i32_global(&((volatile __global - int *) mainMagnitudezicounter_mem_46563)[sext_i32_i64(srem32(flat_segment_id_46577, - 10240))], - (int) ((int64_t) 0 - - groups_per_segment_46556)); - } - // read in the per-group-results + double x_112883; + double x_112884; + double x_128778; + double x_128779; + bool ltid_in_bounds_128781; + + ltid_in_bounds_128781 = slt64(sext_i32_i64(local_tid_128769), + n_75139 * + squot64(segred_group_sizze_112879, + segment_sizze_nonzzero_128766)); + + int32_t skip_threads_128782; + + // read input for in-block scan { - int64_t read_per_thread_46589 = - sdiv_up64(groups_per_segment_46556, - segred_group_sizze_37202); - - x_37206 = 0.0F; - for (int64_t i_46590 = 0; i_46590 < read_per_thread_46589; - i_46590++) { - int64_t group_res_id_46591 = - sext_i32_i64(local_tid_46566) * - read_per_thread_46589 + i_46590; - int64_t index_of_group_res_46592 = - sext_i32_i64(flat_segment_id_46577) * - groups_per_segment_46556 + group_res_id_46591; - - if (slt64(group_res_id_46591, - groups_per_segment_46556)) { - x_37207 = ((__global - float *) group_res_arr_mem_46561)[index_of_group_res_46592 * - segred_group_sizze_37202]; - - float defunc_1_op_res_37208; - - defunc_1_op_res_37208 = x_37206 + x_37207; - x_37206 = defunc_1_op_res_37208; + if (ltid_in_bounds_128781) { + x_112884 = ((volatile __local + double *) red_arr_mem_128773)[sext_i32_i64(local_tid_128769)]; + if ((local_tid_128769 - squot32(local_tid_128769, 32) * + 32) == 0) { + x_112883 = x_112884; } } } - ((__local - float *) red_arr_mem_46570)[sext_i32_i64(local_tid_46566)] = - x_37206; - barrier(CLK_LOCAL_MEM_FENCE); - // reduce the per-group results + // in-block scan (hopefully no barriers needed) { - int32_t offset_46593; - int32_t skip_waves_46594; - - skip_waves_46594 = 1; - - float x_46581; - float x_46582; - - offset_46593 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46566, - sext_i64_i32(segred_group_sizze_37202))) { - x_46581 = ((__local - float *) red_arr_mem_46570)[sext_i32_i64(local_tid_46566 + - offset_46593)]; - } - } - offset_46593 = 1; - while (slt32(offset_46593, wave_sizze_46568)) { - if (slt32(local_tid_46566 + offset_46593, - sext_i64_i32(segred_group_sizze_37202)) && - ((local_tid_46566 - squot32(local_tid_46566, - wave_sizze_46568) * - wave_sizze_46568) & (2 * offset_46593 - 1)) == - 0) { - // read array element + skip_threads_128782 = 1; + while (slt32(skip_threads_128782, 32)) { + if (sle32(skip_threads_128782, local_tid_128769 - + squot32(local_tid_128769, 32) * 32) && + ltid_in_bounds_128781) { + // read operands { - x_46582 = ((volatile __local - float *) red_arr_mem_46570)[sext_i32_i64(local_tid_46566 + - offset_46593)]; + x_112883 = ((volatile __local + double *) red_arr_mem_128773)[sext_i32_i64(local_tid_128769) - + sext_i32_i64(skip_threads_128782)]; } - // apply reduction operation + // perform operation { - float defunc_1_op_res_46583 = x_46581 + x_46582; + bool inactive_128783 = + slt64(srem64(sext_i32_i64(local_tid_128769), + n_75139), + sext_i32_i64(local_tid_128769) - + sext_i32_i64(local_tid_128769 - + skip_threads_128782)); - x_46581 = defunc_1_op_res_46583; + if (inactive_128783) { + x_112883 = x_112884; + } + if (!inactive_128783) { + double defunc_1_op_res_112885 = x_112883 + + x_112884; + + x_112883 = defunc_1_op_res_112885; + } } - // write result of operation + } + if (sle32(wave_sizze_128771, skip_threads_128782)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128782, local_tid_128769 - + squot32(local_tid_128769, 32) * 32) && + ltid_in_bounds_128781) { + // write result { ((volatile __local - float *) red_arr_mem_46570)[sext_i32_i64(local_tid_46566)] = - x_46581; + double *) red_arr_mem_128773)[sext_i32_i64(local_tid_128769)] = + x_112883; + x_112884 = x_112883; } } - offset_46593 *= 2; + if (sle32(wave_sizze_128771, skip_threads_128782)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128782 *= 2; } - while (slt32(skip_waves_46594, - squot32(sext_i64_i32(segred_group_sizze_37202) + - wave_sizze_46568 - 1, - wave_sizze_46568))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46593 = skip_waves_46594 * wave_sizze_46568; - if (slt32(local_tid_46566 + offset_46593, - sext_i64_i32(segred_group_sizze_37202)) && - ((local_tid_46566 - squot32(local_tid_46566, - wave_sizze_46568) * - wave_sizze_46568) == 0 && - (squot32(local_tid_46566, wave_sizze_46568) & (2 * - skip_waves_46594 - - 1)) == - 0)) { - // read array element - { - x_46582 = ((__local - float *) red_arr_mem_46570)[sext_i32_i64(local_tid_46566 + - offset_46593)]; + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128769 - squot32(local_tid_128769, 32) * + 32) == 31 && ltid_in_bounds_128781) { + ((volatile __local + double *) red_arr_mem_128773)[sext_i32_i64(squot32(local_tid_128769, + 32))] = + x_112883; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128784; + + // read input for in-block scan + { + if (squot32(local_tid_128769, 32) == 0 && + ltid_in_bounds_128781) { + x_128779 = ((volatile __local + double *) red_arr_mem_128773)[sext_i32_i64(local_tid_128769)]; + if ((local_tid_128769 - squot32(local_tid_128769, + 32) * 32) == 0) { + x_128778 = x_128779; } - // apply reduction operation - { - float defunc_1_op_res_46583 = x_46581 + x_46582; - - x_46581 = defunc_1_op_res_46583; + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128784 = 1; + while (slt32(skip_threads_128784, 32)) { + if (sle32(skip_threads_128784, local_tid_128769 - + squot32(local_tid_128769, 32) * 32) && + (squot32(local_tid_128769, 32) == 0 && + ltid_in_bounds_128781)) { + // read operands + { + x_128778 = ((volatile __local + double *) red_arr_mem_128773)[sext_i32_i64(local_tid_128769) - + sext_i32_i64(skip_threads_128784)]; + } + // perform operation + { + bool inactive_128785 = + slt64(srem64(sext_i32_i64(local_tid_128769 * + 32 + 32 - 1), n_75139), + sext_i32_i64(local_tid_128769 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_128769 - + skip_threads_128784) * + 32 + 32 - 1)); + + if (inactive_128785) { + x_128778 = x_128779; + } + if (!inactive_128785) { + double defunc_1_op_res_128780 = + x_128778 + x_128779; + + x_128778 = defunc_1_op_res_128780; + } + } } - // write result of operation - { - ((__local - float *) red_arr_mem_46570)[sext_i32_i64(local_tid_46566)] = - x_46581; + if (sle32(wave_sizze_128771, skip_threads_128784)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128784, local_tid_128769 - + squot32(local_tid_128769, 32) * 32) && + (squot32(local_tid_128769, 32) == 0 && + ltid_in_bounds_128781)) { + // write result + { + ((volatile __local + double *) red_arr_mem_128773)[sext_i32_i64(local_tid_128769)] = + x_128778; + x_128779 = x_128778; + } + } + if (sle32(wave_sizze_128771, skip_threads_128784)) { + barrier(CLK_LOCAL_MEM_FENCE); } + skip_threads_128784 *= 2; } - skip_waves_46594 *= 2; } - // and back to memory with the final result - { - if (local_tid_46566 == 0) { - ((__global float *) mem_45278)[gtid_37172] = - x_46581; + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128769, 32) == 0 || + !ltid_in_bounds_128781)) { + // read operands + { + x_112884 = x_112883; + x_112883 = ((__local + double *) red_arr_mem_128773)[sext_i32_i64(squot32(local_tid_128769, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128786 = + slt64(srem64(sext_i32_i64(local_tid_128769), + n_75139), + sext_i32_i64(local_tid_128769) - + sext_i32_i64(squot32(local_tid_128769, + 32) * 32 - 1)); + + if (inactive_128786) { + x_112883 = x_112884; + } + if (!inactive_128786) { + double defunc_1_op_res_112885 = x_112883 + + x_112884; + + x_112883 = defunc_1_op_res_112885; + } } + // write final result + { + ((__local + double *) red_arr_mem_128773)[sext_i32_i64(local_tid_128769)] = + x_112883; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128769, 32) == 0) { + ((__local + double *) red_arr_mem_128773)[sext_i32_i64(local_tid_128769)] = + x_112884; } } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_128777) * + squot64(segred_group_sizze_112879, + segment_sizze_nonzzero_128766) + + sext_i32_i64(local_tid_128769), m_75136 * k2p2zq_75151 * + k2p2zq_75151) && slt64(sext_i32_i64(local_tid_128769), + squot64(segred_group_sizze_112879, + segment_sizze_nonzzero_128766))) { + ((__global + double *) mem_124281)[squot64(sext_i32_i64(virt_group_id_128777) * + squot64(segred_group_sizze_112879, + segment_sizze_nonzzero_128766) + + sext_i32_i64(local_tid_128769), + k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151) + + squot64(sext_i32_i64(virt_group_id_128777) * + squot64(segred_group_sizze_112879, + segment_sizze_nonzzero_128766) + + sext_i32_i64(local_tid_128769) - + squot64(sext_i32_i64(virt_group_id_128777) * + squot64(segred_group_sizze_112879, + segment_sizze_nonzzero_128766) + + sext_i32_i64(local_tid_128769), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151) * k2p2zq_75151 + + (sext_i32_i64(virt_group_id_128777) * + squot64(segred_group_sizze_112879, + segment_sizze_nonzzero_128766) + + sext_i32_i64(local_tid_128769) - + squot64(sext_i32_i64(virt_group_id_128777) * + squot64(segred_group_sizze_112879, + segment_sizze_nonzzero_128766) + + sext_i32_i64(local_tid_128769), + k2p2zq_75151 * k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151) - + squot64(sext_i32_i64(virt_group_id_128777) * + squot64(segred_group_sizze_112879, + segment_sizze_nonzzero_128766) + + sext_i32_i64(local_tid_128769) - + squot64(sext_i32_i64(virt_group_id_128777) * + squot64(segred_group_sizze_112879, + segment_sizze_nonzzero_128766) + + sext_i32_i64(local_tid_128769), + k2p2zq_75151 * + k2p2zq_75151) * + (k2p2zq_75151 * k2p2zq_75151), + k2p2zq_75151) * + k2p2zq_75151)] = ((__local + double *) red_arr_mem_128773)[(sext_i32_i64(local_tid_128769) + + (int64_t) 1) * + segment_sizze_nonzzero_128766 - + (int64_t) 1]; } } + barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } error_1: return; - #undef segred_group_sizze_37202 + #undef segred_group_sizze_112879 } -__kernel void mainMagnitudezisegred_large_38300(__global int *global_failure, - __local volatile - int64_t *sync_arr_mem_46773_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_46771_backing_aligned_1, - __local volatile - int64_t *red_arr_mem_46769_backing_aligned_2, - __local volatile - int64_t *red_arr_mem_46767_backing_aligned_3, - int64_t iota32_arg_28909, - int64_t num_groups_38487, - int64_t groups_per_segment_46749, - int64_t elements_per_thread_46750, - int64_t virt_num_groups_46751, - __global - unsigned char *mem_45284, - __global - unsigned char *mem_45337, - __global - unsigned char *mem_45339, - __global - unsigned char *mem_45343, - __global - unsigned char *mem_45346, - __global - unsigned char *mem_45348, - __global - unsigned char *mem_45350, - __global - unsigned char *group_res_arr_mem_46754, - __global - unsigned char *group_res_arr_mem_46756, - __global - unsigned char *group_res_arr_mem_46758, - __global - unsigned char *mainMagnitudezicounter_mem_46760) +__kernel void mainzisegred_small_113625(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_128963_backing_aligned_0, + int64_t N_75135, int64_t m_75136, + int64_t n_75139, int64_t k2p2zq_75151, + int64_t num_groups_113678, + int64_t segment_sizze_nonzzero_128956, + __global + unsigned char *binop_p_mem_120117, + __global unsigned char *mem_124142, + __global unsigned char *mem_124587) { - #define segred_group_sizze_38486 (mainMagnitudezisegred_group_sizze_38294) + #define segred_group_sizze_113677 (mainzisegred_group_sizze_113619) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict sync_arr_mem_46773_backing_3 = - (__local volatile - char *) sync_arr_mem_46773_backing_aligned_0; - __local volatile char *restrict red_arr_mem_46771_backing_2 = + __local volatile char *restrict red_arr_mem_128963_backing_0 = (__local volatile - char *) red_arr_mem_46771_backing_aligned_1; - __local volatile char *restrict red_arr_mem_46769_backing_1 = - (__local volatile - char *) red_arr_mem_46769_backing_aligned_2; - __local volatile char *restrict red_arr_mem_46767_backing_0 = - (__local volatile - char *) red_arr_mem_46767_backing_aligned_3; + char *) red_arr_mem_128963_backing_aligned_0; if (*global_failure >= 0) return; - int32_t global_tid_46762; - int32_t local_tid_46763; - int64_t group_sizze_46766; - int32_t wave_sizze_46765; - int32_t group_tid_46764; - - global_tid_46762 = get_global_id(0); - local_tid_46763 = get_local_id(0); - group_sizze_46766 = get_local_size(0); - wave_sizze_46765 = LOCKSTEP_WIDTH; - group_tid_46764 = get_group_id(0); - - int32_t phys_tid_38300; - - phys_tid_38300 = global_tid_46762; - - __local char *red_arr_mem_46767; - - red_arr_mem_46767 = (__local char *) red_arr_mem_46767_backing_0; - - __local char *red_arr_mem_46769; - - red_arr_mem_46769 = (__local char *) red_arr_mem_46769_backing_1; - - __local char *red_arr_mem_46771; - - red_arr_mem_46771 = (__local char *) red_arr_mem_46771_backing_2; - - __local char *sync_arr_mem_46773; - - sync_arr_mem_46773 = (__local char *) sync_arr_mem_46773_backing_3; - - int32_t phys_group_id_46775; - - phys_group_id_46775 = get_group_id(0); - for (int32_t i_46776 = 0; i_46776 < - sdiv_up32(sext_i64_i32(virt_num_groups_46751) - phys_group_id_46775, - sext_i64_i32(num_groups_38487)); i_46776++) { - int32_t virt_group_id_46777 = phys_group_id_46775 + i_46776 * - sext_i64_i32(num_groups_38487); - int32_t flat_segment_id_46778 = squot32(virt_group_id_46777, - sext_i64_i32(groups_per_segment_46749)); - int64_t global_tid_46779 = srem64(sext_i32_i64(virt_group_id_46777) * - segred_group_sizze_38486 + - sext_i32_i64(local_tid_46763), - segred_group_sizze_38486 * - groups_per_segment_46749); - int64_t gtid_38291 = sext_i32_i64(flat_segment_id_46778); - int64_t gtid_38299; - bool x_acc_46780; - int32_t x_acc_46781; - float x_acc_46782; - int64_t chunk_sizze_46783; - int64_t starting_point_46784; - - starting_point_46784 = sext_i32_i64(sext_i64_i32(global_tid_46779)) * - elements_per_thread_46750; + int32_t global_tid_128958; + int32_t local_tid_128959; + int64_t group_sizze_128962; + int32_t wave_sizze_128961; + int32_t group_tid_128960; + + global_tid_128958 = get_global_id(0); + local_tid_128959 = get_local_id(0); + group_sizze_128962 = get_local_size(0); + wave_sizze_128961 = LOCKSTEP_WIDTH; + group_tid_128960 = get_group_id(0); + + int32_t phys_tid_113625; + + phys_tid_113625 = global_tid_128958; + + __local char *red_arr_mem_128963; + + red_arr_mem_128963 = (__local char *) red_arr_mem_128963_backing_0; + + int32_t phys_group_id_128965; + + phys_group_id_128965 = get_group_id(0); + for (int32_t i_128966 = 0; i_128966 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136 * k2p2zq_75151, + squot64(segred_group_sizze_113677, + segment_sizze_nonzzero_128956))) - + phys_group_id_128965, sext_i64_i32(num_groups_113678)); + i_128966++) { + int32_t virt_group_id_128967 = phys_group_id_128965 + i_128966 * + sext_i64_i32(num_groups_113678); + int64_t gtid_113614 = squot64(squot64(sext_i32_i64(local_tid_128959), + segment_sizze_nonzzero_128956) + + sext_i32_i64(virt_group_id_128967) * + squot64(segred_group_sizze_113677, + segment_sizze_nonzzero_128956), + k2p2zq_75151); + int64_t gtid_113615 = squot64(sext_i32_i64(local_tid_128959), + segment_sizze_nonzzero_128956) + + sext_i32_i64(virt_group_id_128967) * + squot64(segred_group_sizze_113677, + segment_sizze_nonzzero_128956) - + squot64(squot64(sext_i32_i64(local_tid_128959), + segment_sizze_nonzzero_128956) + + sext_i32_i64(virt_group_id_128967) * + squot64(segred_group_sizze_113677, + segment_sizze_nonzzero_128956), k2p2zq_75151) * + k2p2zq_75151; + int64_t gtid_113624 = srem64(sext_i32_i64(local_tid_128959), n_75139); - int64_t remaining_elements_46785; - - remaining_elements_46785 = iota32_arg_28909 - starting_point_46784; - if (sle64(remaining_elements_46785, (int64_t) 0) || - sle64(iota32_arg_28909, starting_point_46784)) { - chunk_sizze_46783 = (int64_t) 0; - } else { - if (slt64(iota32_arg_28909, - (sext_i32_i64(sext_i64_i32(global_tid_46779)) + - (int64_t) 1) * elements_per_thread_46750)) { - chunk_sizze_46783 = iota32_arg_28909 - - sext_i32_i64(sext_i64_i32(global_tid_46779)) * - elements_per_thread_46750; + // apply map function if in bounds + { + if (slt64((int64_t) 0, n_75139) && ((slt64(gtid_113614, m_75136) && + slt64(gtid_113615, + k2p2zq_75151)) && + slt64(sext_i32_i64(local_tid_128959), + n_75139 * + squot64(segred_group_sizze_113677, + segment_sizze_nonzzero_128956)))) { + double x_113687 = ((__global double *) mem_124142)[gtid_113614 * + N_75135 + + gtid_113624]; + bool isnan_res_113688; + + isnan_res_113688 = futrts_isnan64(x_113687); + + double defunc_1_f_res_113689; + + if (isnan_res_113688) { + defunc_1_f_res_113689 = 0.0; + } else { + double x_113686 = ((__global + double *) binop_p_mem_120117)[gtid_113615 * + N_75135 + + gtid_113624]; + double defunc_1_f_res_f_res_113690 = x_113686 * x_113687; + + defunc_1_f_res_113689 = defunc_1_f_res_f_res_113690; + } + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_128963)[sext_i32_i64(local_tid_128959)] = + defunc_1_f_res_113689; + } } else { - chunk_sizze_46783 = elements_per_thread_46750; + ((__local + double *) red_arr_mem_128963)[sext_i32_i64(local_tid_128959)] = + 0.0; } } - - bool x_38492; - int32_t x_38493; - float x_38494; - bool x_38495; - int32_t x_38496; - float x_38497; - - // neutral-initialise the accumulators - { - x_acc_46780 = 0; - x_acc_46781 = -1; - x_acc_46782 = 0.0F; - } - for (int64_t i_46800 = 0; i_46800 < elements_per_thread_46750; - i_46800++) { - gtid_38299 = sext_i32_i64(local_tid_46763) + - (sext_i32_i64(squot32(sext_i64_i32(global_tid_46779), - sext_i64_i32(segred_group_sizze_38486))) * - elements_per_thread_46750 + i_46800) * - segred_group_sizze_38486; - if (slt64(gtid_38299, iota32_arg_28909)) { - // apply map function + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, n_75139)) { + // perform segmented scan to imitate reduction + { + double x_113681; + double x_113682; + double x_128968; + double x_128969; + bool ltid_in_bounds_128971; + + ltid_in_bounds_128971 = slt64(sext_i32_i64(local_tid_128959), + n_75139 * + squot64(segred_group_sizze_113677, + segment_sizze_nonzzero_128956)); + + int32_t skip_threads_128972; + + // read input for in-block scan { - int32_t y_38506 = ((__global - int32_t *) mem_45339)[gtid_38291]; - float y_38507 = ((__global float *) mem_45337)[gtid_38291]; - float x_38511 = ((__global float *) mem_45343)[gtid_38291 * - iota32_arg_28909 + - gtid_38299]; - float x_38512 = ((__global float *) mem_45284)[gtid_38299]; - int32_t index_primexp_42409 = sext_i64_i32(gtid_38299); - float defunc_0_f_res_38515 = x_38511 / y_38507; - bool cond_38516 = slt32(index_primexp_42409, y_38506); - bool isnan_res_38517; - - isnan_res_38517 = futrts_isnan32(defunc_0_f_res_38515); - - bool cond_t_res_38518 = !isnan_res_38517; - bool x_38519 = cond_38516 && cond_t_res_38518; - float abs_res_38520 = (float) fabs(defunc_0_f_res_38515); - bool defunc_2_f_res_t_res_38521 = x_38512 < abs_res_38520; - bool x_38522 = x_38519 && defunc_2_f_res_t_res_38521; - float defunc_1_f_res_38523; - - if (cond_38516) { - defunc_1_f_res_38523 = defunc_0_f_res_38515; - } else { - defunc_1_f_res_38523 = 0.0F; - } - // save map-out results - { } - // load accumulator - { - x_38492 = x_acc_46780; - x_38493 = x_acc_46781; - x_38494 = x_acc_46782; - } - // load new values - { - x_38495 = x_38522; - x_38496 = index_primexp_42409; - x_38497 = defunc_1_f_res_38523; + if (ltid_in_bounds_128971) { + x_113682 = ((volatile __local + double *) red_arr_mem_128963)[sext_i32_i64(local_tid_128959)]; + if ((local_tid_128959 - squot32(local_tid_128959, 32) * + 32) == 0) { + x_113681 = x_113682; + } } - // apply reduction operator - { - bool defunc_1_op_res_38498; - int32_t defunc_1_op_res_38499; - - if (x_38492) { - defunc_1_op_res_38498 = x_38492; - defunc_1_op_res_38499 = x_38493; - } else { - bool x_38500 = x_38495 && x_38495; - bool x_38501 = !x_38495; - bool y_38502 = x_38492 && x_38501; - bool defunc_1_op_res_f_res_38503 = x_38500 || - y_38502; - int32_t defunc_1_op_res_f_res_38504; - - if (x_38495) { - defunc_1_op_res_f_res_38504 = x_38496; - } else { - defunc_1_op_res_f_res_38504 = x_38493; + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128972 = 1; + while (slt32(skip_threads_128972, 32)) { + if (sle32(skip_threads_128972, local_tid_128959 - + squot32(local_tid_128959, 32) * 32) && + ltid_in_bounds_128971) { + // read operands + { + x_113681 = ((volatile __local + double *) red_arr_mem_128963)[sext_i32_i64(local_tid_128959) - + sext_i32_i64(skip_threads_128972)]; + } + // perform operation + { + bool inactive_128973 = + slt64(srem64(sext_i32_i64(local_tid_128959), + n_75139), + sext_i32_i64(local_tid_128959) - + sext_i32_i64(local_tid_128959 - + skip_threads_128972)); + + if (inactive_128973) { + x_113681 = x_113682; + } + if (!inactive_128973) { + double defunc_1_op_res_113683 = x_113681 + + x_113682; + + x_113681 = defunc_1_op_res_113683; + } } - defunc_1_op_res_38498 = defunc_1_op_res_f_res_38503; - defunc_1_op_res_38499 = defunc_1_op_res_f_res_38504; } - - float defunc_1_op_res_38505 = x_38494 + x_38497; - - // store in accumulator - { - x_acc_46780 = defunc_1_op_res_38498; - x_acc_46781 = defunc_1_op_res_38499; - x_acc_46782 = defunc_1_op_res_38505; + if (sle32(wave_sizze_128961, skip_threads_128972)) { + barrier(CLK_LOCAL_MEM_FENCE); } - } - } - } - // to reduce current chunk, first store our result in memory - { - x_38492 = x_acc_46780; - x_38493 = x_acc_46781; - x_38494 = x_acc_46782; - ((__local - bool *) red_arr_mem_46767)[sext_i32_i64(local_tid_46763)] = - x_38492; - ((__local - int32_t *) red_arr_mem_46769)[sext_i32_i64(local_tid_46763)] = - x_38493; - ((__local - float *) red_arr_mem_46771)[sext_i32_i64(local_tid_46763)] = - x_38494; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t offset_46801; - int32_t skip_waves_46802; - - skip_waves_46802 = 1; - - bool x_46786; - int32_t x_46787; - float x_46788; - bool x_46789; - int32_t x_46790; - float x_46791; - - offset_46801 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46763, - sext_i64_i32(segred_group_sizze_38486))) { - x_46786 = ((__local - bool *) red_arr_mem_46767)[sext_i32_i64(local_tid_46763 + - offset_46801)]; - x_46787 = ((__local - int32_t *) red_arr_mem_46769)[sext_i32_i64(local_tid_46763 + - offset_46801)]; - x_46788 = ((__local - float *) red_arr_mem_46771)[sext_i32_i64(local_tid_46763 + - offset_46801)]; - } - } - offset_46801 = 1; - while (slt32(offset_46801, wave_sizze_46765)) { - if (slt32(local_tid_46763 + offset_46801, - sext_i64_i32(segred_group_sizze_38486)) && - ((local_tid_46763 - squot32(local_tid_46763, - wave_sizze_46765) * - wave_sizze_46765) & (2 * offset_46801 - 1)) == 0) { - // read array element - { - x_46789 = ((volatile __local - bool *) red_arr_mem_46767)[sext_i32_i64(local_tid_46763 + - offset_46801)]; - x_46790 = ((volatile __local - int32_t *) red_arr_mem_46769)[sext_i32_i64(local_tid_46763 + - offset_46801)]; - x_46791 = ((volatile __local - float *) red_arr_mem_46771)[sext_i32_i64(local_tid_46763 + - offset_46801)]; - } - // apply reduction operation - { - bool defunc_1_op_res_46792; - int32_t defunc_1_op_res_46793; - - if (x_46786) { - defunc_1_op_res_46792 = x_46786; - defunc_1_op_res_46793 = x_46787; - } else { - bool x_46794 = x_46789 && x_46789; - bool x_46795 = !x_46789; - bool y_46796 = x_46786 && x_46795; - bool defunc_1_op_res_f_res_46797 = x_46794 || - y_46796; - int32_t defunc_1_op_res_f_res_46798; - - if (x_46789) { - defunc_1_op_res_f_res_46798 = x_46790; - } else { - defunc_1_op_res_f_res_46798 = x_46787; + if (sle32(skip_threads_128972, local_tid_128959 - + squot32(local_tid_128959, 32) * 32) && + ltid_in_bounds_128971) { + // write result + { + ((volatile __local + double *) red_arr_mem_128963)[sext_i32_i64(local_tid_128959)] = + x_113681; + x_113682 = x_113681; } - defunc_1_op_res_46792 = defunc_1_op_res_f_res_46797; - defunc_1_op_res_46793 = defunc_1_op_res_f_res_46798; } - - float defunc_1_op_res_46799 = x_46788 + x_46791; - - x_46786 = defunc_1_op_res_46792; - x_46787 = defunc_1_op_res_46793; - x_46788 = defunc_1_op_res_46799; + if (sle32(wave_sizze_128961, skip_threads_128972)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128972 *= 2; } - // write result of operation - { - ((volatile __local - bool *) red_arr_mem_46767)[sext_i32_i64(local_tid_46763)] = - x_46786; - ((volatile __local - int32_t *) red_arr_mem_46769)[sext_i32_i64(local_tid_46763)] = - x_46787; + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128959 - squot32(local_tid_128959, 32) * + 32) == 31 && ltid_in_bounds_128971) { ((volatile __local - float *) red_arr_mem_46771)[sext_i32_i64(local_tid_46763)] = - x_46788; + double *) red_arr_mem_128963)[sext_i32_i64(squot32(local_tid_128959, + 32))] = + x_113681; } } - offset_46801 *= 2; - } - while (slt32(skip_waves_46802, - squot32(sext_i64_i32(segred_group_sizze_38486) + - wave_sizze_46765 - 1, wave_sizze_46765))) { barrier(CLK_LOCAL_MEM_FENCE); - offset_46801 = skip_waves_46802 * wave_sizze_46765; - if (slt32(local_tid_46763 + offset_46801, - sext_i64_i32(segred_group_sizze_38486)) && - ((local_tid_46763 - squot32(local_tid_46763, - wave_sizze_46765) * - wave_sizze_46765) == 0 && (squot32(local_tid_46763, - wave_sizze_46765) & - (2 * skip_waves_46802 - 1)) == - 0)) { - // read array element - { - x_46789 = ((__local - bool *) red_arr_mem_46767)[sext_i32_i64(local_tid_46763 + - offset_46801)]; - x_46790 = ((__local - int32_t *) red_arr_mem_46769)[sext_i32_i64(local_tid_46763 + - offset_46801)]; - x_46791 = ((__local - float *) red_arr_mem_46771)[sext_i32_i64(local_tid_46763 + - offset_46801)]; - } - // apply reduction operation + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128974; + + // read input for in-block scan { - bool defunc_1_op_res_46792; - int32_t defunc_1_op_res_46793; - - if (x_46786) { - defunc_1_op_res_46792 = x_46786; - defunc_1_op_res_46793 = x_46787; - } else { - bool x_46794 = x_46789 && x_46789; - bool x_46795 = !x_46789; - bool y_46796 = x_46786 && x_46795; - bool defunc_1_op_res_f_res_46797 = x_46794 || - y_46796; - int32_t defunc_1_op_res_f_res_46798; - - if (x_46789) { - defunc_1_op_res_f_res_46798 = x_46790; - } else { - defunc_1_op_res_f_res_46798 = x_46787; + if (squot32(local_tid_128959, 32) == 0 && + ltid_in_bounds_128971) { + x_128969 = ((volatile __local + double *) red_arr_mem_128963)[sext_i32_i64(local_tid_128959)]; + if ((local_tid_128959 - squot32(local_tid_128959, + 32) * 32) == 0) { + x_128968 = x_128969; } - defunc_1_op_res_46792 = defunc_1_op_res_f_res_46797; - defunc_1_op_res_46793 = defunc_1_op_res_f_res_46798; } - - float defunc_1_op_res_46799 = x_46788 + x_46791; - - x_46786 = defunc_1_op_res_46792; - x_46787 = defunc_1_op_res_46793; - x_46788 = defunc_1_op_res_46799; } - // write result of operation + // in-block scan (hopefully no barriers needed) { - ((__local - bool *) red_arr_mem_46767)[sext_i32_i64(local_tid_46763)] = - x_46786; - ((__local - int32_t *) red_arr_mem_46769)[sext_i32_i64(local_tid_46763)] = - x_46787; - ((__local - float *) red_arr_mem_46771)[sext_i32_i64(local_tid_46763)] = - x_46788; - } - } - skip_waves_46802 *= 2; - } - barrier(CLK_LOCAL_MEM_FENCE); - // first thread saves the result in accumulator - { - if (sext_i32_i64(local_tid_46763) == (int64_t) 0) { - x_acc_46780 = x_46786; - x_acc_46781 = x_46787; - x_acc_46782 = x_46788; - } - } - // first thread keeps accumulator; others reset to neutral element - { - if (!(sext_i32_i64(local_tid_46763) == (int64_t) 0)) { - x_acc_46780 = 0; - x_acc_46781 = -1; - x_acc_46782 = 0.0F; - } - } - } - x_38492 = x_acc_46780; - x_38493 = x_acc_46781; - x_38494 = x_acc_46782; - if (groups_per_segment_46749 == (int64_t) 1) { - // first thread in group saves final result to memory - { - if (local_tid_46763 == 0) { - ((__global bool *) mem_45346)[gtid_38291] = x_acc_46780; - ((__global int32_t *) mem_45348)[gtid_38291] = x_acc_46781; - ((__global float *) mem_45350)[gtid_38291] = x_acc_46782; - } - } - } else { - int32_t old_counter_46803; - - // first thread in group saves group result to global memory - { - if (local_tid_46763 == 0) { - ((__global - bool *) group_res_arr_mem_46754)[sext_i32_i64(virt_group_id_46777) * - segred_group_sizze_38486] = - x_acc_46780; - ((__global - int32_t *) group_res_arr_mem_46756)[sext_i32_i64(virt_group_id_46777) * - segred_group_sizze_38486] = - x_acc_46781; - ((__global - float *) group_res_arr_mem_46758)[sext_i32_i64(virt_group_id_46777) * - segred_group_sizze_38486] = - x_acc_46782; - mem_fence_global(); - old_counter_46803 = - atomic_add_i32_global(&((volatile __global - int *) mainMagnitudezicounter_mem_46760)[sext_i32_i64(srem32(flat_segment_id_46778, - 10240))], - (int) 1); - ((__local bool *) sync_arr_mem_46773)[(int64_t) 0] = - old_counter_46803 == groups_per_segment_46749 - - (int64_t) 1; - } - } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - - bool is_last_group_46804; - - is_last_group_46804 = ((__local - bool *) sync_arr_mem_46773)[(int64_t) 0]; - if (is_last_group_46804) { - if (local_tid_46763 == 0) { - old_counter_46803 = - atomic_add_i32_global(&((volatile __global - int *) mainMagnitudezicounter_mem_46760)[sext_i32_i64(srem32(flat_segment_id_46778, - 10240))], - (int) ((int64_t) 0 - - groups_per_segment_46749)); - } - // read in the per-group-results - { - int64_t read_per_thread_46805 = - sdiv_up64(groups_per_segment_46749, - segred_group_sizze_38486); - - x_38492 = 0; - x_38493 = -1; - x_38494 = 0.0F; - for (int64_t i_46806 = 0; i_46806 < read_per_thread_46805; - i_46806++) { - int64_t group_res_id_46807 = - sext_i32_i64(local_tid_46763) * - read_per_thread_46805 + i_46806; - int64_t index_of_group_res_46808 = - sext_i32_i64(flat_segment_id_46778) * - groups_per_segment_46749 + group_res_id_46807; - - if (slt64(group_res_id_46807, - groups_per_segment_46749)) { - x_38495 = ((__global - bool *) group_res_arr_mem_46754)[index_of_group_res_46808 * - segred_group_sizze_38486]; - x_38496 = ((__global - int32_t *) group_res_arr_mem_46756)[index_of_group_res_46808 * - segred_group_sizze_38486]; - x_38497 = ((__global - float *) group_res_arr_mem_46758)[index_of_group_res_46808 * - segred_group_sizze_38486]; - - bool defunc_1_op_res_38498; - int32_t defunc_1_op_res_38499; - - if (x_38492) { - defunc_1_op_res_38498 = x_38492; - defunc_1_op_res_38499 = x_38493; - } else { - bool x_38500 = x_38495 && x_38495; - bool x_38501 = !x_38495; - bool y_38502 = x_38492 && x_38501; - bool defunc_1_op_res_f_res_38503 = x_38500 || - y_38502; - int32_t defunc_1_op_res_f_res_38504; - - if (x_38495) { - defunc_1_op_res_f_res_38504 = x_38496; - } else { - defunc_1_op_res_f_res_38504 = x_38493; + skip_threads_128974 = 1; + while (slt32(skip_threads_128974, 32)) { + if (sle32(skip_threads_128974, local_tid_128959 - + squot32(local_tid_128959, 32) * 32) && + (squot32(local_tid_128959, 32) == 0 && + ltid_in_bounds_128971)) { + // read operands + { + x_128968 = ((volatile __local + double *) red_arr_mem_128963)[sext_i32_i64(local_tid_128959) - + sext_i32_i64(skip_threads_128974)]; + } + // perform operation + { + bool inactive_128975 = + slt64(srem64(sext_i32_i64(local_tid_128959 * + 32 + 32 - 1), n_75139), + sext_i32_i64(local_tid_128959 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_128959 - + skip_threads_128974) * + 32 + 32 - 1)); + + if (inactive_128975) { + x_128968 = x_128969; + } + if (!inactive_128975) { + double defunc_1_op_res_128970 = + x_128968 + x_128969; + + x_128968 = defunc_1_op_res_128970; + } } - defunc_1_op_res_38498 = - defunc_1_op_res_f_res_38503; - defunc_1_op_res_38499 = - defunc_1_op_res_f_res_38504; } - - float defunc_1_op_res_38505 = x_38494 + x_38497; - - x_38492 = defunc_1_op_res_38498; - x_38493 = defunc_1_op_res_38499; - x_38494 = defunc_1_op_res_38505; - } - } - } - ((__local - bool *) red_arr_mem_46767)[sext_i32_i64(local_tid_46763)] = - x_38492; - ((__local - int32_t *) red_arr_mem_46769)[sext_i32_i64(local_tid_46763)] = - x_38493; - ((__local - float *) red_arr_mem_46771)[sext_i32_i64(local_tid_46763)] = - x_38494; - barrier(CLK_LOCAL_MEM_FENCE); - // reduce the per-group results - { - int32_t offset_46809; - int32_t skip_waves_46810; - - skip_waves_46810 = 1; - - bool x_46786; - int32_t x_46787; - float x_46788; - bool x_46789; - int32_t x_46790; - float x_46791; - - offset_46809 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46763, - sext_i64_i32(segred_group_sizze_38486))) { - x_46786 = ((__local - bool *) red_arr_mem_46767)[sext_i32_i64(local_tid_46763 + - offset_46809)]; - x_46787 = ((__local - int32_t *) red_arr_mem_46769)[sext_i32_i64(local_tid_46763 + - offset_46809)]; - x_46788 = ((__local - float *) red_arr_mem_46771)[sext_i32_i64(local_tid_46763 + - offset_46809)]; - } - } - offset_46809 = 1; - while (slt32(offset_46809, wave_sizze_46765)) { - if (slt32(local_tid_46763 + offset_46809, - sext_i64_i32(segred_group_sizze_38486)) && - ((local_tid_46763 - squot32(local_tid_46763, - wave_sizze_46765) * - wave_sizze_46765) & (2 * offset_46809 - 1)) == - 0) { - // read array element - { - x_46789 = ((volatile __local - bool *) red_arr_mem_46767)[sext_i32_i64(local_tid_46763 + - offset_46809)]; - x_46790 = ((volatile __local - int32_t *) red_arr_mem_46769)[sext_i32_i64(local_tid_46763 + - offset_46809)]; - x_46791 = ((volatile __local - float *) red_arr_mem_46771)[sext_i32_i64(local_tid_46763 + - offset_46809)]; + if (sle32(wave_sizze_128961, skip_threads_128974)) { + barrier(CLK_LOCAL_MEM_FENCE); } - // apply reduction operation - { - bool defunc_1_op_res_46792; - int32_t defunc_1_op_res_46793; - - if (x_46786) { - defunc_1_op_res_46792 = x_46786; - defunc_1_op_res_46793 = x_46787; - } else { - bool x_46794 = x_46789 && x_46789; - bool x_46795 = !x_46789; - bool y_46796 = x_46786 && x_46795; - bool defunc_1_op_res_f_res_46797 = - x_46794 || y_46796; - int32_t defunc_1_op_res_f_res_46798; - - if (x_46789) { - defunc_1_op_res_f_res_46798 = x_46790; - } else { - defunc_1_op_res_f_res_46798 = x_46787; - } - defunc_1_op_res_46792 = - defunc_1_op_res_f_res_46797; - defunc_1_op_res_46793 = - defunc_1_op_res_f_res_46798; + if (sle32(skip_threads_128974, local_tid_128959 - + squot32(local_tid_128959, 32) * 32) && + (squot32(local_tid_128959, 32) == 0 && + ltid_in_bounds_128971)) { + // write result + { + ((volatile __local + double *) red_arr_mem_128963)[sext_i32_i64(local_tid_128959)] = + x_128968; + x_128969 = x_128968; } - - float defunc_1_op_res_46799 = x_46788 + x_46791; - - x_46786 = defunc_1_op_res_46792; - x_46787 = defunc_1_op_res_46793; - x_46788 = defunc_1_op_res_46799; } - // write result of operation - { - ((volatile __local - bool *) red_arr_mem_46767)[sext_i32_i64(local_tid_46763)] = - x_46786; - ((volatile __local - int32_t *) red_arr_mem_46769)[sext_i32_i64(local_tid_46763)] = - x_46787; - ((volatile __local - float *) red_arr_mem_46771)[sext_i32_i64(local_tid_46763)] = - x_46788; + if (sle32(wave_sizze_128961, skip_threads_128974)) { + barrier(CLK_LOCAL_MEM_FENCE); } + skip_threads_128974 *= 2; } - offset_46809 *= 2; } - while (slt32(skip_waves_46810, - squot32(sext_i64_i32(segred_group_sizze_38486) + - wave_sizze_46765 - 1, - wave_sizze_46765))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46809 = skip_waves_46810 * wave_sizze_46765; - if (slt32(local_tid_46763 + offset_46809, - sext_i64_i32(segred_group_sizze_38486)) && - ((local_tid_46763 - squot32(local_tid_46763, - wave_sizze_46765) * - wave_sizze_46765) == 0 && - (squot32(local_tid_46763, wave_sizze_46765) & (2 * - skip_waves_46810 - - 1)) == - 0)) { - // read array element - { - x_46789 = ((__local - bool *) red_arr_mem_46767)[sext_i32_i64(local_tid_46763 + - offset_46809)]; - x_46790 = ((__local - int32_t *) red_arr_mem_46769)[sext_i32_i64(local_tid_46763 + - offset_46809)]; - x_46791 = ((__local - float *) red_arr_mem_46771)[sext_i32_i64(local_tid_46763 + - offset_46809)]; + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128959, 32) == 0 || + !ltid_in_bounds_128971)) { + // read operands + { + x_113682 = x_113681; + x_113681 = ((__local + double *) red_arr_mem_128963)[sext_i32_i64(squot32(local_tid_128959, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128976 = + slt64(srem64(sext_i32_i64(local_tid_128959), + n_75139), + sext_i32_i64(local_tid_128959) - + sext_i32_i64(squot32(local_tid_128959, + 32) * 32 - 1)); + + if (inactive_128976) { + x_113681 = x_113682; } - // apply reduction operation - { - bool defunc_1_op_res_46792; - int32_t defunc_1_op_res_46793; - - if (x_46786) { - defunc_1_op_res_46792 = x_46786; - defunc_1_op_res_46793 = x_46787; - } else { - bool x_46794 = x_46789 && x_46789; - bool x_46795 = !x_46789; - bool y_46796 = x_46786 && x_46795; - bool defunc_1_op_res_f_res_46797 = - x_46794 || y_46796; - int32_t defunc_1_op_res_f_res_46798; - - if (x_46789) { - defunc_1_op_res_f_res_46798 = x_46790; - } else { - defunc_1_op_res_f_res_46798 = x_46787; - } - defunc_1_op_res_46792 = - defunc_1_op_res_f_res_46797; - defunc_1_op_res_46793 = - defunc_1_op_res_f_res_46798; - } - - float defunc_1_op_res_46799 = x_46788 + x_46791; + if (!inactive_128976) { + double defunc_1_op_res_113683 = x_113681 + + x_113682; - x_46786 = defunc_1_op_res_46792; - x_46787 = defunc_1_op_res_46793; - x_46788 = defunc_1_op_res_46799; - } - // write result of operation - { - ((__local - bool *) red_arr_mem_46767)[sext_i32_i64(local_tid_46763)] = - x_46786; - ((__local - int32_t *) red_arr_mem_46769)[sext_i32_i64(local_tid_46763)] = - x_46787; - ((__local - float *) red_arr_mem_46771)[sext_i32_i64(local_tid_46763)] = - x_46788; + x_113681 = defunc_1_op_res_113683; } } - skip_waves_46810 *= 2; - } - // and back to memory with the final result - { - if (local_tid_46763 == 0) { - ((__global bool *) mem_45346)[gtid_38291] = x_46786; - ((__global int32_t *) mem_45348)[gtid_38291] = - x_46787; - ((__global float *) mem_45350)[gtid_38291] = - x_46788; + // write final result + { + ((__local + double *) red_arr_mem_128963)[sext_i32_i64(local_tid_128959)] = + x_113681; } } } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128959, 32) == 0) { + ((__local + double *) red_arr_mem_128963)[sext_i32_i64(local_tid_128959)] = + x_113682; + } + } + barrier(CLK_LOCAL_MEM_FENCE); } } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_128967) * + squot64(segred_group_sizze_113677, + segment_sizze_nonzzero_128956) + + sext_i32_i64(local_tid_128959), m_75136 * k2p2zq_75151) && + slt64(sext_i32_i64(local_tid_128959), + squot64(segred_group_sizze_113677, + segment_sizze_nonzzero_128956))) { + ((__global + double *) mem_124587)[squot64(sext_i32_i64(virt_group_id_128967) * + squot64(segred_group_sizze_113677, + segment_sizze_nonzzero_128956) + + sext_i32_i64(local_tid_128959), + k2p2zq_75151) * k2p2zq_75151 + + (sext_i32_i64(virt_group_id_128967) * + squot64(segred_group_sizze_113677, + segment_sizze_nonzzero_128956) + + sext_i32_i64(local_tid_128959) - + squot64(sext_i32_i64(virt_group_id_128967) * + squot64(segred_group_sizze_113677, + segment_sizze_nonzzero_128956) + + sext_i32_i64(local_tid_128959), + k2p2zq_75151) * + k2p2zq_75151)] = ((__local + double *) red_arr_mem_128963)[(sext_i32_i64(local_tid_128959) + + (int64_t) 1) * + segment_sizze_nonzzero_128956 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } error_1: return; - #undef segred_group_sizze_38486 + #undef segred_group_sizze_113677 } -__kernel void mainMagnitudezisegred_nonseg_37091(__global int *global_failure, - __local volatile - int64_t *red_arr_mem_46506_backing_aligned_0, - __local volatile - int64_t *sync_arr_mem_46504_backing_aligned_1, - int64_t m_28478, - int64_t num_groups_37086, - int64_t num_threads_46498, - __global - unsigned char *defunc_3_map_res_mem_45244, - __global - unsigned char *mem_45249, - __global - unsigned char *mainMagnitudezicounter_mem_46494, - __global - unsigned char *group_res_arr_mem_46496) +__kernel void mainzisegred_small_113762(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_129051_backing_aligned_0, + int64_t m_75136, int64_t k2p2zq_75151, + int64_t num_groups_113811, + int64_t segment_sizze_nonzzero_129044, + __global + unsigned char *defunc_3_map_res_mem_124372, + __global + unsigned char *defunc_3_map_res_mem_124593, + __global unsigned char *mem_124653) { - #define segred_group_sizze_37084 (mainMagnitudezisegred_group_sizze_37083) + #define segred_group_sizze_113810 (mainzisegred_group_sizze_113756) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_46506_backing_1 = + __local volatile char *restrict red_arr_mem_129051_backing_0 = (__local volatile - char *) red_arr_mem_46506_backing_aligned_0; - __local volatile char *restrict sync_arr_mem_46504_backing_0 = - (__local volatile - char *) sync_arr_mem_46504_backing_aligned_1; + char *) red_arr_mem_129051_backing_aligned_0; if (*global_failure >= 0) return; - int32_t global_tid_46499; - int32_t local_tid_46500; - int64_t group_sizze_46503; - int32_t wave_sizze_46502; - int32_t group_tid_46501; - - global_tid_46499 = get_global_id(0); - local_tid_46500 = get_local_id(0); - group_sizze_46503 = get_local_size(0); - wave_sizze_46502 = LOCKSTEP_WIDTH; - group_tid_46501 = get_group_id(0); - - int32_t phys_tid_37091; - - phys_tid_37091 = global_tid_46499; - - __local char *sync_arr_mem_46504; - - sync_arr_mem_46504 = (__local char *) sync_arr_mem_46504_backing_0; - - __local char *red_arr_mem_46506; - - red_arr_mem_46506 = (__local char *) red_arr_mem_46506_backing_1; - - int64_t dummy_37089; - - dummy_37089 = (int64_t) 0; - - int64_t gtid_37090; - - gtid_37090 = (int64_t) 0; - - int32_t x_acc_46508; - int64_t chunk_sizze_46509; - - chunk_sizze_46509 = smin64(sdiv_up64(m_28478, - sext_i32_i64(sext_i64_i32(segred_group_sizze_37084 * - num_groups_37086))), - sdiv_up64(m_28478 - sext_i32_i64(phys_tid_37091), - num_threads_46498)); - - int32_t x_28876; - int32_t x_28877; - - // neutral-initialise the accumulators - { - x_acc_46508 = 0; - } - for (int64_t i_46513 = 0; i_46513 < chunk_sizze_46509; i_46513++) { - gtid_37090 = sext_i32_i64(phys_tid_37091) + num_threads_46498 * i_46513; - // apply map function - { - int32_t x_28879 = ((__global - int32_t *) defunc_3_map_res_mem_45244)[gtid_37090]; - - // save map-out results - { } - // load accumulator - { - x_28876 = x_acc_46508; - } - // load new values - { - x_28877 = x_28879; - } - // apply reduction operator - { - int32_t defunc_1_op_res_28878 = smax32(x_28876, x_28877); - - // store in accumulator - { - x_acc_46508 = defunc_1_op_res_28878; - } - } - } - } - // to reduce current chunk, first store our result in memory - { - x_28876 = x_acc_46508; - ((__local int32_t *) red_arr_mem_46506)[sext_i32_i64(local_tid_46500)] = - x_28876; - } - barrier(CLK_LOCAL_MEM_FENCE); - - int32_t offset_46514; - int32_t skip_waves_46515; - - skip_waves_46515 = 1; - - int32_t x_46510; - int32_t x_46511; - - offset_46514 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46500, sext_i64_i32(segred_group_sizze_37084))) { - x_46510 = ((__local - int32_t *) red_arr_mem_46506)[sext_i32_i64(local_tid_46500 + - offset_46514)]; - } - } - offset_46514 = 1; - while (slt32(offset_46514, wave_sizze_46502)) { - if (slt32(local_tid_46500 + offset_46514, - sext_i64_i32(segred_group_sizze_37084)) && ((local_tid_46500 - - squot32(local_tid_46500, - wave_sizze_46502) * - wave_sizze_46502) & - (2 * - offset_46514 - - 1)) == 0) { - // read array element - { - x_46511 = ((volatile __local - int32_t *) red_arr_mem_46506)[sext_i32_i64(local_tid_46500 + - offset_46514)]; - } - // apply reduction operation - { - int32_t defunc_1_op_res_46512 = smax32(x_46510, x_46511); - - x_46510 = defunc_1_op_res_46512; - } - // write result of operation - { - ((volatile __local - int32_t *) red_arr_mem_46506)[sext_i32_i64(local_tid_46500)] = - x_46510; - } - } - offset_46514 *= 2; - } - while (slt32(skip_waves_46515, - squot32(sext_i64_i32(segred_group_sizze_37084) + - wave_sizze_46502 - 1, wave_sizze_46502))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46514 = skip_waves_46515 * wave_sizze_46502; - if (slt32(local_tid_46500 + offset_46514, - sext_i64_i32(segred_group_sizze_37084)) && ((local_tid_46500 - - squot32(local_tid_46500, - wave_sizze_46502) * - wave_sizze_46502) == - 0 && - (squot32(local_tid_46500, - wave_sizze_46502) & - (2 * - skip_waves_46515 - - 1)) == 0)) { - // read array element - { - x_46511 = ((__local - int32_t *) red_arr_mem_46506)[sext_i32_i64(local_tid_46500 + - offset_46514)]; - } - // apply reduction operation - { - int32_t defunc_1_op_res_46512 = smax32(x_46510, x_46511); - - x_46510 = defunc_1_op_res_46512; - } - // write result of operation - { - ((__local - int32_t *) red_arr_mem_46506)[sext_i32_i64(local_tid_46500)] = - x_46510; - } - } - skip_waves_46515 *= 2; - } - barrier(CLK_LOCAL_MEM_FENCE); - // first thread saves the result in accumulator - { - if (sext_i32_i64(local_tid_46500) == (int64_t) 0) { - x_acc_46508 = x_46510; - } - } - - int32_t old_counter_46516; - - // first thread in group saves group result to global memory - { - if (local_tid_46500 == 0) { - ((__global - int32_t *) group_res_arr_mem_46496)[sext_i32_i64(group_tid_46501) * - segred_group_sizze_37084] = - x_acc_46508; - mem_fence_global(); - old_counter_46516 = atomic_add_i32_global(&((volatile __global - int *) mainMagnitudezicounter_mem_46494)[(int64_t) 0], - (int) 1); - ((__local bool *) sync_arr_mem_46504)[(int64_t) 0] = - old_counter_46516 == num_groups_37086 - (int64_t) 1; - } - } - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); - - bool is_last_group_46517; - - is_last_group_46517 = ((__local bool *) sync_arr_mem_46504)[(int64_t) 0]; - if (is_last_group_46517) { - if (local_tid_46500 == 0) { - old_counter_46516 = atomic_add_i32_global(&((volatile __global - int *) mainMagnitudezicounter_mem_46494)[(int64_t) 0], - (int) ((int64_t) 0 - - num_groups_37086)); - } - // read in the per-group-results - { - int64_t read_per_thread_46518 = sdiv_up64(num_groups_37086, - segred_group_sizze_37084); - - x_28876 = 0; - for (int64_t i_46519 = 0; i_46519 < read_per_thread_46518; - i_46519++) { - int64_t group_res_id_46520 = sext_i32_i64(local_tid_46500) * - read_per_thread_46518 + i_46519; - int64_t index_of_group_res_46521 = group_res_id_46520; - - if (slt64(group_res_id_46520, num_groups_37086)) { - x_28877 = ((__global - int32_t *) group_res_arr_mem_46496)[index_of_group_res_46521 * - segred_group_sizze_37084]; - - int32_t defunc_1_op_res_28878; - - defunc_1_op_res_28878 = smax32(x_28876, x_28877); - x_28876 = defunc_1_op_res_28878; - } - } - } - ((__local int32_t *) red_arr_mem_46506)[sext_i32_i64(local_tid_46500)] = - x_28876; - barrier(CLK_LOCAL_MEM_FENCE); - // reduce the per-group results - { - int32_t offset_46522; - int32_t skip_waves_46523; - - skip_waves_46523 = 1; - - int32_t x_46510; - int32_t x_46511; - - offset_46522 = 0; - // participating threads read initial accumulator - { - if (slt32(local_tid_46500, - sext_i64_i32(segred_group_sizze_37084))) { - x_46510 = ((__local - int32_t *) red_arr_mem_46506)[sext_i32_i64(local_tid_46500 + - offset_46522)]; - } - } - offset_46522 = 1; - while (slt32(offset_46522, wave_sizze_46502)) { - if (slt32(local_tid_46500 + offset_46522, - sext_i64_i32(segred_group_sizze_37084)) && - ((local_tid_46500 - squot32(local_tid_46500, - wave_sizze_46502) * - wave_sizze_46502) & (2 * offset_46522 - 1)) == 0) { - // read array element - { - x_46511 = ((volatile __local - int32_t *) red_arr_mem_46506)[sext_i32_i64(local_tid_46500 + - offset_46522)]; - } - // apply reduction operation - { - int32_t defunc_1_op_res_46512 = smax32(x_46510, - x_46511); - - x_46510 = defunc_1_op_res_46512; - } - // write result of operation - { - ((volatile __local - int32_t *) red_arr_mem_46506)[sext_i32_i64(local_tid_46500)] = - x_46510; - } - } - offset_46522 *= 2; - } - while (slt32(skip_waves_46523, - squot32(sext_i64_i32(segred_group_sizze_37084) + - wave_sizze_46502 - 1, wave_sizze_46502))) { - barrier(CLK_LOCAL_MEM_FENCE); - offset_46522 = skip_waves_46523 * wave_sizze_46502; - if (slt32(local_tid_46500 + offset_46522, - sext_i64_i32(segred_group_sizze_37084)) && - ((local_tid_46500 - squot32(local_tid_46500, - wave_sizze_46502) * - wave_sizze_46502) == 0 && (squot32(local_tid_46500, - wave_sizze_46502) & - (2 * skip_waves_46523 - 1)) == - 0)) { - // read array element - { - x_46511 = ((__local - int32_t *) red_arr_mem_46506)[sext_i32_i64(local_tid_46500 + - offset_46522)]; - } - // apply reduction operation - { - int32_t defunc_1_op_res_46512 = smax32(x_46510, - x_46511); - - x_46510 = defunc_1_op_res_46512; - } - // write result of operation - { - ((__local - int32_t *) red_arr_mem_46506)[sext_i32_i64(local_tid_46500)] = - x_46510; - } - } - skip_waves_46523 *= 2; - } - // and back to memory with the final result - { - if (local_tid_46500 == 0) { - ((__global int32_t *) mem_45249)[(int64_t) 0] = x_46510; - } - } - } - } - - error_1: - return; - #undef segred_group_sizze_37084 -} -__kernel void mainMagnitudezisegred_small_34797(__global int *global_failure, - __local volatile - int64_t *red_arr_mem_45764_backing_aligned_0, - int64_t N_28477, - int64_t m_28478, - int64_t i32_res_28487, - int64_t i32_res_28493, - int64_t num_groups_34936, - int64_t segment_sizze_nonzzero_45757, - __global - unsigned char *images_mem_44381, - __global - unsigned char *binop_p_mem_44390, - __global - unsigned char *mem_44531, - __global - unsigned char *mem_44536) -{ - #define segred_group_sizze_34935 (mainMagnitudezisegred_group_sizze_34791) - - const int block_dim0 = 0; - const int block_dim1 = 1; - const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_45764_backing_0 = - (__local volatile - char *) red_arr_mem_45764_backing_aligned_0; - - if (*global_failure >= 0) - return; - - int32_t global_tid_45759; - int32_t local_tid_45760; - int64_t group_sizze_45763; - int32_t wave_sizze_45762; - int32_t group_tid_45761; - - global_tid_45759 = get_global_id(0); - local_tid_45760 = get_local_id(0); - group_sizze_45763 = get_local_size(0); - wave_sizze_45762 = LOCKSTEP_WIDTH; - group_tid_45761 = get_group_id(0); - - int32_t phys_tid_34797; - - phys_tid_34797 = global_tid_45759; - - __local char *red_arr_mem_45764; - - red_arr_mem_45764 = (__local char *) red_arr_mem_45764_backing_0; - - int32_t phys_group_id_45766; - - phys_group_id_45766 = get_group_id(0); - for (int32_t i_45767 = 0; i_45767 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_28478 * i32_res_28493 * - i32_res_28493, - squot64(segred_group_sizze_34935, - segment_sizze_nonzzero_45757))) - - phys_group_id_45766, sext_i64_i32(num_groups_34936)); - i_45767++) { - int32_t virt_group_id_45768 = phys_group_id_45766 + i_45767 * - sext_i64_i32(num_groups_34936); - int64_t gtid_34784 = squot64(squot64(sext_i32_i64(local_tid_45760), - segment_sizze_nonzzero_45757) + - sext_i32_i64(virt_group_id_45768) * - squot64(segred_group_sizze_34935, - segment_sizze_nonzzero_45757), - i32_res_28493 * i32_res_28493); - int64_t gtid_34785 = squot64(squot64(sext_i32_i64(local_tid_45760), - segment_sizze_nonzzero_45757) + - sext_i32_i64(virt_group_id_45768) * - squot64(segred_group_sizze_34935, - segment_sizze_nonzzero_45757) - - squot64(squot64(sext_i32_i64(local_tid_45760), - segment_sizze_nonzzero_45757) + - sext_i32_i64(virt_group_id_45768) * - squot64(segred_group_sizze_34935, - segment_sizze_nonzzero_45757), - i32_res_28493 * i32_res_28493) * - (i32_res_28493 * i32_res_28493), - i32_res_28493); - int64_t gtid_34786 = squot64(sext_i32_i64(local_tid_45760), - segment_sizze_nonzzero_45757) + - sext_i32_i64(virt_group_id_45768) * - squot64(segred_group_sizze_34935, - segment_sizze_nonzzero_45757) - - squot64(squot64(sext_i32_i64(local_tid_45760), - segment_sizze_nonzzero_45757) + - sext_i32_i64(virt_group_id_45768) * - squot64(segred_group_sizze_34935, - segment_sizze_nonzzero_45757), i32_res_28493 * - i32_res_28493) * (i32_res_28493 * i32_res_28493) - - squot64(squot64(sext_i32_i64(local_tid_45760), - segment_sizze_nonzzero_45757) + - sext_i32_i64(virt_group_id_45768) * - squot64(segred_group_sizze_34935, - segment_sizze_nonzzero_45757) - - squot64(squot64(sext_i32_i64(local_tid_45760), - segment_sizze_nonzzero_45757) + - sext_i32_i64(virt_group_id_45768) * - squot64(segred_group_sizze_34935, - segment_sizze_nonzzero_45757), - i32_res_28493 * i32_res_28493) * - (i32_res_28493 * i32_res_28493), i32_res_28493) * - i32_res_28493; - int64_t gtid_34796 = srem64(sext_i32_i64(local_tid_45760), - i32_res_28487); + int32_t global_tid_129046; + int32_t local_tid_129047; + int64_t group_sizze_129050; + int32_t wave_sizze_129049; + int32_t group_tid_129048; + + global_tid_129046 = get_global_id(0); + local_tid_129047 = get_local_id(0); + group_sizze_129050 = get_local_size(0); + wave_sizze_129049 = LOCKSTEP_WIDTH; + group_tid_129048 = get_group_id(0); + + int32_t phys_tid_113762; + + phys_tid_113762 = global_tid_129046; + + __local char *red_arr_mem_129051; + + red_arr_mem_129051 = (__local char *) red_arr_mem_129051_backing_0; + + int32_t phys_group_id_129053; + + phys_group_id_129053 = get_group_id(0); + for (int32_t i_129054 = 0; i_129054 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136 * k2p2zq_75151, + squot64(segred_group_sizze_113810, + segment_sizze_nonzzero_129044))) - + phys_group_id_129053, sext_i64_i32(num_groups_113811)); + i_129054++) { + int32_t virt_group_id_129055 = phys_group_id_129053 + i_129054 * + sext_i64_i32(num_groups_113811); + int64_t gtid_113751 = squot64(squot64(sext_i32_i64(local_tid_129047), + segment_sizze_nonzzero_129044) + + sext_i32_i64(virt_group_id_129055) * + squot64(segred_group_sizze_113810, + segment_sizze_nonzzero_129044), + k2p2zq_75151); + int64_t gtid_113752 = squot64(sext_i32_i64(local_tid_129047), + segment_sizze_nonzzero_129044) + + sext_i32_i64(virt_group_id_129055) * + squot64(segred_group_sizze_113810, + segment_sizze_nonzzero_129044) - + squot64(squot64(sext_i32_i64(local_tid_129047), + segment_sizze_nonzzero_129044) + + sext_i32_i64(virt_group_id_129055) * + squot64(segred_group_sizze_113810, + segment_sizze_nonzzero_129044), k2p2zq_75151) * + k2p2zq_75151; + int64_t gtid_113761 = srem64(sext_i32_i64(local_tid_129047), + k2p2zq_75151); // apply map function if in bounds { - if (slt64((int64_t) 0, i32_res_28487) && (((slt64(gtid_34784, - m_28478) && - slt64(gtid_34785, - i32_res_28493)) && - slt64(gtid_34786, - i32_res_28493)) && - slt64(sext_i32_i64(local_tid_45760), - i32_res_28487 * - squot64(segred_group_sizze_34935, - segment_sizze_nonzzero_45757)))) { - float x_34945 = ((__global - float *) images_mem_44381)[gtid_34784 * - N_28477 + - gtid_34796]; - float x_34946 = ((__global - float *) binop_p_mem_44390)[gtid_34785 * - N_28477 + - gtid_34796]; - float x_34947 = ((__global float *) mem_44531)[gtid_34786 * - N_28477 + - gtid_34796]; - float x_34948 = x_34946 * x_34947; - bool isnan_res_34949; - - isnan_res_34949 = futrts_isnan32(x_34945); - - float y_34950; - - if (isnan_res_34949) { - y_34950 = 0.0F; - } else { - y_34950 = 1.0F; - } - - float defunc_2_f_res_34951 = x_34948 * y_34950; + if (slt64((int64_t) 0, k2p2zq_75151) && ((slt64(gtid_113751, + m_75136) && + slt64(gtid_113752, + k2p2zq_75151)) && + slt64(sext_i32_i64(local_tid_129047), + k2p2zq_75151 * + squot64(segred_group_sizze_113810, + segment_sizze_nonzzero_129044)))) { + double x_113820 = ((__global + double *) defunc_3_map_res_mem_124593)[gtid_113751 * + k2p2zq_75151 + + gtid_113761]; + double x_113821 = ((__global + double *) defunc_3_map_res_mem_124372)[gtid_113751 * + (k2p2zq_75151 * + k2p2zq_75151) + + gtid_113752 * + k2p2zq_75151 + + gtid_113761]; + double defunc_1_f_res_113822 = x_113820 * x_113821; // save map-out results { } // save results to be reduced { ((__local - float *) red_arr_mem_45764)[sext_i32_i64(local_tid_45760)] = - defunc_2_f_res_34951; + double *) red_arr_mem_129051)[sext_i32_i64(local_tid_129047)] = + defunc_1_f_res_113822; } } else { ((__local - float *) red_arr_mem_45764)[sext_i32_i64(local_tid_45760)] = - 0.0F; + double *) red_arr_mem_129051)[sext_i32_i64(local_tid_129047)] = + 0.0; } } barrier(CLK_LOCAL_MEM_FENCE); - if (slt64((int64_t) 0, i32_res_28487)) { + if (slt64((int64_t) 0, k2p2zq_75151)) { // perform segmented scan to imitate reduction { - float x_34939; - float x_34940; - float x_45769; - float x_45770; - bool ltid_in_bounds_45772; + double x_113814; + double x_113815; + double x_129056; + double x_129057; + bool ltid_in_bounds_129059; - ltid_in_bounds_45772 = slt64(sext_i32_i64(local_tid_45760), - i32_res_28487 * - squot64(segred_group_sizze_34935, - segment_sizze_nonzzero_45757)); + ltid_in_bounds_129059 = slt64(sext_i32_i64(local_tid_129047), + k2p2zq_75151 * + squot64(segred_group_sizze_113810, + segment_sizze_nonzzero_129044)); - int32_t skip_threads_45773; + int32_t skip_threads_129060; // read input for in-block scan { - if (ltid_in_bounds_45772) { - x_34940 = ((volatile __local - float *) red_arr_mem_45764)[sext_i32_i64(local_tid_45760)]; - if ((local_tid_45760 - squot32(local_tid_45760, 32) * + if (ltid_in_bounds_129059) { + x_113815 = ((volatile __local + double *) red_arr_mem_129051)[sext_i32_i64(local_tid_129047)]; + if ((local_tid_129047 - squot32(local_tid_129047, 32) * 32) == 0) { - x_34939 = x_34940; + x_113814 = x_113815; } } } // in-block scan (hopefully no barriers needed) { - skip_threads_45773 = 1; - while (slt32(skip_threads_45773, 32)) { - if (sle32(skip_threads_45773, local_tid_45760 - - squot32(local_tid_45760, 32) * 32) && - ltid_in_bounds_45772) { + skip_threads_129060 = 1; + while (slt32(skip_threads_129060, 32)) { + if (sle32(skip_threads_129060, local_tid_129047 - + squot32(local_tid_129047, 32) * 32) && + ltid_in_bounds_129059) { // read operands { - x_34939 = ((volatile __local - float *) red_arr_mem_45764)[sext_i32_i64(local_tid_45760) - - sext_i32_i64(skip_threads_45773)]; + x_113814 = ((volatile __local + double *) red_arr_mem_129051)[sext_i32_i64(local_tid_129047) - + sext_i32_i64(skip_threads_129060)]; } // perform operation { - bool inactive_45774 = - slt64(srem64(sext_i32_i64(local_tid_45760), - i32_res_28487), - sext_i32_i64(local_tid_45760) - - sext_i32_i64(local_tid_45760 - - skip_threads_45773)); + bool inactive_129061 = + slt64(srem64(sext_i32_i64(local_tid_129047), + k2p2zq_75151), + sext_i32_i64(local_tid_129047) - + sext_i32_i64(local_tid_129047 - + skip_threads_129060)); - if (inactive_45774) { - x_34939 = x_34940; + if (inactive_129061) { + x_113814 = x_113815; } - if (!inactive_45774) { - float defunc_1_op_res_34941 = x_34939 + - x_34940; + if (!inactive_129061) { + double defunc_1_op_res_113816 = x_113814 + + x_113815; - x_34939 = defunc_1_op_res_34941; + x_113814 = defunc_1_op_res_113816; } } } - if (sle32(wave_sizze_45762, skip_threads_45773)) { + if (sle32(wave_sizze_129049, skip_threads_129060)) { barrier(CLK_LOCAL_MEM_FENCE); } - if (sle32(skip_threads_45773, local_tid_45760 - - squot32(local_tid_45760, 32) * 32) && - ltid_in_bounds_45772) { + if (sle32(skip_threads_129060, local_tid_129047 - + squot32(local_tid_129047, 32) * 32) && + ltid_in_bounds_129059) { // write result { ((volatile __local - float *) red_arr_mem_45764)[sext_i32_i64(local_tid_45760)] = - x_34939; - x_34940 = x_34939; + double *) red_arr_mem_129051)[sext_i32_i64(local_tid_129047)] = + x_113814; + x_113815 = x_113814; } } - if (sle32(wave_sizze_45762, skip_threads_45773)) { + if (sle32(wave_sizze_129049, skip_threads_129060)) { barrier(CLK_LOCAL_MEM_FENCE); } - skip_threads_45773 *= 2; + skip_threads_129060 *= 2; } } barrier(CLK_LOCAL_MEM_FENCE); // last thread of block 'i' writes its result to offset 'i' { - if ((local_tid_45760 - squot32(local_tid_45760, 32) * 32) == - 31 && ltid_in_bounds_45772) { + if ((local_tid_129047 - squot32(local_tid_129047, 32) * + 32) == 31 && ltid_in_bounds_129059) { ((volatile __local - float *) red_arr_mem_45764)[sext_i32_i64(squot32(local_tid_45760, - 32))] = - x_34939; + double *) red_arr_mem_129051)[sext_i32_i64(squot32(local_tid_129047, + 32))] = + x_113814; } } barrier(CLK_LOCAL_MEM_FENCE); // scan the first block, after which offset 'i' contains carry-in for block 'i+1' { - int32_t skip_threads_45775; + int32_t skip_threads_129062; // read input for in-block scan { - if (squot32(local_tid_45760, 32) == 0 && - ltid_in_bounds_45772) { - x_45770 = ((volatile __local - float *) red_arr_mem_45764)[sext_i32_i64(local_tid_45760)]; - if ((local_tid_45760 - squot32(local_tid_45760, - 32) * 32) == 0) { - x_45769 = x_45770; + if (squot32(local_tid_129047, 32) == 0 && + ltid_in_bounds_129059) { + x_129057 = ((volatile __local + double *) red_arr_mem_129051)[sext_i32_i64(local_tid_129047)]; + if ((local_tid_129047 - squot32(local_tid_129047, + 32) * 32) == 0) { + x_129056 = x_129057; } } } // in-block scan (hopefully no barriers needed) { - skip_threads_45775 = 1; - while (slt32(skip_threads_45775, 32)) { - if (sle32(skip_threads_45775, local_tid_45760 - - squot32(local_tid_45760, 32) * 32) && - (squot32(local_tid_45760, 32) == 0 && - ltid_in_bounds_45772)) { + skip_threads_129062 = 1; + while (slt32(skip_threads_129062, 32)) { + if (sle32(skip_threads_129062, local_tid_129047 - + squot32(local_tid_129047, 32) * 32) && + (squot32(local_tid_129047, 32) == 0 && + ltid_in_bounds_129059)) { // read operands { - x_45769 = ((volatile __local - float *) red_arr_mem_45764)[sext_i32_i64(local_tid_45760) - - sext_i32_i64(skip_threads_45775)]; + x_129056 = ((volatile __local + double *) red_arr_mem_129051)[sext_i32_i64(local_tid_129047) - + sext_i32_i64(skip_threads_129062)]; } // perform operation { - bool inactive_45776 = - slt64(srem64(sext_i32_i64(local_tid_45760 * + bool inactive_129063 = + slt64(srem64(sext_i32_i64(local_tid_129047 * 32 + 32 - 1), - i32_res_28487), - sext_i32_i64(local_tid_45760 * + k2p2zq_75151), + sext_i32_i64(local_tid_129047 * 32 + 32 - 1) - - sext_i32_i64((local_tid_45760 - - skip_threads_45775) * + sext_i32_i64((local_tid_129047 - + skip_threads_129062) * 32 + 32 - 1)); - if (inactive_45776) { - x_45769 = x_45770; + if (inactive_129063) { + x_129056 = x_129057; } - if (!inactive_45776) { - float defunc_1_op_res_45771 = x_45769 + - x_45770; + if (!inactive_129063) { + double defunc_1_op_res_129058 = + x_129056 + x_129057; - x_45769 = defunc_1_op_res_45771; + x_129056 = defunc_1_op_res_129058; } } } - if (sle32(wave_sizze_45762, skip_threads_45775)) { + if (sle32(wave_sizze_129049, skip_threads_129062)) { barrier(CLK_LOCAL_MEM_FENCE); } - if (sle32(skip_threads_45775, local_tid_45760 - - squot32(local_tid_45760, 32) * 32) && - (squot32(local_tid_45760, 32) == 0 && - ltid_in_bounds_45772)) { + if (sle32(skip_threads_129062, local_tid_129047 - + squot32(local_tid_129047, 32) * 32) && + (squot32(local_tid_129047, 32) == 0 && + ltid_in_bounds_129059)) { // write result { ((volatile __local - float *) red_arr_mem_45764)[sext_i32_i64(local_tid_45760)] = - x_45769; - x_45770 = x_45769; + double *) red_arr_mem_129051)[sext_i32_i64(local_tid_129047)] = + x_129056; + x_129057 = x_129056; } } - if (sle32(wave_sizze_45762, skip_threads_45775)) { + if (sle32(wave_sizze_129049, skip_threads_129062)) { barrier(CLK_LOCAL_MEM_FENCE); } - skip_threads_45775 *= 2; + skip_threads_129062 *= 2; } } } barrier(CLK_LOCAL_MEM_FENCE); // carry-in for every block except the first { - if (!(squot32(local_tid_45760, 32) == 0 || - !ltid_in_bounds_45772)) { + if (!(squot32(local_tid_129047, 32) == 0 || + !ltid_in_bounds_129059)) { // read operands { - x_34940 = x_34939; - x_34939 = ((__local - float *) red_arr_mem_45764)[sext_i32_i64(squot32(local_tid_45760, - 32)) - - (int64_t) 1]; + x_113815 = x_113814; + x_113814 = ((__local + double *) red_arr_mem_129051)[sext_i32_i64(squot32(local_tid_129047, + 32)) - + (int64_t) 1]; } // perform operation { - bool inactive_45777 = - slt64(srem64(sext_i32_i64(local_tid_45760), - i32_res_28487), - sext_i32_i64(local_tid_45760) - - sext_i32_i64(squot32(local_tid_45760, + bool inactive_129064 = + slt64(srem64(sext_i32_i64(local_tid_129047), + k2p2zq_75151), + sext_i32_i64(local_tid_129047) - + sext_i32_i64(squot32(local_tid_129047, 32) * 32 - 1)); - if (inactive_45777) { - x_34939 = x_34940; + if (inactive_129064) { + x_113814 = x_113815; } - if (!inactive_45777) { - float defunc_1_op_res_34941 = x_34939 + x_34940; + if (!inactive_129064) { + double defunc_1_op_res_113816 = x_113814 + + x_113815; - x_34939 = defunc_1_op_res_34941; + x_113814 = defunc_1_op_res_113816; } } // write final result { ((__local - float *) red_arr_mem_45764)[sext_i32_i64(local_tid_45760)] = - x_34939; + double *) red_arr_mem_129051)[sext_i32_i64(local_tid_129047)] = + x_113814; } } } barrier(CLK_LOCAL_MEM_FENCE); // restore correct values for first block { - if (squot32(local_tid_45760, 32) == 0) { + if (squot32(local_tid_129047, 32) == 0) { ((__local - float *) red_arr_mem_45764)[sext_i32_i64(local_tid_45760)] = - x_34940; + double *) red_arr_mem_129051)[sext_i32_i64(local_tid_129047)] = + x_113815; } } barrier(CLK_LOCAL_MEM_FENCE); @@ -46906,59 +50206,33 @@ def sync(self): barrier(CLK_LOCAL_MEM_FENCE); // save final values of segments { - if (slt64(sext_i32_i64(virt_group_id_45768) * - squot64(segred_group_sizze_34935, - segment_sizze_nonzzero_45757) + - sext_i32_i64(local_tid_45760), m_28478 * i32_res_28493 * - i32_res_28493) && slt64(sext_i32_i64(local_tid_45760), - squot64(segred_group_sizze_34935, - segment_sizze_nonzzero_45757))) { + if (slt64(sext_i32_i64(virt_group_id_129055) * + squot64(segred_group_sizze_113810, + segment_sizze_nonzzero_129044) + + sext_i32_i64(local_tid_129047), m_75136 * k2p2zq_75151) && + slt64(sext_i32_i64(local_tid_129047), + squot64(segred_group_sizze_113810, + segment_sizze_nonzzero_129044))) { ((__global - float *) mem_44536)[squot64(sext_i32_i64(virt_group_id_45768) * - squot64(segred_group_sizze_34935, - segment_sizze_nonzzero_45757) + - sext_i32_i64(local_tid_45760), - i32_res_28493 * i32_res_28493) * - (i32_res_28493 * i32_res_28493) + - squot64(sext_i32_i64(virt_group_id_45768) * - squot64(segred_group_sizze_34935, - segment_sizze_nonzzero_45757) + - sext_i32_i64(local_tid_45760) - - squot64(sext_i32_i64(virt_group_id_45768) * - squot64(segred_group_sizze_34935, - segment_sizze_nonzzero_45757) + - sext_i32_i64(local_tid_45760), - i32_res_28493 * - i32_res_28493) * - (i32_res_28493 * i32_res_28493), - i32_res_28493) * i32_res_28493 + - (sext_i32_i64(virt_group_id_45768) * - squot64(segred_group_sizze_34935, - segment_sizze_nonzzero_45757) + - sext_i32_i64(local_tid_45760) - - squot64(sext_i32_i64(virt_group_id_45768) * - squot64(segred_group_sizze_34935, - segment_sizze_nonzzero_45757) + - sext_i32_i64(local_tid_45760), - i32_res_28493 * i32_res_28493) * - (i32_res_28493 * i32_res_28493) - - squot64(sext_i32_i64(virt_group_id_45768) * - squot64(segred_group_sizze_34935, - segment_sizze_nonzzero_45757) + - sext_i32_i64(local_tid_45760) - - squot64(sext_i32_i64(virt_group_id_45768) * - squot64(segred_group_sizze_34935, - segment_sizze_nonzzero_45757) + - sext_i32_i64(local_tid_45760), - i32_res_28493 * - i32_res_28493) * - (i32_res_28493 * i32_res_28493), - i32_res_28493) * - i32_res_28493)] = ((__local - float *) red_arr_mem_45764)[(sext_i32_i64(local_tid_45760) + - (int64_t) 1) * - segment_sizze_nonzzero_45757 - - (int64_t) 1]; + double *) mem_124653)[squot64(sext_i32_i64(virt_group_id_129055) * + squot64(segred_group_sizze_113810, + segment_sizze_nonzzero_129044) + + sext_i32_i64(local_tid_129047), + k2p2zq_75151) * k2p2zq_75151 + + (sext_i32_i64(virt_group_id_129055) * + squot64(segred_group_sizze_113810, + segment_sizze_nonzzero_129044) + + sext_i32_i64(local_tid_129047) - + squot64(sext_i32_i64(virt_group_id_129055) * + squot64(segred_group_sizze_113810, + segment_sizze_nonzzero_129044) + + sext_i32_i64(local_tid_129047), + k2p2zq_75151) * + k2p2zq_75151)] = ((__local + double *) red_arr_mem_129051)[(sext_i32_i64(local_tid_129047) + + (int64_t) 1) * + segment_sizze_nonzzero_129044 - + (int64_t) 1]; } } barrier(CLK_LOCAL_MEM_FENCE); @@ -46967,346 +50241,331 @@ def sync(self): error_1: return; - #undef segred_group_sizze_34935 + #undef segred_group_sizze_113810 } -__kernel void mainMagnitudezisegred_small_36148(__global int *global_failure, - __local volatile - int64_t *red_arr_mem_45961_backing_aligned_0, - int64_t N_28477, - int64_t m_28478, - int64_t i32_res_28487, - int64_t i32_res_28493, - int64_t num_groups_36201, - int64_t segment_sizze_nonzzero_45954, - __global - unsigned char *images_mem_44381, - __global - unsigned char *binop_p_mem_44390, - __global - unsigned char *mem_44844) +__kernel void mainzisegred_small_113892(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_129183_backing_aligned_0, + int64_t N_75135, int64_t m_75136, + int64_t k2p2zq_75151, + int64_t num_groups_113939, + int64_t segment_sizze_nonzzero_129176, + __global unsigned char *mem_120124, + __global + unsigned char *defunc_4_map_res_mem_124659, + __global unsigned char *mem_124877) { - #define segred_group_sizze_36200 (mainMagnitudezisegred_group_sizze_36142) + #define segred_group_sizze_113938 (mainzisegred_group_sizze_113886) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_45961_backing_0 = + __local volatile char *restrict red_arr_mem_129183_backing_0 = (__local volatile - char *) red_arr_mem_45961_backing_aligned_0; + char *) red_arr_mem_129183_backing_aligned_0; if (*global_failure >= 0) return; - int32_t global_tid_45956; - int32_t local_tid_45957; - int64_t group_sizze_45960; - int32_t wave_sizze_45959; - int32_t group_tid_45958; - - global_tid_45956 = get_global_id(0); - local_tid_45957 = get_local_id(0); - group_sizze_45960 = get_local_size(0); - wave_sizze_45959 = LOCKSTEP_WIDTH; - group_tid_45958 = get_group_id(0); - - int32_t phys_tid_36148; - - phys_tid_36148 = global_tid_45956; - - __local char *red_arr_mem_45961; - - red_arr_mem_45961 = (__local char *) red_arr_mem_45961_backing_0; - - int32_t phys_group_id_45963; - - phys_group_id_45963 = get_group_id(0); - for (int32_t i_45964 = 0; i_45964 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_28478 * i32_res_28493, - squot64(segred_group_sizze_36200, - segment_sizze_nonzzero_45954))) - - phys_group_id_45963, sext_i64_i32(num_groups_36201)); - i_45964++) { - int32_t virt_group_id_45965 = phys_group_id_45963 + i_45964 * - sext_i64_i32(num_groups_36201); - int64_t gtid_36137 = squot64(squot64(sext_i32_i64(local_tid_45957), - segment_sizze_nonzzero_45954) + - sext_i32_i64(virt_group_id_45965) * - squot64(segred_group_sizze_36200, - segment_sizze_nonzzero_45954), - i32_res_28493); - int64_t gtid_36138 = squot64(sext_i32_i64(local_tid_45957), - segment_sizze_nonzzero_45954) + - sext_i32_i64(virt_group_id_45965) * - squot64(segred_group_sizze_36200, - segment_sizze_nonzzero_45954) - - squot64(squot64(sext_i32_i64(local_tid_45957), - segment_sizze_nonzzero_45954) + - sext_i32_i64(virt_group_id_45965) * - squot64(segred_group_sizze_36200, - segment_sizze_nonzzero_45954), i32_res_28493) * - i32_res_28493; - int64_t gtid_36147 = srem64(sext_i32_i64(local_tid_45957), - i32_res_28487); + int32_t global_tid_129178; + int32_t local_tid_129179; + int64_t group_sizze_129182; + int32_t wave_sizze_129181; + int32_t group_tid_129180; + + global_tid_129178 = get_global_id(0); + local_tid_129179 = get_local_id(0); + group_sizze_129182 = get_local_size(0); + wave_sizze_129181 = LOCKSTEP_WIDTH; + group_tid_129180 = get_group_id(0); + + int32_t phys_tid_113892; + + phys_tid_113892 = global_tid_129178; + + __local char *red_arr_mem_129183; + + red_arr_mem_129183 = (__local char *) red_arr_mem_129183_backing_0; + + int32_t phys_group_id_129185; + + phys_group_id_129185 = get_group_id(0); + for (int32_t i_129186 = 0; i_129186 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136 * N_75135, + squot64(segred_group_sizze_113938, + segment_sizze_nonzzero_129176))) - + phys_group_id_129185, sext_i64_i32(num_groups_113939)); + i_129186++) { + int32_t virt_group_id_129187 = phys_group_id_129185 + i_129186 * + sext_i64_i32(num_groups_113939); + int64_t gtid_113881 = squot64(squot64(sext_i32_i64(local_tid_129179), + segment_sizze_nonzzero_129176) + + sext_i32_i64(virt_group_id_129187) * + squot64(segred_group_sizze_113938, + segment_sizze_nonzzero_129176), + N_75135); + int64_t gtid_113882 = squot64(sext_i32_i64(local_tid_129179), + segment_sizze_nonzzero_129176) + + sext_i32_i64(virt_group_id_129187) * + squot64(segred_group_sizze_113938, + segment_sizze_nonzzero_129176) - + squot64(squot64(sext_i32_i64(local_tid_129179), + segment_sizze_nonzzero_129176) + + sext_i32_i64(virt_group_id_129187) * + squot64(segred_group_sizze_113938, + segment_sizze_nonzzero_129176), N_75135) * + N_75135; + int64_t gtid_113891 = srem64(sext_i32_i64(local_tid_129179), + k2p2zq_75151); // apply map function if in bounds { - if (slt64((int64_t) 0, i32_res_28487) && ((slt64(gtid_36137, - m_28478) && - slt64(gtid_36138, - i32_res_28493)) && - slt64(sext_i32_i64(local_tid_45957), - i32_res_28487 * - squot64(segred_group_sizze_36200, - segment_sizze_nonzzero_45954)))) { - float x_36210 = ((__global - float *) images_mem_44381)[gtid_36137 * - N_28477 + - gtid_36147]; - bool isnan_res_36211; - - isnan_res_36211 = futrts_isnan32(x_36210); - - float defunc_1_f_res_36212; - - if (isnan_res_36211) { - defunc_1_f_res_36212 = 0.0F; - } else { - float x_36209 = ((__global - float *) binop_p_mem_44390)[gtid_36138 * - N_28477 + - gtid_36147]; - float defunc_1_f_res_f_res_36213 = x_36209 * x_36210; - - defunc_1_f_res_36212 = defunc_1_f_res_f_res_36213; - } + if (slt64((int64_t) 0, k2p2zq_75151) && ((slt64(gtid_113881, + m_75136) && + slt64(gtid_113882, + N_75135)) && + slt64(sext_i32_i64(local_tid_129179), + k2p2zq_75151 * + squot64(segred_group_sizze_113938, + segment_sizze_nonzzero_129176)))) { + double x_113947 = ((__global + double *) defunc_4_map_res_mem_124659)[gtid_113881 * + k2p2zq_75151 + + gtid_113891]; + double x_113948 = ((__global double *) mem_120124)[gtid_113882 * + k2p2zq_75151 + + gtid_113891]; + double defunc_1_f_res_113949 = x_113947 * x_113948; + // save map-out results { } // save results to be reduced { ((__local - float *) red_arr_mem_45961)[sext_i32_i64(local_tid_45957)] = - defunc_1_f_res_36212; + double *) red_arr_mem_129183)[sext_i32_i64(local_tid_129179)] = + defunc_1_f_res_113949; } } else { ((__local - float *) red_arr_mem_45961)[sext_i32_i64(local_tid_45957)] = - 0.0F; + double *) red_arr_mem_129183)[sext_i32_i64(local_tid_129179)] = + 0.0; } } barrier(CLK_LOCAL_MEM_FENCE); - if (slt64((int64_t) 0, i32_res_28487)) { + if (slt64((int64_t) 0, k2p2zq_75151)) { // perform segmented scan to imitate reduction { - float x_36204; - float x_36205; - float x_45966; - float x_45967; - bool ltid_in_bounds_45969; + double x_113942; + double x_113943; + double x_129188; + double x_129189; + bool ltid_in_bounds_129191; - ltid_in_bounds_45969 = slt64(sext_i32_i64(local_tid_45957), - i32_res_28487 * - squot64(segred_group_sizze_36200, - segment_sizze_nonzzero_45954)); + ltid_in_bounds_129191 = slt64(sext_i32_i64(local_tid_129179), + k2p2zq_75151 * + squot64(segred_group_sizze_113938, + segment_sizze_nonzzero_129176)); - int32_t skip_threads_45970; + int32_t skip_threads_129192; // read input for in-block scan { - if (ltid_in_bounds_45969) { - x_36205 = ((volatile __local - float *) red_arr_mem_45961)[sext_i32_i64(local_tid_45957)]; - if ((local_tid_45957 - squot32(local_tid_45957, 32) * + if (ltid_in_bounds_129191) { + x_113943 = ((volatile __local + double *) red_arr_mem_129183)[sext_i32_i64(local_tid_129179)]; + if ((local_tid_129179 - squot32(local_tid_129179, 32) * 32) == 0) { - x_36204 = x_36205; + x_113942 = x_113943; } } } // in-block scan (hopefully no barriers needed) { - skip_threads_45970 = 1; - while (slt32(skip_threads_45970, 32)) { - if (sle32(skip_threads_45970, local_tid_45957 - - squot32(local_tid_45957, 32) * 32) && - ltid_in_bounds_45969) { + skip_threads_129192 = 1; + while (slt32(skip_threads_129192, 32)) { + if (sle32(skip_threads_129192, local_tid_129179 - + squot32(local_tid_129179, 32) * 32) && + ltid_in_bounds_129191) { // read operands { - x_36204 = ((volatile __local - float *) red_arr_mem_45961)[sext_i32_i64(local_tid_45957) - - sext_i32_i64(skip_threads_45970)]; + x_113942 = ((volatile __local + double *) red_arr_mem_129183)[sext_i32_i64(local_tid_129179) - + sext_i32_i64(skip_threads_129192)]; } // perform operation { - bool inactive_45971 = - slt64(srem64(sext_i32_i64(local_tid_45957), - i32_res_28487), - sext_i32_i64(local_tid_45957) - - sext_i32_i64(local_tid_45957 - - skip_threads_45970)); + bool inactive_129193 = + slt64(srem64(sext_i32_i64(local_tid_129179), + k2p2zq_75151), + sext_i32_i64(local_tid_129179) - + sext_i32_i64(local_tid_129179 - + skip_threads_129192)); - if (inactive_45971) { - x_36204 = x_36205; + if (inactive_129193) { + x_113942 = x_113943; } - if (!inactive_45971) { - float defunc_1_op_res_36206 = x_36204 + - x_36205; + if (!inactive_129193) { + double defunc_1_op_res_113944 = x_113942 + + x_113943; - x_36204 = defunc_1_op_res_36206; + x_113942 = defunc_1_op_res_113944; } } } - if (sle32(wave_sizze_45959, skip_threads_45970)) { + if (sle32(wave_sizze_129181, skip_threads_129192)) { barrier(CLK_LOCAL_MEM_FENCE); } - if (sle32(skip_threads_45970, local_tid_45957 - - squot32(local_tid_45957, 32) * 32) && - ltid_in_bounds_45969) { + if (sle32(skip_threads_129192, local_tid_129179 - + squot32(local_tid_129179, 32) * 32) && + ltid_in_bounds_129191) { // write result { ((volatile __local - float *) red_arr_mem_45961)[sext_i32_i64(local_tid_45957)] = - x_36204; - x_36205 = x_36204; + double *) red_arr_mem_129183)[sext_i32_i64(local_tid_129179)] = + x_113942; + x_113943 = x_113942; } } - if (sle32(wave_sizze_45959, skip_threads_45970)) { + if (sle32(wave_sizze_129181, skip_threads_129192)) { barrier(CLK_LOCAL_MEM_FENCE); } - skip_threads_45970 *= 2; + skip_threads_129192 *= 2; } } barrier(CLK_LOCAL_MEM_FENCE); // last thread of block 'i' writes its result to offset 'i' { - if ((local_tid_45957 - squot32(local_tid_45957, 32) * 32) == - 31 && ltid_in_bounds_45969) { + if ((local_tid_129179 - squot32(local_tid_129179, 32) * + 32) == 31 && ltid_in_bounds_129191) { ((volatile __local - float *) red_arr_mem_45961)[sext_i32_i64(squot32(local_tid_45957, - 32))] = - x_36204; + double *) red_arr_mem_129183)[sext_i32_i64(squot32(local_tid_129179, + 32))] = + x_113942; } } barrier(CLK_LOCAL_MEM_FENCE); // scan the first block, after which offset 'i' contains carry-in for block 'i+1' { - int32_t skip_threads_45972; + int32_t skip_threads_129194; // read input for in-block scan { - if (squot32(local_tid_45957, 32) == 0 && - ltid_in_bounds_45969) { - x_45967 = ((volatile __local - float *) red_arr_mem_45961)[sext_i32_i64(local_tid_45957)]; - if ((local_tid_45957 - squot32(local_tid_45957, - 32) * 32) == 0) { - x_45966 = x_45967; + if (squot32(local_tid_129179, 32) == 0 && + ltid_in_bounds_129191) { + x_129189 = ((volatile __local + double *) red_arr_mem_129183)[sext_i32_i64(local_tid_129179)]; + if ((local_tid_129179 - squot32(local_tid_129179, + 32) * 32) == 0) { + x_129188 = x_129189; } } } // in-block scan (hopefully no barriers needed) { - skip_threads_45972 = 1; - while (slt32(skip_threads_45972, 32)) { - if (sle32(skip_threads_45972, local_tid_45957 - - squot32(local_tid_45957, 32) * 32) && - (squot32(local_tid_45957, 32) == 0 && - ltid_in_bounds_45969)) { + skip_threads_129194 = 1; + while (slt32(skip_threads_129194, 32)) { + if (sle32(skip_threads_129194, local_tid_129179 - + squot32(local_tid_129179, 32) * 32) && + (squot32(local_tid_129179, 32) == 0 && + ltid_in_bounds_129191)) { // read operands { - x_45966 = ((volatile __local - float *) red_arr_mem_45961)[sext_i32_i64(local_tid_45957) - - sext_i32_i64(skip_threads_45972)]; + x_129188 = ((volatile __local + double *) red_arr_mem_129183)[sext_i32_i64(local_tid_129179) - + sext_i32_i64(skip_threads_129194)]; } // perform operation { - bool inactive_45973 = - slt64(srem64(sext_i32_i64(local_tid_45957 * + bool inactive_129195 = + slt64(srem64(sext_i32_i64(local_tid_129179 * 32 + 32 - 1), - i32_res_28487), - sext_i32_i64(local_tid_45957 * + k2p2zq_75151), + sext_i32_i64(local_tid_129179 * 32 + 32 - 1) - - sext_i32_i64((local_tid_45957 - - skip_threads_45972) * + sext_i32_i64((local_tid_129179 - + skip_threads_129194) * 32 + 32 - 1)); - if (inactive_45973) { - x_45966 = x_45967; + if (inactive_129195) { + x_129188 = x_129189; } - if (!inactive_45973) { - float defunc_1_op_res_45968 = x_45966 + - x_45967; + if (!inactive_129195) { + double defunc_1_op_res_129190 = + x_129188 + x_129189; - x_45966 = defunc_1_op_res_45968; + x_129188 = defunc_1_op_res_129190; } } } - if (sle32(wave_sizze_45959, skip_threads_45972)) { + if (sle32(wave_sizze_129181, skip_threads_129194)) { barrier(CLK_LOCAL_MEM_FENCE); } - if (sle32(skip_threads_45972, local_tid_45957 - - squot32(local_tid_45957, 32) * 32) && - (squot32(local_tid_45957, 32) == 0 && - ltid_in_bounds_45969)) { + if (sle32(skip_threads_129194, local_tid_129179 - + squot32(local_tid_129179, 32) * 32) && + (squot32(local_tid_129179, 32) == 0 && + ltid_in_bounds_129191)) { // write result { ((volatile __local - float *) red_arr_mem_45961)[sext_i32_i64(local_tid_45957)] = - x_45966; - x_45967 = x_45966; + double *) red_arr_mem_129183)[sext_i32_i64(local_tid_129179)] = + x_129188; + x_129189 = x_129188; } } - if (sle32(wave_sizze_45959, skip_threads_45972)) { + if (sle32(wave_sizze_129181, skip_threads_129194)) { barrier(CLK_LOCAL_MEM_FENCE); } - skip_threads_45972 *= 2; + skip_threads_129194 *= 2; } } } barrier(CLK_LOCAL_MEM_FENCE); // carry-in for every block except the first { - if (!(squot32(local_tid_45957, 32) == 0 || - !ltid_in_bounds_45969)) { + if (!(squot32(local_tid_129179, 32) == 0 || + !ltid_in_bounds_129191)) { // read operands { - x_36205 = x_36204; - x_36204 = ((__local - float *) red_arr_mem_45961)[sext_i32_i64(squot32(local_tid_45957, - 32)) - - (int64_t) 1]; + x_113943 = x_113942; + x_113942 = ((__local + double *) red_arr_mem_129183)[sext_i32_i64(squot32(local_tid_129179, + 32)) - + (int64_t) 1]; } // perform operation { - bool inactive_45974 = - slt64(srem64(sext_i32_i64(local_tid_45957), - i32_res_28487), - sext_i32_i64(local_tid_45957) - - sext_i32_i64(squot32(local_tid_45957, + bool inactive_129196 = + slt64(srem64(sext_i32_i64(local_tid_129179), + k2p2zq_75151), + sext_i32_i64(local_tid_129179) - + sext_i32_i64(squot32(local_tid_129179, 32) * 32 - 1)); - if (inactive_45974) { - x_36204 = x_36205; + if (inactive_129196) { + x_113942 = x_113943; } - if (!inactive_45974) { - float defunc_1_op_res_36206 = x_36204 + x_36205; + if (!inactive_129196) { + double defunc_1_op_res_113944 = x_113942 + + x_113943; - x_36204 = defunc_1_op_res_36206; + x_113942 = defunc_1_op_res_113944; } } // write final result { ((__local - float *) red_arr_mem_45961)[sext_i32_i64(local_tid_45957)] = - x_36204; + double *) red_arr_mem_129183)[sext_i32_i64(local_tid_129179)] = + x_113942; } } } barrier(CLK_LOCAL_MEM_FENCE); // restore correct values for first block { - if (squot32(local_tid_45957, 32) == 0) { + if (squot32(local_tid_129179, 32) == 0) { ((__local - float *) red_arr_mem_45961)[sext_i32_i64(local_tid_45957)] = - x_36205; + double *) red_arr_mem_129183)[sext_i32_i64(local_tid_129179)] = + x_113943; } } barrier(CLK_LOCAL_MEM_FENCE); @@ -47315,33 +50574,33 @@ def sync(self): barrier(CLK_LOCAL_MEM_FENCE); // save final values of segments { - if (slt64(sext_i32_i64(virt_group_id_45965) * - squot64(segred_group_sizze_36200, - segment_sizze_nonzzero_45954) + - sext_i32_i64(local_tid_45957), m_28478 * i32_res_28493) && - slt64(sext_i32_i64(local_tid_45957), - squot64(segred_group_sizze_36200, - segment_sizze_nonzzero_45954))) { + if (slt64(sext_i32_i64(virt_group_id_129187) * + squot64(segred_group_sizze_113938, + segment_sizze_nonzzero_129176) + + sext_i32_i64(local_tid_129179), m_75136 * N_75135) && + slt64(sext_i32_i64(local_tid_129179), + squot64(segred_group_sizze_113938, + segment_sizze_nonzzero_129176))) { ((__global - float *) mem_44844)[squot64(sext_i32_i64(virt_group_id_45965) * - squot64(segred_group_sizze_36200, - segment_sizze_nonzzero_45954) + - sext_i32_i64(local_tid_45957), - i32_res_28493) * i32_res_28493 + - (sext_i32_i64(virt_group_id_45965) * - squot64(segred_group_sizze_36200, - segment_sizze_nonzzero_45954) + - sext_i32_i64(local_tid_45957) - - squot64(sext_i32_i64(virt_group_id_45965) * - squot64(segred_group_sizze_36200, - segment_sizze_nonzzero_45954) + - sext_i32_i64(local_tid_45957), - i32_res_28493) * - i32_res_28493)] = ((__local - float *) red_arr_mem_45961)[(sext_i32_i64(local_tid_45957) + - (int64_t) 1) * - segment_sizze_nonzzero_45954 - - (int64_t) 1]; + double *) mem_124877)[squot64(sext_i32_i64(virt_group_id_129187) * + squot64(segred_group_sizze_113938, + segment_sizze_nonzzero_129176) + + sext_i32_i64(local_tid_129179), + N_75135) * N_75135 + + (sext_i32_i64(virt_group_id_129187) * + squot64(segred_group_sizze_113938, + segment_sizze_nonzzero_129176) + + sext_i32_i64(local_tid_129179) - + squot64(sext_i32_i64(virt_group_id_129187) * + squot64(segred_group_sizze_113938, + segment_sizze_nonzzero_129176) + + sext_i32_i64(local_tid_129179), + N_75135) * N_75135)] = + ((__local + double *) red_arr_mem_129183)[(sext_i32_i64(local_tid_129179) + + (int64_t) 1) * + segment_sizze_nonzzero_129176 - + (int64_t) 1]; } } barrier(CLK_LOCAL_MEM_FENCE); @@ -47350,336 +50609,354 @@ def sync(self): error_1: return; - #undef segred_group_sizze_36200 + #undef segred_group_sizze_113938 } -__kernel void mainMagnitudezisegred_small_36285(__global int *global_failure, - __local volatile - int64_t *red_arr_mem_46049_backing_aligned_0, - int64_t m_28478, - int64_t i32_res_28493, - int64_t num_groups_36334, - int64_t segment_sizze_nonzzero_46042, - __global - unsigned char *defunc_3_map_res_mem_44629, - __global - unsigned char *defunc_3_map_res_mem_44850, - __global - unsigned char *mem_44910) +__kernel void mainzisegred_small_114313(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + __local volatile + int64_t *red_arr_mem_129413_backing_aligned_0, + int64_t N_75135, int64_t m_75136, + int64_t n_75139, + int64_t num_groups_114363, + int64_t segment_sizze_nonzzero_129406, + __global + unsigned char *defunc_4_map_res_mem_124920, + __global unsigned char *mem_124946, + __global unsigned char *mem_124949) { - #define segred_group_sizze_36333 (mainMagnitudezisegred_group_sizze_36279) + #define segred_group_sizze_114362 (mainzisegred_group_sizze_114307) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_46049_backing_0 = + __local volatile char *restrict red_arr_mem_129413_backing_0 = (__local volatile - char *) red_arr_mem_46049_backing_aligned_0; + char *) red_arr_mem_129413_backing_aligned_0; + volatile __local bool local_failure; - if (*global_failure >= 0) - return; + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); - int32_t global_tid_46044; - int32_t local_tid_46045; - int64_t group_sizze_46048; - int32_t wave_sizze_46047; - int32_t group_tid_46046; + int32_t global_tid_129408; + int32_t local_tid_129409; + int64_t group_sizze_129412; + int32_t wave_sizze_129411; + int32_t group_tid_129410; - global_tid_46044 = get_global_id(0); - local_tid_46045 = get_local_id(0); - group_sizze_46048 = get_local_size(0); - wave_sizze_46047 = LOCKSTEP_WIDTH; - group_tid_46046 = get_group_id(0); + global_tid_129408 = get_global_id(0); + local_tid_129409 = get_local_id(0); + group_sizze_129412 = get_local_size(0); + wave_sizze_129411 = LOCKSTEP_WIDTH; + group_tid_129410 = get_group_id(0); - int32_t phys_tid_36285; + int32_t phys_tid_114313; - phys_tid_36285 = global_tid_46044; + phys_tid_114313 = global_tid_129408; - __local char *red_arr_mem_46049; + __local char *red_arr_mem_129413; - red_arr_mem_46049 = (__local char *) red_arr_mem_46049_backing_0; + red_arr_mem_129413 = (__local char *) red_arr_mem_129413_backing_0; - int32_t phys_group_id_46051; + int32_t phys_group_id_129415; - phys_group_id_46051 = get_group_id(0); - for (int32_t i_46052 = 0; i_46052 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_28478 * i32_res_28493, - squot64(segred_group_sizze_36333, - segment_sizze_nonzzero_46042))) - - phys_group_id_46051, sext_i64_i32(num_groups_36334)); - i_46052++) { - int32_t virt_group_id_46053 = phys_group_id_46051 + i_46052 * - sext_i64_i32(num_groups_36334); - int64_t gtid_36274 = squot64(squot64(sext_i32_i64(local_tid_46045), - segment_sizze_nonzzero_46042) + - sext_i32_i64(virt_group_id_46053) * - squot64(segred_group_sizze_36333, - segment_sizze_nonzzero_46042), - i32_res_28493); - int64_t gtid_36275 = squot64(sext_i32_i64(local_tid_46045), - segment_sizze_nonzzero_46042) + - sext_i32_i64(virt_group_id_46053) * - squot64(segred_group_sizze_36333, - segment_sizze_nonzzero_46042) - - squot64(squot64(sext_i32_i64(local_tid_46045), - segment_sizze_nonzzero_46042) + - sext_i32_i64(virt_group_id_46053) * - squot64(segred_group_sizze_36333, - segment_sizze_nonzzero_46042), i32_res_28493) * - i32_res_28493; - int64_t gtid_36284 = srem64(sext_i32_i64(local_tid_46045), - i32_res_28493); + phys_group_id_129415 = get_group_id(0); + for (int32_t i_129416 = 0; i_129416 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, + squot64(segred_group_sizze_114362, + segment_sizze_nonzzero_129406))) - + phys_group_id_129415, sext_i64_i32(num_groups_114363)); + i_129416++) { + int32_t virt_group_id_129417 = phys_group_id_129415 + i_129416 * + sext_i64_i32(num_groups_114363); + int64_t gtid_114304 = squot64(sext_i32_i64(local_tid_129409), + segment_sizze_nonzzero_129406) + + sext_i32_i64(virt_group_id_129417) * + squot64(segred_group_sizze_114362, + segment_sizze_nonzzero_129406); + int64_t gtid_114312 = srem64(sext_i32_i64(local_tid_129409), n_75139); // apply map function if in bounds { - if (slt64((int64_t) 0, i32_res_28493) && ((slt64(gtid_36274, - m_28478) && - slt64(gtid_36275, - i32_res_28493)) && - slt64(sext_i32_i64(local_tid_46045), - i32_res_28493 * - squot64(segred_group_sizze_36333, - segment_sizze_nonzzero_46042)))) { - float x_36343 = ((__global - float *) defunc_3_map_res_mem_44850)[gtid_36274 * - i32_res_28493 + - gtid_36284]; - float x_36344 = ((__global - float *) defunc_3_map_res_mem_44629)[gtid_36274 * - (i32_res_28493 * - i32_res_28493) + - gtid_36275 * - i32_res_28493 + - gtid_36284]; - float defunc_1_f_res_36345 = x_36343 * x_36344; + if (slt64((int64_t) 0, n_75139) && (slt64(gtid_114304, m_75136) && + slt64(sext_i32_i64(local_tid_129409), + n_75139 * + squot64(segred_group_sizze_114362, + segment_sizze_nonzzero_129406)))) { + int64_t defunc_0_f_res_114370 = ((__global + int64_t *) mem_124946)[gtid_114304]; + bool cond_114372 = slt64(gtid_114312, defunc_0_f_res_114370); + double defunc_0_f_res_114373; + + if (cond_114372) { + bool y_114375 = slt64(gtid_114312, N_75135); + bool index_certs_114377; + + if (!y_114375) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 197) == -1) { + global_failure_args[0] = gtid_114312; + global_failure_args[1] = N_75135; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_0_f_res_t_res_114378 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_114304 * + N_75135 + + gtid_114312]; + + defunc_0_f_res_114373 = defunc_0_f_res_t_res_114378; + } else { + defunc_0_f_res_114373 = 0.0; + } + + double defunc_0_f_res_114379 = defunc_0_f_res_114373 * + defunc_0_f_res_114373; // save map-out results { } // save results to be reduced { ((__local - float *) red_arr_mem_46049)[sext_i32_i64(local_tid_46045)] = - defunc_1_f_res_36345; + double *) red_arr_mem_129413)[sext_i32_i64(local_tid_129409)] = + defunc_0_f_res_114379; } } else { ((__local - float *) red_arr_mem_46049)[sext_i32_i64(local_tid_46045)] = - 0.0F; + double *) red_arr_mem_129413)[sext_i32_i64(local_tid_129409)] = + 0.0; } } + + error_0: barrier(CLK_LOCAL_MEM_FENCE); - if (slt64((int64_t) 0, i32_res_28493)) { + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, n_75139)) { // perform segmented scan to imitate reduction { - float x_36337; - float x_36338; - float x_46054; - float x_46055; - bool ltid_in_bounds_46057; + double x_114366; + double x_114367; + double x_129418; + double x_129419; + bool ltid_in_bounds_129421; - ltid_in_bounds_46057 = slt64(sext_i32_i64(local_tid_46045), - i32_res_28493 * - squot64(segred_group_sizze_36333, - segment_sizze_nonzzero_46042)); + ltid_in_bounds_129421 = slt64(sext_i32_i64(local_tid_129409), + n_75139 * + squot64(segred_group_sizze_114362, + segment_sizze_nonzzero_129406)); - int32_t skip_threads_46058; + int32_t skip_threads_129422; // read input for in-block scan { - if (ltid_in_bounds_46057) { - x_36338 = ((volatile __local - float *) red_arr_mem_46049)[sext_i32_i64(local_tid_46045)]; - if ((local_tid_46045 - squot32(local_tid_46045, 32) * + if (ltid_in_bounds_129421) { + x_114367 = ((volatile __local + double *) red_arr_mem_129413)[sext_i32_i64(local_tid_129409)]; + if ((local_tid_129409 - squot32(local_tid_129409, 32) * 32) == 0) { - x_36337 = x_36338; + x_114366 = x_114367; } } } // in-block scan (hopefully no barriers needed) { - skip_threads_46058 = 1; - while (slt32(skip_threads_46058, 32)) { - if (sle32(skip_threads_46058, local_tid_46045 - - squot32(local_tid_46045, 32) * 32) && - ltid_in_bounds_46057) { + skip_threads_129422 = 1; + while (slt32(skip_threads_129422, 32)) { + if (sle32(skip_threads_129422, local_tid_129409 - + squot32(local_tid_129409, 32) * 32) && + ltid_in_bounds_129421) { // read operands { - x_36337 = ((volatile __local - float *) red_arr_mem_46049)[sext_i32_i64(local_tid_46045) - - sext_i32_i64(skip_threads_46058)]; + x_114366 = ((volatile __local + double *) red_arr_mem_129413)[sext_i32_i64(local_tid_129409) - + sext_i32_i64(skip_threads_129422)]; } // perform operation { - bool inactive_46059 = - slt64(srem64(sext_i32_i64(local_tid_46045), - i32_res_28493), - sext_i32_i64(local_tid_46045) - - sext_i32_i64(local_tid_46045 - - skip_threads_46058)); + bool inactive_129423 = + slt64(srem64(sext_i32_i64(local_tid_129409), + n_75139), + sext_i32_i64(local_tid_129409) - + sext_i32_i64(local_tid_129409 - + skip_threads_129422)); - if (inactive_46059) { - x_36337 = x_36338; + if (inactive_129423) { + x_114366 = x_114367; } - if (!inactive_46059) { - float defunc_1_op_res_36339 = x_36337 + - x_36338; + if (!inactive_129423) { + double defunc_1_op_res_114368 = x_114366 + + x_114367; - x_36337 = defunc_1_op_res_36339; + x_114366 = defunc_1_op_res_114368; } } } - if (sle32(wave_sizze_46047, skip_threads_46058)) { + if (sle32(wave_sizze_129411, skip_threads_129422)) { barrier(CLK_LOCAL_MEM_FENCE); } - if (sle32(skip_threads_46058, local_tid_46045 - - squot32(local_tid_46045, 32) * 32) && - ltid_in_bounds_46057) { + if (sle32(skip_threads_129422, local_tid_129409 - + squot32(local_tid_129409, 32) * 32) && + ltid_in_bounds_129421) { // write result { ((volatile __local - float *) red_arr_mem_46049)[sext_i32_i64(local_tid_46045)] = - x_36337; - x_36338 = x_36337; + double *) red_arr_mem_129413)[sext_i32_i64(local_tid_129409)] = + x_114366; + x_114367 = x_114366; } } - if (sle32(wave_sizze_46047, skip_threads_46058)) { + if (sle32(wave_sizze_129411, skip_threads_129422)) { barrier(CLK_LOCAL_MEM_FENCE); } - skip_threads_46058 *= 2; + skip_threads_129422 *= 2; } } barrier(CLK_LOCAL_MEM_FENCE); // last thread of block 'i' writes its result to offset 'i' { - if ((local_tid_46045 - squot32(local_tid_46045, 32) * 32) == - 31 && ltid_in_bounds_46057) { + if ((local_tid_129409 - squot32(local_tid_129409, 32) * + 32) == 31 && ltid_in_bounds_129421) { ((volatile __local - float *) red_arr_mem_46049)[sext_i32_i64(squot32(local_tid_46045, - 32))] = - x_36337; + double *) red_arr_mem_129413)[sext_i32_i64(squot32(local_tid_129409, + 32))] = + x_114366; } } barrier(CLK_LOCAL_MEM_FENCE); // scan the first block, after which offset 'i' contains carry-in for block 'i+1' { - int32_t skip_threads_46060; + int32_t skip_threads_129424; // read input for in-block scan { - if (squot32(local_tid_46045, 32) == 0 && - ltid_in_bounds_46057) { - x_46055 = ((volatile __local - float *) red_arr_mem_46049)[sext_i32_i64(local_tid_46045)]; - if ((local_tid_46045 - squot32(local_tid_46045, - 32) * 32) == 0) { - x_46054 = x_46055; + if (squot32(local_tid_129409, 32) == 0 && + ltid_in_bounds_129421) { + x_129419 = ((volatile __local + double *) red_arr_mem_129413)[sext_i32_i64(local_tid_129409)]; + if ((local_tid_129409 - squot32(local_tid_129409, + 32) * 32) == 0) { + x_129418 = x_129419; } } } // in-block scan (hopefully no barriers needed) { - skip_threads_46060 = 1; - while (slt32(skip_threads_46060, 32)) { - if (sle32(skip_threads_46060, local_tid_46045 - - squot32(local_tid_46045, 32) * 32) && - (squot32(local_tid_46045, 32) == 0 && - ltid_in_bounds_46057)) { + skip_threads_129424 = 1; + while (slt32(skip_threads_129424, 32)) { + if (sle32(skip_threads_129424, local_tid_129409 - + squot32(local_tid_129409, 32) * 32) && + (squot32(local_tid_129409, 32) == 0 && + ltid_in_bounds_129421)) { // read operands { - x_46054 = ((volatile __local - float *) red_arr_mem_46049)[sext_i32_i64(local_tid_46045) - - sext_i32_i64(skip_threads_46060)]; + x_129418 = ((volatile __local + double *) red_arr_mem_129413)[sext_i32_i64(local_tid_129409) - + sext_i32_i64(skip_threads_129424)]; } // perform operation { - bool inactive_46061 = - slt64(srem64(sext_i32_i64(local_tid_46045 * - 32 + 32 - 1), - i32_res_28493), - sext_i32_i64(local_tid_46045 * + bool inactive_129425 = + slt64(srem64(sext_i32_i64(local_tid_129409 * + 32 + 32 - 1), n_75139), + sext_i32_i64(local_tid_129409 * 32 + 32 - 1) - - sext_i32_i64((local_tid_46045 - - skip_threads_46060) * + sext_i32_i64((local_tid_129409 - + skip_threads_129424) * 32 + 32 - 1)); - if (inactive_46061) { - x_46054 = x_46055; + if (inactive_129425) { + x_129418 = x_129419; } - if (!inactive_46061) { - float defunc_1_op_res_46056 = x_46054 + - x_46055; + if (!inactive_129425) { + double defunc_1_op_res_129420 = + x_129418 + x_129419; - x_46054 = defunc_1_op_res_46056; + x_129418 = defunc_1_op_res_129420; } } } - if (sle32(wave_sizze_46047, skip_threads_46060)) { + if (sle32(wave_sizze_129411, skip_threads_129424)) { barrier(CLK_LOCAL_MEM_FENCE); } - if (sle32(skip_threads_46060, local_tid_46045 - - squot32(local_tid_46045, 32) * 32) && - (squot32(local_tid_46045, 32) == 0 && - ltid_in_bounds_46057)) { + if (sle32(skip_threads_129424, local_tid_129409 - + squot32(local_tid_129409, 32) * 32) && + (squot32(local_tid_129409, 32) == 0 && + ltid_in_bounds_129421)) { // write result { ((volatile __local - float *) red_arr_mem_46049)[sext_i32_i64(local_tid_46045)] = - x_46054; - x_46055 = x_46054; + double *) red_arr_mem_129413)[sext_i32_i64(local_tid_129409)] = + x_129418; + x_129419 = x_129418; } } - if (sle32(wave_sizze_46047, skip_threads_46060)) { + if (sle32(wave_sizze_129411, skip_threads_129424)) { barrier(CLK_LOCAL_MEM_FENCE); } - skip_threads_46060 *= 2; + skip_threads_129424 *= 2; } } } barrier(CLK_LOCAL_MEM_FENCE); // carry-in for every block except the first { - if (!(squot32(local_tid_46045, 32) == 0 || - !ltid_in_bounds_46057)) { + if (!(squot32(local_tid_129409, 32) == 0 || + !ltid_in_bounds_129421)) { // read operands { - x_36338 = x_36337; - x_36337 = ((__local - float *) red_arr_mem_46049)[sext_i32_i64(squot32(local_tid_46045, - 32)) - - (int64_t) 1]; + x_114367 = x_114366; + x_114366 = ((__local + double *) red_arr_mem_129413)[sext_i32_i64(squot32(local_tid_129409, + 32)) - + (int64_t) 1]; } // perform operation { - bool inactive_46062 = - slt64(srem64(sext_i32_i64(local_tid_46045), - i32_res_28493), - sext_i32_i64(local_tid_46045) - - sext_i32_i64(squot32(local_tid_46045, + bool inactive_129426 = + slt64(srem64(sext_i32_i64(local_tid_129409), + n_75139), + sext_i32_i64(local_tid_129409) - + sext_i32_i64(squot32(local_tid_129409, 32) * 32 - 1)); - if (inactive_46062) { - x_36337 = x_36338; + if (inactive_129426) { + x_114366 = x_114367; } - if (!inactive_46062) { - float defunc_1_op_res_36339 = x_36337 + x_36338; + if (!inactive_129426) { + double defunc_1_op_res_114368 = x_114366 + + x_114367; - x_36337 = defunc_1_op_res_36339; + x_114366 = defunc_1_op_res_114368; } } // write final result { ((__local - float *) red_arr_mem_46049)[sext_i32_i64(local_tid_46045)] = - x_36337; + double *) red_arr_mem_129413)[sext_i32_i64(local_tid_129409)] = + x_114366; } } } barrier(CLK_LOCAL_MEM_FENCE); // restore correct values for first block { - if (squot32(local_tid_46045, 32) == 0) { + if (squot32(local_tid_129409, 32) == 0) { ((__local - float *) red_arr_mem_46049)[sext_i32_i64(local_tid_46045)] = - x_36338; + double *) red_arr_mem_129413)[sext_i32_i64(local_tid_129409)] = + x_114367; } } barrier(CLK_LOCAL_MEM_FENCE); @@ -47688,33 +50965,23 @@ def sync(self): barrier(CLK_LOCAL_MEM_FENCE); // save final values of segments { - if (slt64(sext_i32_i64(virt_group_id_46053) * - squot64(segred_group_sizze_36333, - segment_sizze_nonzzero_46042) + - sext_i32_i64(local_tid_46045), m_28478 * i32_res_28493) && - slt64(sext_i32_i64(local_tid_46045), - squot64(segred_group_sizze_36333, - segment_sizze_nonzzero_46042))) { + if (slt64(sext_i32_i64(virt_group_id_129417) * + squot64(segred_group_sizze_114362, + segment_sizze_nonzzero_129406) + + sext_i32_i64(local_tid_129409), m_75136) && + slt64(sext_i32_i64(local_tid_129409), + squot64(segred_group_sizze_114362, + segment_sizze_nonzzero_129406))) { ((__global - float *) mem_44910)[squot64(sext_i32_i64(virt_group_id_46053) * - squot64(segred_group_sizze_36333, - segment_sizze_nonzzero_46042) + - sext_i32_i64(local_tid_46045), - i32_res_28493) * i32_res_28493 + - (sext_i32_i64(virt_group_id_46053) * - squot64(segred_group_sizze_36333, - segment_sizze_nonzzero_46042) + - sext_i32_i64(local_tid_46045) - - squot64(sext_i32_i64(virt_group_id_46053) * - squot64(segred_group_sizze_36333, - segment_sizze_nonzzero_46042) + - sext_i32_i64(local_tid_46045), - i32_res_28493) * - i32_res_28493)] = ((__local - float *) red_arr_mem_46049)[(sext_i32_i64(local_tid_46045) + - (int64_t) 1) * - segment_sizze_nonzzero_46042 - - (int64_t) 1]; + double *) mem_124949)[sext_i32_i64(virt_group_id_129417) * + squot64(segred_group_sizze_114362, + segment_sizze_nonzzero_129406) + + sext_i32_i64(local_tid_129409)] = + ((__local + double *) red_arr_mem_129413)[(sext_i32_i64(local_tid_129409) + + (int64_t) 1) * + segment_sizze_nonzzero_129406 - + (int64_t) 1]; } } barrier(CLK_LOCAL_MEM_FENCE); @@ -47723,333 +50990,313 @@ def sync(self): error_1: return; - #undef segred_group_sizze_36333 + #undef segred_group_sizze_114362 } -__kernel void mainMagnitudezisegred_small_36415(__global int *global_failure, - __local volatile - int64_t *red_arr_mem_46181_backing_aligned_0, - int64_t N_28477, - int64_t m_28478, - int64_t i32_res_28493, - int64_t num_groups_36462, - int64_t segment_sizze_nonzzero_46174, - __global - unsigned char *mem_44397, - __global - unsigned char *defunc_4_map_res_mem_44916, - __global - unsigned char *mem_45134) +__kernel void mainzisegred_small_114337(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_129353_backing_aligned_0, + int64_t N_75135, int64_t m_75136, + int64_t n_75139, + int64_t num_groups_114349, + int64_t segment_sizze_nonzzero_129346, + __global unsigned char *mem_124142, + __global unsigned char *mem_124946) { - #define segred_group_sizze_36461 (mainMagnitudezisegred_group_sizze_36409) + #define segred_group_sizze_114348 (mainzisegred_group_sizze_114331) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_46181_backing_0 = + __local volatile char *restrict red_arr_mem_129353_backing_0 = (__local volatile - char *) red_arr_mem_46181_backing_aligned_0; + char *) red_arr_mem_129353_backing_aligned_0; if (*global_failure >= 0) return; - int32_t global_tid_46176; - int32_t local_tid_46177; - int64_t group_sizze_46180; - int32_t wave_sizze_46179; - int32_t group_tid_46178; + int32_t global_tid_129348; + int32_t local_tid_129349; + int64_t group_sizze_129352; + int32_t wave_sizze_129351; + int32_t group_tid_129350; - global_tid_46176 = get_global_id(0); - local_tid_46177 = get_local_id(0); - group_sizze_46180 = get_local_size(0); - wave_sizze_46179 = LOCKSTEP_WIDTH; - group_tid_46178 = get_group_id(0); + global_tid_129348 = get_global_id(0); + local_tid_129349 = get_local_id(0); + group_sizze_129352 = get_local_size(0); + wave_sizze_129351 = LOCKSTEP_WIDTH; + group_tid_129350 = get_group_id(0); - int32_t phys_tid_36415; + int32_t phys_tid_114337; - phys_tid_36415 = global_tid_46176; + phys_tid_114337 = global_tid_129348; - __local char *red_arr_mem_46181; + __local char *red_arr_mem_129353; - red_arr_mem_46181 = (__local char *) red_arr_mem_46181_backing_0; + red_arr_mem_129353 = (__local char *) red_arr_mem_129353_backing_0; - int32_t phys_group_id_46183; + int32_t phys_group_id_129355; - phys_group_id_46183 = get_group_id(0); - for (int32_t i_46184 = 0; i_46184 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_28478 * N_28477, - squot64(segred_group_sizze_36461, - segment_sizze_nonzzero_46174))) - - phys_group_id_46183, sext_i64_i32(num_groups_36462)); - i_46184++) { - int32_t virt_group_id_46185 = phys_group_id_46183 + i_46184 * - sext_i64_i32(num_groups_36462); - int64_t gtid_36404 = squot64(squot64(sext_i32_i64(local_tid_46177), - segment_sizze_nonzzero_46174) + - sext_i32_i64(virt_group_id_46185) * - squot64(segred_group_sizze_36461, - segment_sizze_nonzzero_46174), - N_28477); - int64_t gtid_36405 = squot64(sext_i32_i64(local_tid_46177), - segment_sizze_nonzzero_46174) + - sext_i32_i64(virt_group_id_46185) * - squot64(segred_group_sizze_36461, - segment_sizze_nonzzero_46174) - - squot64(squot64(sext_i32_i64(local_tid_46177), - segment_sizze_nonzzero_46174) + - sext_i32_i64(virt_group_id_46185) * - squot64(segred_group_sizze_36461, - segment_sizze_nonzzero_46174), N_28477) * - N_28477; - int64_t gtid_36414 = srem64(sext_i32_i64(local_tid_46177), - i32_res_28493); + phys_group_id_129355 = get_group_id(0); + for (int32_t i_129356 = 0; i_129356 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, + squot64(segred_group_sizze_114348, + segment_sizze_nonzzero_129346))) - + phys_group_id_129355, sext_i64_i32(num_groups_114349)); + i_129356++) { + int32_t virt_group_id_129357 = phys_group_id_129355 + i_129356 * + sext_i64_i32(num_groups_114349); + int64_t gtid_114328 = squot64(sext_i32_i64(local_tid_129349), + segment_sizze_nonzzero_129346) + + sext_i32_i64(virt_group_id_129357) * + squot64(segred_group_sizze_114348, + segment_sizze_nonzzero_129346); + int64_t gtid_114336 = srem64(sext_i32_i64(local_tid_129349), n_75139); // apply map function if in bounds { - if (slt64((int64_t) 0, i32_res_28493) && ((slt64(gtid_36404, - m_28478) && - slt64(gtid_36405, - N_28477)) && - slt64(sext_i32_i64(local_tid_46177), - i32_res_28493 * - squot64(segred_group_sizze_36461, - segment_sizze_nonzzero_46174)))) { - float x_36470 = ((__global - float *) defunc_4_map_res_mem_44916)[gtid_36404 * - i32_res_28493 + - gtid_36414]; - float x_36471 = ((__global float *) mem_44397)[gtid_36405 * - i32_res_28493 + - gtid_36414]; - float defunc_1_f_res_36472 = x_36470 * x_36471; + if (slt64((int64_t) 0, n_75139) && (slt64(gtid_114328, m_75136) && + slt64(sext_i32_i64(local_tid_129349), + n_75139 * + squot64(segred_group_sizze_114348, + segment_sizze_nonzzero_129346)))) { + double x_114356 = ((__global double *) mem_124142)[gtid_114328 * + N_75135 + + gtid_114336]; + bool isnan_res_114357; + + isnan_res_114357 = futrts_isnan64(x_114356); + + bool cond_114358 = !isnan_res_114357; + int64_t defunc_0_f_res_114359 = btoi_bool_i64(cond_114358); // save map-out results { } // save results to be reduced { ((__local - float *) red_arr_mem_46181)[sext_i32_i64(local_tid_46177)] = - defunc_1_f_res_36472; + int64_t *) red_arr_mem_129353)[sext_i32_i64(local_tid_129349)] = + defunc_0_f_res_114359; } } else { ((__local - float *) red_arr_mem_46181)[sext_i32_i64(local_tid_46177)] = - 0.0F; + int64_t *) red_arr_mem_129353)[sext_i32_i64(local_tid_129349)] = + (int64_t) 0; } } barrier(CLK_LOCAL_MEM_FENCE); - if (slt64((int64_t) 0, i32_res_28493)) { + if (slt64((int64_t) 0, n_75139)) { // perform segmented scan to imitate reduction { - float x_36465; - float x_36466; - float x_46186; - float x_46187; - bool ltid_in_bounds_46189; + int64_t x_114352; + int64_t x_114353; + int64_t x_129358; + int64_t x_129359; + bool ltid_in_bounds_129361; - ltid_in_bounds_46189 = slt64(sext_i32_i64(local_tid_46177), - i32_res_28493 * - squot64(segred_group_sizze_36461, - segment_sizze_nonzzero_46174)); + ltid_in_bounds_129361 = slt64(sext_i32_i64(local_tid_129349), + n_75139 * + squot64(segred_group_sizze_114348, + segment_sizze_nonzzero_129346)); - int32_t skip_threads_46190; + int32_t skip_threads_129362; // read input for in-block scan { - if (ltid_in_bounds_46189) { - x_36466 = ((volatile __local - float *) red_arr_mem_46181)[sext_i32_i64(local_tid_46177)]; - if ((local_tid_46177 - squot32(local_tid_46177, 32) * + if (ltid_in_bounds_129361) { + x_114353 = ((volatile __local + int64_t *) red_arr_mem_129353)[sext_i32_i64(local_tid_129349)]; + if ((local_tid_129349 - squot32(local_tid_129349, 32) * 32) == 0) { - x_36465 = x_36466; + x_114352 = x_114353; } } } // in-block scan (hopefully no barriers needed) { - skip_threads_46190 = 1; - while (slt32(skip_threads_46190, 32)) { - if (sle32(skip_threads_46190, local_tid_46177 - - squot32(local_tid_46177, 32) * 32) && - ltid_in_bounds_46189) { + skip_threads_129362 = 1; + while (slt32(skip_threads_129362, 32)) { + if (sle32(skip_threads_129362, local_tid_129349 - + squot32(local_tid_129349, 32) * 32) && + ltid_in_bounds_129361) { // read operands { - x_36465 = ((volatile __local - float *) red_arr_mem_46181)[sext_i32_i64(local_tid_46177) - - sext_i32_i64(skip_threads_46190)]; + x_114352 = ((volatile __local + int64_t *) red_arr_mem_129353)[sext_i32_i64(local_tid_129349) - + sext_i32_i64(skip_threads_129362)]; } // perform operation { - bool inactive_46191 = - slt64(srem64(sext_i32_i64(local_tid_46177), - i32_res_28493), - sext_i32_i64(local_tid_46177) - - sext_i32_i64(local_tid_46177 - - skip_threads_46190)); + bool inactive_129363 = + slt64(srem64(sext_i32_i64(local_tid_129349), + n_75139), + sext_i32_i64(local_tid_129349) - + sext_i32_i64(local_tid_129349 - + skip_threads_129362)); - if (inactive_46191) { - x_36465 = x_36466; + if (inactive_129363) { + x_114352 = x_114353; } - if (!inactive_46191) { - float defunc_1_op_res_36467 = x_36465 + - x_36466; + if (!inactive_129363) { + int64_t defunc_1_op_res_114354 = + add64(x_114352, x_114353); - x_36465 = defunc_1_op_res_36467; + x_114352 = defunc_1_op_res_114354; } } } - if (sle32(wave_sizze_46179, skip_threads_46190)) { + if (sle32(wave_sizze_129351, skip_threads_129362)) { barrier(CLK_LOCAL_MEM_FENCE); } - if (sle32(skip_threads_46190, local_tid_46177 - - squot32(local_tid_46177, 32) * 32) && - ltid_in_bounds_46189) { + if (sle32(skip_threads_129362, local_tid_129349 - + squot32(local_tid_129349, 32) * 32) && + ltid_in_bounds_129361) { // write result { ((volatile __local - float *) red_arr_mem_46181)[sext_i32_i64(local_tid_46177)] = - x_36465; - x_36466 = x_36465; + int64_t *) red_arr_mem_129353)[sext_i32_i64(local_tid_129349)] = + x_114352; + x_114353 = x_114352; } } - if (sle32(wave_sizze_46179, skip_threads_46190)) { + if (sle32(wave_sizze_129351, skip_threads_129362)) { barrier(CLK_LOCAL_MEM_FENCE); } - skip_threads_46190 *= 2; + skip_threads_129362 *= 2; } } barrier(CLK_LOCAL_MEM_FENCE); // last thread of block 'i' writes its result to offset 'i' { - if ((local_tid_46177 - squot32(local_tid_46177, 32) * 32) == - 31 && ltid_in_bounds_46189) { + if ((local_tid_129349 - squot32(local_tid_129349, 32) * + 32) == 31 && ltid_in_bounds_129361) { ((volatile __local - float *) red_arr_mem_46181)[sext_i32_i64(squot32(local_tid_46177, - 32))] = - x_36465; + int64_t *) red_arr_mem_129353)[sext_i32_i64(squot32(local_tid_129349, + 32))] = + x_114352; } } barrier(CLK_LOCAL_MEM_FENCE); // scan the first block, after which offset 'i' contains carry-in for block 'i+1' { - int32_t skip_threads_46192; + int32_t skip_threads_129364; // read input for in-block scan { - if (squot32(local_tid_46177, 32) == 0 && - ltid_in_bounds_46189) { - x_46187 = ((volatile __local - float *) red_arr_mem_46181)[sext_i32_i64(local_tid_46177)]; - if ((local_tid_46177 - squot32(local_tid_46177, - 32) * 32) == 0) { - x_46186 = x_46187; + if (squot32(local_tid_129349, 32) == 0 && + ltid_in_bounds_129361) { + x_129359 = ((volatile __local + int64_t *) red_arr_mem_129353)[sext_i32_i64(local_tid_129349)]; + if ((local_tid_129349 - squot32(local_tid_129349, + 32) * 32) == 0) { + x_129358 = x_129359; } } } // in-block scan (hopefully no barriers needed) { - skip_threads_46192 = 1; - while (slt32(skip_threads_46192, 32)) { - if (sle32(skip_threads_46192, local_tid_46177 - - squot32(local_tid_46177, 32) * 32) && - (squot32(local_tid_46177, 32) == 0 && - ltid_in_bounds_46189)) { + skip_threads_129364 = 1; + while (slt32(skip_threads_129364, 32)) { + if (sle32(skip_threads_129364, local_tid_129349 - + squot32(local_tid_129349, 32) * 32) && + (squot32(local_tid_129349, 32) == 0 && + ltid_in_bounds_129361)) { // read operands { - x_46186 = ((volatile __local - float *) red_arr_mem_46181)[sext_i32_i64(local_tid_46177) - - sext_i32_i64(skip_threads_46192)]; + x_129358 = ((volatile __local + int64_t *) red_arr_mem_129353)[sext_i32_i64(local_tid_129349) - + sext_i32_i64(skip_threads_129364)]; } // perform operation { - bool inactive_46193 = - slt64(srem64(sext_i32_i64(local_tid_46177 * - 32 + 32 - 1), - i32_res_28493), - sext_i32_i64(local_tid_46177 * + bool inactive_129365 = + slt64(srem64(sext_i32_i64(local_tid_129349 * + 32 + 32 - 1), n_75139), + sext_i32_i64(local_tid_129349 * 32 + 32 - 1) - - sext_i32_i64((local_tid_46177 - - skip_threads_46192) * + sext_i32_i64((local_tid_129349 - + skip_threads_129364) * 32 + 32 - 1)); - if (inactive_46193) { - x_46186 = x_46187; + if (inactive_129365) { + x_129358 = x_129359; } - if (!inactive_46193) { - float defunc_1_op_res_46188 = x_46186 + - x_46187; + if (!inactive_129365) { + int64_t defunc_1_op_res_129360 = + add64(x_129358, x_129359); - x_46186 = defunc_1_op_res_46188; + x_129358 = defunc_1_op_res_129360; } } } - if (sle32(wave_sizze_46179, skip_threads_46192)) { + if (sle32(wave_sizze_129351, skip_threads_129364)) { barrier(CLK_LOCAL_MEM_FENCE); } - if (sle32(skip_threads_46192, local_tid_46177 - - squot32(local_tid_46177, 32) * 32) && - (squot32(local_tid_46177, 32) == 0 && - ltid_in_bounds_46189)) { + if (sle32(skip_threads_129364, local_tid_129349 - + squot32(local_tid_129349, 32) * 32) && + (squot32(local_tid_129349, 32) == 0 && + ltid_in_bounds_129361)) { // write result { ((volatile __local - float *) red_arr_mem_46181)[sext_i32_i64(local_tid_46177)] = - x_46186; - x_46187 = x_46186; + int64_t *) red_arr_mem_129353)[sext_i32_i64(local_tid_129349)] = + x_129358; + x_129359 = x_129358; } } - if (sle32(wave_sizze_46179, skip_threads_46192)) { + if (sle32(wave_sizze_129351, skip_threads_129364)) { barrier(CLK_LOCAL_MEM_FENCE); } - skip_threads_46192 *= 2; + skip_threads_129364 *= 2; } } } barrier(CLK_LOCAL_MEM_FENCE); // carry-in for every block except the first { - if (!(squot32(local_tid_46177, 32) == 0 || - !ltid_in_bounds_46189)) { + if (!(squot32(local_tid_129349, 32) == 0 || + !ltid_in_bounds_129361)) { // read operands { - x_36466 = x_36465; - x_36465 = ((__local - float *) red_arr_mem_46181)[sext_i32_i64(squot32(local_tid_46177, - 32)) - - (int64_t) 1]; + x_114353 = x_114352; + x_114352 = ((__local + int64_t *) red_arr_mem_129353)[sext_i32_i64(squot32(local_tid_129349, + 32)) - + (int64_t) 1]; } // perform operation { - bool inactive_46194 = - slt64(srem64(sext_i32_i64(local_tid_46177), - i32_res_28493), - sext_i32_i64(local_tid_46177) - - sext_i32_i64(squot32(local_tid_46177, + bool inactive_129366 = + slt64(srem64(sext_i32_i64(local_tid_129349), + n_75139), + sext_i32_i64(local_tid_129349) - + sext_i32_i64(squot32(local_tid_129349, 32) * 32 - 1)); - if (inactive_46194) { - x_36465 = x_36466; + if (inactive_129366) { + x_114352 = x_114353; } - if (!inactive_46194) { - float defunc_1_op_res_36467 = x_36465 + x_36466; + if (!inactive_129366) { + int64_t defunc_1_op_res_114354 = add64(x_114352, + x_114353); - x_36465 = defunc_1_op_res_36467; + x_114352 = defunc_1_op_res_114354; } } // write final result { ((__local - float *) red_arr_mem_46181)[sext_i32_i64(local_tid_46177)] = - x_36465; + int64_t *) red_arr_mem_129353)[sext_i32_i64(local_tid_129349)] = + x_114352; } } } barrier(CLK_LOCAL_MEM_FENCE); // restore correct values for first block { - if (squot32(local_tid_46177, 32) == 0) { + if (squot32(local_tid_129349, 32) == 0) { ((__local - float *) red_arr_mem_46181)[sext_i32_i64(local_tid_46177)] = - x_36466; + int64_t *) red_arr_mem_129353)[sext_i32_i64(local_tid_129349)] = + x_114353; } } barrier(CLK_LOCAL_MEM_FENCE); @@ -48058,32 +51305,23 @@ def sync(self): barrier(CLK_LOCAL_MEM_FENCE); // save final values of segments { - if (slt64(sext_i32_i64(virt_group_id_46185) * - squot64(segred_group_sizze_36461, - segment_sizze_nonzzero_46174) + - sext_i32_i64(local_tid_46177), m_28478 * N_28477) && - slt64(sext_i32_i64(local_tid_46177), - squot64(segred_group_sizze_36461, - segment_sizze_nonzzero_46174))) { + if (slt64(sext_i32_i64(virt_group_id_129357) * + squot64(segred_group_sizze_114348, + segment_sizze_nonzzero_129346) + + sext_i32_i64(local_tid_129349), m_75136) && + slt64(sext_i32_i64(local_tid_129349), + squot64(segred_group_sizze_114348, + segment_sizze_nonzzero_129346))) { ((__global - float *) mem_45134)[squot64(sext_i32_i64(virt_group_id_46185) * - squot64(segred_group_sizze_36461, - segment_sizze_nonzzero_46174) + - sext_i32_i64(local_tid_46177), - N_28477) * N_28477 + - (sext_i32_i64(virt_group_id_46185) * - squot64(segred_group_sizze_36461, - segment_sizze_nonzzero_46174) + - sext_i32_i64(local_tid_46177) - - squot64(sext_i32_i64(virt_group_id_46185) * - squot64(segred_group_sizze_36461, - segment_sizze_nonzzero_46174) + - sext_i32_i64(local_tid_46177), - N_28477) * N_28477)] = ((__local - float *) red_arr_mem_46181)[(sext_i32_i64(local_tid_46177) + - (int64_t) 1) * - segment_sizze_nonzzero_46174 - - (int64_t) 1]; + int64_t *) mem_124946)[sext_i32_i64(virt_group_id_129357) * + squot64(segred_group_sizze_114348, + segment_sizze_nonzzero_129346) + + sext_i32_i64(local_tid_129349)] = + ((__local + int64_t *) red_arr_mem_129353)[(sext_i32_i64(local_tid_129349) + + (int64_t) 1) * + segment_sizze_nonzzero_129346 - + (int64_t) 1]; } } barrier(CLK_LOCAL_MEM_FENCE); @@ -48092,34 +51330,33 @@ def sync(self): error_1: return; - #undef segred_group_sizze_36461 + #undef segred_group_sizze_114348 } -__kernel void mainMagnitudezisegred_small_36993(__global int *global_failure, - int failure_is_an_option, - __global - int64_t *global_failure_args, - __local volatile - int64_t *red_arr_mem_46433_backing_aligned_0, - int64_t N_28477, - int64_t m_28478, - int64_t i32_res_28487, - int64_t num_groups_37044, - int64_t segment_sizze_nonzzero_46426, - __global - unsigned char *defunc_4_map_res_mem_45178, - __global - unsigned char *mem_45232, - __global - unsigned char *mem_45235) +__kernel void mainzisegred_small_114467(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + __local volatile + int64_t *red_arr_mem_129518_backing_aligned_0, + int64_t N_75135, int64_t m_75136, + int64_t defunc_2_reduce_comm_res_76995, + int64_t num_groups_114488, + int64_t segment_sizze_nonzzero_129511, + __global + unsigned char *defunc_4_map_res_mem_124920, + __global + unsigned char *defunc_3_map_res_mem_124958, + __global + unsigned char *defunc_3_map_res_mem_124959, + __global unsigned char *mem_124969) { - #define segred_group_sizze_37043 (mainMagnitudezisegred_group_sizze_36987) + #define segred_group_sizze_114487 (mainzisegred_group_sizze_114461) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_46433_backing_0 = + __local volatile char *restrict red_arr_mem_129518_backing_0 = (__local volatile - char *) red_arr_mem_46433_backing_aligned_0; + char *) red_arr_mem_129518_backing_aligned_0; volatile __local bool local_failure; if (failure_is_an_option) { @@ -48131,72 +51368,75 @@ def sync(self): local_failure = false; barrier(CLK_LOCAL_MEM_FENCE); - int32_t global_tid_46428; - int32_t local_tid_46429; - int64_t group_sizze_46432; - int32_t wave_sizze_46431; - int32_t group_tid_46430; - - global_tid_46428 = get_global_id(0); - local_tid_46429 = get_local_id(0); - group_sizze_46432 = get_local_size(0); - wave_sizze_46431 = LOCKSTEP_WIDTH; - group_tid_46430 = get_group_id(0); - - int32_t phys_tid_36993; - - phys_tid_36993 = global_tid_46428; - - __local char *red_arr_mem_46433; - - red_arr_mem_46433 = (__local char *) red_arr_mem_46433_backing_0; - - int32_t phys_group_id_46435; - - phys_group_id_46435 = get_group_id(0); - for (int32_t i_46436 = 0; i_46436 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_28478, - squot64(segred_group_sizze_37043, - segment_sizze_nonzzero_46426))) - - phys_group_id_46435, sext_i64_i32(num_groups_37044)); - i_46436++) { - int32_t virt_group_id_46437 = phys_group_id_46435 + i_46436 * - sext_i64_i32(num_groups_37044); - int64_t gtid_36984 = squot64(sext_i32_i64(local_tid_46429), - segment_sizze_nonzzero_46426) + - sext_i32_i64(virt_group_id_46437) * - squot64(segred_group_sizze_37043, segment_sizze_nonzzero_46426); - int64_t gtid_36992 = srem64(sext_i32_i64(local_tid_46429), - i32_res_28487); + int32_t global_tid_129513; + int32_t local_tid_129514; + int64_t group_sizze_129517; + int32_t wave_sizze_129516; + int32_t group_tid_129515; + + global_tid_129513 = get_global_id(0); + local_tid_129514 = get_local_id(0); + group_sizze_129517 = get_local_size(0); + wave_sizze_129516 = LOCKSTEP_WIDTH; + group_tid_129515 = get_group_id(0); + + int32_t phys_tid_114467; + + phys_tid_114467 = global_tid_129513; + + __local char *red_arr_mem_129518; + + red_arr_mem_129518 = (__local char *) red_arr_mem_129518_backing_0; + + int32_t phys_group_id_129520; + + phys_group_id_129520 = get_group_id(0); + for (int32_t i_129521 = 0; i_129521 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, + squot64(segred_group_sizze_114487, + segment_sizze_nonzzero_129511))) - + phys_group_id_129520, sext_i64_i32(num_groups_114488)); + i_129521++) { + int32_t virt_group_id_129522 = phys_group_id_129520 + i_129521 * + sext_i64_i32(num_groups_114488); + int64_t gtid_114458 = squot64(sext_i32_i64(local_tid_129514), + segment_sizze_nonzzero_129511) + + sext_i32_i64(virt_group_id_129522) * + squot64(segred_group_sizze_114487, + segment_sizze_nonzzero_129511); + int64_t gtid_114466 = srem64(sext_i32_i64(local_tid_129514), + defunc_2_reduce_comm_res_76995); // apply map function if in bounds { - if (slt64((int64_t) 0, i32_res_28487) && (slt64(gtid_36984, - m_28478) && - slt64(sext_i32_i64(local_tid_46429), - i32_res_28487 * - squot64(segred_group_sizze_37043, - segment_sizze_nonzzero_46426)))) { - int32_t defunc_0_f_res_37051 = ((__global - int32_t *) mem_45232)[gtid_36984]; - int32_t index_primexp_42385 = sext_i64_i32(gtid_36992); - bool cond_37053 = slt32(index_primexp_42385, - defunc_0_f_res_37051); - float defunc_0_f_res_37054; - - if (cond_37053) { - int64_t i_37055 = sext_i32_i64(index_primexp_42385); - bool x_37056 = sle64((int64_t) 0, i_37055); - bool y_37057 = slt64(i_37055, N_28477); - bool bounds_check_37058 = x_37056 && y_37057; - bool index_certs_37059; - - if (!bounds_check_37058) { + if (slt64((int64_t) 0, defunc_2_reduce_comm_res_76995) && + (slt64(gtid_114458, m_75136) && + slt64(sext_i32_i64(local_tid_129514), + defunc_2_reduce_comm_res_76995 * + squot64(segred_group_sizze_114487, + segment_sizze_nonzzero_129511)))) { + int64_t x_114496 = ((__global + int64_t *) defunc_3_map_res_mem_124958)[gtid_114458]; + bool cond_114498 = slt64(gtid_114466, x_114496); + double defunc_0_f_res_114499; + + if (cond_114498) { + int64_t x_114495 = ((__global + int64_t *) defunc_3_map_res_mem_124959)[gtid_114458]; + int64_t x_114500 = add64(gtid_114466, x_114495); + int64_t x_114501 = sub64(x_114500, x_114496); + int64_t i_114502 = add64((int64_t) 1, x_114501); + bool x_114503 = sle64((int64_t) 0, i_114502); + bool y_114504 = slt64(i_114502, N_75135); + bool bounds_check_114505 = x_114503 && y_114504; + bool index_certs_114506; + + if (!bounds_check_114505) { { if (atomic_cmpxchg_i32_global(global_failure, -1, - 97) == -1) { - global_failure_args[0] = i_37055; - global_failure_args[1] = N_28477; + 200) == -1) { + global_failure_args[0] = i_114502; + global_failure_args[1] = N_75135; ; } local_failure = true; @@ -48204,31 +51444,27 @@ def sync(self): } } - float defunc_0_f_res_t_res_37060 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_36984 * - N_28477 + - i_37055]; + double defunc_0_f_res_t_res_114507 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_114458 * + N_75135 + + i_114502]; - defunc_0_f_res_37054 = defunc_0_f_res_t_res_37060; + defunc_0_f_res_114499 = defunc_0_f_res_t_res_114507; } else { - defunc_0_f_res_37054 = 0.0F; + defunc_0_f_res_114499 = 0.0; } - - float defunc_0_f_res_37061 = defunc_0_f_res_37054 * - defunc_0_f_res_37054; - // save map-out results { } // save results to be reduced { ((__local - float *) red_arr_mem_46433)[sext_i32_i64(local_tid_46429)] = - defunc_0_f_res_37061; + double *) red_arr_mem_129518)[sext_i32_i64(local_tid_129514)] = + defunc_0_f_res_114499; } } else { ((__local - float *) red_arr_mem_46433)[sext_i32_i64(local_tid_46429)] = - 0.0F; + double *) red_arr_mem_129518)[sext_i32_i64(local_tid_129514)] = + 0.0; } } @@ -48237,219 +51473,220 @@ def sync(self): if (local_failure) return; barrier(CLK_LOCAL_MEM_FENCE); - if (slt64((int64_t) 0, i32_res_28487)) { + if (slt64((int64_t) 0, defunc_2_reduce_comm_res_76995)) { // perform segmented scan to imitate reduction { - float x_37047; - float x_37048; - float x_46438; - float x_46439; - bool ltid_in_bounds_46441; + double x_114491; + double x_114492; + double x_129523; + double x_129524; + bool ltid_in_bounds_129526; - ltid_in_bounds_46441 = slt64(sext_i32_i64(local_tid_46429), - i32_res_28487 * - squot64(segred_group_sizze_37043, - segment_sizze_nonzzero_46426)); + ltid_in_bounds_129526 = slt64(sext_i32_i64(local_tid_129514), + defunc_2_reduce_comm_res_76995 * + squot64(segred_group_sizze_114487, + segment_sizze_nonzzero_129511)); - int32_t skip_threads_46442; + int32_t skip_threads_129527; // read input for in-block scan { - if (ltid_in_bounds_46441) { - x_37048 = ((volatile __local - float *) red_arr_mem_46433)[sext_i32_i64(local_tid_46429)]; - if ((local_tid_46429 - squot32(local_tid_46429, 32) * + if (ltid_in_bounds_129526) { + x_114492 = ((volatile __local + double *) red_arr_mem_129518)[sext_i32_i64(local_tid_129514)]; + if ((local_tid_129514 - squot32(local_tid_129514, 32) * 32) == 0) { - x_37047 = x_37048; + x_114491 = x_114492; } } } // in-block scan (hopefully no barriers needed) { - skip_threads_46442 = 1; - while (slt32(skip_threads_46442, 32)) { - if (sle32(skip_threads_46442, local_tid_46429 - - squot32(local_tid_46429, 32) * 32) && - ltid_in_bounds_46441) { + skip_threads_129527 = 1; + while (slt32(skip_threads_129527, 32)) { + if (sle32(skip_threads_129527, local_tid_129514 - + squot32(local_tid_129514, 32) * 32) && + ltid_in_bounds_129526) { // read operands { - x_37047 = ((volatile __local - float *) red_arr_mem_46433)[sext_i32_i64(local_tid_46429) - - sext_i32_i64(skip_threads_46442)]; + x_114491 = ((volatile __local + double *) red_arr_mem_129518)[sext_i32_i64(local_tid_129514) - + sext_i32_i64(skip_threads_129527)]; } // perform operation { - bool inactive_46443 = - slt64(srem64(sext_i32_i64(local_tid_46429), - i32_res_28487), - sext_i32_i64(local_tid_46429) - - sext_i32_i64(local_tid_46429 - - skip_threads_46442)); + bool inactive_129528 = + slt64(srem64(sext_i32_i64(local_tid_129514), + defunc_2_reduce_comm_res_76995), + sext_i32_i64(local_tid_129514) - + sext_i32_i64(local_tid_129514 - + skip_threads_129527)); - if (inactive_46443) { - x_37047 = x_37048; + if (inactive_129528) { + x_114491 = x_114492; } - if (!inactive_46443) { - float defunc_1_op_res_37049 = x_37047 + - x_37048; + if (!inactive_129528) { + double defunc_1_op_res_114493 = x_114491 + + x_114492; - x_37047 = defunc_1_op_res_37049; + x_114491 = defunc_1_op_res_114493; } } } - if (sle32(wave_sizze_46431, skip_threads_46442)) { + if (sle32(wave_sizze_129516, skip_threads_129527)) { barrier(CLK_LOCAL_MEM_FENCE); } - if (sle32(skip_threads_46442, local_tid_46429 - - squot32(local_tid_46429, 32) * 32) && - ltid_in_bounds_46441) { + if (sle32(skip_threads_129527, local_tid_129514 - + squot32(local_tid_129514, 32) * 32) && + ltid_in_bounds_129526) { // write result { ((volatile __local - float *) red_arr_mem_46433)[sext_i32_i64(local_tid_46429)] = - x_37047; - x_37048 = x_37047; + double *) red_arr_mem_129518)[sext_i32_i64(local_tid_129514)] = + x_114491; + x_114492 = x_114491; } } - if (sle32(wave_sizze_46431, skip_threads_46442)) { + if (sle32(wave_sizze_129516, skip_threads_129527)) { barrier(CLK_LOCAL_MEM_FENCE); } - skip_threads_46442 *= 2; + skip_threads_129527 *= 2; } } barrier(CLK_LOCAL_MEM_FENCE); // last thread of block 'i' writes its result to offset 'i' { - if ((local_tid_46429 - squot32(local_tid_46429, 32) * 32) == - 31 && ltid_in_bounds_46441) { + if ((local_tid_129514 - squot32(local_tid_129514, 32) * + 32) == 31 && ltid_in_bounds_129526) { ((volatile __local - float *) red_arr_mem_46433)[sext_i32_i64(squot32(local_tid_46429, - 32))] = - x_37047; + double *) red_arr_mem_129518)[sext_i32_i64(squot32(local_tid_129514, + 32))] = + x_114491; } } barrier(CLK_LOCAL_MEM_FENCE); // scan the first block, after which offset 'i' contains carry-in for block 'i+1' { - int32_t skip_threads_46444; + int32_t skip_threads_129529; // read input for in-block scan { - if (squot32(local_tid_46429, 32) == 0 && - ltid_in_bounds_46441) { - x_46439 = ((volatile __local - float *) red_arr_mem_46433)[sext_i32_i64(local_tid_46429)]; - if ((local_tid_46429 - squot32(local_tid_46429, - 32) * 32) == 0) { - x_46438 = x_46439; + if (squot32(local_tid_129514, 32) == 0 && + ltid_in_bounds_129526) { + x_129524 = ((volatile __local + double *) red_arr_mem_129518)[sext_i32_i64(local_tid_129514)]; + if ((local_tid_129514 - squot32(local_tid_129514, + 32) * 32) == 0) { + x_129523 = x_129524; } } } // in-block scan (hopefully no barriers needed) { - skip_threads_46444 = 1; - while (slt32(skip_threads_46444, 32)) { - if (sle32(skip_threads_46444, local_tid_46429 - - squot32(local_tid_46429, 32) * 32) && - (squot32(local_tid_46429, 32) == 0 && - ltid_in_bounds_46441)) { + skip_threads_129529 = 1; + while (slt32(skip_threads_129529, 32)) { + if (sle32(skip_threads_129529, local_tid_129514 - + squot32(local_tid_129514, 32) * 32) && + (squot32(local_tid_129514, 32) == 0 && + ltid_in_bounds_129526)) { // read operands { - x_46438 = ((volatile __local - float *) red_arr_mem_46433)[sext_i32_i64(local_tid_46429) - - sext_i32_i64(skip_threads_46444)]; + x_129523 = ((volatile __local + double *) red_arr_mem_129518)[sext_i32_i64(local_tid_129514) - + sext_i32_i64(skip_threads_129529)]; } // perform operation { - bool inactive_46445 = - slt64(srem64(sext_i32_i64(local_tid_46429 * + bool inactive_129530 = + slt64(srem64(sext_i32_i64(local_tid_129514 * 32 + 32 - 1), - i32_res_28487), - sext_i32_i64(local_tid_46429 * + defunc_2_reduce_comm_res_76995), + sext_i32_i64(local_tid_129514 * 32 + 32 - 1) - - sext_i32_i64((local_tid_46429 - - skip_threads_46444) * + sext_i32_i64((local_tid_129514 - + skip_threads_129529) * 32 + 32 - 1)); - if (inactive_46445) { - x_46438 = x_46439; + if (inactive_129530) { + x_129523 = x_129524; } - if (!inactive_46445) { - float defunc_1_op_res_46440 = x_46438 + - x_46439; + if (!inactive_129530) { + double defunc_1_op_res_129525 = + x_129523 + x_129524; - x_46438 = defunc_1_op_res_46440; + x_129523 = defunc_1_op_res_129525; } } } - if (sle32(wave_sizze_46431, skip_threads_46444)) { + if (sle32(wave_sizze_129516, skip_threads_129529)) { barrier(CLK_LOCAL_MEM_FENCE); } - if (sle32(skip_threads_46444, local_tid_46429 - - squot32(local_tid_46429, 32) * 32) && - (squot32(local_tid_46429, 32) == 0 && - ltid_in_bounds_46441)) { + if (sle32(skip_threads_129529, local_tid_129514 - + squot32(local_tid_129514, 32) * 32) && + (squot32(local_tid_129514, 32) == 0 && + ltid_in_bounds_129526)) { // write result { ((volatile __local - float *) red_arr_mem_46433)[sext_i32_i64(local_tid_46429)] = - x_46438; - x_46439 = x_46438; + double *) red_arr_mem_129518)[sext_i32_i64(local_tid_129514)] = + x_129523; + x_129524 = x_129523; } } - if (sle32(wave_sizze_46431, skip_threads_46444)) { + if (sle32(wave_sizze_129516, skip_threads_129529)) { barrier(CLK_LOCAL_MEM_FENCE); } - skip_threads_46444 *= 2; + skip_threads_129529 *= 2; } } } barrier(CLK_LOCAL_MEM_FENCE); // carry-in for every block except the first { - if (!(squot32(local_tid_46429, 32) == 0 || - !ltid_in_bounds_46441)) { + if (!(squot32(local_tid_129514, 32) == 0 || + !ltid_in_bounds_129526)) { // read operands { - x_37048 = x_37047; - x_37047 = ((__local - float *) red_arr_mem_46433)[sext_i32_i64(squot32(local_tid_46429, - 32)) - - (int64_t) 1]; + x_114492 = x_114491; + x_114491 = ((__local + double *) red_arr_mem_129518)[sext_i32_i64(squot32(local_tid_129514, + 32)) - + (int64_t) 1]; } // perform operation { - bool inactive_46446 = - slt64(srem64(sext_i32_i64(local_tid_46429), - i32_res_28487), - sext_i32_i64(local_tid_46429) - - sext_i32_i64(squot32(local_tid_46429, + bool inactive_129531 = + slt64(srem64(sext_i32_i64(local_tid_129514), + defunc_2_reduce_comm_res_76995), + sext_i32_i64(local_tid_129514) - + sext_i32_i64(squot32(local_tid_129514, 32) * 32 - 1)); - if (inactive_46446) { - x_37047 = x_37048; + if (inactive_129531) { + x_114491 = x_114492; } - if (!inactive_46446) { - float defunc_1_op_res_37049 = x_37047 + x_37048; + if (!inactive_129531) { + double defunc_1_op_res_114493 = x_114491 + + x_114492; - x_37047 = defunc_1_op_res_37049; + x_114491 = defunc_1_op_res_114493; } } // write final result { ((__local - float *) red_arr_mem_46433)[sext_i32_i64(local_tid_46429)] = - x_37047; + double *) red_arr_mem_129518)[sext_i32_i64(local_tid_129514)] = + x_114491; } } } barrier(CLK_LOCAL_MEM_FENCE); // restore correct values for first block { - if (squot32(local_tid_46429, 32) == 0) { + if (squot32(local_tid_129514, 32) == 0) { ((__local - float *) red_arr_mem_46433)[sext_i32_i64(local_tid_46429)] = - x_37048; + double *) red_arr_mem_129518)[sext_i32_i64(local_tid_129514)] = + x_114492; } } barrier(CLK_LOCAL_MEM_FENCE); @@ -48458,22 +51695,23 @@ def sync(self): barrier(CLK_LOCAL_MEM_FENCE); // save final values of segments { - if (slt64(sext_i32_i64(virt_group_id_46437) * - squot64(segred_group_sizze_37043, - segment_sizze_nonzzero_46426) + - sext_i32_i64(local_tid_46429), m_28478) && - slt64(sext_i32_i64(local_tid_46429), - squot64(segred_group_sizze_37043, - segment_sizze_nonzzero_46426))) { + if (slt64(sext_i32_i64(virt_group_id_129522) * + squot64(segred_group_sizze_114487, + segment_sizze_nonzzero_129511) + + sext_i32_i64(local_tid_129514), m_75136) && + slt64(sext_i32_i64(local_tid_129514), + squot64(segred_group_sizze_114487, + segment_sizze_nonzzero_129511))) { ((__global - float *) mem_45235)[sext_i32_i64(virt_group_id_46437) * - squot64(segred_group_sizze_37043, - segment_sizze_nonzzero_46426) + - sext_i32_i64(local_tid_46429)] = ((__local - float *) red_arr_mem_46433)[(sext_i32_i64(local_tid_46429) + - (int64_t) 1) * - segment_sizze_nonzzero_46426 - - (int64_t) 1]; + double *) mem_124969)[sext_i32_i64(virt_group_id_129522) * + squot64(segred_group_sizze_114487, + segment_sizze_nonzzero_129511) + + sext_i32_i64(local_tid_129514)] = + ((__local + double *) red_arr_mem_129518)[(sext_i32_i64(local_tid_129514) + + (int64_t) 1) * + segment_sizze_nonzzero_129511 - + (int64_t) 1]; } } barrier(CLK_LOCAL_MEM_FENCE); @@ -48482,319 +51720,537 @@ def sync(self): error_1: return; - #undef segred_group_sizze_37043 + #undef segred_group_sizze_114487 } -__kernel void mainMagnitudezisegred_small_37018(__global int *global_failure, - __local volatile - int64_t *red_arr_mem_46373_backing_aligned_0, - int64_t N_28477, - int64_t m_28478, - int64_t i32_res_28487, - int64_t num_groups_37030, - int64_t segment_sizze_nonzzero_46366, - __global - unsigned char *images_mem_44381, - __global - unsigned char *mem_45232) +__kernel void mainzisegred_small_114739(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_129680_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_129678_backing_aligned_1, + __local volatile + int64_t *red_arr_mem_129676_backing_aligned_2, + int64_t m_75136, int64_t iota_arg_77024, + int64_t num_groups_114922, + int64_t segment_sizze_nonzzero_129669, + __global unsigned char *mem_124973, + __global unsigned char *mem_124985, + __global unsigned char *mem_124987, + __global unsigned char *mem_124991, + __global unsigned char *mem_124994, + __global unsigned char *mem_124996, + __global unsigned char *mem_124998) { - #define segred_group_sizze_37029 (mainMagnitudezisegred_group_sizze_37012) + #define segred_group_sizze_114921 (mainzisegred_group_sizze_114733) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_46373_backing_0 = + __local volatile char *restrict red_arr_mem_129680_backing_2 = + (__local volatile + char *) red_arr_mem_129680_backing_aligned_0; + __local volatile char *restrict red_arr_mem_129678_backing_1 = + (__local volatile + char *) red_arr_mem_129678_backing_aligned_1; + __local volatile char *restrict red_arr_mem_129676_backing_0 = (__local volatile - char *) red_arr_mem_46373_backing_aligned_0; + char *) red_arr_mem_129676_backing_aligned_2; if (*global_failure >= 0) return; - int32_t global_tid_46368; - int32_t local_tid_46369; - int64_t group_sizze_46372; - int32_t wave_sizze_46371; - int32_t group_tid_46370; + int32_t global_tid_129671; + int32_t local_tid_129672; + int64_t group_sizze_129675; + int32_t wave_sizze_129674; + int32_t group_tid_129673; + + global_tid_129671 = get_global_id(0); + local_tid_129672 = get_local_id(0); + group_sizze_129675 = get_local_size(0); + wave_sizze_129674 = LOCKSTEP_WIDTH; + group_tid_129673 = get_group_id(0); + + int32_t phys_tid_114739; + + phys_tid_114739 = global_tid_129671; + + __local char *red_arr_mem_129676; - global_tid_46368 = get_global_id(0); - local_tid_46369 = get_local_id(0); - group_sizze_46372 = get_local_size(0); - wave_sizze_46371 = LOCKSTEP_WIDTH; - group_tid_46370 = get_group_id(0); + red_arr_mem_129676 = (__local char *) red_arr_mem_129676_backing_0; - int32_t phys_tid_37018; + __local char *red_arr_mem_129678; - phys_tid_37018 = global_tid_46368; + red_arr_mem_129678 = (__local char *) red_arr_mem_129678_backing_1; - __local char *red_arr_mem_46373; + __local char *red_arr_mem_129680; - red_arr_mem_46373 = (__local char *) red_arr_mem_46373_backing_0; + red_arr_mem_129680 = (__local char *) red_arr_mem_129680_backing_2; - int32_t phys_group_id_46375; + int32_t phys_group_id_129682; - phys_group_id_46375 = get_group_id(0); - for (int32_t i_46376 = 0; i_46376 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_28478, - squot64(segred_group_sizze_37029, - segment_sizze_nonzzero_46366))) - - phys_group_id_46375, sext_i64_i32(num_groups_37030)); - i_46376++) { - int32_t virt_group_id_46377 = phys_group_id_46375 + i_46376 * - sext_i64_i32(num_groups_37030); - int64_t gtid_37009 = squot64(sext_i32_i64(local_tid_46369), - segment_sizze_nonzzero_46366) + - sext_i32_i64(virt_group_id_46377) * - squot64(segred_group_sizze_37029, segment_sizze_nonzzero_46366); - int64_t gtid_37017 = srem64(sext_i32_i64(local_tid_46369), - i32_res_28487); + phys_group_id_129682 = get_group_id(0); + for (int32_t i_129683 = 0; i_129683 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_75136, + squot64(segred_group_sizze_114921, + segment_sizze_nonzzero_129669))) - + phys_group_id_129682, sext_i64_i32(num_groups_114922)); + i_129683++) { + int32_t virt_group_id_129684 = phys_group_id_129682 + i_129683 * + sext_i64_i32(num_groups_114922); + int64_t gtid_114730 = squot64(sext_i32_i64(local_tid_129672), + segment_sizze_nonzzero_129669) + + sext_i32_i64(virt_group_id_129684) * + squot64(segred_group_sizze_114921, + segment_sizze_nonzzero_129669); + int64_t gtid_114738 = srem64(sext_i32_i64(local_tid_129672), + iota_arg_77024); // apply map function if in bounds { - if (slt64((int64_t) 0, i32_res_28487) && (slt64(gtid_37009, - m_28478) && - slt64(sext_i32_i64(local_tid_46369), - i32_res_28487 * - squot64(segred_group_sizze_37029, - segment_sizze_nonzzero_46366)))) { - float x_37037 = ((__global - float *) images_mem_44381)[gtid_37009 * - N_28477 + - gtid_37017]; - bool isnan_res_37038; - - isnan_res_37038 = futrts_isnan32(x_37037); - - bool cond_37039 = !isnan_res_37038; - int32_t defunc_0_f_res_37040 = btoi_bool_i32(cond_37039); - + if (slt64((int64_t) 0, iota_arg_77024) && (slt64(gtid_114730, + m_75136) && + slt64(sext_i32_i64(local_tid_129672), + iota_arg_77024 * + squot64(segred_group_sizze_114921, + segment_sizze_nonzzero_129669)))) { + int64_t y_114941 = ((__global + int64_t *) mem_124987)[gtid_114730]; + double y_114942 = ((__global double *) mem_124985)[gtid_114730]; + double x_114946 = ((__global double *) mem_124991)[gtid_114730 * + iota_arg_77024 + + gtid_114738]; + double x_114947 = ((__global double *) mem_124973)[gtid_114738]; + double defunc_0_f_res_114950 = x_114946 / y_114942; + bool cond_114951 = slt64(gtid_114738, y_114941); + bool isnan_res_114952; + + isnan_res_114952 = futrts_isnan64(defunc_0_f_res_114950); + + bool cond_t_res_114953 = !isnan_res_114952; + bool x_114954 = cond_114951 && cond_t_res_114953; + double abs_res_114955 = fabs(defunc_0_f_res_114950); + bool defunc_2_f_res_t_res_114956 = x_114947 < abs_res_114955; + bool x_114957 = x_114954 && defunc_2_f_res_t_res_114956; + double defunc_1_f_res_114958; + + if (cond_114951) { + defunc_1_f_res_114958 = defunc_0_f_res_114950; + } else { + defunc_1_f_res_114958 = 0.0; + } // save map-out results { } // save results to be reduced { ((__local - int32_t *) red_arr_mem_46373)[sext_i32_i64(local_tid_46369)] = - defunc_0_f_res_37040; + bool *) red_arr_mem_129676)[sext_i32_i64(local_tid_129672)] = + x_114957; + ((__local + int64_t *) red_arr_mem_129678)[sext_i32_i64(local_tid_129672)] = + gtid_114738; + ((__local + double *) red_arr_mem_129680)[sext_i32_i64(local_tid_129672)] = + defunc_1_f_res_114958; } } else { ((__local - int32_t *) red_arr_mem_46373)[sext_i32_i64(local_tid_46369)] = + bool *) red_arr_mem_129676)[sext_i32_i64(local_tid_129672)] = 0; + ((__local + int64_t *) red_arr_mem_129678)[sext_i32_i64(local_tid_129672)] = + (int64_t) -1; + ((__local + double *) red_arr_mem_129680)[sext_i32_i64(local_tid_129672)] = + 0.0; } } barrier(CLK_LOCAL_MEM_FENCE); - if (slt64((int64_t) 0, i32_res_28487)) { + if (slt64((int64_t) 0, iota_arg_77024)) { // perform segmented scan to imitate reduction { - int32_t x_37033; - int32_t x_37034; - int32_t x_46378; - int32_t x_46379; - bool ltid_in_bounds_46381; - - ltid_in_bounds_46381 = slt64(sext_i32_i64(local_tid_46369), - i32_res_28487 * - squot64(segred_group_sizze_37029, - segment_sizze_nonzzero_46366)); - - int32_t skip_threads_46382; + bool x_114927; + int64_t x_114928; + double x_114929; + bool x_114930; + int64_t x_114931; + double x_114932; + bool x_129685; + int64_t x_129686; + double x_129687; + bool x_129688; + int64_t x_129689; + double x_129690; + bool ltid_in_bounds_129699; + + ltid_in_bounds_129699 = slt64(sext_i32_i64(local_tid_129672), + iota_arg_77024 * + squot64(segred_group_sizze_114921, + segment_sizze_nonzzero_129669)); + + int32_t skip_threads_129700; // read input for in-block scan { - if (ltid_in_bounds_46381) { - x_37034 = ((volatile __local - int32_t *) red_arr_mem_46373)[sext_i32_i64(local_tid_46369)]; - if ((local_tid_46369 - squot32(local_tid_46369, 32) * + if (ltid_in_bounds_129699) { + x_114930 = ((volatile __local + bool *) red_arr_mem_129676)[sext_i32_i64(local_tid_129672)]; + x_114931 = ((volatile __local + int64_t *) red_arr_mem_129678)[sext_i32_i64(local_tid_129672)]; + x_114932 = ((volatile __local + double *) red_arr_mem_129680)[sext_i32_i64(local_tid_129672)]; + if ((local_tid_129672 - squot32(local_tid_129672, 32) * 32) == 0) { - x_37033 = x_37034; + x_114927 = x_114930; + x_114928 = x_114931; + x_114929 = x_114932; } } } // in-block scan (hopefully no barriers needed) { - skip_threads_46382 = 1; - while (slt32(skip_threads_46382, 32)) { - if (sle32(skip_threads_46382, local_tid_46369 - - squot32(local_tid_46369, 32) * 32) && - ltid_in_bounds_46381) { + skip_threads_129700 = 1; + while (slt32(skip_threads_129700, 32)) { + if (sle32(skip_threads_129700, local_tid_129672 - + squot32(local_tid_129672, 32) * 32) && + ltid_in_bounds_129699) { // read operands { - x_37033 = ((volatile __local - int32_t *) red_arr_mem_46373)[sext_i32_i64(local_tid_46369) - - sext_i32_i64(skip_threads_46382)]; + x_114927 = ((volatile __local + bool *) red_arr_mem_129676)[sext_i32_i64(local_tid_129672) - + sext_i32_i64(skip_threads_129700)]; + x_114928 = ((volatile __local + int64_t *) red_arr_mem_129678)[sext_i32_i64(local_tid_129672) - + sext_i32_i64(skip_threads_129700)]; + x_114929 = ((volatile __local + double *) red_arr_mem_129680)[sext_i32_i64(local_tid_129672) - + sext_i32_i64(skip_threads_129700)]; } // perform operation { - bool inactive_46383 = - slt64(srem64(sext_i32_i64(local_tid_46369), - i32_res_28487), - sext_i32_i64(local_tid_46369) - - sext_i32_i64(local_tid_46369 - - skip_threads_46382)); + bool inactive_129701 = + slt64(srem64(sext_i32_i64(local_tid_129672), + iota_arg_77024), + sext_i32_i64(local_tid_129672) - + sext_i32_i64(local_tid_129672 - + skip_threads_129700)); - if (inactive_46383) { - x_37033 = x_37034; + if (inactive_129701) { + x_114927 = x_114930; + x_114928 = x_114931; + x_114929 = x_114932; } - if (!inactive_46383) { - int32_t defunc_1_op_res_37035 = - add32(x_37033, x_37034); + if (!inactive_129701) { + bool defunc_1_op_res_114933; + int64_t defunc_1_op_res_114934; - x_37033 = defunc_1_op_res_37035; + if (x_114927) { + defunc_1_op_res_114933 = x_114927; + defunc_1_op_res_114934 = x_114928; + } else { + bool x_114935 = x_114930 && x_114930; + bool x_114936 = !x_114930; + bool y_114937 = x_114927 && x_114936; + bool defunc_1_op_res_f_res_114938 = + x_114935 || y_114937; + int64_t defunc_1_op_res_f_res_114939; + + if (x_114930) { + defunc_1_op_res_f_res_114939 = + x_114931; + } else { + defunc_1_op_res_f_res_114939 = + x_114928; + } + defunc_1_op_res_114933 = + defunc_1_op_res_f_res_114938; + defunc_1_op_res_114934 = + defunc_1_op_res_f_res_114939; + } + + double defunc_1_op_res_114940 = x_114929 + + x_114932; + + x_114927 = defunc_1_op_res_114933; + x_114928 = defunc_1_op_res_114934; + x_114929 = defunc_1_op_res_114940; } } } - if (sle32(wave_sizze_46371, skip_threads_46382)) { + if (sle32(wave_sizze_129674, skip_threads_129700)) { barrier(CLK_LOCAL_MEM_FENCE); } - if (sle32(skip_threads_46382, local_tid_46369 - - squot32(local_tid_46369, 32) * 32) && - ltid_in_bounds_46381) { + if (sle32(skip_threads_129700, local_tid_129672 - + squot32(local_tid_129672, 32) * 32) && + ltid_in_bounds_129699) { // write result { ((volatile __local - int32_t *) red_arr_mem_46373)[sext_i32_i64(local_tid_46369)] = - x_37033; - x_37034 = x_37033; + bool *) red_arr_mem_129676)[sext_i32_i64(local_tid_129672)] = + x_114927; + x_114930 = x_114927; + ((volatile __local + int64_t *) red_arr_mem_129678)[sext_i32_i64(local_tid_129672)] = + x_114928; + x_114931 = x_114928; + ((volatile __local + double *) red_arr_mem_129680)[sext_i32_i64(local_tid_129672)] = + x_114929; + x_114932 = x_114929; } } - if (sle32(wave_sizze_46371, skip_threads_46382)) { + if (sle32(wave_sizze_129674, skip_threads_129700)) { barrier(CLK_LOCAL_MEM_FENCE); } - skip_threads_46382 *= 2; + skip_threads_129700 *= 2; } } barrier(CLK_LOCAL_MEM_FENCE); // last thread of block 'i' writes its result to offset 'i' { - if ((local_tid_46369 - squot32(local_tid_46369, 32) * 32) == - 31 && ltid_in_bounds_46381) { + if ((local_tid_129672 - squot32(local_tid_129672, 32) * + 32) == 31 && ltid_in_bounds_129699) { + ((volatile __local + bool *) red_arr_mem_129676)[sext_i32_i64(squot32(local_tid_129672, + 32))] = + x_114927; + ((volatile __local + int64_t *) red_arr_mem_129678)[sext_i32_i64(squot32(local_tid_129672, + 32))] = + x_114928; ((volatile __local - int32_t *) red_arr_mem_46373)[sext_i32_i64(squot32(local_tid_46369, + double *) red_arr_mem_129680)[sext_i32_i64(squot32(local_tid_129672, 32))] = - x_37033; + x_114929; } } barrier(CLK_LOCAL_MEM_FENCE); // scan the first block, after which offset 'i' contains carry-in for block 'i+1' { - int32_t skip_threads_46384; + int32_t skip_threads_129702; // read input for in-block scan { - if (squot32(local_tid_46369, 32) == 0 && - ltid_in_bounds_46381) { - x_46379 = ((volatile __local - int32_t *) red_arr_mem_46373)[sext_i32_i64(local_tid_46369)]; - if ((local_tid_46369 - squot32(local_tid_46369, - 32) * 32) == 0) { - x_46378 = x_46379; + if (squot32(local_tid_129672, 32) == 0 && + ltid_in_bounds_129699) { + x_129688 = ((volatile __local + bool *) red_arr_mem_129676)[sext_i32_i64(local_tid_129672)]; + x_129689 = ((volatile __local + int64_t *) red_arr_mem_129678)[sext_i32_i64(local_tid_129672)]; + x_129690 = ((volatile __local + double *) red_arr_mem_129680)[sext_i32_i64(local_tid_129672)]; + if ((local_tid_129672 - squot32(local_tid_129672, + 32) * 32) == 0) { + x_129685 = x_129688; + x_129686 = x_129689; + x_129687 = x_129690; } } } // in-block scan (hopefully no barriers needed) { - skip_threads_46384 = 1; - while (slt32(skip_threads_46384, 32)) { - if (sle32(skip_threads_46384, local_tid_46369 - - squot32(local_tid_46369, 32) * 32) && - (squot32(local_tid_46369, 32) == 0 && - ltid_in_bounds_46381)) { + skip_threads_129702 = 1; + while (slt32(skip_threads_129702, 32)) { + if (sle32(skip_threads_129702, local_tid_129672 - + squot32(local_tid_129672, 32) * 32) && + (squot32(local_tid_129672, 32) == 0 && + ltid_in_bounds_129699)) { // read operands { - x_46378 = ((volatile __local - int32_t *) red_arr_mem_46373)[sext_i32_i64(local_tid_46369) - - sext_i32_i64(skip_threads_46384)]; + x_129685 = ((volatile __local + bool *) red_arr_mem_129676)[sext_i32_i64(local_tid_129672) - + sext_i32_i64(skip_threads_129702)]; + x_129686 = ((volatile __local + int64_t *) red_arr_mem_129678)[sext_i32_i64(local_tid_129672) - + sext_i32_i64(skip_threads_129702)]; + x_129687 = ((volatile __local + double *) red_arr_mem_129680)[sext_i32_i64(local_tid_129672) - + sext_i32_i64(skip_threads_129702)]; } // perform operation { - bool inactive_46385 = - slt64(srem64(sext_i32_i64(local_tid_46369 * + bool inactive_129703 = + slt64(srem64(sext_i32_i64(local_tid_129672 * 32 + 32 - 1), - i32_res_28487), - sext_i32_i64(local_tid_46369 * + iota_arg_77024), + sext_i32_i64(local_tid_129672 * 32 + 32 - 1) - - sext_i32_i64((local_tid_46369 - - skip_threads_46384) * + sext_i32_i64((local_tid_129672 - + skip_threads_129702) * 32 + 32 - 1)); - if (inactive_46385) { - x_46378 = x_46379; + if (inactive_129703) { + x_129685 = x_129688; + x_129686 = x_129689; + x_129687 = x_129690; } - if (!inactive_46385) { - int32_t defunc_1_op_res_46380 = - add32(x_46378, x_46379); + if (!inactive_129703) { + bool defunc_1_op_res_129691; + int64_t defunc_1_op_res_129692; - x_46378 = defunc_1_op_res_46380; + if (x_129685) { + defunc_1_op_res_129691 = x_129685; + defunc_1_op_res_129692 = x_129686; + } else { + bool x_129693 = x_129688 && + x_129688; + bool x_129694 = !x_129688; + bool y_129695 = x_129685 && + x_129694; + bool defunc_1_op_res_f_res_129696 = + x_129693 || y_129695; + int64_t + defunc_1_op_res_f_res_129697; + + if (x_129688) { + defunc_1_op_res_f_res_129697 = + x_129689; + } else { + defunc_1_op_res_f_res_129697 = + x_129686; + } + defunc_1_op_res_129691 = + defunc_1_op_res_f_res_129696; + defunc_1_op_res_129692 = + defunc_1_op_res_f_res_129697; + } + + double defunc_1_op_res_129698 = + x_129687 + x_129690; + + x_129685 = defunc_1_op_res_129691; + x_129686 = defunc_1_op_res_129692; + x_129687 = defunc_1_op_res_129698; } } } - if (sle32(wave_sizze_46371, skip_threads_46384)) { + if (sle32(wave_sizze_129674, skip_threads_129702)) { barrier(CLK_LOCAL_MEM_FENCE); } - if (sle32(skip_threads_46384, local_tid_46369 - - squot32(local_tid_46369, 32) * 32) && - (squot32(local_tid_46369, 32) == 0 && - ltid_in_bounds_46381)) { + if (sle32(skip_threads_129702, local_tid_129672 - + squot32(local_tid_129672, 32) * 32) && + (squot32(local_tid_129672, 32) == 0 && + ltid_in_bounds_129699)) { // write result { ((volatile __local - int32_t *) red_arr_mem_46373)[sext_i32_i64(local_tid_46369)] = - x_46378; - x_46379 = x_46378; + bool *) red_arr_mem_129676)[sext_i32_i64(local_tid_129672)] = + x_129685; + x_129688 = x_129685; + ((volatile __local + int64_t *) red_arr_mem_129678)[sext_i32_i64(local_tid_129672)] = + x_129686; + x_129689 = x_129686; + ((volatile __local + double *) red_arr_mem_129680)[sext_i32_i64(local_tid_129672)] = + x_129687; + x_129690 = x_129687; } } - if (sle32(wave_sizze_46371, skip_threads_46384)) { + if (sle32(wave_sizze_129674, skip_threads_129702)) { barrier(CLK_LOCAL_MEM_FENCE); } - skip_threads_46384 *= 2; + skip_threads_129702 *= 2; } } } barrier(CLK_LOCAL_MEM_FENCE); // carry-in for every block except the first { - if (!(squot32(local_tid_46369, 32) == 0 || - !ltid_in_bounds_46381)) { + if (!(squot32(local_tid_129672, 32) == 0 || + !ltid_in_bounds_129699)) { // read operands { - x_37034 = x_37033; - x_37033 = ((__local - int32_t *) red_arr_mem_46373)[sext_i32_i64(squot32(local_tid_46369, - 32)) - - (int64_t) 1]; + x_114930 = x_114927; + x_114931 = x_114928; + x_114932 = x_114929; + x_114927 = ((__local + bool *) red_arr_mem_129676)[sext_i32_i64(squot32(local_tid_129672, + 32)) - + (int64_t) 1]; + x_114928 = ((__local + int64_t *) red_arr_mem_129678)[sext_i32_i64(squot32(local_tid_129672, + 32)) - + (int64_t) 1]; + x_114929 = ((__local + double *) red_arr_mem_129680)[sext_i32_i64(squot32(local_tid_129672, + 32)) - + (int64_t) 1]; } // perform operation { - bool inactive_46386 = - slt64(srem64(sext_i32_i64(local_tid_46369), - i32_res_28487), - sext_i32_i64(local_tid_46369) - - sext_i32_i64(squot32(local_tid_46369, + bool inactive_129704 = + slt64(srem64(sext_i32_i64(local_tid_129672), + iota_arg_77024), + sext_i32_i64(local_tid_129672) - + sext_i32_i64(squot32(local_tid_129672, 32) * 32 - 1)); - if (inactive_46386) { - x_37033 = x_37034; + if (inactive_129704) { + x_114927 = x_114930; + x_114928 = x_114931; + x_114929 = x_114932; } - if (!inactive_46386) { - int32_t defunc_1_op_res_37035 = add32(x_37033, - x_37034); + if (!inactive_129704) { + bool defunc_1_op_res_114933; + int64_t defunc_1_op_res_114934; + + if (x_114927) { + defunc_1_op_res_114933 = x_114927; + defunc_1_op_res_114934 = x_114928; + } else { + bool x_114935 = x_114930 && x_114930; + bool x_114936 = !x_114930; + bool y_114937 = x_114927 && x_114936; + bool defunc_1_op_res_f_res_114938 = + x_114935 || y_114937; + int64_t defunc_1_op_res_f_res_114939; + + if (x_114930) { + defunc_1_op_res_f_res_114939 = x_114931; + } else { + defunc_1_op_res_f_res_114939 = x_114928; + } + defunc_1_op_res_114933 = + defunc_1_op_res_f_res_114938; + defunc_1_op_res_114934 = + defunc_1_op_res_f_res_114939; + } + + double defunc_1_op_res_114940 = x_114929 + + x_114932; - x_37033 = defunc_1_op_res_37035; + x_114927 = defunc_1_op_res_114933; + x_114928 = defunc_1_op_res_114934; + x_114929 = defunc_1_op_res_114940; } } // write final result { ((__local - int32_t *) red_arr_mem_46373)[sext_i32_i64(local_tid_46369)] = - x_37033; + bool *) red_arr_mem_129676)[sext_i32_i64(local_tid_129672)] = + x_114927; + ((__local + int64_t *) red_arr_mem_129678)[sext_i32_i64(local_tid_129672)] = + x_114928; + ((__local + double *) red_arr_mem_129680)[sext_i32_i64(local_tid_129672)] = + x_114929; } } } barrier(CLK_LOCAL_MEM_FENCE); // restore correct values for first block { - if (squot32(local_tid_46369, 32) == 0) { + if (squot32(local_tid_129672, 32) == 0) { ((__local - int32_t *) red_arr_mem_46373)[sext_i32_i64(local_tid_46369)] = - x_37034; + bool *) red_arr_mem_129676)[sext_i32_i64(local_tid_129672)] = + x_114930; + ((__local + int64_t *) red_arr_mem_129678)[sext_i32_i64(local_tid_129672)] = + x_114931; + ((__local + double *) red_arr_mem_129680)[sext_i32_i64(local_tid_129672)] = + x_114932; } } barrier(CLK_LOCAL_MEM_FENCE); @@ -48803,22 +52259,42 @@ def sync(self): barrier(CLK_LOCAL_MEM_FENCE); // save final values of segments { - if (slt64(sext_i32_i64(virt_group_id_46377) * - squot64(segred_group_sizze_37029, - segment_sizze_nonzzero_46366) + - sext_i32_i64(local_tid_46369), m_28478) && - slt64(sext_i32_i64(local_tid_46369), - squot64(segred_group_sizze_37029, - segment_sizze_nonzzero_46366))) { + if (slt64(sext_i32_i64(virt_group_id_129684) * + squot64(segred_group_sizze_114921, + segment_sizze_nonzzero_129669) + + sext_i32_i64(local_tid_129672), m_75136) && + slt64(sext_i32_i64(local_tid_129672), + squot64(segred_group_sizze_114921, + segment_sizze_nonzzero_129669))) { + ((__global + bool *) mem_124994)[sext_i32_i64(virt_group_id_129684) * + squot64(segred_group_sizze_114921, + segment_sizze_nonzzero_129669) + + sext_i32_i64(local_tid_129672)] = + ((__local + bool *) red_arr_mem_129676)[(sext_i32_i64(local_tid_129672) + + (int64_t) 1) * + segment_sizze_nonzzero_129669 - + (int64_t) 1]; + ((__global + int64_t *) mem_124996)[sext_i32_i64(virt_group_id_129684) * + squot64(segred_group_sizze_114921, + segment_sizze_nonzzero_129669) + + sext_i32_i64(local_tid_129672)] = + ((__local + int64_t *) red_arr_mem_129678)[(sext_i32_i64(local_tid_129672) + + (int64_t) 1) * + segment_sizze_nonzzero_129669 - + (int64_t) 1]; ((__global - int32_t *) mem_45232)[sext_i32_i64(virt_group_id_46377) * - squot64(segred_group_sizze_37029, - segment_sizze_nonzzero_46366) + - sext_i32_i64(local_tid_46369)] = + double *) mem_124998)[sext_i32_i64(virt_group_id_129684) * + squot64(segred_group_sizze_114921, + segment_sizze_nonzzero_129669) + + sext_i32_i64(local_tid_129672)] = ((__local - int32_t *) red_arr_mem_46373)[(sext_i32_i64(local_tid_46369) + + double *) red_arr_mem_129680)[(sext_i32_i64(local_tid_129672) + (int64_t) 1) * - segment_sizze_nonzzero_46366 - + segment_sizze_nonzzero_129669 - (int64_t) 1]; } } @@ -48828,1002 +52304,100665 @@ def sync(self): error_1: return; - #undef segred_group_sizze_37029 + #undef segred_group_sizze_114921 } -__kernel void mainMagnitudezisegred_small_37181(__global int *global_failure, - int failure_is_an_option, - __global - int64_t *global_failure_args, - __local volatile - int64_t *red_arr_mem_46542_backing_aligned_0, - int64_t N_28477, - int64_t m_28478, - int64_t i32_res_28880, - int64_t num_groups_37203, - int64_t segment_sizze_nonzzero_46535, - __global - unsigned char *defunc_4_map_res_mem_45178, - __global - unsigned char *defunc_3_map_res_mem_45244, - __global - unsigned char *defunc_3_map_res_mem_45245, - __global - unsigned char *mem_45278) +__kernel void mainDetailedzicopy_126395(int64_t m_70861, int64_t n_70864, + __global unsigned char *mem_120177, + __global unsigned char *mem_120224) { - #define segred_group_sizze_37202 (mainMagnitudezisegred_group_sizze_37175) + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_126395; + int32_t copy_ltid_126396; + int32_t copy_gid_126397; + + copy_gtid_126395 = get_global_id(0); + copy_ltid_126396 = get_local_id(0); + copy_gid_126397 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_126395), m_70861 * n_70864)) { + ((__global double *) mem_120224)[squot64(sext_i32_i64(copy_gtid_126395), + n_70864) * n_70864 + + (sext_i32_i64(copy_gtid_126395) - + squot64(sext_i32_i64(copy_gtid_126395), + n_70864) * n_70864)] = + ((__global double *) mem_120177)[(sext_i32_i64(copy_gtid_126395) - + squot64(sext_i32_i64(copy_gtid_126395), + n_70864) * n_70864) * + m_70861 + + squot64(sext_i32_i64(copy_gtid_126395), + n_70864)]; + } + error_0: + return; +} +__kernel void mainDetailedzicopy_126400(int64_t m_70861, int64_t n_70864, + __global unsigned char *mem_120180, + __global unsigned char *mem_120228) +{ const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_46542_backing_0 = - (__local volatile - char *) red_arr_mem_46542_backing_aligned_0; - volatile __local bool local_failure; + int32_t copy_gtid_126400; + int32_t copy_ltid_126401; + int32_t copy_gid_126402; + + copy_gtid_126400 = get_global_id(0); + copy_ltid_126401 = get_local_id(0); + copy_gid_126402 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_126400), m_70861 * n_70864)) { + ((__global + int64_t *) mem_120228)[squot64(sext_i32_i64(copy_gtid_126400), + n_70864) * n_70864 + + (sext_i32_i64(copy_gtid_126400) - + squot64(sext_i32_i64(copy_gtid_126400), + n_70864) * n_70864)] = ((__global + int64_t *) mem_120180)[(sext_i32_i64(copy_gtid_126400) - + squot64(sext_i32_i64(copy_gtid_126400), + n_70864) * + n_70864) * + m_70861 + + squot64(sext_i32_i64(copy_gtid_126400), + n_70864)]; + } - if (failure_is_an_option) { - int failed = *global_failure >= 0; - - if (failed) - return; + error_0: + return; +} +__kernel void mainDetailedzicopy_126478(int64_t m_70861, int64_t n_70864, + int64_t m_70956, __global + unsigned char *mem_120201, __global + unsigned char *mem_120203) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_126478; + int32_t copy_ltid_126479; + int32_t copy_gid_126480; + + copy_gtid_126478 = get_global_id(0); + copy_ltid_126479 = get_local_id(0); + copy_gid_126480 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_126478), m_70861)) { + ((__global int64_t *) mem_120203)[sext_i32_i64(copy_gtid_126478)] = + ((__global int64_t *) mem_120201)[m_70956 + + sext_i32_i64(copy_gtid_126478) * + n_70864]; } - local_failure = false; - barrier(CLK_LOCAL_MEM_FENCE); - int32_t global_tid_46537; - int32_t local_tid_46538; - int64_t group_sizze_46541; - int32_t wave_sizze_46540; - int32_t group_tid_46539; + error_0: + return; +} +__kernel void mainDetailedzicopy_126574(int64_t m_70861, int64_t n_70864, + int64_t k2p2zq_70876, __global + unsigned char *defunc_3_map_res_mem_120231, + __global unsigned char *mem_120257) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_126574; + int32_t copy_ltid_126575; + int32_t copy_gid_126576; + + copy_gtid_126574 = get_global_id(0); + copy_ltid_126575 = get_local_id(0); + copy_gid_126576 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_126574), m_70861 * k2p2zq_70876)) { + ((__global double *) mem_120257)[(sext_i32_i64(copy_gtid_126574) - + squot64(sext_i32_i64(copy_gtid_126574), + k2p2zq_70876) * + k2p2zq_70876) * m_70861 + + squot64(sext_i32_i64(copy_gtid_126574), + k2p2zq_70876)] = ((__global + double *) defunc_3_map_res_mem_120231)[squot64(sext_i32_i64(copy_gtid_126574), + k2p2zq_70876) * + n_70864 + + (sext_i32_i64(copy_gtid_126574) - + squot64(sext_i32_i64(copy_gtid_126574), + k2p2zq_70876) * + k2p2zq_70876)]; + } + + error_0: + return; +} +__kernel void mainDetailedzicopy_126579(int64_t m_70861, int64_t k2p2zq_70876, + int64_t defunc_2_reduce_res_70985, + __global unsigned char *mem_120246, + __global unsigned char *mem_120261) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_126579; + int32_t copy_ltid_126580; + int32_t copy_gid_126581; + + copy_gtid_126579 = get_global_id(0); + copy_ltid_126580 = get_local_id(0); + copy_gid_126581 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_126579), m_70861 * k2p2zq_70876 * + k2p2zq_70876)) { + ((__global + double *) mem_120261)[squot64(sext_i32_i64(copy_gtid_126579) - + squot64(sext_i32_i64(copy_gtid_126579), + k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876) * (m_70861 * + k2p2zq_70876) + + (sext_i32_i64(copy_gtid_126579) - + squot64(sext_i32_i64(copy_gtid_126579), + k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876) - + squot64(sext_i32_i64(copy_gtid_126579) - + squot64(sext_i32_i64(copy_gtid_126579), + k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876) * k2p2zq_70876) * + m_70861 + + squot64(sext_i32_i64(copy_gtid_126579), + k2p2zq_70876 * k2p2zq_70876)] = + ((__global + double *) mem_120246)[squot64(sext_i32_i64(copy_gtid_126579) - + squot64(sext_i32_i64(copy_gtid_126579), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876) * + (defunc_2_reduce_res_70985 * m_70861) + + squot64(sext_i32_i64(copy_gtid_126579), + k2p2zq_70876 * k2p2zq_70876) * + defunc_2_reduce_res_70985 + + (sext_i32_i64(copy_gtid_126579) - + squot64(sext_i32_i64(copy_gtid_126579), + k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876) - + squot64(sext_i32_i64(copy_gtid_126579) - + squot64(sext_i32_i64(copy_gtid_126579), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876) * k2p2zq_70876)]; + } + + error_0: + return; +} +__kernel void mainDetailedzicopy_126584(int64_t m_70861, int64_t k2p2zq_70876, + int64_t defunc_2_reduce_res_70985, + __global unsigned char *mem_120246, + __global unsigned char *mem_120265) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_126584; + int32_t copy_ltid_126585; + int32_t copy_gid_126586; + + copy_gtid_126584 = get_global_id(0); + copy_ltid_126585 = get_local_id(0); + copy_gid_126586 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_126584), m_70861 * k2p2zq_70876 * + k2p2zq_70876)) { + ((__global double *) mem_120265)[(sext_i32_i64(copy_gtid_126584) - + squot64(sext_i32_i64(copy_gtid_126584), + k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876) - + squot64(sext_i32_i64(copy_gtid_126584) - + squot64(sext_i32_i64(copy_gtid_126584), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876) * + k2p2zq_70876) * (k2p2zq_70876 * + m_70861) + + squot64(sext_i32_i64(copy_gtid_126584), + k2p2zq_70876 * k2p2zq_70876) * + k2p2zq_70876 + + squot64(sext_i32_i64(copy_gtid_126584) - + squot64(sext_i32_i64(copy_gtid_126584), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876)] = ((__global + double *) mem_120246)[squot64(sext_i32_i64(copy_gtid_126584) - + squot64(sext_i32_i64(copy_gtid_126584), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * + k2p2zq_70876), + k2p2zq_70876) * + (defunc_2_reduce_res_70985 * + m_70861) + + squot64(sext_i32_i64(copy_gtid_126584), + k2p2zq_70876 * + k2p2zq_70876) * + defunc_2_reduce_res_70985 + + (sext_i32_i64(copy_gtid_126584) - + squot64(sext_i32_i64(copy_gtid_126584), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * + k2p2zq_70876) - + squot64(sext_i32_i64(copy_gtid_126584) - + squot64(sext_i32_i64(copy_gtid_126584), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * + k2p2zq_70876), + k2p2zq_70876) * + k2p2zq_70876)]; + } + + error_0: + return; +} +__kernel void mainDetailedzicopy_126694(int64_t m_70861, int64_t k2p2zq_70876, + int64_t defunc_2_reduce_res_70985, + __global unsigned char *mem_120246, + __global unsigned char *mem_120894) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_126694; + int32_t copy_ltid_126695; + int32_t copy_gid_126696; + + copy_gtid_126694 = get_global_id(0); + copy_ltid_126695 = get_local_id(0); + copy_gid_126696 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_126694), m_70861 * k2p2zq_70876 * + k2p2zq_70876)) { + ((__global double *) mem_120894)[(sext_i32_i64(copy_gtid_126694) - + squot64(sext_i32_i64(copy_gtid_126694), + k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876) - + squot64(sext_i32_i64(copy_gtid_126694) - + squot64(sext_i32_i64(copy_gtid_126694), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876) * + k2p2zq_70876) * (k2p2zq_70876 * + m_70861) + + squot64(sext_i32_i64(copy_gtid_126694), + k2p2zq_70876 * k2p2zq_70876) * + k2p2zq_70876 + + squot64(sext_i32_i64(copy_gtid_126694) - + squot64(sext_i32_i64(copy_gtid_126694), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876)] = ((__global + double *) mem_120246)[squot64(sext_i32_i64(copy_gtid_126694) - + squot64(sext_i32_i64(copy_gtid_126694), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * + k2p2zq_70876), + k2p2zq_70876) * + (defunc_2_reduce_res_70985 * + m_70861) + + squot64(sext_i32_i64(copy_gtid_126694), + k2p2zq_70876 * + k2p2zq_70876) * + defunc_2_reduce_res_70985 + + (sext_i32_i64(copy_gtid_126694) - + squot64(sext_i32_i64(copy_gtid_126694), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * + k2p2zq_70876) - + squot64(sext_i32_i64(copy_gtid_126694) - + squot64(sext_i32_i64(copy_gtid_126694), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * + k2p2zq_70876), + k2p2zq_70876) * + k2p2zq_70876)]; + } + + error_0: + return; +} +__kernel void mainDetailedzicopy_126798(int64_t m_70861, int64_t k2p2zq_70876, + int64_t defunc_2_reduce_res_70985, + __global unsigned char *mem_120246, + __global unsigned char *mem_121001) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_126798; + int32_t copy_ltid_126799; + int32_t copy_gid_126800; + + copy_gtid_126798 = get_global_id(0); + copy_ltid_126799 = get_local_id(0); + copy_gid_126800 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_126798), m_70861 * k2p2zq_70876 * + k2p2zq_70876)) { + ((__global + double *) mem_121001)[squot64(sext_i32_i64(copy_gtid_126798) - + squot64(sext_i32_i64(copy_gtid_126798), + k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876) * (m_70861 * + k2p2zq_70876) + + (sext_i32_i64(copy_gtid_126798) - + squot64(sext_i32_i64(copy_gtid_126798), + k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876) - + squot64(sext_i32_i64(copy_gtid_126798) - + squot64(sext_i32_i64(copy_gtid_126798), + k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876) * k2p2zq_70876) * + m_70861 + + squot64(sext_i32_i64(copy_gtid_126798), + k2p2zq_70876 * k2p2zq_70876)] = + ((__global + double *) mem_120246)[squot64(sext_i32_i64(copy_gtid_126798) - + squot64(sext_i32_i64(copy_gtid_126798), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876) * + (defunc_2_reduce_res_70985 * m_70861) + + squot64(sext_i32_i64(copy_gtid_126798), + k2p2zq_70876 * k2p2zq_70876) * + defunc_2_reduce_res_70985 + + (sext_i32_i64(copy_gtid_126798) - + squot64(sext_i32_i64(copy_gtid_126798), + k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876) - + squot64(sext_i32_i64(copy_gtid_126798) - + squot64(sext_i32_i64(copy_gtid_126798), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876) * k2p2zq_70876)]; + } + + error_0: + return; +} +__kernel void mainDetailedzicopy_126882(int64_t m_70861, int64_t k2p2zq_70876, + __global unsigned char *mem_121351, + __global unsigned char *mem_121363) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_126882; + int32_t copy_ltid_126883; + int32_t copy_gid_126884; + + copy_gtid_126882 = get_global_id(0); + copy_ltid_126883 = get_local_id(0); + copy_gid_126884 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_126882), m_70861 * k2p2zq_70876 * + k2p2zq_70876)) { + ((__global + double *) mem_121363)[squot64(sext_i32_i64(copy_gtid_126882) - + squot64(sext_i32_i64(copy_gtid_126882), + k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876) * (k2p2zq_70876 * + m_70861) + + squot64(sext_i32_i64(copy_gtid_126882), + k2p2zq_70876 * k2p2zq_70876) * + k2p2zq_70876 + (sext_i32_i64(copy_gtid_126882) - + squot64(sext_i32_i64(copy_gtid_126882), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876) - + squot64(sext_i32_i64(copy_gtid_126882) - + squot64(sext_i32_i64(copy_gtid_126882), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * + k2p2zq_70876), + k2p2zq_70876) * + k2p2zq_70876)] = ((__global + double *) mem_121351)[squot64(sext_i32_i64(copy_gtid_126882), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * + k2p2zq_70876) + + squot64(sext_i32_i64(copy_gtid_126882) - + squot64(sext_i32_i64(copy_gtid_126882), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * + k2p2zq_70876), + k2p2zq_70876) * + k2p2zq_70876 + + (sext_i32_i64(copy_gtid_126882) - + squot64(sext_i32_i64(copy_gtid_126882), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * + k2p2zq_70876) - + squot64(sext_i32_i64(copy_gtid_126882) - + squot64(sext_i32_i64(copy_gtid_126882), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * + k2p2zq_70876), + k2p2zq_70876) * + k2p2zq_70876)]; + } + + error_0: + return; +} +__kernel void mainDetailedzicopy_127146(int64_t m_70861, int64_t n_70864, + int64_t k2p2zq_70876, __global + unsigned char *defunc_3_map_res_mem_120231, + __global unsigned char *mem_121850) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127146; + int32_t copy_ltid_127147; + int32_t copy_gid_127148; + + copy_gtid_127146 = get_global_id(0); + copy_ltid_127147 = get_local_id(0); + copy_gid_127148 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127146), m_70861 * k2p2zq_70876)) { + ((__global double *) mem_121850)[(sext_i32_i64(copy_gtid_127146) - + squot64(sext_i32_i64(copy_gtid_127146), + k2p2zq_70876) * + k2p2zq_70876) * m_70861 + + squot64(sext_i32_i64(copy_gtid_127146), + k2p2zq_70876)] = ((__global + double *) defunc_3_map_res_mem_120231)[squot64(sext_i32_i64(copy_gtid_127146), + k2p2zq_70876) * + n_70864 + + (sext_i32_i64(copy_gtid_127146) - + squot64(sext_i32_i64(copy_gtid_127146), + k2p2zq_70876) * + k2p2zq_70876)]; + } + + error_0: + return; +} +__kernel void mainDetailedzicopy_127151(int64_t m_70861, int64_t k2p2zq_70876, + __global unsigned char *mem_121854, + __global unsigned char *mem_121858) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127151; + int32_t copy_ltid_127152; + int32_t copy_gid_127153; + + copy_gtid_127151 = get_global_id(0); + copy_ltid_127152 = get_local_id(0); + copy_gid_127153 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127151), m_70861 * k2p2zq_70876 * + k2p2zq_70876)) { + ((__global double *) mem_121858)[(sext_i32_i64(copy_gtid_127151) - + squot64(sext_i32_i64(copy_gtid_127151), + k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876) - + squot64(sext_i32_i64(copy_gtid_127151) - + squot64(sext_i32_i64(copy_gtid_127151), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876) * + k2p2zq_70876) * (m_70861 * + k2p2zq_70876) + + squot64(sext_i32_i64(copy_gtid_127151) - + squot64(sext_i32_i64(copy_gtid_127151), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876) * m_70861 + + squot64(sext_i32_i64(copy_gtid_127151), + k2p2zq_70876 * k2p2zq_70876)] = + ((__global + double *) mem_121854)[squot64(sext_i32_i64(copy_gtid_127151), + k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876) + + squot64(sext_i32_i64(copy_gtid_127151) - + squot64(sext_i32_i64(copy_gtid_127151), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876) * k2p2zq_70876 + + (sext_i32_i64(copy_gtid_127151) - + squot64(sext_i32_i64(copy_gtid_127151), + k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876) - + squot64(sext_i32_i64(copy_gtid_127151) - + squot64(sext_i32_i64(copy_gtid_127151), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876) * k2p2zq_70876)]; + } + + error_0: + return; +} +__kernel void mainDetailedzicopy_127219(int64_t m_70861, int64_t n_70864, + int64_t rp1_71562, __global + unsigned char *defunc_3_map_res_mem_120231, + __global unsigned char *mem_122017) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127219; + int32_t copy_ltid_127220; + int32_t copy_gid_127221; + + copy_gtid_127219 = get_global_id(0); + copy_ltid_127220 = get_local_id(0); + copy_gid_127221 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127219), m_70861 * rp1_71562)) { + ((__global double *) mem_122017)[(sext_i32_i64(copy_gtid_127219) - + squot64(sext_i32_i64(copy_gtid_127219), + rp1_71562) * rp1_71562) * + m_70861 + + squot64(sext_i32_i64(copy_gtid_127219), + rp1_71562)] = ((__global + double *) defunc_3_map_res_mem_120231)[squot64(sext_i32_i64(copy_gtid_127219), + rp1_71562) * + n_70864 + + (sext_i32_i64(copy_gtid_127219) - + squot64(sext_i32_i64(copy_gtid_127219), + rp1_71562) * + rp1_71562)]; + } + + error_0: + return; +} +__kernel void mainDetailedzicopy_127224(int64_t m_70861, int64_t k2p2zq_70876, + int64_t defunc_2_reduce_res_70985, + int64_t rp1_71562, __global + unsigned char *mem_120246, __global + unsigned char *mem_122021) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127224; + int32_t copy_ltid_127225; + int32_t copy_gid_127226; + + copy_gtid_127224 = get_global_id(0); + copy_ltid_127225 = get_local_id(0); + copy_gid_127226 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127224), m_70861 * k2p2zq_70876 * + rp1_71562)) { + ((__global + double *) mem_122021)[squot64(sext_i32_i64(copy_gtid_127224) - + squot64(sext_i32_i64(copy_gtid_127224), + k2p2zq_70876 * rp1_71562) * + (k2p2zq_70876 * rp1_71562), rp1_71562) * + (m_70861 * rp1_71562) + + (sext_i32_i64(copy_gtid_127224) - + squot64(sext_i32_i64(copy_gtid_127224), + k2p2zq_70876 * rp1_71562) * + (k2p2zq_70876 * rp1_71562) - + squot64(sext_i32_i64(copy_gtid_127224) - + squot64(sext_i32_i64(copy_gtid_127224), + k2p2zq_70876 * rp1_71562) * + (k2p2zq_70876 * rp1_71562), + rp1_71562) * rp1_71562) * m_70861 + + squot64(sext_i32_i64(copy_gtid_127224), + k2p2zq_70876 * rp1_71562)] = ((__global + double *) mem_120246)[squot64(sext_i32_i64(copy_gtid_127224) - + squot64(sext_i32_i64(copy_gtid_127224), + k2p2zq_70876 * + rp1_71562) * + (k2p2zq_70876 * + rp1_71562), + rp1_71562) * + (defunc_2_reduce_res_70985 * + m_70861) + + squot64(sext_i32_i64(copy_gtid_127224), + k2p2zq_70876 * + rp1_71562) * + defunc_2_reduce_res_70985 + + (sext_i32_i64(copy_gtid_127224) - + squot64(sext_i32_i64(copy_gtid_127224), + k2p2zq_70876 * + rp1_71562) * + (k2p2zq_70876 * + rp1_71562) - + squot64(sext_i32_i64(copy_gtid_127224) - + squot64(sext_i32_i64(copy_gtid_127224), + k2p2zq_70876 * + rp1_71562) * + (k2p2zq_70876 * + rp1_71562), + rp1_71562) * + rp1_71562)]; + } + + error_0: + return; +} +__kernel void mainDetailedzicopy_127229(int64_t m_70861, int64_t k2p2zq_70876, + int64_t defunc_2_reduce_res_70985, + int64_t rp1_71562, __global + unsigned char *mem_120246, __global + unsigned char *mem_122025) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127229; + int32_t copy_ltid_127230; + int32_t copy_gid_127231; + + copy_gtid_127229 = get_global_id(0); + copy_ltid_127230 = get_local_id(0); + copy_gid_127231 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127229), m_70861 * k2p2zq_70876 * + rp1_71562)) { + ((__global double *) mem_122025)[(sext_i32_i64(copy_gtid_127229) - + squot64(sext_i32_i64(copy_gtid_127229), + k2p2zq_70876 * rp1_71562) * + (k2p2zq_70876 * rp1_71562) - + squot64(sext_i32_i64(copy_gtid_127229) - + squot64(sext_i32_i64(copy_gtid_127229), + k2p2zq_70876 * + rp1_71562) * + (k2p2zq_70876 * rp1_71562), + rp1_71562) * rp1_71562) * + (k2p2zq_70876 * m_70861) + + squot64(sext_i32_i64(copy_gtid_127229), + k2p2zq_70876 * rp1_71562) * + k2p2zq_70876 + + squot64(sext_i32_i64(copy_gtid_127229) - + squot64(sext_i32_i64(copy_gtid_127229), + k2p2zq_70876 * + rp1_71562) * + (k2p2zq_70876 * rp1_71562), + rp1_71562)] = ((__global + double *) mem_120246)[squot64(sext_i32_i64(copy_gtid_127229) - + squot64(sext_i32_i64(copy_gtid_127229), + k2p2zq_70876 * + rp1_71562) * + (k2p2zq_70876 * + rp1_71562), + rp1_71562) * + (defunc_2_reduce_res_70985 * + m_70861) + + squot64(sext_i32_i64(copy_gtid_127229), + k2p2zq_70876 * + rp1_71562) * + defunc_2_reduce_res_70985 + + (sext_i32_i64(copy_gtid_127229) - + squot64(sext_i32_i64(copy_gtid_127229), + k2p2zq_70876 * + rp1_71562) * + (k2p2zq_70876 * + rp1_71562) - + squot64(sext_i32_i64(copy_gtid_127229) - + squot64(sext_i32_i64(copy_gtid_127229), + k2p2zq_70876 * + rp1_71562) * + (k2p2zq_70876 * + rp1_71562), + rp1_71562) * + rp1_71562)]; + } + + error_0: + return; +} +__kernel void mainDetailedzicopy_127478(int64_t m_70861, int64_t k2p2zq_70876, + int64_t defunc_2_reduce_res_70985, + int64_t rp1_71562, __global + unsigned char *mem_120246, __global + unsigned char *mem_122686) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127478; + int32_t copy_ltid_127479; + int32_t copy_gid_127480; + + copy_gtid_127478 = get_global_id(0); + copy_ltid_127479 = get_local_id(0); + copy_gid_127480 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127478), m_70861 * k2p2zq_70876 * + rp1_71562)) { + ((__global double *) mem_122686)[(sext_i32_i64(copy_gtid_127478) - + squot64(sext_i32_i64(copy_gtid_127478), + k2p2zq_70876 * rp1_71562) * + (k2p2zq_70876 * rp1_71562) - + squot64(sext_i32_i64(copy_gtid_127478) - + squot64(sext_i32_i64(copy_gtid_127478), + k2p2zq_70876 * + rp1_71562) * + (k2p2zq_70876 * rp1_71562), + rp1_71562) * rp1_71562) * + (k2p2zq_70876 * m_70861) + + squot64(sext_i32_i64(copy_gtid_127478), + k2p2zq_70876 * rp1_71562) * + k2p2zq_70876 + + squot64(sext_i32_i64(copy_gtid_127478) - + squot64(sext_i32_i64(copy_gtid_127478), + k2p2zq_70876 * + rp1_71562) * + (k2p2zq_70876 * rp1_71562), + rp1_71562)] = ((__global + double *) mem_120246)[squot64(sext_i32_i64(copy_gtid_127478) - + squot64(sext_i32_i64(copy_gtid_127478), + k2p2zq_70876 * + rp1_71562) * + (k2p2zq_70876 * + rp1_71562), + rp1_71562) * + (defunc_2_reduce_res_70985 * + m_70861) + + squot64(sext_i32_i64(copy_gtid_127478), + k2p2zq_70876 * + rp1_71562) * + defunc_2_reduce_res_70985 + + (sext_i32_i64(copy_gtid_127478) - + squot64(sext_i32_i64(copy_gtid_127478), + k2p2zq_70876 * + rp1_71562) * + (k2p2zq_70876 * + rp1_71562) - + squot64(sext_i32_i64(copy_gtid_127478) - + squot64(sext_i32_i64(copy_gtid_127478), + k2p2zq_70876 * + rp1_71562) * + (k2p2zq_70876 * + rp1_71562), + rp1_71562) * + rp1_71562)]; + } + + error_0: + return; +} +__kernel void mainDetailedzicopy_127582(int64_t m_70861, int64_t k2p2zq_70876, + int64_t defunc_2_reduce_res_70985, + int64_t rp1_71562, __global + unsigned char *mem_120246, __global + unsigned char *mem_122793) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127582; + int32_t copy_ltid_127583; + int32_t copy_gid_127584; + + copy_gtid_127582 = get_global_id(0); + copy_ltid_127583 = get_local_id(0); + copy_gid_127584 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127582), m_70861 * k2p2zq_70876 * + rp1_71562)) { + ((__global + double *) mem_122793)[squot64(sext_i32_i64(copy_gtid_127582) - + squot64(sext_i32_i64(copy_gtid_127582), + k2p2zq_70876 * rp1_71562) * + (k2p2zq_70876 * rp1_71562), rp1_71562) * + (m_70861 * rp1_71562) + + (sext_i32_i64(copy_gtid_127582) - + squot64(sext_i32_i64(copy_gtid_127582), + k2p2zq_70876 * rp1_71562) * + (k2p2zq_70876 * rp1_71562) - + squot64(sext_i32_i64(copy_gtid_127582) - + squot64(sext_i32_i64(copy_gtid_127582), + k2p2zq_70876 * rp1_71562) * + (k2p2zq_70876 * rp1_71562), + rp1_71562) * rp1_71562) * m_70861 + + squot64(sext_i32_i64(copy_gtid_127582), + k2p2zq_70876 * rp1_71562)] = ((__global + double *) mem_120246)[squot64(sext_i32_i64(copy_gtid_127582) - + squot64(sext_i32_i64(copy_gtid_127582), + k2p2zq_70876 * + rp1_71562) * + (k2p2zq_70876 * + rp1_71562), + rp1_71562) * + (defunc_2_reduce_res_70985 * + m_70861) + + squot64(sext_i32_i64(copy_gtid_127582), + k2p2zq_70876 * + rp1_71562) * + defunc_2_reduce_res_70985 + + (sext_i32_i64(copy_gtid_127582) - + squot64(sext_i32_i64(copy_gtid_127582), + k2p2zq_70876 * + rp1_71562) * + (k2p2zq_70876 * + rp1_71562) - + squot64(sext_i32_i64(copy_gtid_127582) - + squot64(sext_i32_i64(copy_gtid_127582), + k2p2zq_70876 * + rp1_71562) * + (k2p2zq_70876 * + rp1_71562), + rp1_71562) * + rp1_71562)]; + } + + error_0: + return; +} +__kernel void mainDetailedzicopy_127666(int64_t m_70861, int64_t k2p2zq_70876, + __global unsigned char *mem_123143, + __global unsigned char *mem_123155) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127666; + int32_t copy_ltid_127667; + int32_t copy_gid_127668; + + copy_gtid_127666 = get_global_id(0); + copy_ltid_127667 = get_local_id(0); + copy_gid_127668 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127666), m_70861 * k2p2zq_70876 * + k2p2zq_70876)) { + ((__global + double *) mem_123155)[squot64(sext_i32_i64(copy_gtid_127666) - + squot64(sext_i32_i64(copy_gtid_127666), + k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876) * (k2p2zq_70876 * + m_70861) + + squot64(sext_i32_i64(copy_gtid_127666), + k2p2zq_70876 * k2p2zq_70876) * + k2p2zq_70876 + (sext_i32_i64(copy_gtid_127666) - + squot64(sext_i32_i64(copy_gtid_127666), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876) - + squot64(sext_i32_i64(copy_gtid_127666) - + squot64(sext_i32_i64(copy_gtid_127666), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * + k2p2zq_70876), + k2p2zq_70876) * + k2p2zq_70876)] = ((__global + double *) mem_123143)[squot64(sext_i32_i64(copy_gtid_127666), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * + k2p2zq_70876) + + squot64(sext_i32_i64(copy_gtid_127666) - + squot64(sext_i32_i64(copy_gtid_127666), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * + k2p2zq_70876), + k2p2zq_70876) * + k2p2zq_70876 + + (sext_i32_i64(copy_gtid_127666) - + squot64(sext_i32_i64(copy_gtid_127666), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * + k2p2zq_70876) - + squot64(sext_i32_i64(copy_gtid_127666) - + squot64(sext_i32_i64(copy_gtid_127666), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * + k2p2zq_70876), + k2p2zq_70876) * + k2p2zq_70876)]; + } + + error_0: + return; +} +__kernel void mainDetailedzicopy_127930(int64_t m_70861, int64_t n_70864, + int64_t rp1_71562, __global + unsigned char *defunc_3_map_res_mem_120231, + __global unsigned char *mem_123633) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127930; + int32_t copy_ltid_127931; + int32_t copy_gid_127932; + + copy_gtid_127930 = get_global_id(0); + copy_ltid_127931 = get_local_id(0); + copy_gid_127932 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127930), m_70861 * rp1_71562)) { + ((__global double *) mem_123633)[(sext_i32_i64(copy_gtid_127930) - + squot64(sext_i32_i64(copy_gtid_127930), + rp1_71562) * rp1_71562) * + m_70861 + + squot64(sext_i32_i64(copy_gtid_127930), + rp1_71562)] = ((__global + double *) defunc_3_map_res_mem_120231)[squot64(sext_i32_i64(copy_gtid_127930), + rp1_71562) * + n_70864 + + (sext_i32_i64(copy_gtid_127930) - + squot64(sext_i32_i64(copy_gtid_127930), + rp1_71562) * + rp1_71562)]; + } + + error_0: + return; +} +__kernel void mainDetailedzicopy_127935(int64_t m_70861, int64_t k2p2zq_70876, + int64_t rp1_71562, __global + unsigned char *mem_123637, __global + unsigned char *mem_123641) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127935; + int32_t copy_ltid_127936; + int32_t copy_gid_127937; + + copy_gtid_127935 = get_global_id(0); + copy_ltid_127936 = get_local_id(0); + copy_gid_127937 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127935), m_70861 * k2p2zq_70876 * + rp1_71562)) { + ((__global double *) mem_123641)[(sext_i32_i64(copy_gtid_127935) - + squot64(sext_i32_i64(copy_gtid_127935), + k2p2zq_70876 * rp1_71562) * + (k2p2zq_70876 * rp1_71562) - + squot64(sext_i32_i64(copy_gtid_127935) - + squot64(sext_i32_i64(copy_gtid_127935), + k2p2zq_70876 * + rp1_71562) * + (k2p2zq_70876 * rp1_71562), + rp1_71562) * rp1_71562) * + (m_70861 * k2p2zq_70876) + + squot64(sext_i32_i64(copy_gtid_127935) - + squot64(sext_i32_i64(copy_gtid_127935), + k2p2zq_70876 * + rp1_71562) * + (k2p2zq_70876 * rp1_71562), + rp1_71562) * m_70861 + + squot64(sext_i32_i64(copy_gtid_127935), + k2p2zq_70876 * rp1_71562)] = + ((__global + double *) mem_123637)[squot64(sext_i32_i64(copy_gtid_127935), + k2p2zq_70876 * rp1_71562) * + (rp1_71562 * k2p2zq_70876) + + squot64(sext_i32_i64(copy_gtid_127935) - + squot64(sext_i32_i64(copy_gtid_127935), + k2p2zq_70876 * rp1_71562) * + (k2p2zq_70876 * rp1_71562), + rp1_71562) * rp1_71562 + + (sext_i32_i64(copy_gtid_127935) - + squot64(sext_i32_i64(copy_gtid_127935), + k2p2zq_70876 * rp1_71562) * + (k2p2zq_70876 * rp1_71562) - + squot64(sext_i32_i64(copy_gtid_127935) - + squot64(sext_i32_i64(copy_gtid_127935), + k2p2zq_70876 * rp1_71562) * + (k2p2zq_70876 * rp1_71562), + rp1_71562) * rp1_71562)]; + } + + error_0: + return; +} +__kernel void mainDetailedzicopy_129324(int64_t N_70860, int64_t m_70861, + int64_t i_72637, __global + unsigned char *mem_124906, __global + unsigned char *mem_124911) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_129324; + int32_t copy_ltid_129325; + int32_t copy_gid_129326; + + copy_gtid_129324 = get_global_id(0); + copy_ltid_129325 = get_local_id(0); + copy_gid_129326 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_129324), m_70861)) { + ((__global int64_t *) mem_124911)[sext_i32_i64(copy_gtid_129324)] = + ((__global int64_t *) mem_124906)[i_72637 + + sext_i32_i64(copy_gtid_129324) * + N_70860]; + } + + error_0: + return; +} +__kernel void mainDetailedziscan_stage1_77650(__global int *global_failure, + __local volatile + int64_t *scan_arr_mem_126430_backing_aligned_0, + int64_t N_70860, int64_t m_70861, + int64_t n_70864, int64_t m_70956, + int32_t num_threads_126424, + __global + unsigned char *images_mem_120108, + __global + unsigned char *mem_120201) +{ + #define segscan_group_sizze_77764 (mainDetailedzisegscan_group_sizze_77644) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict scan_arr_mem_126430_backing_0 = + (__local volatile + char *) scan_arr_mem_126430_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126425; + int32_t local_tid_126426; + int64_t group_sizze_126429; + int32_t wave_sizze_126428; + int32_t group_tid_126427; - global_tid_46537 = get_global_id(0); - local_tid_46538 = get_local_id(0); - group_sizze_46541 = get_local_size(0); - wave_sizze_46540 = LOCKSTEP_WIDTH; - group_tid_46539 = get_group_id(0); + global_tid_126425 = get_global_id(0); + local_tid_126426 = get_local_id(0); + group_sizze_126429 = get_local_size(0); + wave_sizze_126428 = LOCKSTEP_WIDTH; + group_tid_126427 = get_group_id(0); - int32_t phys_tid_37181; + int32_t phys_tid_77650; - phys_tid_37181 = global_tid_46537; + phys_tid_77650 = global_tid_126425; - __local char *red_arr_mem_46542; + __local char *scan_arr_mem_126430; - red_arr_mem_46542 = (__local char *) red_arr_mem_46542_backing_0; + scan_arr_mem_126430 = (__local char *) scan_arr_mem_126430_backing_0; - int32_t phys_group_id_46544; + int64_t x_77768; + int64_t x_77769; - phys_group_id_46544 = get_group_id(0); - for (int32_t i_46545 = 0; i_46545 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_28478, - squot64(segred_group_sizze_37202, - segment_sizze_nonzzero_46535))) - - phys_group_id_46544, sext_i64_i32(num_groups_37203)); - i_46545++) { - int32_t virt_group_id_46546 = phys_group_id_46544 + i_46545 * - sext_i64_i32(num_groups_37203); - int64_t gtid_37172 = squot64(sext_i32_i64(local_tid_46538), - segment_sizze_nonzzero_46535) + - sext_i32_i64(virt_group_id_46546) * - squot64(segred_group_sizze_37202, segment_sizze_nonzzero_46535); - int64_t gtid_37180 = srem64(sext_i32_i64(local_tid_46538), - i32_res_28880); + x_77768 = (int64_t) 0; + for (int64_t j_126432 = 0; j_126432 < sdiv_up64(m_70861 * n_70864, + sext_i32_i64(num_threads_126424)); + j_126432++) { + int64_t chunk_offset_126433 = segscan_group_sizze_77764 * j_126432 + + sext_i32_i64(group_tid_126427) * (segscan_group_sizze_77764 * + sdiv_up64(m_70861 * n_70864, + sext_i32_i64(num_threads_126424))); + int64_t flat_idx_126434 = chunk_offset_126433 + + sext_i32_i64(local_tid_126426); + int64_t gtid_77641 = squot64(flat_idx_126434, n_70864); + int64_t gtid_77649 = flat_idx_126434 - squot64(flat_idx_126434, + n_70864) * n_70864; - // apply map function if in bounds + // threads in bounds read input + { + if (slt64(gtid_77641, m_70861) && slt64(gtid_77649, n_70864)) { + int64_t binop_y_115026 = (int64_t) -1 * gtid_77649; + int64_t slice_115027 = m_70956 + binop_y_115026; + double x_77772 = ((__global + double *) images_mem_120108)[gtid_77641 * + N_70860 + + slice_115027]; + bool defunc_0_f_res_77773; + + defunc_0_f_res_77773 = futrts_isnan64(x_77772); + + bool defunc_0_g_res_77774 = !defunc_0_f_res_77773; + int64_t defunc_0_f_res_77775 = + btoi_bool_i64(defunc_0_g_res_77774); + + // write to-scan values to parameters + { + x_77769 = defunc_0_f_res_77775; + } + // write mapped values results to global memory + { } + } + } + // do one intra-group scan operation { - if (slt64((int64_t) 0, i32_res_28880) && (slt64(gtid_37172, - m_28478) && - slt64(sext_i32_i64(local_tid_46538), - i32_res_28880 * - squot64(segred_group_sizze_37202, - segment_sizze_nonzzero_46535)))) { - int32_t x_37211 = ((__global - int32_t *) defunc_3_map_res_mem_45244)[gtid_37172]; - int32_t index_primexp_42390 = sext_i64_i32(gtid_37180); - bool cond_37213 = slt32(index_primexp_42390, x_37211); - float defunc_0_f_res_37214; - - if (cond_37213) { - int32_t x_37210 = ((__global - int32_t *) defunc_3_map_res_mem_45245)[gtid_37172]; - int32_t x_37215 = add32(x_37210, index_primexp_42390); - int32_t x_37216 = sub32(x_37215, x_37211); - int32_t i_37217 = add32(1, x_37216); - int64_t i_37218 = sext_i32_i64(i_37217); - bool x_37219 = sle64((int64_t) 0, i_37218); - bool y_37220 = slt64(i_37218, N_28477); - bool bounds_check_37221 = x_37219 && y_37220; - bool index_certs_37222; - - if (!bounds_check_37221) { + // maybe restore some to-scan values to parameters, or read neutral + { + if (!(slt64(gtid_77641, m_70861) && slt64(gtid_77649, + n_70864))) { + x_77769 = (int64_t) 0; + } + } + // combine with carry and write to local memory + { + int64_t defunc_1_op_res_77770 = add64(x_77768, x_77769); + + ((__local + int64_t *) scan_arr_mem_126430)[sext_i32_i64(local_tid_126426)] = + defunc_1_op_res_77770; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t x_126435; + int64_t x_126436; + int64_t x_126438; + int64_t x_126439; + bool ltid_in_bounds_126441; + + ltid_in_bounds_126441 = slt64(sext_i32_i64(local_tid_126426), + segscan_group_sizze_77764); + + int32_t skip_threads_126442; + + // read input for in-block scan + { + if (ltid_in_bounds_126441) { + x_126436 = ((volatile __local + int64_t *) scan_arr_mem_126430)[sext_i32_i64(local_tid_126426)]; + if ((local_tid_126426 - squot32(local_tid_126426, 32) * + 32) == 0) { + x_126435 = x_126436; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_126442 = 1; + while (slt32(skip_threads_126442, 32)) { + if (sle32(skip_threads_126442, local_tid_126426 - + squot32(local_tid_126426, 32) * 32) && + ltid_in_bounds_126441) { + // read operands { - if (atomic_cmpxchg_i32_global(global_failure, -1, - 101) == -1) { - global_failure_args[0] = i_37218; - global_failure_args[1] = N_28477; - ; + x_126435 = ((volatile __local + int64_t *) scan_arr_mem_126430)[sext_i32_i64(local_tid_126426) - + sext_i32_i64(skip_threads_126442)]; + } + // perform operation + { + bool inactive_126443 = + slt64(srem64(sext_i32_i64(local_tid_126426) + + chunk_offset_126433, n_70864), + sext_i32_i64(local_tid_126426) + + chunk_offset_126433 - + (sext_i32_i64(local_tid_126426 - + skip_threads_126442) + + chunk_offset_126433)); + + if (inactive_126443) { + x_126435 = x_126436; + } + if (!inactive_126443) { + int64_t defunc_1_op_res_126437 = add64(x_126435, + x_126436); + + x_126435 = defunc_1_op_res_126437; } - local_failure = true; - goto error_0; } } - - float defunc_0_f_res_t_res_37223 = ((__global - float *) defunc_4_map_res_mem_45178)[gtid_37172 * - N_28477 + - i_37218]; - - defunc_0_f_res_37214 = defunc_0_f_res_t_res_37223; - } else { - defunc_0_f_res_37214 = 0.0F; + if (sle32(wave_sizze_126428, skip_threads_126442)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_126442, local_tid_126426 - + squot32(local_tid_126426, 32) * 32) && + ltid_in_bounds_126441) { + // write result + { + ((volatile __local + int64_t *) scan_arr_mem_126430)[sext_i32_i64(local_tid_126426)] = + x_126435; + x_126436 = x_126435; + } + } + if (sle32(wave_sizze_126428, skip_threads_126442)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_126442 *= 2; } - // save map-out results - { } - // save results to be reduced - { - ((__local - float *) red_arr_mem_46542)[sext_i32_i64(local_tid_46538)] = - defunc_0_f_res_37214; + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_126426 - squot32(local_tid_126426, 32) * 32) == + 31 && ltid_in_bounds_126441) { + ((volatile __local + int64_t *) scan_arr_mem_126430)[sext_i32_i64(squot32(local_tid_126426, + 32))] = + x_126435; } - } else { - ((__local - float *) red_arr_mem_46542)[sext_i32_i64(local_tid_46538)] = - 0.0F; } - } - - error_0: - barrier(CLK_LOCAL_MEM_FENCE); - if (local_failure) - return; - barrier(CLK_LOCAL_MEM_FENCE); - if (slt64((int64_t) 0, i32_res_28880)) { - // perform segmented scan to imitate reduction + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' { - float x_37206; - float x_37207; - float x_46547; - float x_46548; - bool ltid_in_bounds_46550; - - ltid_in_bounds_46550 = slt64(sext_i32_i64(local_tid_46538), - i32_res_28880 * - squot64(segred_group_sizze_37202, - segment_sizze_nonzzero_46535)); - - int32_t skip_threads_46551; + int32_t skip_threads_126444; // read input for in-block scan { - if (ltid_in_bounds_46550) { - x_37207 = ((volatile __local - float *) red_arr_mem_46542)[sext_i32_i64(local_tid_46538)]; - if ((local_tid_46538 - squot32(local_tid_46538, 32) * + if (squot32(local_tid_126426, 32) == 0 && + ltid_in_bounds_126441) { + x_126439 = ((volatile __local + int64_t *) scan_arr_mem_126430)[sext_i32_i64(local_tid_126426)]; + if ((local_tid_126426 - squot32(local_tid_126426, 32) * 32) == 0) { - x_37206 = x_37207; + x_126438 = x_126439; } } } // in-block scan (hopefully no barriers needed) { - skip_threads_46551 = 1; - while (slt32(skip_threads_46551, 32)) { - if (sle32(skip_threads_46551, local_tid_46538 - - squot32(local_tid_46538, 32) * 32) && - ltid_in_bounds_46550) { + skip_threads_126444 = 1; + while (slt32(skip_threads_126444, 32)) { + if (sle32(skip_threads_126444, local_tid_126426 - + squot32(local_tid_126426, 32) * 32) && + (squot32(local_tid_126426, 32) == 0 && + ltid_in_bounds_126441)) { // read operands { - x_37206 = ((volatile __local - float *) red_arr_mem_46542)[sext_i32_i64(local_tid_46538) - - sext_i32_i64(skip_threads_46551)]; + x_126438 = ((volatile __local + int64_t *) scan_arr_mem_126430)[sext_i32_i64(local_tid_126426) - + sext_i32_i64(skip_threads_126444)]; } // perform operation { - bool inactive_46552 = - slt64(srem64(sext_i32_i64(local_tid_46538), - i32_res_28880), - sext_i32_i64(local_tid_46538) - - sext_i32_i64(local_tid_46538 - - skip_threads_46551)); + bool inactive_126445 = + slt64(srem64(sext_i32_i64(local_tid_126426 * + 32 + 32 - 1) + + chunk_offset_126433, n_70864), + sext_i32_i64(local_tid_126426 * 32 + + 32 - 1) + chunk_offset_126433 - + (sext_i32_i64((local_tid_126426 - + skip_threads_126444) * + 32 + 32 - 1) + + chunk_offset_126433)); - if (inactive_46552) { - x_37206 = x_37207; + if (inactive_126445) { + x_126438 = x_126439; } - if (!inactive_46552) { - float defunc_1_op_res_37208 = x_37206 + - x_37207; + if (!inactive_126445) { + int64_t defunc_1_op_res_126440 = + add64(x_126438, x_126439); - x_37206 = defunc_1_op_res_37208; + x_126438 = defunc_1_op_res_126440; } } } - if (sle32(wave_sizze_46540, skip_threads_46551)) { + if (sle32(wave_sizze_126428, skip_threads_126444)) { barrier(CLK_LOCAL_MEM_FENCE); } - if (sle32(skip_threads_46551, local_tid_46538 - - squot32(local_tid_46538, 32) * 32) && - ltid_in_bounds_46550) { + if (sle32(skip_threads_126444, local_tid_126426 - + squot32(local_tid_126426, 32) * 32) && + (squot32(local_tid_126426, 32) == 0 && + ltid_in_bounds_126441)) { // write result { ((volatile __local - float *) red_arr_mem_46542)[sext_i32_i64(local_tid_46538)] = - x_37206; - x_37207 = x_37206; + int64_t *) scan_arr_mem_126430)[sext_i32_i64(local_tid_126426)] = + x_126438; + x_126439 = x_126438; } } - if (sle32(wave_sizze_46540, skip_threads_46551)) { + if (sle32(wave_sizze_126428, skip_threads_126444)) { barrier(CLK_LOCAL_MEM_FENCE); } - skip_threads_46551 *= 2; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' - { - if ((local_tid_46538 - squot32(local_tid_46538, 32) * 32) == - 31 && ltid_in_bounds_46550) { - ((volatile __local - float *) red_arr_mem_46542)[sext_i32_i64(squot32(local_tid_46538, - 32))] = - x_37206; + skip_threads_126444 *= 2; } } - barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' - { - int32_t skip_threads_46553; - - // read input for in-block scan + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_126426, 32) == 0 || + !ltid_in_bounds_126441)) { + // read operands { - if (squot32(local_tid_46538, 32) == 0 && - ltid_in_bounds_46550) { - x_46548 = ((volatile __local - float *) red_arr_mem_46542)[sext_i32_i64(local_tid_46538)]; - if ((local_tid_46538 - squot32(local_tid_46538, - 32) * 32) == 0) { - x_46547 = x_46548; - } - } + x_126436 = x_126435; + x_126435 = ((__local + int64_t *) scan_arr_mem_126430)[sext_i32_i64(squot32(local_tid_126426, + 32)) - + (int64_t) 1]; } - // in-block scan (hopefully no barriers needed) + // perform operation { - skip_threads_46553 = 1; - while (slt32(skip_threads_46553, 32)) { - if (sle32(skip_threads_46553, local_tid_46538 - - squot32(local_tid_46538, 32) * 32) && - (squot32(local_tid_46538, 32) == 0 && - ltid_in_bounds_46550)) { - // read operands - { - x_46547 = ((volatile __local - float *) red_arr_mem_46542)[sext_i32_i64(local_tid_46538) - - sext_i32_i64(skip_threads_46553)]; - } - // perform operation - { - bool inactive_46554 = - slt64(srem64(sext_i32_i64(local_tid_46538 * - 32 + 32 - 1), - i32_res_28880), - sext_i32_i64(local_tid_46538 * - 32 + 32 - 1) - - sext_i32_i64((local_tid_46538 - - skip_threads_46553) * - 32 + 32 - 1)); - - if (inactive_46554) { - x_46547 = x_46548; - } - if (!inactive_46554) { - float defunc_1_op_res_46549 = x_46547 + - x_46548; - - x_46547 = defunc_1_op_res_46549; - } - } - } - if (sle32(wave_sizze_46540, skip_threads_46553)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - if (sle32(skip_threads_46553, local_tid_46538 - - squot32(local_tid_46538, 32) * 32) && - (squot32(local_tid_46538, 32) == 0 && - ltid_in_bounds_46550)) { - // write result - { - ((volatile __local - float *) red_arr_mem_46542)[sext_i32_i64(local_tid_46538)] = - x_46547; - x_46548 = x_46547; - } - } - if (sle32(wave_sizze_46540, skip_threads_46553)) { - barrier(CLK_LOCAL_MEM_FENCE); - } - skip_threads_46553 *= 2; - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first - { - if (!(squot32(local_tid_46538, 32) == 0 || - !ltid_in_bounds_46550)) { - // read operands - { - x_37207 = x_37206; - x_37206 = ((__local - float *) red_arr_mem_46542)[sext_i32_i64(squot32(local_tid_46538, - 32)) - - (int64_t) 1]; + bool inactive_126446 = + slt64(srem64(sext_i32_i64(local_tid_126426) + + chunk_offset_126433, n_70864), + sext_i32_i64(local_tid_126426) + + chunk_offset_126433 - + (sext_i32_i64(squot32(local_tid_126426, 32) * + 32 - 1) + chunk_offset_126433)); + + if (inactive_126446) { + x_126435 = x_126436; } - // perform operation - { - bool inactive_46555 = - slt64(srem64(sext_i32_i64(local_tid_46538), - i32_res_28880), - sext_i32_i64(local_tid_46538) - - sext_i32_i64(squot32(local_tid_46538, - 32) * 32 - 1)); + if (!inactive_126446) { + int64_t defunc_1_op_res_126437 = add64(x_126435, + x_126436); - if (inactive_46555) { - x_37206 = x_37207; - } - if (!inactive_46555) { - float defunc_1_op_res_37208 = x_37206 + x_37207; - - x_37206 = defunc_1_op_res_37208; - } - } - // write final result - { - ((__local - float *) red_arr_mem_46542)[sext_i32_i64(local_tid_46538)] = - x_37206; + x_126435 = defunc_1_op_res_126437; } } - } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block - { - if (squot32(local_tid_46538, 32) == 0) { + // write final result + { ((__local - float *) red_arr_mem_46542)[sext_i32_i64(local_tid_46538)] = - x_37207; + int64_t *) scan_arr_mem_126430)[sext_i32_i64(local_tid_126426)] = + x_126435; } } - barrier(CLK_LOCAL_MEM_FENCE); } - } - barrier(CLK_LOCAL_MEM_FENCE); - // save final values of segments - { - if (slt64(sext_i32_i64(virt_group_id_46546) * - squot64(segred_group_sizze_37202, - segment_sizze_nonzzero_46535) + - sext_i32_i64(local_tid_46538), m_28478) && - slt64(sext_i32_i64(local_tid_46538), - squot64(segred_group_sizze_37202, - segment_sizze_nonzzero_46535))) { - ((__global - float *) mem_45278)[sext_i32_i64(virt_group_id_46546) * - squot64(segred_group_sizze_37202, - segment_sizze_nonzzero_46535) + - sext_i32_i64(local_tid_46538)] = ((__local - float *) red_arr_mem_46542)[(sext_i32_i64(local_tid_46538) + - (int64_t) 1) * - segment_sizze_nonzzero_46535 - - (int64_t) 1]; + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_126426, 32) == 0) { + ((__local + int64_t *) scan_arr_mem_126430)[sext_i32_i64(local_tid_126426)] = + x_126436; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // threads in bounds write partial scan result + { + if (slt64(gtid_77641, m_70861) && slt64(gtid_77649, n_70864)) { + ((__global int64_t *) mem_120201)[gtid_77641 * n_70864 + + gtid_77649] = ((__local + int64_t *) scan_arr_mem_126430)[sext_i32_i64(local_tid_126426)]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread reads last element as carry-in for next iteration + { + bool crosses_segment_126447 = slt64(srem64(chunk_offset_126433 + + segscan_group_sizze_77764, + n_70864), + chunk_offset_126433 + + segscan_group_sizze_77764 - + (chunk_offset_126433 + + segscan_group_sizze_77764 - + (int64_t) 1)); + bool should_load_carry_126448 = local_tid_126426 == 0 && + !crosses_segment_126447; + + if (should_load_carry_126448) { + x_77768 = ((__local + int64_t *) scan_arr_mem_126430)[segscan_group_sizze_77764 - + (int64_t) 1]; + } + if (!should_load_carry_126448) { + x_77768 = (int64_t) 0; + } } + barrier(CLK_LOCAL_MEM_FENCE); } - barrier(CLK_LOCAL_MEM_FENCE); - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } error_1: return; - #undef segred_group_sizze_37202 + #undef segscan_group_sizze_77764 } -__kernel void mainMagnitudezisegred_small_38300(__global int *global_failure, - __local volatile - int64_t *red_arr_mem_46724_backing_aligned_0, - __local volatile - int64_t *red_arr_mem_46722_backing_aligned_1, - __local volatile - int64_t *red_arr_mem_46720_backing_aligned_2, - int64_t m_28478, - int64_t iota32_arg_28909, - int64_t num_groups_38487, - int64_t segment_sizze_nonzzero_46713, - __global - unsigned char *mem_45284, - __global - unsigned char *mem_45337, - __global - unsigned char *mem_45339, - __global - unsigned char *mem_45343, - __global - unsigned char *mem_45346, - __global - unsigned char *mem_45348, - __global - unsigned char *mem_45350) +__kernel void mainDetailedziscan_stage1_86129(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + __local volatile + int64_t *scan_arr_mem_128487_backing_aligned_0, + int64_t m_70861, + int64_t num_recresids_padded_71534, + int64_t Nmk_72261, + int32_t num_threads_128481, + __global + unsigned char *mem_124045, + __global + unsigned char *mem_124057, + __global + unsigned char *mem_124061) { - #define segred_group_sizze_38486 (mainMagnitudezisegred_group_sizze_38294) + #define segscan_group_sizze_86278 (mainDetailedzisegscan_group_sizze_86123) const int block_dim0 = 0; const int block_dim1 = 1; const int block_dim2 = 2; - __local volatile char *restrict red_arr_mem_46724_backing_2 = + __local volatile char *restrict scan_arr_mem_128487_backing_0 = (__local volatile - char *) red_arr_mem_46724_backing_aligned_0; - __local volatile char *restrict red_arr_mem_46722_backing_1 = - (__local volatile - char *) red_arr_mem_46722_backing_aligned_1; - __local volatile char *restrict red_arr_mem_46720_backing_0 = - (__local volatile - char *) red_arr_mem_46720_backing_aligned_2; - - if (*global_failure >= 0) - return; - - int32_t global_tid_46715; - int32_t local_tid_46716; - int64_t group_sizze_46719; - int32_t wave_sizze_46718; - int32_t group_tid_46717; - - global_tid_46715 = get_global_id(0); - local_tid_46716 = get_local_id(0); - group_sizze_46719 = get_local_size(0); - wave_sizze_46718 = LOCKSTEP_WIDTH; - group_tid_46717 = get_group_id(0); - - int32_t phys_tid_38300; + char *) scan_arr_mem_128487_backing_aligned_0; + volatile __local bool local_failure; - phys_tid_38300 = global_tid_46715; + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); - __local char *red_arr_mem_46720; + int32_t global_tid_128482; + int32_t local_tid_128483; + int64_t group_sizze_128486; + int32_t wave_sizze_128485; + int32_t group_tid_128484; - red_arr_mem_46720 = (__local char *) red_arr_mem_46720_backing_0; + global_tid_128482 = get_global_id(0); + local_tid_128483 = get_local_id(0); + group_sizze_128486 = get_local_size(0); + wave_sizze_128485 = LOCKSTEP_WIDTH; + group_tid_128484 = get_group_id(0); - __local char *red_arr_mem_46722; + int32_t phys_tid_86129; - red_arr_mem_46722 = (__local char *) red_arr_mem_46722_backing_1; + phys_tid_86129 = global_tid_128482; - __local char *red_arr_mem_46724; + __local char *scan_arr_mem_128487; - red_arr_mem_46724 = (__local char *) red_arr_mem_46724_backing_2; + scan_arr_mem_128487 = (__local char *) scan_arr_mem_128487_backing_0; - int32_t phys_group_id_46726; + double x_86282; + double x_86283; - phys_group_id_46726 = get_group_id(0); - for (int32_t i_46727 = 0; i_46727 < - sdiv_up32(sext_i64_i32(sdiv_up64(m_28478, - squot64(segred_group_sizze_38486, - segment_sizze_nonzzero_46713))) - - phys_group_id_46726, sext_i64_i32(num_groups_38487)); - i_46727++) { - int32_t virt_group_id_46728 = phys_group_id_46726 + i_46727 * - sext_i64_i32(num_groups_38487); - int64_t gtid_38291 = squot64(sext_i32_i64(local_tid_46716), - segment_sizze_nonzzero_46713) + - sext_i32_i64(virt_group_id_46728) * - squot64(segred_group_sizze_38486, segment_sizze_nonzzero_46713); - int64_t gtid_38299 = srem64(sext_i32_i64(local_tid_46716), - iota32_arg_28909); + x_86282 = 0.0; + for (int64_t j_128489 = 0; j_128489 < sdiv_up64(m_70861 * Nmk_72261, + sext_i32_i64(num_threads_128481)); + j_128489++) { + int64_t chunk_offset_128490 = segscan_group_sizze_86278 * j_128489 + + sext_i32_i64(group_tid_128484) * (segscan_group_sizze_86278 * + sdiv_up64(m_70861 * Nmk_72261, + sext_i32_i64(num_threads_128481))); + int64_t flat_idx_128491 = chunk_offset_128490 + + sext_i32_i64(local_tid_128483); + int64_t gtid_86120 = squot64(flat_idx_128491, Nmk_72261); + int64_t gtid_86128 = flat_idx_128491 - squot64(flat_idx_128491, + Nmk_72261) * Nmk_72261; - // apply map function if in bounds + // threads in bounds read input { - if (slt64((int64_t) 0, iota32_arg_28909) && (slt64(gtid_38291, - m_28478) && - slt64(sext_i32_i64(local_tid_46716), - iota32_arg_28909 * - squot64(segred_group_sizze_38486, - segment_sizze_nonzzero_46713)))) { - int32_t y_38506 = ((__global int32_t *) mem_45339)[gtid_38291]; - float y_38507 = ((__global float *) mem_45337)[gtid_38291]; - float x_38511 = ((__global float *) mem_45343)[gtid_38291 * - iota32_arg_28909 + - gtid_38299]; - float x_38512 = ((__global float *) mem_45284)[gtid_38299]; - int32_t index_primexp_42409 = sext_i64_i32(gtid_38299); - float defunc_0_f_res_38515 = x_38511 / y_38507; - bool cond_38516 = slt32(index_primexp_42409, y_38506); - bool isnan_res_38517; - - isnan_res_38517 = futrts_isnan32(defunc_0_f_res_38515); - - bool cond_t_res_38518 = !isnan_res_38517; - bool x_38519 = cond_38516 && cond_t_res_38518; - float abs_res_38520 = (float) fabs(defunc_0_f_res_38515); - bool defunc_2_f_res_t_res_38521 = x_38512 < abs_res_38520; - bool x_38522 = x_38519 && defunc_2_f_res_t_res_38521; - float defunc_1_f_res_38523; - - if (cond_38516) { - defunc_1_f_res_38523 = defunc_0_f_res_38515; + if (slt64(gtid_86120, m_70861) && slt64(gtid_86128, Nmk_72261)) { + bool cond_86288 = gtid_86128 == (int64_t) 0; + double defunc_0_f_res_86289; + + if (cond_86288) { + defunc_0_f_res_86289 = 0.0; } else { - defunc_1_f_res_38523 = 0.0F; + double fr_86286 = ((__global + double *) mem_124057)[gtid_86120]; + int64_t i_86290 = sub64(gtid_86128, (int64_t) 1); + bool x_86291 = sle64((int64_t) 0, i_86290); + bool y_86292 = slt64(i_86290, num_recresids_padded_71534); + bool bounds_check_86293 = x_86291 && y_86292; + bool index_certs_86294; + + if (!bounds_check_86293) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 388) == -1) { + global_failure_args[0] = i_86290; + global_failure_args[1] = + num_recresids_padded_71534; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_86295 = ((__global + double *) mem_124045)[gtid_86120 * + num_recresids_padded_71534 + + i_86290]; + double defunc_0_f_res_f_res_86296 = x_86295 / fr_86286; + + defunc_0_f_res_86289 = defunc_0_f_res_f_res_86296; } - // save map-out results - { } - // save results to be reduced + // write to-scan values to parameters { - ((__local - bool *) red_arr_mem_46720)[sext_i32_i64(local_tid_46716)] = - x_38522; - ((__local - int32_t *) red_arr_mem_46722)[sext_i32_i64(local_tid_46716)] = - index_primexp_42409; - ((__local - float *) red_arr_mem_46724)[sext_i32_i64(local_tid_46716)] = - defunc_1_f_res_38523; + x_86283 = defunc_0_f_res_86289; } - } else { - ((__local - bool *) red_arr_mem_46720)[sext_i32_i64(local_tid_46716)] = 0; - ((__local - int32_t *) red_arr_mem_46722)[sext_i32_i64(local_tid_46716)] = - -1; - ((__local - float *) red_arr_mem_46724)[sext_i32_i64(local_tid_46716)] = - 0.0F; + // write mapped values results to global memory + { } } } - barrier(CLK_LOCAL_MEM_FENCE); - if (slt64((int64_t) 0, iota32_arg_28909)) { - // perform segmented scan to imitate reduction + // do one intra-group scan operation + { + // maybe restore some to-scan values to parameters, or read neutral + { + if (!(slt64(gtid_86120, m_70861) && slt64(gtid_86128, + Nmk_72261))) { + x_86283 = 0.0; + } + } + // combine with carry and write to local memory { - bool x_38492; - int32_t x_38493; - float x_38494; - bool x_38495; - int32_t x_38496; - float x_38497; - bool x_46729; - int32_t x_46730; - float x_46731; - bool x_46732; - int32_t x_46733; - float x_46734; - bool ltid_in_bounds_46743; - - ltid_in_bounds_46743 = slt64(sext_i32_i64(local_tid_46716), - iota32_arg_28909 * - squot64(segred_group_sizze_38486, - segment_sizze_nonzzero_46713)); - - int32_t skip_threads_46744; + double defunc_1_op_res_86284 = x_86282 + x_86283; - // read input for in-block scan - { - if (ltid_in_bounds_46743) { - x_38495 = ((volatile __local - bool *) red_arr_mem_46720)[sext_i32_i64(local_tid_46716)]; - x_38496 = ((volatile __local - int32_t *) red_arr_mem_46722)[sext_i32_i64(local_tid_46716)]; - x_38497 = ((volatile __local - float *) red_arr_mem_46724)[sext_i32_i64(local_tid_46716)]; - if ((local_tid_46716 - squot32(local_tid_46716, 32) * - 32) == 0) { - x_38492 = x_38495; - x_38493 = x_38496; - x_38494 = x_38497; - } + ((__local + double *) scan_arr_mem_128487)[sext_i32_i64(local_tid_128483)] = + defunc_1_op_res_86284; + } + + error_0: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + double x_128492; + double x_128493; + double x_128495; + double x_128496; + bool ltid_in_bounds_128498; + + ltid_in_bounds_128498 = slt64(sext_i32_i64(local_tid_128483), + segscan_group_sizze_86278); + + int32_t skip_threads_128499; + + // read input for in-block scan + { + if (ltid_in_bounds_128498) { + x_128493 = ((volatile __local + double *) scan_arr_mem_128487)[sext_i32_i64(local_tid_128483)]; + if ((local_tid_128483 - squot32(local_tid_128483, 32) * + 32) == 0) { + x_128492 = x_128493; } } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46744 = 1; - while (slt32(skip_threads_46744, 32)) { - if (sle32(skip_threads_46744, local_tid_46716 - - squot32(local_tid_46716, 32) * 32) && - ltid_in_bounds_46743) { - // read operands - { - x_38492 = ((volatile __local - bool *) red_arr_mem_46720)[sext_i32_i64(local_tid_46716) - - sext_i32_i64(skip_threads_46744)]; - x_38493 = ((volatile __local - int32_t *) red_arr_mem_46722)[sext_i32_i64(local_tid_46716) - - sext_i32_i64(skip_threads_46744)]; - x_38494 = ((volatile __local - float *) red_arr_mem_46724)[sext_i32_i64(local_tid_46716) - - sext_i32_i64(skip_threads_46744)]; + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128499 = 1; + while (slt32(skip_threads_128499, 32)) { + if (sle32(skip_threads_128499, local_tid_128483 - + squot32(local_tid_128483, 32) * 32) && + ltid_in_bounds_128498) { + // read operands + { + x_128492 = ((volatile __local + double *) scan_arr_mem_128487)[sext_i32_i64(local_tid_128483) - + sext_i32_i64(skip_threads_128499)]; + } + // perform operation + { + bool inactive_128500 = + slt64(srem64(sext_i32_i64(local_tid_128483) + + chunk_offset_128490, Nmk_72261), + sext_i32_i64(local_tid_128483) + + chunk_offset_128490 - + (sext_i32_i64(local_tid_128483 - + skip_threads_128499) + + chunk_offset_128490)); + + if (inactive_128500) { + x_128492 = x_128493; + } + if (!inactive_128500) { + double defunc_1_op_res_128494 = x_128492 + + x_128493; + + x_128492 = defunc_1_op_res_128494; + } + } + } + if (sle32(wave_sizze_128485, skip_threads_128499)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128499, local_tid_128483 - + squot32(local_tid_128483, 32) * 32) && + ltid_in_bounds_128498) { + // write result + { + ((volatile __local + double *) scan_arr_mem_128487)[sext_i32_i64(local_tid_128483)] = + x_128492; + x_128493 = x_128492; + } + } + if (sle32(wave_sizze_128485, skip_threads_128499)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128499 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128483 - squot32(local_tid_128483, 32) * 32) == + 31 && ltid_in_bounds_128498) { + ((volatile __local + double *) scan_arr_mem_128487)[sext_i32_i64(squot32(local_tid_128483, + 32))] = + x_128492; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128501; + + // read input for in-block scan + { + if (squot32(local_tid_128483, 32) == 0 && + ltid_in_bounds_128498) { + x_128496 = ((volatile __local + double *) scan_arr_mem_128487)[sext_i32_i64(local_tid_128483)]; + if ((local_tid_128483 - squot32(local_tid_128483, 32) * + 32) == 0) { + x_128495 = x_128496; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128501 = 1; + while (slt32(skip_threads_128501, 32)) { + if (sle32(skip_threads_128501, local_tid_128483 - + squot32(local_tid_128483, 32) * 32) && + (squot32(local_tid_128483, 32) == 0 && + ltid_in_bounds_128498)) { + // read operands + { + x_128495 = ((volatile __local + double *) scan_arr_mem_128487)[sext_i32_i64(local_tid_128483) - + sext_i32_i64(skip_threads_128501)]; } // perform operation { - bool inactive_46745 = - slt64(srem64(sext_i32_i64(local_tid_46716), - iota32_arg_28909), - sext_i32_i64(local_tid_46716) - - sext_i32_i64(local_tid_46716 - - skip_threads_46744)); - - if (inactive_46745) { - x_38492 = x_38495; - x_38493 = x_38496; - x_38494 = x_38497; - } - if (!inactive_46745) { - bool defunc_1_op_res_38498; - int32_t defunc_1_op_res_38499; - - if (x_38492) { - defunc_1_op_res_38498 = x_38492; - defunc_1_op_res_38499 = x_38493; - } else { - bool x_38500 = x_38495 && x_38495; - bool x_38501 = !x_38495; - bool y_38502 = x_38492 && x_38501; - bool defunc_1_op_res_f_res_38503 = - x_38500 || y_38502; - int32_t defunc_1_op_res_f_res_38504; - - if (x_38495) { - defunc_1_op_res_f_res_38504 = - x_38496; - } else { - defunc_1_op_res_f_res_38504 = - x_38493; - } - defunc_1_op_res_38498 = - defunc_1_op_res_f_res_38503; - defunc_1_op_res_38499 = - defunc_1_op_res_f_res_38504; - } - - float defunc_1_op_res_38505 = x_38494 + - x_38497; + bool inactive_128502 = + slt64(srem64(sext_i32_i64(local_tid_128483 * + 32 + 32 - 1) + + chunk_offset_128490, + Nmk_72261), + sext_i32_i64(local_tid_128483 * 32 + + 32 - 1) + chunk_offset_128490 - + (sext_i32_i64((local_tid_128483 - + skip_threads_128501) * + 32 + 32 - 1) + + chunk_offset_128490)); + + if (inactive_128502) { + x_128495 = x_128496; + } + if (!inactive_128502) { + double defunc_1_op_res_128497 = x_128495 + + x_128496; - x_38492 = defunc_1_op_res_38498; - x_38493 = defunc_1_op_res_38499; - x_38494 = defunc_1_op_res_38505; + x_128495 = defunc_1_op_res_128497; } } } - if (sle32(wave_sizze_46718, skip_threads_46744)) { + if (sle32(wave_sizze_128485, skip_threads_128501)) { barrier(CLK_LOCAL_MEM_FENCE); } - if (sle32(skip_threads_46744, local_tid_46716 - - squot32(local_tid_46716, 32) * 32) && - ltid_in_bounds_46743) { + if (sle32(skip_threads_128501, local_tid_128483 - + squot32(local_tid_128483, 32) * 32) && + (squot32(local_tid_128483, 32) == 0 && + ltid_in_bounds_128498)) { // write result { ((volatile __local - bool *) red_arr_mem_46720)[sext_i32_i64(local_tid_46716)] = - x_38492; - x_38495 = x_38492; - ((volatile __local - int32_t *) red_arr_mem_46722)[sext_i32_i64(local_tid_46716)] = - x_38493; - x_38496 = x_38493; - ((volatile __local - float *) red_arr_mem_46724)[sext_i32_i64(local_tid_46716)] = - x_38494; - x_38497 = x_38494; + double *) scan_arr_mem_128487)[sext_i32_i64(local_tid_128483)] = + x_128495; + x_128496 = x_128495; } } - if (sle32(wave_sizze_46718, skip_threads_46744)) { + if (sle32(wave_sizze_128485, skip_threads_128501)) { barrier(CLK_LOCAL_MEM_FENCE); } - skip_threads_46744 *= 2; + skip_threads_128501 *= 2; } } - barrier(CLK_LOCAL_MEM_FENCE); - // last thread of block 'i' writes its result to offset 'i' - { - if ((local_tid_46716 - squot32(local_tid_46716, 32) * 32) == - 31 && ltid_in_bounds_46743) { - ((volatile __local - bool *) red_arr_mem_46720)[sext_i32_i64(squot32(local_tid_46716, - 32))] = - x_38492; - ((volatile __local - int32_t *) red_arr_mem_46722)[sext_i32_i64(squot32(local_tid_46716, - 32))] = - x_38493; - ((volatile __local - float *) red_arr_mem_46724)[sext_i32_i64(squot32(local_tid_46716, - 32))] = - x_38494; + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128483, 32) == 0 || + !ltid_in_bounds_128498)) { + // read operands + { + x_128493 = x_128492; + x_128492 = ((__local + double *) scan_arr_mem_128487)[sext_i32_i64(squot32(local_tid_128483, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128503 = + slt64(srem64(sext_i32_i64(local_tid_128483) + + chunk_offset_128490, Nmk_72261), + sext_i32_i64(local_tid_128483) + + chunk_offset_128490 - + (sext_i32_i64(squot32(local_tid_128483, 32) * + 32 - 1) + chunk_offset_128490)); + + if (inactive_128503) { + x_128492 = x_128493; + } + if (!inactive_128503) { + double defunc_1_op_res_128494 = x_128492 + x_128493; + + x_128492 = defunc_1_op_res_128494; + } + } + // write final result + { + ((__local + double *) scan_arr_mem_128487)[sext_i32_i64(local_tid_128483)] = + x_128492; } } - barrier(CLK_LOCAL_MEM_FENCE); - // scan the first block, after which offset 'i' contains carry-in for block 'i+1' - { - int32_t skip_threads_46746; + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128483, 32) == 0) { + ((__local + double *) scan_arr_mem_128487)[sext_i32_i64(local_tid_128483)] = + x_128493; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // threads in bounds write partial scan result + { + if (slt64(gtid_86120, m_70861) && slt64(gtid_86128, + Nmk_72261)) { + ((__global double *) mem_124061)[gtid_86120 * Nmk_72261 + + gtid_86128] = ((__local + double *) scan_arr_mem_128487)[sext_i32_i64(local_tid_128483)]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread reads last element as carry-in for next iteration + { + bool crosses_segment_128504 = slt64(srem64(chunk_offset_128490 + + segscan_group_sizze_86278, + Nmk_72261), + chunk_offset_128490 + + segscan_group_sizze_86278 - + (chunk_offset_128490 + + segscan_group_sizze_86278 - + (int64_t) 1)); + bool should_load_carry_128505 = local_tid_128483 == 0 && + !crosses_segment_128504; + + if (should_load_carry_128505) { + x_86282 = ((__local + double *) scan_arr_mem_128487)[segscan_group_sizze_86278 - + (int64_t) 1]; + } + if (!should_load_carry_128505) { + x_86282 = 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + + error_1: + return; + #undef segscan_group_sizze_86278 +} +__kernel void mainDetailedziscan_stage1_88651(__global int *global_failure, + __local volatile + int64_t *scan_arr_mem_129276_backing_aligned_0, + int64_t N_70860, int64_t m_70861, + int32_t num_threads_129270, + __global + unsigned char *mem_124142, + __global + unsigned char *defunc_3_map_res_mem_124883, + __global + unsigned char *mem_124906, + __global + unsigned char *mem_124909) +{ + #define segscan_group_sizze_88668 (mainDetailedzisegscan_group_sizze_88645) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict scan_arr_mem_129276_backing_0 = + (__local volatile + char *) scan_arr_mem_129276_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129271; + int32_t local_tid_129272; + int64_t group_sizze_129275; + int32_t wave_sizze_129274; + int32_t group_tid_129273; + + global_tid_129271 = get_global_id(0); + local_tid_129272 = get_local_id(0); + group_sizze_129275 = get_local_size(0); + wave_sizze_129274 = LOCKSTEP_WIDTH; + group_tid_129273 = get_group_id(0); + + int32_t phys_tid_88651; + + phys_tid_88651 = global_tid_129271; + + __local char *scan_arr_mem_129276; + + scan_arr_mem_129276 = (__local char *) scan_arr_mem_129276_backing_0; + + int64_t x_88673; + int64_t x_88674; + + x_88673 = (int64_t) 0; + for (int64_t j_129278 = 0; j_129278 < sdiv_up64(m_70861 * N_70860, + sext_i32_i64(num_threads_129270)); + j_129278++) { + int64_t chunk_offset_129279 = segscan_group_sizze_88668 * j_129278 + + sext_i32_i64(group_tid_129273) * (segscan_group_sizze_88668 * + sdiv_up64(m_70861 * N_70860, + sext_i32_i64(num_threads_129270))); + int64_t flat_idx_129280 = chunk_offset_129279 + + sext_i32_i64(local_tid_129272); + int64_t gtid_88642 = squot64(flat_idx_129280, N_70860); + int64_t gtid_88650 = flat_idx_129280 - squot64(flat_idx_129280, + N_70860) * N_70860; + + // threads in bounds read input + { + if (slt64(gtid_88642, m_70861) && slt64(gtid_88650, N_70860)) { + double x_88678 = ((__global double *) mem_124142)[gtid_88642 * + N_70860 + + gtid_88650]; + bool isnan_res_88680; + + isnan_res_88680 = futrts_isnan64(x_88678); + + bool cond_88681 = !isnan_res_88680; + double defunc_1_f_res_88682; + + if (cond_88681) { + double x_88679 = ((__global + double *) defunc_3_map_res_mem_124883)[gtid_88642 * + N_70860 + + gtid_88650]; + double defunc_1_f_res_t_res_88683 = x_88678 - x_88679; - // read input for in-block scan - { - if (squot32(local_tid_46716, 32) == 0 && - ltid_in_bounds_46743) { - x_46732 = ((volatile __local - bool *) red_arr_mem_46720)[sext_i32_i64(local_tid_46716)]; - x_46733 = ((volatile __local - int32_t *) red_arr_mem_46722)[sext_i32_i64(local_tid_46716)]; - x_46734 = ((volatile __local - float *) red_arr_mem_46724)[sext_i32_i64(local_tid_46716)]; - if ((local_tid_46716 - squot32(local_tid_46716, - 32) * 32) == 0) { - x_46729 = x_46732; - x_46730 = x_46733; - x_46731 = x_46734; + defunc_1_f_res_88682 = defunc_1_f_res_t_res_88683; + } else { + defunc_1_f_res_88682 = NAN; + } + + bool isnan_res_88684; + + isnan_res_88684 = futrts_isnan64(defunc_1_f_res_88682); + + bool defunc_0_p_res_88685 = !isnan_res_88684; + int64_t defunc_0_f_res_88686 = + btoi_bool_i64(defunc_0_p_res_88685); + + // write to-scan values to parameters + { + x_88674 = defunc_0_f_res_88686; + } + // write mapped values results to global memory + { + ((__global double *) mem_124909)[gtid_88642 * N_70860 + + gtid_88650] = + defunc_1_f_res_88682; + } + } + } + // do one intra-group scan operation + { + // maybe restore some to-scan values to parameters, or read neutral + { + if (!(slt64(gtid_88642, m_70861) && slt64(gtid_88650, + N_70860))) { + x_88674 = (int64_t) 0; + } + } + // combine with carry and write to local memory + { + int64_t defunc_1_op_res_88675 = add64(x_88673, x_88674); + + ((__local + int64_t *) scan_arr_mem_129276)[sext_i32_i64(local_tid_129272)] = + defunc_1_op_res_88675; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t x_129281; + int64_t x_129282; + int64_t x_129284; + int64_t x_129285; + bool ltid_in_bounds_129287; + + ltid_in_bounds_129287 = slt64(sext_i32_i64(local_tid_129272), + segscan_group_sizze_88668); + + int32_t skip_threads_129288; + + // read input for in-block scan + { + if (ltid_in_bounds_129287) { + x_129282 = ((volatile __local + int64_t *) scan_arr_mem_129276)[sext_i32_i64(local_tid_129272)]; + if ((local_tid_129272 - squot32(local_tid_129272, 32) * + 32) == 0) { + x_129281 = x_129282; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129288 = 1; + while (slt32(skip_threads_129288, 32)) { + if (sle32(skip_threads_129288, local_tid_129272 - + squot32(local_tid_129272, 32) * 32) && + ltid_in_bounds_129287) { + // read operands + { + x_129281 = ((volatile __local + int64_t *) scan_arr_mem_129276)[sext_i32_i64(local_tid_129272) - + sext_i32_i64(skip_threads_129288)]; + } + // perform operation + { + bool inactive_129289 = + slt64(srem64(sext_i32_i64(local_tid_129272) + + chunk_offset_129279, N_70860), + sext_i32_i64(local_tid_129272) + + chunk_offset_129279 - + (sext_i32_i64(local_tid_129272 - + skip_threads_129288) + + chunk_offset_129279)); + + if (inactive_129289) { + x_129281 = x_129282; + } + if (!inactive_129289) { + int64_t defunc_1_op_res_129283 = add64(x_129281, + x_129282); + + x_129281 = defunc_1_op_res_129283; } } } - // in-block scan (hopefully no barriers needed) - { - skip_threads_46746 = 1; - while (slt32(skip_threads_46746, 32)) { - if (sle32(skip_threads_46746, local_tid_46716 - - squot32(local_tid_46716, 32) * 32) && - (squot32(local_tid_46716, 32) == 0 && - ltid_in_bounds_46743)) { - // read operands - { - x_46729 = ((volatile __local - bool *) red_arr_mem_46720)[sext_i32_i64(local_tid_46716) - - sext_i32_i64(skip_threads_46746)]; - x_46730 = ((volatile __local - int32_t *) red_arr_mem_46722)[sext_i32_i64(local_tid_46716) - - sext_i32_i64(skip_threads_46746)]; - x_46731 = ((volatile __local - float *) red_arr_mem_46724)[sext_i32_i64(local_tid_46716) - - sext_i32_i64(skip_threads_46746)]; + if (sle32(wave_sizze_129274, skip_threads_129288)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129288, local_tid_129272 - + squot32(local_tid_129272, 32) * 32) && + ltid_in_bounds_129287) { + // write result + { + ((volatile __local + int64_t *) scan_arr_mem_129276)[sext_i32_i64(local_tid_129272)] = + x_129281; + x_129282 = x_129281; + } + } + if (sle32(wave_sizze_129274, skip_threads_129288)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129288 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129272 - squot32(local_tid_129272, 32) * 32) == + 31 && ltid_in_bounds_129287) { + ((volatile __local + int64_t *) scan_arr_mem_129276)[sext_i32_i64(squot32(local_tid_129272, + 32))] = + x_129281; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129290; + + // read input for in-block scan + { + if (squot32(local_tid_129272, 32) == 0 && + ltid_in_bounds_129287) { + x_129285 = ((volatile __local + int64_t *) scan_arr_mem_129276)[sext_i32_i64(local_tid_129272)]; + if ((local_tid_129272 - squot32(local_tid_129272, 32) * + 32) == 0) { + x_129284 = x_129285; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129290 = 1; + while (slt32(skip_threads_129290, 32)) { + if (sle32(skip_threads_129290, local_tid_129272 - + squot32(local_tid_129272, 32) * 32) && + (squot32(local_tid_129272, 32) == 0 && + ltid_in_bounds_129287)) { + // read operands + { + x_129284 = ((volatile __local + int64_t *) scan_arr_mem_129276)[sext_i32_i64(local_tid_129272) - + sext_i32_i64(skip_threads_129290)]; + } + // perform operation + { + bool inactive_129291 = + slt64(srem64(sext_i32_i64(local_tid_129272 * + 32 + 32 - 1) + + chunk_offset_129279, N_70860), + sext_i32_i64(local_tid_129272 * 32 + + 32 - 1) + chunk_offset_129279 - + (sext_i32_i64((local_tid_129272 - + skip_threads_129290) * + 32 + 32 - 1) + + chunk_offset_129279)); + + if (inactive_129291) { + x_129284 = x_129285; } - // perform operation - { - bool inactive_46747 = - slt64(srem64(sext_i32_i64(local_tid_46716 * - 32 + 32 - 1), - iota32_arg_28909), - sext_i32_i64(local_tid_46716 * - 32 + 32 - 1) - - sext_i32_i64((local_tid_46716 - - skip_threads_46746) * - 32 + 32 - 1)); + if (!inactive_129291) { + int64_t defunc_1_op_res_129286 = + add64(x_129284, x_129285); - if (inactive_46747) { - x_46729 = x_46732; - x_46730 = x_46733; - x_46731 = x_46734; - } - if (!inactive_46747) { - bool defunc_1_op_res_46735; - int32_t defunc_1_op_res_46736; - - if (x_46729) { - defunc_1_op_res_46735 = x_46729; - defunc_1_op_res_46736 = x_46730; - } else { - bool x_46737 = x_46732 && x_46732; - bool x_46738 = !x_46732; - bool y_46739 = x_46729 && x_46738; - bool defunc_1_op_res_f_res_46740 = - x_46737 || y_46739; - int32_t defunc_1_op_res_f_res_46741; - - if (x_46732) { - defunc_1_op_res_f_res_46741 = - x_46733; - } else { - defunc_1_op_res_f_res_46741 = - x_46730; - } - defunc_1_op_res_46735 = - defunc_1_op_res_f_res_46740; - defunc_1_op_res_46736 = - defunc_1_op_res_f_res_46741; - } - - float defunc_1_op_res_46742 = x_46731 + - x_46734; - - x_46729 = defunc_1_op_res_46735; - x_46730 = defunc_1_op_res_46736; - x_46731 = defunc_1_op_res_46742; - } + x_129284 = defunc_1_op_res_129286; } } - if (sle32(wave_sizze_46718, skip_threads_46746)) { - barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(wave_sizze_129274, skip_threads_129290)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129290, local_tid_129272 - + squot32(local_tid_129272, 32) * 32) && + (squot32(local_tid_129272, 32) == 0 && + ltid_in_bounds_129287)) { + // write result + { + ((volatile __local + int64_t *) scan_arr_mem_129276)[sext_i32_i64(local_tid_129272)] = + x_129284; + x_129285 = x_129284; } - if (sle32(skip_threads_46746, local_tid_46716 - - squot32(local_tid_46716, 32) * 32) && - (squot32(local_tid_46716, 32) == 0 && - ltid_in_bounds_46743)) { - // write result - { - ((volatile __local - bool *) red_arr_mem_46720)[sext_i32_i64(local_tid_46716)] = - x_46729; - x_46732 = x_46729; - ((volatile __local - int32_t *) red_arr_mem_46722)[sext_i32_i64(local_tid_46716)] = - x_46730; - x_46733 = x_46730; - ((volatile __local - float *) red_arr_mem_46724)[sext_i32_i64(local_tid_46716)] = - x_46731; - x_46734 = x_46731; + } + if (sle32(wave_sizze_129274, skip_threads_129290)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129290 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129272, 32) == 0 || + !ltid_in_bounds_129287)) { + // read operands + { + x_129282 = x_129281; + x_129281 = ((__local + int64_t *) scan_arr_mem_129276)[sext_i32_i64(squot32(local_tid_129272, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129292 = + slt64(srem64(sext_i32_i64(local_tid_129272) + + chunk_offset_129279, N_70860), + sext_i32_i64(local_tid_129272) + + chunk_offset_129279 - + (sext_i32_i64(squot32(local_tid_129272, 32) * + 32 - 1) + chunk_offset_129279)); + + if (inactive_129292) { + x_129281 = x_129282; + } + if (!inactive_129292) { + int64_t defunc_1_op_res_129283 = add64(x_129281, + x_129282); + + x_129281 = defunc_1_op_res_129283; + } + } + // write final result + { + ((__local + int64_t *) scan_arr_mem_129276)[sext_i32_i64(local_tid_129272)] = + x_129281; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129272, 32) == 0) { + ((__local + int64_t *) scan_arr_mem_129276)[sext_i32_i64(local_tid_129272)] = + x_129282; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // threads in bounds write partial scan result + { + if (slt64(gtid_88642, m_70861) && slt64(gtid_88650, N_70860)) { + ((__global int64_t *) mem_124906)[gtid_88642 * N_70860 + + gtid_88650] = ((__local + int64_t *) scan_arr_mem_129276)[sext_i32_i64(local_tid_129272)]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread reads last element as carry-in for next iteration + { + bool crosses_segment_129293 = slt64(srem64(chunk_offset_129279 + + segscan_group_sizze_88668, + N_70860), + chunk_offset_129279 + + segscan_group_sizze_88668 - + (chunk_offset_129279 + + segscan_group_sizze_88668 - + (int64_t) 1)); + bool should_load_carry_129294 = local_tid_129272 == 0 && + !crosses_segment_129293; + + if (should_load_carry_129294) { + x_88673 = ((__local + int64_t *) scan_arr_mem_129276)[segscan_group_sizze_88668 - + (int64_t) 1]; + } + if (!should_load_carry_129294) { + x_88673 = (int64_t) 0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + + error_1: + return; + #undef segscan_group_sizze_88668 +} +__kernel void mainDetailedziscan_stage1_89879(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + __local volatile + int64_t *scan_arr_mem_129674_backing_aligned_0, + int64_t N_70860, int64_t m_70861, + int64_t iota_arg_72776, + int32_t num_threads_129668, + __global + unsigned char *defunc_4_map_res_mem_124920, + __global + unsigned char *defunc_3_map_res_mem_124961, + __global + unsigned char *defunc_3_map_res_mem_124962, + __global + unsigned char *defunc_0_f_res_mem_124973, + __global + unsigned char *mem_125093, + __global + unsigned char *mem_125097) +{ + #define segscan_group_sizze_89925 (mainDetailedzisegscan_group_sizze_89873) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict scan_arr_mem_129674_backing_0 = + (__local volatile + char *) scan_arr_mem_129674_backing_aligned_0; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_129669; + int32_t local_tid_129670; + int64_t group_sizze_129673; + int32_t wave_sizze_129672; + int32_t group_tid_129671; + + global_tid_129669 = get_global_id(0); + local_tid_129670 = get_local_id(0); + group_sizze_129673 = get_local_size(0); + wave_sizze_129672 = LOCKSTEP_WIDTH; + group_tid_129671 = get_group_id(0); + + int32_t phys_tid_89879; + + phys_tid_89879 = global_tid_129669; + + __local char *scan_arr_mem_129674; + + scan_arr_mem_129674 = (__local char *) scan_arr_mem_129674_backing_0; + + double x_89929; + double x_89930; + + x_89929 = 0.0; + for (int64_t j_129676 = 0; j_129676 < sdiv_up64(m_70861 * iota_arg_72776, + sext_i32_i64(num_threads_129668)); + j_129676++) { + int64_t chunk_offset_129677 = segscan_group_sizze_89925 * j_129676 + + sext_i32_i64(group_tid_129671) * (segscan_group_sizze_89925 * + sdiv_up64(m_70861 * + iota_arg_72776, + sext_i32_i64(num_threads_129668))); + int64_t flat_idx_129678 = chunk_offset_129677 + + sext_i32_i64(local_tid_129670); + int64_t gtid_89870 = squot64(flat_idx_129678, iota_arg_72776); + int64_t gtid_89878 = flat_idx_129678 - squot64(flat_idx_129678, + iota_arg_72776) * + iota_arg_72776; + + // threads in bounds read input + { + if (slt64(gtid_89870, m_70861) && slt64(gtid_89878, + iota_arg_72776)) { + int64_t y_89936 = ((__global int64_t *) mem_125093)[gtid_89870]; + bool cond_89938 = sle64(y_89936, gtid_89878); + double defunc_0_f_res_89939; + + if (cond_89938) { + defunc_0_f_res_89939 = 0.0; + } else { + int64_t x_89932 = ((__global + int64_t *) defunc_3_map_res_mem_124962)[gtid_89870]; + int64_t x_89933 = ((__global + int64_t *) defunc_3_map_res_mem_124961)[gtid_89870]; + double x_89934 = ((__global + double *) defunc_0_f_res_mem_124973)[gtid_89870]; + bool cond_89940 = gtid_89878 == (int64_t) 0; + double defunc_0_f_res_f_res_89941; + + if (cond_89940) { + defunc_0_f_res_f_res_89941 = x_89934; + } else { + int64_t i_89942 = add64(gtid_89878, x_89932); + bool x_89943 = sle64((int64_t) 0, i_89942); + bool y_89944 = slt64(i_89942, N_70860); + bool bounds_check_89945 = x_89943 && y_89944; + bool index_certs_89946; + + if (!bounds_check_89945) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 431) == -1) { + global_failure_args[0] = i_89942; + global_failure_args[1] = N_70860; + ; } + local_failure = true; + goto error_0; } - if (sle32(wave_sizze_46718, skip_threads_46746)) { - barrier(CLK_LOCAL_MEM_FENCE); + } + + double x_89947 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_89870 * + N_70860 + + i_89942]; + int64_t x_89948 = sub64(x_89932, x_89933); + int64_t i_89949 = add64(gtid_89878, x_89948); + bool x_89950 = sle64((int64_t) 0, i_89949); + bool y_89951 = slt64(i_89949, N_70860); + bool bounds_check_89952 = x_89950 && y_89951; + bool index_certs_89953; + + if (!bounds_check_89952) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 432) == -1) { + global_failure_args[0] = i_89949; + global_failure_args[1] = N_70860; + ; + } + local_failure = true; + goto error_0; } - skip_threads_46746 *= 2; } + + double y_89954 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_89870 * + N_70860 + + i_89949]; + double defunc_0_f_res_f_res_f_res_89955 = x_89947 - + y_89954; + + defunc_0_f_res_f_res_89941 = + defunc_0_f_res_f_res_f_res_89955; } + defunc_0_f_res_89939 = defunc_0_f_res_f_res_89941; } - barrier(CLK_LOCAL_MEM_FENCE); - // carry-in for every block except the first + // write to-scan values to parameters { - if (!(squot32(local_tid_46716, 32) == 0 || - !ltid_in_bounds_46743)) { + x_89930 = defunc_0_f_res_89939; + } + // write mapped values results to global memory + { } + } + } + // do one intra-group scan operation + { + // maybe restore some to-scan values to parameters, or read neutral + { + if (!(slt64(gtid_89870, m_70861) && slt64(gtid_89878, + iota_arg_72776))) { + x_89930 = 0.0; + } + } + // combine with carry and write to local memory + { + double defunc_1_op_res_89931 = x_89929 + x_89930; + + ((__local + double *) scan_arr_mem_129674)[sext_i32_i64(local_tid_129670)] = + defunc_1_op_res_89931; + } + + error_0: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + double x_129679; + double x_129680; + double x_129682; + double x_129683; + bool ltid_in_bounds_129685; + + ltid_in_bounds_129685 = slt64(sext_i32_i64(local_tid_129670), + segscan_group_sizze_89925); + + int32_t skip_threads_129686; + + // read input for in-block scan + { + if (ltid_in_bounds_129685) { + x_129680 = ((volatile __local + double *) scan_arr_mem_129674)[sext_i32_i64(local_tid_129670)]; + if ((local_tid_129670 - squot32(local_tid_129670, 32) * + 32) == 0) { + x_129679 = x_129680; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129686 = 1; + while (slt32(skip_threads_129686, 32)) { + if (sle32(skip_threads_129686, local_tid_129670 - + squot32(local_tid_129670, 32) * 32) && + ltid_in_bounds_129685) { // read operands { - x_38495 = x_38492; - x_38496 = x_38493; - x_38497 = x_38494; - x_38492 = ((__local - bool *) red_arr_mem_46720)[sext_i32_i64(squot32(local_tid_46716, - 32)) - - (int64_t) 1]; - x_38493 = ((__local - int32_t *) red_arr_mem_46722)[sext_i32_i64(squot32(local_tid_46716, - 32)) - - (int64_t) 1]; - x_38494 = ((__local - float *) red_arr_mem_46724)[sext_i32_i64(squot32(local_tid_46716, - 32)) - - (int64_t) 1]; + x_129679 = ((volatile __local + double *) scan_arr_mem_129674)[sext_i32_i64(local_tid_129670) - + sext_i32_i64(skip_threads_129686)]; } // perform operation { - bool inactive_46748 = - slt64(srem64(sext_i32_i64(local_tid_46716), - iota32_arg_28909), - sext_i32_i64(local_tid_46716) - - sext_i32_i64(squot32(local_tid_46716, - 32) * 32 - 1)); + bool inactive_129687 = + slt64(srem64(sext_i32_i64(local_tid_129670) + + chunk_offset_129677, + iota_arg_72776), + sext_i32_i64(local_tid_129670) + + chunk_offset_129677 - + (sext_i32_i64(local_tid_129670 - + skip_threads_129686) + + chunk_offset_129677)); - if (inactive_46748) { - x_38492 = x_38495; - x_38493 = x_38496; - x_38494 = x_38497; + if (inactive_129687) { + x_129679 = x_129680; } - if (!inactive_46748) { - bool defunc_1_op_res_38498; - int32_t defunc_1_op_res_38499; + if (!inactive_129687) { + double defunc_1_op_res_129681 = x_129679 + + x_129680; - if (x_38492) { - defunc_1_op_res_38498 = x_38492; - defunc_1_op_res_38499 = x_38493; - } else { - bool x_38500 = x_38495 && x_38495; - bool x_38501 = !x_38495; - bool y_38502 = x_38492 && x_38501; - bool defunc_1_op_res_f_res_38503 = - x_38500 || y_38502; - int32_t defunc_1_op_res_f_res_38504; - - if (x_38495) { - defunc_1_op_res_f_res_38504 = x_38496; - } else { - defunc_1_op_res_f_res_38504 = x_38493; - } - defunc_1_op_res_38498 = - defunc_1_op_res_f_res_38503; - defunc_1_op_res_38499 = - defunc_1_op_res_f_res_38504; - } - - float defunc_1_op_res_38505 = x_38494 + x_38497; - - x_38492 = defunc_1_op_res_38498; - x_38493 = defunc_1_op_res_38499; - x_38494 = defunc_1_op_res_38505; + x_129679 = defunc_1_op_res_129681; } } - // write final result + } + if (sle32(wave_sizze_129672, skip_threads_129686)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129686, local_tid_129670 - + squot32(local_tid_129670, 32) * 32) && + ltid_in_bounds_129685) { + // write result { - ((__local - bool *) red_arr_mem_46720)[sext_i32_i64(local_tid_46716)] = - x_38492; - ((__local - int32_t *) red_arr_mem_46722)[sext_i32_i64(local_tid_46716)] = - x_38493; - ((__local - float *) red_arr_mem_46724)[sext_i32_i64(local_tid_46716)] = - x_38494; + ((volatile __local + double *) scan_arr_mem_129674)[sext_i32_i64(local_tid_129670)] = + x_129679; + x_129680 = x_129679; } } + if (sle32(wave_sizze_129672, skip_threads_129686)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129686 *= 2; } - barrier(CLK_LOCAL_MEM_FENCE); - // restore correct values for first block + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129670 - squot32(local_tid_129670, 32) * 32) == + 31 && ltid_in_bounds_129685) { + ((volatile __local + double *) scan_arr_mem_129674)[sext_i32_i64(squot32(local_tid_129670, + 32))] = + x_129679; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129688; + + // read input for in-block scan { - if (squot32(local_tid_46716, 32) == 0) { - ((__local - bool *) red_arr_mem_46720)[sext_i32_i64(local_tid_46716)] = - x_38495; - ((__local - int32_t *) red_arr_mem_46722)[sext_i32_i64(local_tid_46716)] = - x_38496; + if (squot32(local_tid_129670, 32) == 0 && + ltid_in_bounds_129685) { + x_129683 = ((volatile __local + double *) scan_arr_mem_129674)[sext_i32_i64(local_tid_129670)]; + if ((local_tid_129670 - squot32(local_tid_129670, 32) * + 32) == 0) { + x_129682 = x_129683; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129688 = 1; + while (slt32(skip_threads_129688, 32)) { + if (sle32(skip_threads_129688, local_tid_129670 - + squot32(local_tid_129670, 32) * 32) && + (squot32(local_tid_129670, 32) == 0 && + ltid_in_bounds_129685)) { + // read operands + { + x_129682 = ((volatile __local + double *) scan_arr_mem_129674)[sext_i32_i64(local_tid_129670) - + sext_i32_i64(skip_threads_129688)]; + } + // perform operation + { + bool inactive_129689 = + slt64(srem64(sext_i32_i64(local_tid_129670 * + 32 + 32 - 1) + + chunk_offset_129677, + iota_arg_72776), + sext_i32_i64(local_tid_129670 * 32 + + 32 - 1) + chunk_offset_129677 - + (sext_i32_i64((local_tid_129670 - + skip_threads_129688) * + 32 + 32 - 1) + + chunk_offset_129677)); + + if (inactive_129689) { + x_129682 = x_129683; + } + if (!inactive_129689) { + double defunc_1_op_res_129684 = x_129682 + + x_129683; + + x_129682 = defunc_1_op_res_129684; + } + } + } + if (sle32(wave_sizze_129672, skip_threads_129688)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129688, local_tid_129670 - + squot32(local_tid_129670, 32) * 32) && + (squot32(local_tid_129670, 32) == 0 && + ltid_in_bounds_129685)) { + // write result + { + ((volatile __local + double *) scan_arr_mem_129674)[sext_i32_i64(local_tid_129670)] = + x_129682; + x_129683 = x_129682; + } + } + if (sle32(wave_sizze_129672, skip_threads_129688)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129688 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129670, 32) == 0 || + !ltid_in_bounds_129685)) { + // read operands + { + x_129680 = x_129679; + x_129679 = ((__local + double *) scan_arr_mem_129674)[sext_i32_i64(squot32(local_tid_129670, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129690 = + slt64(srem64(sext_i32_i64(local_tid_129670) + + chunk_offset_129677, iota_arg_72776), + sext_i32_i64(local_tid_129670) + + chunk_offset_129677 - + (sext_i32_i64(squot32(local_tid_129670, 32) * + 32 - 1) + chunk_offset_129677)); + + if (inactive_129690) { + x_129679 = x_129680; + } + if (!inactive_129690) { + double defunc_1_op_res_129681 = x_129679 + x_129680; + + x_129679 = defunc_1_op_res_129681; + } + } + // write final result + { ((__local - float *) red_arr_mem_46724)[sext_i32_i64(local_tid_46716)] = - x_38497; + double *) scan_arr_mem_129674)[sext_i32_i64(local_tid_129670)] = + x_129679; } } - barrier(CLK_LOCAL_MEM_FENCE); } - } - barrier(CLK_LOCAL_MEM_FENCE); - // save final values of segments - { - if (slt64(sext_i32_i64(virt_group_id_46728) * - squot64(segred_group_sizze_38486, - segment_sizze_nonzzero_46713) + - sext_i32_i64(local_tid_46716), m_28478) && - slt64(sext_i32_i64(local_tid_46716), - squot64(segred_group_sizze_38486, - segment_sizze_nonzzero_46713))) { - ((__global - bool *) mem_45346)[sext_i32_i64(virt_group_id_46728) * - squot64(segred_group_sizze_38486, - segment_sizze_nonzzero_46713) + - sext_i32_i64(local_tid_46716)] = ((__local - bool *) red_arr_mem_46720)[(sext_i32_i64(local_tid_46716) + - (int64_t) 1) * - segment_sizze_nonzzero_46713 - - (int64_t) 1]; - ((__global - int32_t *) mem_45348)[sext_i32_i64(virt_group_id_46728) * - squot64(segred_group_sizze_38486, - segment_sizze_nonzzero_46713) + - sext_i32_i64(local_tid_46716)] = + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129670, 32) == 0) { ((__local - int32_t *) red_arr_mem_46722)[(sext_i32_i64(local_tid_46716) + - (int64_t) 1) * - segment_sizze_nonzzero_46713 - - (int64_t) 1]; - ((__global - float *) mem_45350)[sext_i32_i64(virt_group_id_46728) * - squot64(segred_group_sizze_38486, - segment_sizze_nonzzero_46713) + - sext_i32_i64(local_tid_46716)] = ((__local - float *) red_arr_mem_46724)[(sext_i32_i64(local_tid_46716) + - (int64_t) 1) * - segment_sizze_nonzzero_46713 - - (int64_t) 1]; + double *) scan_arr_mem_129674)[sext_i32_i64(local_tid_129670)] = + x_129680; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // threads in bounds write partial scan result + { + if (slt64(gtid_89870, m_70861) && slt64(gtid_89878, + iota_arg_72776)) { + ((__global double *) mem_125097)[gtid_89870 * + iota_arg_72776 + + gtid_89878] = ((__local + double *) scan_arr_mem_129674)[sext_i32_i64(local_tid_129670)]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread reads last element as carry-in for next iteration + { + bool crosses_segment_129691 = slt64(srem64(chunk_offset_129677 + + segscan_group_sizze_89925, + iota_arg_72776), + chunk_offset_129677 + + segscan_group_sizze_89925 - + (chunk_offset_129677 + + segscan_group_sizze_89925 - + (int64_t) 1)); + bool should_load_carry_129692 = local_tid_129670 == 0 && + !crosses_segment_129691; + + if (should_load_carry_129692) { + x_89929 = ((__local + double *) scan_arr_mem_129674)[segscan_group_sizze_89925 - + (int64_t) 1]; + } + if (!should_load_carry_129692) { + x_89929 = 0.0; + } } + barrier(CLK_LOCAL_MEM_FENCE); } - barrier(CLK_LOCAL_MEM_FENCE); - barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); } error_1: return; - #undef segred_group_sizze_38486 + #undef segscan_group_sizze_89925 } -""" -# Start of values.py. - -# Hacky parser/reader/writer for values written in Futhark syntax. -# Used for reading stdin when compiling standalone programs with the -# Python code generator. - -import numpy as np -import string -import struct -import sys - -class ReaderInput: - def __init__(self, f): - self.f = f - self.lookahead_buffer = [] - - def get_char(self): +__kernel void mainDetailedziscan_stage2_77650(__global int *global_failure, + __local volatile + int64_t *scan_arr_mem_126454_backing_aligned_0, + int64_t m_70861, int64_t n_70864, + int64_t stage1_num_groups_126423, + int32_t num_threads_126424, + __global + unsigned char *mem_120201) +{ + #define segscan_group_sizze_77764 (mainDetailedzisegscan_group_sizze_77644) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict scan_arr_mem_126454_backing_0 = + (__local volatile + char *) scan_arr_mem_126454_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126449; + int32_t local_tid_126450; + int64_t group_sizze_126453; + int32_t wave_sizze_126452; + int32_t group_tid_126451; + + global_tid_126449 = get_global_id(0); + local_tid_126450 = get_local_id(0); + group_sizze_126453 = get_local_size(0); + wave_sizze_126452 = LOCKSTEP_WIDTH; + group_tid_126451 = get_group_id(0); + + int32_t phys_tid_77650; + + phys_tid_77650 = global_tid_126449; + + __local char *scan_arr_mem_126454; + + scan_arr_mem_126454 = (__local char *) scan_arr_mem_126454_backing_0; + + int64_t flat_idx_126456; + + flat_idx_126456 = (sext_i32_i64(local_tid_126450) + (int64_t) 1) * + (segscan_group_sizze_77764 * sdiv_up64(m_70861 * n_70864, + sext_i32_i64(num_threads_126424))) - + (int64_t) 1; + + int64_t gtid_77641; + + gtid_77641 = squot64(flat_idx_126456, n_70864); + + int64_t gtid_77649; + + gtid_77649 = flat_idx_126456 - squot64(flat_idx_126456, n_70864) * n_70864; + // threads in bound read carries; others get neutral element + { + if (slt64(gtid_77641, m_70861) && slt64(gtid_77649, n_70864)) { + ((__local + int64_t *) scan_arr_mem_126454)[sext_i32_i64(local_tid_126450)] = + ((__global int64_t *) mem_120201)[gtid_77641 * n_70864 + + gtid_77649]; + } else { + ((__local + int64_t *) scan_arr_mem_126454)[sext_i32_i64(local_tid_126450)] = + (int64_t) 0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t x_77768; + int64_t x_77769; + int64_t x_126457; + int64_t x_126458; + bool ltid_in_bounds_126460; + + ltid_in_bounds_126460 = slt64(sext_i32_i64(local_tid_126450), + stage1_num_groups_126423); + + int32_t skip_threads_126461; + + // read input for in-block scan + { + if (ltid_in_bounds_126460) { + x_77769 = ((volatile __local + int64_t *) scan_arr_mem_126454)[sext_i32_i64(local_tid_126450)]; + if ((local_tid_126450 - squot32(local_tid_126450, 32) * 32) == 0) { + x_77768 = x_77769; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_126461 = 1; + while (slt32(skip_threads_126461, 32)) { + if (sle32(skip_threads_126461, local_tid_126450 - + squot32(local_tid_126450, 32) * 32) && + ltid_in_bounds_126460) { + // read operands + { + x_77768 = ((volatile __local + int64_t *) scan_arr_mem_126454)[sext_i32_i64(local_tid_126450) - + sext_i32_i64(skip_threads_126461)]; + } + // perform operation + { + bool inactive_126462 = + slt64(srem64((sext_i32_i64(local_tid_126450) + + (int64_t) 1) * + (segscan_group_sizze_77764 * + sdiv_up64(m_70861 * n_70864, + sext_i32_i64(num_threads_126424))) - + (int64_t) 1, n_70864), + (sext_i32_i64(local_tid_126450) + (int64_t) 1) * + (segscan_group_sizze_77764 * sdiv_up64(m_70861 * + n_70864, + sext_i32_i64(num_threads_126424))) - + (int64_t) 1 - ((sext_i32_i64(local_tid_126450 - + skip_threads_126461) + + (int64_t) 1) * + (segscan_group_sizze_77764 * + sdiv_up64(m_70861 * n_70864, + sext_i32_i64(num_threads_126424))) - + (int64_t) 1)); + + if (inactive_126462) { + x_77768 = x_77769; + } + if (!inactive_126462) { + int64_t defunc_1_op_res_77770 = add64(x_77768, x_77769); + + x_77768 = defunc_1_op_res_77770; + } + } + } + if (sle32(wave_sizze_126452, skip_threads_126461)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_126461, local_tid_126450 - + squot32(local_tid_126450, 32) * 32) && + ltid_in_bounds_126460) { + // write result + { + ((volatile __local + int64_t *) scan_arr_mem_126454)[sext_i32_i64(local_tid_126450)] = + x_77768; + x_77769 = x_77768; + } + } + if (sle32(wave_sizze_126452, skip_threads_126461)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_126461 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_126450 - squot32(local_tid_126450, 32) * 32) == 31 && + ltid_in_bounds_126460) { + ((volatile __local + int64_t *) scan_arr_mem_126454)[sext_i32_i64(squot32(local_tid_126450, + 32))] = + x_77768; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_126463; + + // read input for in-block scan + { + if (squot32(local_tid_126450, 32) == 0 && ltid_in_bounds_126460) { + x_126458 = ((volatile __local + int64_t *) scan_arr_mem_126454)[sext_i32_i64(local_tid_126450)]; + if ((local_tid_126450 - squot32(local_tid_126450, 32) * 32) == + 0) { + x_126457 = x_126458; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_126463 = 1; + while (slt32(skip_threads_126463, 32)) { + if (sle32(skip_threads_126463, local_tid_126450 - + squot32(local_tid_126450, 32) * 32) && + (squot32(local_tid_126450, 32) == 0 && + ltid_in_bounds_126460)) { + // read operands + { + x_126457 = ((volatile __local + int64_t *) scan_arr_mem_126454)[sext_i32_i64(local_tid_126450) - + sext_i32_i64(skip_threads_126463)]; + } + // perform operation + { + bool inactive_126464 = + slt64(srem64((sext_i32_i64(local_tid_126450 * 32 + + 32 - 1) + (int64_t) 1) * + (segscan_group_sizze_77764 * + sdiv_up64(m_70861 * n_70864, + sext_i32_i64(num_threads_126424))) - + (int64_t) 1, n_70864), + (sext_i32_i64(local_tid_126450 * 32 + 32 - + 1) + (int64_t) 1) * + (segscan_group_sizze_77764 * + sdiv_up64(m_70861 * n_70864, + sext_i32_i64(num_threads_126424))) - + (int64_t) 1 - + ((sext_i32_i64((local_tid_126450 - + skip_threads_126463) * 32 + + 32 - 1) + (int64_t) 1) * + (segscan_group_sizze_77764 * + sdiv_up64(m_70861 * n_70864, + sext_i32_i64(num_threads_126424))) - + (int64_t) 1)); + + if (inactive_126464) { + x_126457 = x_126458; + } + if (!inactive_126464) { + int64_t defunc_1_op_res_126459 = add64(x_126457, + x_126458); + + x_126457 = defunc_1_op_res_126459; + } + } + } + if (sle32(wave_sizze_126452, skip_threads_126463)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_126463, local_tid_126450 - + squot32(local_tid_126450, 32) * 32) && + (squot32(local_tid_126450, 32) == 0 && + ltid_in_bounds_126460)) { + // write result + { + ((volatile __local + int64_t *) scan_arr_mem_126454)[sext_i32_i64(local_tid_126450)] = + x_126457; + x_126458 = x_126457; + } + } + if (sle32(wave_sizze_126452, skip_threads_126463)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_126463 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_126450, 32) == 0 || !ltid_in_bounds_126460)) { + // read operands + { + x_77769 = x_77768; + x_77768 = ((__local + int64_t *) scan_arr_mem_126454)[sext_i32_i64(squot32(local_tid_126450, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_126465 = + slt64(srem64((sext_i32_i64(local_tid_126450) + + (int64_t) 1) * (segscan_group_sizze_77764 * + sdiv_up64(m_70861 * n_70864, + sext_i32_i64(num_threads_126424))) - + (int64_t) 1, n_70864), + (sext_i32_i64(local_tid_126450) + (int64_t) 1) * + (segscan_group_sizze_77764 * sdiv_up64(m_70861 * + n_70864, + sext_i32_i64(num_threads_126424))) - + (int64_t) 1 - + ((sext_i32_i64(squot32(local_tid_126450, 32) * 32 - + 1) + (int64_t) 1) * (segscan_group_sizze_77764 * + sdiv_up64(m_70861 * n_70864, + sext_i32_i64(num_threads_126424))) - + (int64_t) 1)); + + if (inactive_126465) { + x_77768 = x_77769; + } + if (!inactive_126465) { + int64_t defunc_1_op_res_77770 = add64(x_77768, x_77769); + + x_77768 = defunc_1_op_res_77770; + } + } + // write final result + { + ((__local + int64_t *) scan_arr_mem_126454)[sext_i32_i64(local_tid_126450)] = + x_77768; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_126450, 32) == 0) { + ((__local + int64_t *) scan_arr_mem_126454)[sext_i32_i64(local_tid_126450)] = + x_77769; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // threads in bounds write scanned carries + { + if (slt64(gtid_77641, m_70861) && slt64(gtid_77649, n_70864)) { + ((__global int64_t *) mem_120201)[gtid_77641 * n_70864 + + gtid_77649] = ((__local + int64_t *) scan_arr_mem_126454)[sext_i32_i64(local_tid_126450)]; + } + } + + error_0: + return; + #undef segscan_group_sizze_77764 +} +__kernel void mainDetailedziscan_stage2_86129(__global int *global_failure, + __local volatile + int64_t *scan_arr_mem_128511_backing_aligned_0, + int64_t m_70861, + int64_t Nmk_72261, + int64_t stage1_num_groups_128480, + int32_t num_threads_128481, + __global + unsigned char *mem_124061) +{ + #define segscan_group_sizze_86278 (mainDetailedzisegscan_group_sizze_86123) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict scan_arr_mem_128511_backing_0 = + (__local volatile + char *) scan_arr_mem_128511_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128506; + int32_t local_tid_128507; + int64_t group_sizze_128510; + int32_t wave_sizze_128509; + int32_t group_tid_128508; + + global_tid_128506 = get_global_id(0); + local_tid_128507 = get_local_id(0); + group_sizze_128510 = get_local_size(0); + wave_sizze_128509 = LOCKSTEP_WIDTH; + group_tid_128508 = get_group_id(0); + + int32_t phys_tid_86129; + + phys_tid_86129 = global_tid_128506; + + __local char *scan_arr_mem_128511; + + scan_arr_mem_128511 = (__local char *) scan_arr_mem_128511_backing_0; + + int64_t flat_idx_128513; + + flat_idx_128513 = (sext_i32_i64(local_tid_128507) + (int64_t) 1) * + (segscan_group_sizze_86278 * sdiv_up64(m_70861 * Nmk_72261, + sext_i32_i64(num_threads_128481))) - + (int64_t) 1; + + int64_t gtid_86120; + + gtid_86120 = squot64(flat_idx_128513, Nmk_72261); + + int64_t gtid_86128; + + gtid_86128 = flat_idx_128513 - squot64(flat_idx_128513, Nmk_72261) * + Nmk_72261; + // threads in bound read carries; others get neutral element + { + if (slt64(gtid_86120, m_70861) && slt64(gtid_86128, Nmk_72261)) { + ((__local + double *) scan_arr_mem_128511)[sext_i32_i64(local_tid_128507)] = + ((__global double *) mem_124061)[gtid_86120 * Nmk_72261 + + gtid_86128]; + } else { + ((__local + double *) scan_arr_mem_128511)[sext_i32_i64(local_tid_128507)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + double x_86282; + double x_86283; + double x_128514; + double x_128515; + bool ltid_in_bounds_128517; + + ltid_in_bounds_128517 = slt64(sext_i32_i64(local_tid_128507), + stage1_num_groups_128480); + + int32_t skip_threads_128518; + + // read input for in-block scan + { + if (ltid_in_bounds_128517) { + x_86283 = ((volatile __local + double *) scan_arr_mem_128511)[sext_i32_i64(local_tid_128507)]; + if ((local_tid_128507 - squot32(local_tid_128507, 32) * 32) == 0) { + x_86282 = x_86283; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128518 = 1; + while (slt32(skip_threads_128518, 32)) { + if (sle32(skip_threads_128518, local_tid_128507 - + squot32(local_tid_128507, 32) * 32) && + ltid_in_bounds_128517) { + // read operands + { + x_86282 = ((volatile __local + double *) scan_arr_mem_128511)[sext_i32_i64(local_tid_128507) - + sext_i32_i64(skip_threads_128518)]; + } + // perform operation + { + bool inactive_128519 = + slt64(srem64((sext_i32_i64(local_tid_128507) + + (int64_t) 1) * + (segscan_group_sizze_86278 * + sdiv_up64(m_70861 * Nmk_72261, + sext_i32_i64(num_threads_128481))) - + (int64_t) 1, Nmk_72261), + (sext_i32_i64(local_tid_128507) + (int64_t) 1) * + (segscan_group_sizze_86278 * sdiv_up64(m_70861 * + Nmk_72261, + sext_i32_i64(num_threads_128481))) - + (int64_t) 1 - ((sext_i32_i64(local_tid_128507 - + skip_threads_128518) + + (int64_t) 1) * + (segscan_group_sizze_86278 * + sdiv_up64(m_70861 * Nmk_72261, + sext_i32_i64(num_threads_128481))) - + (int64_t) 1)); + + if (inactive_128519) { + x_86282 = x_86283; + } + if (!inactive_128519) { + double defunc_1_op_res_86284 = x_86282 + x_86283; + + x_86282 = defunc_1_op_res_86284; + } + } + } + if (sle32(wave_sizze_128509, skip_threads_128518)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128518, local_tid_128507 - + squot32(local_tid_128507, 32) * 32) && + ltid_in_bounds_128517) { + // write result + { + ((volatile __local + double *) scan_arr_mem_128511)[sext_i32_i64(local_tid_128507)] = + x_86282; + x_86283 = x_86282; + } + } + if (sle32(wave_sizze_128509, skip_threads_128518)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128518 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128507 - squot32(local_tid_128507, 32) * 32) == 31 && + ltid_in_bounds_128517) { + ((volatile __local + double *) scan_arr_mem_128511)[sext_i32_i64(squot32(local_tid_128507, + 32))] = + x_86282; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128520; + + // read input for in-block scan + { + if (squot32(local_tid_128507, 32) == 0 && ltid_in_bounds_128517) { + x_128515 = ((volatile __local + double *) scan_arr_mem_128511)[sext_i32_i64(local_tid_128507)]; + if ((local_tid_128507 - squot32(local_tid_128507, 32) * 32) == + 0) { + x_128514 = x_128515; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128520 = 1; + while (slt32(skip_threads_128520, 32)) { + if (sle32(skip_threads_128520, local_tid_128507 - + squot32(local_tid_128507, 32) * 32) && + (squot32(local_tid_128507, 32) == 0 && + ltid_in_bounds_128517)) { + // read operands + { + x_128514 = ((volatile __local + double *) scan_arr_mem_128511)[sext_i32_i64(local_tid_128507) - + sext_i32_i64(skip_threads_128520)]; + } + // perform operation + { + bool inactive_128521 = + slt64(srem64((sext_i32_i64(local_tid_128507 * 32 + + 32 - 1) + (int64_t) 1) * + (segscan_group_sizze_86278 * + sdiv_up64(m_70861 * Nmk_72261, + sext_i32_i64(num_threads_128481))) - + (int64_t) 1, Nmk_72261), + (sext_i32_i64(local_tid_128507 * 32 + 32 - + 1) + (int64_t) 1) * + (segscan_group_sizze_86278 * + sdiv_up64(m_70861 * Nmk_72261, + sext_i32_i64(num_threads_128481))) - + (int64_t) 1 - + ((sext_i32_i64((local_tid_128507 - + skip_threads_128520) * 32 + + 32 - 1) + (int64_t) 1) * + (segscan_group_sizze_86278 * + sdiv_up64(m_70861 * Nmk_72261, + sext_i32_i64(num_threads_128481))) - + (int64_t) 1)); + + if (inactive_128521) { + x_128514 = x_128515; + } + if (!inactive_128521) { + double defunc_1_op_res_128516 = x_128514 + x_128515; + + x_128514 = defunc_1_op_res_128516; + } + } + } + if (sle32(wave_sizze_128509, skip_threads_128520)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128520, local_tid_128507 - + squot32(local_tid_128507, 32) * 32) && + (squot32(local_tid_128507, 32) == 0 && + ltid_in_bounds_128517)) { + // write result + { + ((volatile __local + double *) scan_arr_mem_128511)[sext_i32_i64(local_tid_128507)] = + x_128514; + x_128515 = x_128514; + } + } + if (sle32(wave_sizze_128509, skip_threads_128520)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128520 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128507, 32) == 0 || !ltid_in_bounds_128517)) { + // read operands + { + x_86283 = x_86282; + x_86282 = ((__local + double *) scan_arr_mem_128511)[sext_i32_i64(squot32(local_tid_128507, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128522 = + slt64(srem64((sext_i32_i64(local_tid_128507) + + (int64_t) 1) * (segscan_group_sizze_86278 * + sdiv_up64(m_70861 * + Nmk_72261, + sext_i32_i64(num_threads_128481))) - + (int64_t) 1, Nmk_72261), + (sext_i32_i64(local_tid_128507) + (int64_t) 1) * + (segscan_group_sizze_86278 * sdiv_up64(m_70861 * + Nmk_72261, + sext_i32_i64(num_threads_128481))) - + (int64_t) 1 - + ((sext_i32_i64(squot32(local_tid_128507, 32) * 32 - + 1) + (int64_t) 1) * (segscan_group_sizze_86278 * + sdiv_up64(m_70861 * Nmk_72261, + sext_i32_i64(num_threads_128481))) - + (int64_t) 1)); + + if (inactive_128522) { + x_86282 = x_86283; + } + if (!inactive_128522) { + double defunc_1_op_res_86284 = x_86282 + x_86283; + + x_86282 = defunc_1_op_res_86284; + } + } + // write final result + { + ((__local + double *) scan_arr_mem_128511)[sext_i32_i64(local_tid_128507)] = + x_86282; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128507, 32) == 0) { + ((__local + double *) scan_arr_mem_128511)[sext_i32_i64(local_tid_128507)] = + x_86283; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // threads in bounds write scanned carries + { + if (slt64(gtid_86120, m_70861) && slt64(gtid_86128, Nmk_72261)) { + ((__global double *) mem_124061)[gtid_86120 * Nmk_72261 + + gtid_86128] = ((__local + double *) scan_arr_mem_128511)[sext_i32_i64(local_tid_128507)]; + } + } + + error_0: + return; + #undef segscan_group_sizze_86278 +} +__kernel void mainDetailedziscan_stage2_88651(__global int *global_failure, + __local volatile + int64_t *scan_arr_mem_129300_backing_aligned_0, + int64_t N_70860, int64_t m_70861, + int64_t stage1_num_groups_129269, + int32_t num_threads_129270, + __global + unsigned char *mem_124906) +{ + #define segscan_group_sizze_88668 (mainDetailedzisegscan_group_sizze_88645) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict scan_arr_mem_129300_backing_0 = + (__local volatile + char *) scan_arr_mem_129300_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129295; + int32_t local_tid_129296; + int64_t group_sizze_129299; + int32_t wave_sizze_129298; + int32_t group_tid_129297; + + global_tid_129295 = get_global_id(0); + local_tid_129296 = get_local_id(0); + group_sizze_129299 = get_local_size(0); + wave_sizze_129298 = LOCKSTEP_WIDTH; + group_tid_129297 = get_group_id(0); + + int32_t phys_tid_88651; + + phys_tid_88651 = global_tid_129295; + + __local char *scan_arr_mem_129300; + + scan_arr_mem_129300 = (__local char *) scan_arr_mem_129300_backing_0; + + int64_t flat_idx_129302; + + flat_idx_129302 = (sext_i32_i64(local_tid_129296) + (int64_t) 1) * + (segscan_group_sizze_88668 * sdiv_up64(m_70861 * N_70860, + sext_i32_i64(num_threads_129270))) - + (int64_t) 1; + + int64_t gtid_88642; + + gtid_88642 = squot64(flat_idx_129302, N_70860); + + int64_t gtid_88650; + + gtid_88650 = flat_idx_129302 - squot64(flat_idx_129302, N_70860) * N_70860; + // threads in bound read carries; others get neutral element + { + if (slt64(gtid_88642, m_70861) && slt64(gtid_88650, N_70860)) { + ((__local + int64_t *) scan_arr_mem_129300)[sext_i32_i64(local_tid_129296)] = + ((__global int64_t *) mem_124906)[gtid_88642 * N_70860 + + gtid_88650]; + } else { + ((__local + int64_t *) scan_arr_mem_129300)[sext_i32_i64(local_tid_129296)] = + (int64_t) 0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t x_88673; + int64_t x_88674; + int64_t x_129303; + int64_t x_129304; + bool ltid_in_bounds_129306; + + ltid_in_bounds_129306 = slt64(sext_i32_i64(local_tid_129296), + stage1_num_groups_129269); + + int32_t skip_threads_129307; + + // read input for in-block scan + { + if (ltid_in_bounds_129306) { + x_88674 = ((volatile __local + int64_t *) scan_arr_mem_129300)[sext_i32_i64(local_tid_129296)]; + if ((local_tid_129296 - squot32(local_tid_129296, 32) * 32) == 0) { + x_88673 = x_88674; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129307 = 1; + while (slt32(skip_threads_129307, 32)) { + if (sle32(skip_threads_129307, local_tid_129296 - + squot32(local_tid_129296, 32) * 32) && + ltid_in_bounds_129306) { + // read operands + { + x_88673 = ((volatile __local + int64_t *) scan_arr_mem_129300)[sext_i32_i64(local_tid_129296) - + sext_i32_i64(skip_threads_129307)]; + } + // perform operation + { + bool inactive_129308 = + slt64(srem64((sext_i32_i64(local_tid_129296) + + (int64_t) 1) * + (segscan_group_sizze_88668 * + sdiv_up64(m_70861 * N_70860, + sext_i32_i64(num_threads_129270))) - + (int64_t) 1, N_70860), + (sext_i32_i64(local_tid_129296) + (int64_t) 1) * + (segscan_group_sizze_88668 * sdiv_up64(m_70861 * + N_70860, + sext_i32_i64(num_threads_129270))) - + (int64_t) 1 - ((sext_i32_i64(local_tid_129296 - + skip_threads_129307) + + (int64_t) 1) * + (segscan_group_sizze_88668 * + sdiv_up64(m_70861 * N_70860, + sext_i32_i64(num_threads_129270))) - + (int64_t) 1)); + + if (inactive_129308) { + x_88673 = x_88674; + } + if (!inactive_129308) { + int64_t defunc_1_op_res_88675 = add64(x_88673, x_88674); + + x_88673 = defunc_1_op_res_88675; + } + } + } + if (sle32(wave_sizze_129298, skip_threads_129307)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129307, local_tid_129296 - + squot32(local_tid_129296, 32) * 32) && + ltid_in_bounds_129306) { + // write result + { + ((volatile __local + int64_t *) scan_arr_mem_129300)[sext_i32_i64(local_tid_129296)] = + x_88673; + x_88674 = x_88673; + } + } + if (sle32(wave_sizze_129298, skip_threads_129307)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129307 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129296 - squot32(local_tid_129296, 32) * 32) == 31 && + ltid_in_bounds_129306) { + ((volatile __local + int64_t *) scan_arr_mem_129300)[sext_i32_i64(squot32(local_tid_129296, + 32))] = + x_88673; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129309; + + // read input for in-block scan + { + if (squot32(local_tid_129296, 32) == 0 && ltid_in_bounds_129306) { + x_129304 = ((volatile __local + int64_t *) scan_arr_mem_129300)[sext_i32_i64(local_tid_129296)]; + if ((local_tid_129296 - squot32(local_tid_129296, 32) * 32) == + 0) { + x_129303 = x_129304; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129309 = 1; + while (slt32(skip_threads_129309, 32)) { + if (sle32(skip_threads_129309, local_tid_129296 - + squot32(local_tid_129296, 32) * 32) && + (squot32(local_tid_129296, 32) == 0 && + ltid_in_bounds_129306)) { + // read operands + { + x_129303 = ((volatile __local + int64_t *) scan_arr_mem_129300)[sext_i32_i64(local_tid_129296) - + sext_i32_i64(skip_threads_129309)]; + } + // perform operation + { + bool inactive_129310 = + slt64(srem64((sext_i32_i64(local_tid_129296 * 32 + + 32 - 1) + (int64_t) 1) * + (segscan_group_sizze_88668 * + sdiv_up64(m_70861 * N_70860, + sext_i32_i64(num_threads_129270))) - + (int64_t) 1, N_70860), + (sext_i32_i64(local_tid_129296 * 32 + 32 - + 1) + (int64_t) 1) * + (segscan_group_sizze_88668 * + sdiv_up64(m_70861 * N_70860, + sext_i32_i64(num_threads_129270))) - + (int64_t) 1 - + ((sext_i32_i64((local_tid_129296 - + skip_threads_129309) * 32 + + 32 - 1) + (int64_t) 1) * + (segscan_group_sizze_88668 * + sdiv_up64(m_70861 * N_70860, + sext_i32_i64(num_threads_129270))) - + (int64_t) 1)); + + if (inactive_129310) { + x_129303 = x_129304; + } + if (!inactive_129310) { + int64_t defunc_1_op_res_129305 = add64(x_129303, + x_129304); + + x_129303 = defunc_1_op_res_129305; + } + } + } + if (sle32(wave_sizze_129298, skip_threads_129309)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129309, local_tid_129296 - + squot32(local_tid_129296, 32) * 32) && + (squot32(local_tid_129296, 32) == 0 && + ltid_in_bounds_129306)) { + // write result + { + ((volatile __local + int64_t *) scan_arr_mem_129300)[sext_i32_i64(local_tid_129296)] = + x_129303; + x_129304 = x_129303; + } + } + if (sle32(wave_sizze_129298, skip_threads_129309)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129309 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129296, 32) == 0 || !ltid_in_bounds_129306)) { + // read operands + { + x_88674 = x_88673; + x_88673 = ((__local + int64_t *) scan_arr_mem_129300)[sext_i32_i64(squot32(local_tid_129296, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129311 = + slt64(srem64((sext_i32_i64(local_tid_129296) + + (int64_t) 1) * (segscan_group_sizze_88668 * + sdiv_up64(m_70861 * N_70860, + sext_i32_i64(num_threads_129270))) - + (int64_t) 1, N_70860), + (sext_i32_i64(local_tid_129296) + (int64_t) 1) * + (segscan_group_sizze_88668 * sdiv_up64(m_70861 * + N_70860, + sext_i32_i64(num_threads_129270))) - + (int64_t) 1 - + ((sext_i32_i64(squot32(local_tid_129296, 32) * 32 - + 1) + (int64_t) 1) * (segscan_group_sizze_88668 * + sdiv_up64(m_70861 * N_70860, + sext_i32_i64(num_threads_129270))) - + (int64_t) 1)); + + if (inactive_129311) { + x_88673 = x_88674; + } + if (!inactive_129311) { + int64_t defunc_1_op_res_88675 = add64(x_88673, x_88674); + + x_88673 = defunc_1_op_res_88675; + } + } + // write final result + { + ((__local + int64_t *) scan_arr_mem_129300)[sext_i32_i64(local_tid_129296)] = + x_88673; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129296, 32) == 0) { + ((__local + int64_t *) scan_arr_mem_129300)[sext_i32_i64(local_tid_129296)] = + x_88674; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // threads in bounds write scanned carries + { + if (slt64(gtid_88642, m_70861) && slt64(gtid_88650, N_70860)) { + ((__global int64_t *) mem_124906)[gtid_88642 * N_70860 + + gtid_88650] = ((__local + int64_t *) scan_arr_mem_129300)[sext_i32_i64(local_tid_129296)]; + } + } + + error_0: + return; + #undef segscan_group_sizze_88668 +} +__kernel void mainDetailedziscan_stage2_89879(__global int *global_failure, + __local volatile + int64_t *scan_arr_mem_129698_backing_aligned_0, + int64_t m_70861, + int64_t iota_arg_72776, + int64_t stage1_num_groups_129667, + int32_t num_threads_129668, + __global + unsigned char *mem_125097) +{ + #define segscan_group_sizze_89925 (mainDetailedzisegscan_group_sizze_89873) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict scan_arr_mem_129698_backing_0 = + (__local volatile + char *) scan_arr_mem_129698_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129693; + int32_t local_tid_129694; + int64_t group_sizze_129697; + int32_t wave_sizze_129696; + int32_t group_tid_129695; + + global_tid_129693 = get_global_id(0); + local_tid_129694 = get_local_id(0); + group_sizze_129697 = get_local_size(0); + wave_sizze_129696 = LOCKSTEP_WIDTH; + group_tid_129695 = get_group_id(0); + + int32_t phys_tid_89879; + + phys_tid_89879 = global_tid_129693; + + __local char *scan_arr_mem_129698; + + scan_arr_mem_129698 = (__local char *) scan_arr_mem_129698_backing_0; + + int64_t flat_idx_129700; + + flat_idx_129700 = (sext_i32_i64(local_tid_129694) + (int64_t) 1) * + (segscan_group_sizze_89925 * sdiv_up64(m_70861 * iota_arg_72776, + sext_i32_i64(num_threads_129668))) - + (int64_t) 1; + + int64_t gtid_89870; + + gtid_89870 = squot64(flat_idx_129700, iota_arg_72776); + + int64_t gtid_89878; + + gtid_89878 = flat_idx_129700 - squot64(flat_idx_129700, iota_arg_72776) * + iota_arg_72776; + // threads in bound read carries; others get neutral element + { + if (slt64(gtid_89870, m_70861) && slt64(gtid_89878, iota_arg_72776)) { + ((__local + double *) scan_arr_mem_129698)[sext_i32_i64(local_tid_129694)] = + ((__global double *) mem_125097)[gtid_89870 * iota_arg_72776 + + gtid_89878]; + } else { + ((__local + double *) scan_arr_mem_129698)[sext_i32_i64(local_tid_129694)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + double x_89929; + double x_89930; + double x_129701; + double x_129702; + bool ltid_in_bounds_129704; + + ltid_in_bounds_129704 = slt64(sext_i32_i64(local_tid_129694), + stage1_num_groups_129667); + + int32_t skip_threads_129705; + + // read input for in-block scan + { + if (ltid_in_bounds_129704) { + x_89930 = ((volatile __local + double *) scan_arr_mem_129698)[sext_i32_i64(local_tid_129694)]; + if ((local_tid_129694 - squot32(local_tid_129694, 32) * 32) == 0) { + x_89929 = x_89930; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129705 = 1; + while (slt32(skip_threads_129705, 32)) { + if (sle32(skip_threads_129705, local_tid_129694 - + squot32(local_tid_129694, 32) * 32) && + ltid_in_bounds_129704) { + // read operands + { + x_89929 = ((volatile __local + double *) scan_arr_mem_129698)[sext_i32_i64(local_tid_129694) - + sext_i32_i64(skip_threads_129705)]; + } + // perform operation + { + bool inactive_129706 = + slt64(srem64((sext_i32_i64(local_tid_129694) + + (int64_t) 1) * + (segscan_group_sizze_89925 * + sdiv_up64(m_70861 * iota_arg_72776, + sext_i32_i64(num_threads_129668))) - + (int64_t) 1, iota_arg_72776), + (sext_i32_i64(local_tid_129694) + (int64_t) 1) * + (segscan_group_sizze_89925 * sdiv_up64(m_70861 * + iota_arg_72776, + sext_i32_i64(num_threads_129668))) - + (int64_t) 1 - ((sext_i32_i64(local_tid_129694 - + skip_threads_129705) + + (int64_t) 1) * + (segscan_group_sizze_89925 * + sdiv_up64(m_70861 * + iota_arg_72776, + sext_i32_i64(num_threads_129668))) - + (int64_t) 1)); + + if (inactive_129706) { + x_89929 = x_89930; + } + if (!inactive_129706) { + double defunc_1_op_res_89931 = x_89929 + x_89930; + + x_89929 = defunc_1_op_res_89931; + } + } + } + if (sle32(wave_sizze_129696, skip_threads_129705)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129705, local_tid_129694 - + squot32(local_tid_129694, 32) * 32) && + ltid_in_bounds_129704) { + // write result + { + ((volatile __local + double *) scan_arr_mem_129698)[sext_i32_i64(local_tid_129694)] = + x_89929; + x_89930 = x_89929; + } + } + if (sle32(wave_sizze_129696, skip_threads_129705)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129705 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129694 - squot32(local_tid_129694, 32) * 32) == 31 && + ltid_in_bounds_129704) { + ((volatile __local + double *) scan_arr_mem_129698)[sext_i32_i64(squot32(local_tid_129694, + 32))] = + x_89929; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129707; + + // read input for in-block scan + { + if (squot32(local_tid_129694, 32) == 0 && ltid_in_bounds_129704) { + x_129702 = ((volatile __local + double *) scan_arr_mem_129698)[sext_i32_i64(local_tid_129694)]; + if ((local_tid_129694 - squot32(local_tid_129694, 32) * 32) == + 0) { + x_129701 = x_129702; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129707 = 1; + while (slt32(skip_threads_129707, 32)) { + if (sle32(skip_threads_129707, local_tid_129694 - + squot32(local_tid_129694, 32) * 32) && + (squot32(local_tid_129694, 32) == 0 && + ltid_in_bounds_129704)) { + // read operands + { + x_129701 = ((volatile __local + double *) scan_arr_mem_129698)[sext_i32_i64(local_tid_129694) - + sext_i32_i64(skip_threads_129707)]; + } + // perform operation + { + bool inactive_129708 = + slt64(srem64((sext_i32_i64(local_tid_129694 * 32 + + 32 - 1) + (int64_t) 1) * + (segscan_group_sizze_89925 * + sdiv_up64(m_70861 * iota_arg_72776, + sext_i32_i64(num_threads_129668))) - + (int64_t) 1, iota_arg_72776), + (sext_i32_i64(local_tid_129694 * 32 + 32 - + 1) + (int64_t) 1) * + (segscan_group_sizze_89925 * + sdiv_up64(m_70861 * iota_arg_72776, + sext_i32_i64(num_threads_129668))) - + (int64_t) 1 - + ((sext_i32_i64((local_tid_129694 - + skip_threads_129707) * 32 + + 32 - 1) + (int64_t) 1) * + (segscan_group_sizze_89925 * + sdiv_up64(m_70861 * iota_arg_72776, + sext_i32_i64(num_threads_129668))) - + (int64_t) 1)); + + if (inactive_129708) { + x_129701 = x_129702; + } + if (!inactive_129708) { + double defunc_1_op_res_129703 = x_129701 + x_129702; + + x_129701 = defunc_1_op_res_129703; + } + } + } + if (sle32(wave_sizze_129696, skip_threads_129707)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129707, local_tid_129694 - + squot32(local_tid_129694, 32) * 32) && + (squot32(local_tid_129694, 32) == 0 && + ltid_in_bounds_129704)) { + // write result + { + ((volatile __local + double *) scan_arr_mem_129698)[sext_i32_i64(local_tid_129694)] = + x_129701; + x_129702 = x_129701; + } + } + if (sle32(wave_sizze_129696, skip_threads_129707)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129707 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129694, 32) == 0 || !ltid_in_bounds_129704)) { + // read operands + { + x_89930 = x_89929; + x_89929 = ((__local + double *) scan_arr_mem_129698)[sext_i32_i64(squot32(local_tid_129694, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129709 = + slt64(srem64((sext_i32_i64(local_tid_129694) + + (int64_t) 1) * (segscan_group_sizze_89925 * + sdiv_up64(m_70861 * + iota_arg_72776, + sext_i32_i64(num_threads_129668))) - + (int64_t) 1, iota_arg_72776), + (sext_i32_i64(local_tid_129694) + (int64_t) 1) * + (segscan_group_sizze_89925 * sdiv_up64(m_70861 * + iota_arg_72776, + sext_i32_i64(num_threads_129668))) - + (int64_t) 1 - + ((sext_i32_i64(squot32(local_tid_129694, 32) * 32 - + 1) + (int64_t) 1) * (segscan_group_sizze_89925 * + sdiv_up64(m_70861 * + iota_arg_72776, + sext_i32_i64(num_threads_129668))) - + (int64_t) 1)); + + if (inactive_129709) { + x_89929 = x_89930; + } + if (!inactive_129709) { + double defunc_1_op_res_89931 = x_89929 + x_89930; + + x_89929 = defunc_1_op_res_89931; + } + } + // write final result + { + ((__local + double *) scan_arr_mem_129698)[sext_i32_i64(local_tid_129694)] = + x_89929; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129694, 32) == 0) { + ((__local + double *) scan_arr_mem_129698)[sext_i32_i64(local_tid_129694)] = + x_89930; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // threads in bounds write scanned carries + { + if (slt64(gtid_89870, m_70861) && slt64(gtid_89878, iota_arg_72776)) { + ((__global double *) mem_125097)[gtid_89870 * iota_arg_72776 + + gtid_89878] = ((__local + double *) scan_arr_mem_129698)[sext_i32_i64(local_tid_129694)]; + } + } + + error_0: + return; + #undef segscan_group_sizze_89925 +} +__kernel void mainDetailedziscan_stage3_77650(__global int *global_failure, + int64_t m_70861, int64_t n_70864, + int64_t num_groups_77765, + int32_t num_threads_126424, + int32_t required_groups_126466, + __global + unsigned char *mem_120201) +{ + #define segscan_group_sizze_77764 (mainDetailedzisegscan_group_sizze_77644) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126467; + int32_t local_tid_126468; + int64_t group_sizze_126471; + int32_t wave_sizze_126470; + int32_t group_tid_126469; + + global_tid_126467 = get_global_id(0); + local_tid_126468 = get_local_id(0); + group_sizze_126471 = get_local_size(0); + wave_sizze_126470 = LOCKSTEP_WIDTH; + group_tid_126469 = get_group_id(0); + + int32_t phys_tid_77650; + + phys_tid_77650 = global_tid_126467; + + int32_t phys_group_id_126472; + + phys_group_id_126472 = get_group_id(0); + for (int32_t i_126473 = 0; i_126473 < sdiv_up32(required_groups_126466 - + phys_group_id_126472, + sext_i64_i32(num_groups_77765)); + i_126473++) { + int32_t virt_group_id_126474 = phys_group_id_126472 + i_126473 * + sext_i64_i32(num_groups_77765); + int64_t flat_idx_126475 = sext_i32_i64(virt_group_id_126474) * + segscan_group_sizze_77764 + sext_i32_i64(local_tid_126468); + int64_t gtid_77641 = squot64(flat_idx_126475, n_70864); + int64_t gtid_77649 = flat_idx_126475 - squot64(flat_idx_126475, + n_70864) * n_70864; + int64_t orig_group_126476 = squot64(flat_idx_126475, + segscan_group_sizze_77764 * + sdiv_up64(m_70861 * n_70864, + sext_i32_i64(num_threads_126424))); + int64_t carry_in_flat_idx_126477 = orig_group_126476 * + (segscan_group_sizze_77764 * sdiv_up64(m_70861 * n_70864, + sext_i32_i64(num_threads_126424))) - + (int64_t) 1; + + if (slt64(gtid_77641, m_70861) && slt64(gtid_77649, n_70864)) { + if (!(orig_group_126476 == (int64_t) 0 || (flat_idx_126475 == + (orig_group_126476 + + (int64_t) 1) * + (segscan_group_sizze_77764 * + sdiv_up64(m_70861 * + n_70864, + sext_i32_i64(num_threads_126424))) - + (int64_t) 1 || + slt64(srem64(flat_idx_126475, + n_70864), + flat_idx_126475 - + carry_in_flat_idx_126477)))) { + int64_t x_77768; + int64_t x_77769; + + x_77768 = ((__global + int64_t *) mem_120201)[squot64(carry_in_flat_idx_126477, + n_70864) * n_70864 + + (carry_in_flat_idx_126477 - + squot64(carry_in_flat_idx_126477, + n_70864) * + n_70864)]; + x_77769 = ((__global int64_t *) mem_120201)[gtid_77641 * + n_70864 + + gtid_77649]; + + int64_t defunc_1_op_res_77770; + + defunc_1_op_res_77770 = add64(x_77768, x_77769); + x_77768 = defunc_1_op_res_77770; + ((__global int64_t *) mem_120201)[gtid_77641 * n_70864 + + gtid_77649] = x_77768; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segscan_group_sizze_77764 +} +__kernel void mainDetailedziscan_stage3_86129(__global int *global_failure, + int64_t m_70861, + int64_t Nmk_72261, + int64_t num_groups_86279, + int32_t num_threads_128481, + int32_t required_groups_128523, + __global + unsigned char *mem_124061) +{ + #define segscan_group_sizze_86278 (mainDetailedzisegscan_group_sizze_86123) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128524; + int32_t local_tid_128525; + int64_t group_sizze_128528; + int32_t wave_sizze_128527; + int32_t group_tid_128526; + + global_tid_128524 = get_global_id(0); + local_tid_128525 = get_local_id(0); + group_sizze_128528 = get_local_size(0); + wave_sizze_128527 = LOCKSTEP_WIDTH; + group_tid_128526 = get_group_id(0); + + int32_t phys_tid_86129; + + phys_tid_86129 = global_tid_128524; + + int32_t phys_group_id_128529; + + phys_group_id_128529 = get_group_id(0); + for (int32_t i_128530 = 0; i_128530 < sdiv_up32(required_groups_128523 - + phys_group_id_128529, + sext_i64_i32(num_groups_86279)); + i_128530++) { + int32_t virt_group_id_128531 = phys_group_id_128529 + i_128530 * + sext_i64_i32(num_groups_86279); + int64_t flat_idx_128532 = sext_i32_i64(virt_group_id_128531) * + segscan_group_sizze_86278 + sext_i32_i64(local_tid_128525); + int64_t gtid_86120 = squot64(flat_idx_128532, Nmk_72261); + int64_t gtid_86128 = flat_idx_128532 - squot64(flat_idx_128532, + Nmk_72261) * Nmk_72261; + int64_t orig_group_128533 = squot64(flat_idx_128532, + segscan_group_sizze_86278 * + sdiv_up64(m_70861 * Nmk_72261, + sext_i32_i64(num_threads_128481))); + int64_t carry_in_flat_idx_128534 = orig_group_128533 * + (segscan_group_sizze_86278 * sdiv_up64(m_70861 * Nmk_72261, + sext_i32_i64(num_threads_128481))) - + (int64_t) 1; + + if (slt64(gtid_86120, m_70861) && slt64(gtid_86128, Nmk_72261)) { + if (!(orig_group_128533 == (int64_t) 0 || (flat_idx_128532 == + (orig_group_128533 + + (int64_t) 1) * + (segscan_group_sizze_86278 * + sdiv_up64(m_70861 * + Nmk_72261, + sext_i32_i64(num_threads_128481))) - + (int64_t) 1 || + slt64(srem64(flat_idx_128532, + Nmk_72261), + flat_idx_128532 - + carry_in_flat_idx_128534)))) { + double x_86282; + double x_86283; + + x_86282 = ((__global + double *) mem_124061)[squot64(carry_in_flat_idx_128534, + Nmk_72261) * + Nmk_72261 + + (carry_in_flat_idx_128534 - + squot64(carry_in_flat_idx_128534, + Nmk_72261) * + Nmk_72261)]; + x_86283 = ((__global double *) mem_124061)[gtid_86120 * + Nmk_72261 + + gtid_86128]; + + double defunc_1_op_res_86284; + + defunc_1_op_res_86284 = x_86282 + x_86283; + x_86282 = defunc_1_op_res_86284; + ((__global double *) mem_124061)[gtid_86120 * Nmk_72261 + + gtid_86128] = x_86282; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segscan_group_sizze_86278 +} +__kernel void mainDetailedziscan_stage3_88651(__global int *global_failure, + int64_t N_70860, int64_t m_70861, + int64_t num_groups_88669, + int32_t num_threads_129270, + int32_t required_groups_129312, + __global + unsigned char *mem_124906) +{ + #define segscan_group_sizze_88668 (mainDetailedzisegscan_group_sizze_88645) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129313; + int32_t local_tid_129314; + int64_t group_sizze_129317; + int32_t wave_sizze_129316; + int32_t group_tid_129315; + + global_tid_129313 = get_global_id(0); + local_tid_129314 = get_local_id(0); + group_sizze_129317 = get_local_size(0); + wave_sizze_129316 = LOCKSTEP_WIDTH; + group_tid_129315 = get_group_id(0); + + int32_t phys_tid_88651; + + phys_tid_88651 = global_tid_129313; + + int32_t phys_group_id_129318; + + phys_group_id_129318 = get_group_id(0); + for (int32_t i_129319 = 0; i_129319 < sdiv_up32(required_groups_129312 - + phys_group_id_129318, + sext_i64_i32(num_groups_88669)); + i_129319++) { + int32_t virt_group_id_129320 = phys_group_id_129318 + i_129319 * + sext_i64_i32(num_groups_88669); + int64_t flat_idx_129321 = sext_i32_i64(virt_group_id_129320) * + segscan_group_sizze_88668 + sext_i32_i64(local_tid_129314); + int64_t gtid_88642 = squot64(flat_idx_129321, N_70860); + int64_t gtid_88650 = flat_idx_129321 - squot64(flat_idx_129321, + N_70860) * N_70860; + int64_t orig_group_129322 = squot64(flat_idx_129321, + segscan_group_sizze_88668 * + sdiv_up64(m_70861 * N_70860, + sext_i32_i64(num_threads_129270))); + int64_t carry_in_flat_idx_129323 = orig_group_129322 * + (segscan_group_sizze_88668 * sdiv_up64(m_70861 * N_70860, + sext_i32_i64(num_threads_129270))) - + (int64_t) 1; + + if (slt64(gtid_88642, m_70861) && slt64(gtid_88650, N_70860)) { + if (!(orig_group_129322 == (int64_t) 0 || (flat_idx_129321 == + (orig_group_129322 + + (int64_t) 1) * + (segscan_group_sizze_88668 * + sdiv_up64(m_70861 * + N_70860, + sext_i32_i64(num_threads_129270))) - + (int64_t) 1 || + slt64(srem64(flat_idx_129321, + N_70860), + flat_idx_129321 - + carry_in_flat_idx_129323)))) { + int64_t x_88673; + int64_t x_88674; + + x_88673 = ((__global + int64_t *) mem_124906)[squot64(carry_in_flat_idx_129323, + N_70860) * N_70860 + + (carry_in_flat_idx_129323 - + squot64(carry_in_flat_idx_129323, + N_70860) * + N_70860)]; + x_88674 = ((__global int64_t *) mem_124906)[gtid_88642 * + N_70860 + + gtid_88650]; + + int64_t defunc_1_op_res_88675; + + defunc_1_op_res_88675 = add64(x_88673, x_88674); + x_88673 = defunc_1_op_res_88675; + ((__global int64_t *) mem_124906)[gtid_88642 * N_70860 + + gtid_88650] = x_88673; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segscan_group_sizze_88668 +} +__kernel void mainDetailedziscan_stage3_89879(__global int *global_failure, + int64_t m_70861, + int64_t iota_arg_72776, + int64_t num_groups_89926, + int32_t num_threads_129668, + int32_t required_groups_129710, + __global + unsigned char *mem_125097) +{ + #define segscan_group_sizze_89925 (mainDetailedzisegscan_group_sizze_89873) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129711; + int32_t local_tid_129712; + int64_t group_sizze_129715; + int32_t wave_sizze_129714; + int32_t group_tid_129713; + + global_tid_129711 = get_global_id(0); + local_tid_129712 = get_local_id(0); + group_sizze_129715 = get_local_size(0); + wave_sizze_129714 = LOCKSTEP_WIDTH; + group_tid_129713 = get_group_id(0); + + int32_t phys_tid_89879; + + phys_tid_89879 = global_tid_129711; + + int32_t phys_group_id_129716; + + phys_group_id_129716 = get_group_id(0); + for (int32_t i_129717 = 0; i_129717 < sdiv_up32(required_groups_129710 - + phys_group_id_129716, + sext_i64_i32(num_groups_89926)); + i_129717++) { + int32_t virt_group_id_129718 = phys_group_id_129716 + i_129717 * + sext_i64_i32(num_groups_89926); + int64_t flat_idx_129719 = sext_i32_i64(virt_group_id_129718) * + segscan_group_sizze_89925 + sext_i32_i64(local_tid_129712); + int64_t gtid_89870 = squot64(flat_idx_129719, iota_arg_72776); + int64_t gtid_89878 = flat_idx_129719 - squot64(flat_idx_129719, + iota_arg_72776) * + iota_arg_72776; + int64_t orig_group_129720 = squot64(flat_idx_129719, + segscan_group_sizze_89925 * + sdiv_up64(m_70861 * iota_arg_72776, + sext_i32_i64(num_threads_129668))); + int64_t carry_in_flat_idx_129721 = orig_group_129720 * + (segscan_group_sizze_89925 * sdiv_up64(m_70861 * iota_arg_72776, + sext_i32_i64(num_threads_129668))) - + (int64_t) 1; + + if (slt64(gtid_89870, m_70861) && slt64(gtid_89878, iota_arg_72776)) { + if (!(orig_group_129720 == (int64_t) 0 || (flat_idx_129719 == + (orig_group_129720 + + (int64_t) 1) * + (segscan_group_sizze_89925 * + sdiv_up64(m_70861 * + iota_arg_72776, + sext_i32_i64(num_threads_129668))) - + (int64_t) 1 || + slt64(srem64(flat_idx_129719, + iota_arg_72776), + flat_idx_129719 - + carry_in_flat_idx_129721)))) { + double x_89929; + double x_89930; + + x_89929 = ((__global + double *) mem_125097)[squot64(carry_in_flat_idx_129721, + iota_arg_72776) * + iota_arg_72776 + + (carry_in_flat_idx_129721 - + squot64(carry_in_flat_idx_129721, + iota_arg_72776) * + iota_arg_72776)]; + x_89930 = ((__global double *) mem_125097)[gtid_89870 * + iota_arg_72776 + + gtid_89878]; + + double defunc_1_op_res_89931; + + defunc_1_op_res_89931 = x_89929 + x_89930; + x_89929 = defunc_1_op_res_89931; + ((__global double *) mem_125097)[gtid_89870 * iota_arg_72776 + + gtid_89878] = x_89929; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segscan_group_sizze_89925 +} +__kernel void mainDetailedzisegmap_77255(__global int *global_failure, + int64_t N_70860, double freq_70865, + int64_t k2p2zq_70876, __global + unsigned char *mappingindices_mem_120107, + __global unsigned char *mem_120112) +{ + #define segmap_group_sizze_77322 (mainDetailedzisegmap_group_sizze_77258) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126336; + int32_t local_tid_126337; + int64_t group_sizze_126340; + int32_t wave_sizze_126339; + int32_t group_tid_126338; + + global_tid_126336 = get_global_id(0); + local_tid_126337 = get_local_id(0); + group_sizze_126340 = get_local_size(0); + wave_sizze_126339 = LOCKSTEP_WIDTH; + group_tid_126338 = get_group_id(0); + + int32_t phys_tid_77255; + + phys_tid_77255 = global_tid_126336; + + int64_t gtid_77253; + + gtid_77253 = squot64(sext_i32_i64(group_tid_126338) * + segmap_group_sizze_77322 + + sext_i32_i64(local_tid_126337), N_70860); + + int64_t gtid_77254; + + gtid_77254 = sext_i32_i64(group_tid_126338) * segmap_group_sizze_77322 + + sext_i32_i64(local_tid_126337) - + squot64(sext_i32_i64(group_tid_126338) * segmap_group_sizze_77322 + + sext_i32_i64(local_tid_126337), N_70860) * N_70860; + if (slt64(gtid_77253, k2p2zq_70876) && slt64(gtid_77254, N_70860)) { + bool index_primexp_115010 = gtid_77253 == (int64_t) 0; + double defunc_0_f_res_77328; + + if (index_primexp_115010) { + defunc_0_f_res_77328 = 1.0; + } else { + int64_t x_77327 = ((__global + int64_t *) mappingindices_mem_120107)[gtid_77254]; + bool cond_77329 = gtid_77253 == (int64_t) 1; + double defunc_0_f_res_f_res_77330; + + if (cond_77329) { + double i64_res_77331 = sitofp_i64_f64(x_77327); + + defunc_0_f_res_f_res_77330 = i64_res_77331; + } else { + int64_t i64_arg_77332 = sdiv64(gtid_77253, (int64_t) 2); + double i64_res_77333 = sitofp_i64_f64(i64_arg_77332); + double i64_res_77334 = sitofp_i64_f64(x_77327); + double x_77335 = 6.283185307179586 * i64_res_77333; + double x_77336 = i64_res_77334 * x_77335; + double angle_77337 = x_77336 / freq_70865; + int64_t x_77338 = smod64(gtid_77253, (int64_t) 2); + bool cond_77339 = x_77338 == (int64_t) 0; + double defunc_0_f_res_f_res_f_res_77340; + + if (cond_77339) { + double sin_res_77341; + + sin_res_77341 = futrts_sin64(angle_77337); + defunc_0_f_res_f_res_f_res_77340 = sin_res_77341; + } else { + double cos_res_77342; + + cos_res_77342 = futrts_cos64(angle_77337); + defunc_0_f_res_f_res_f_res_77340 = cos_res_77342; + } + defunc_0_f_res_f_res_77330 = defunc_0_f_res_f_res_f_res_77340; + } + defunc_0_f_res_77328 = defunc_0_f_res_f_res_77330; + } + ((__global double *) mem_120112)[gtid_77253 * N_70860 + gtid_77254] = + defunc_0_f_res_77328; + } + + error_0: + return; + #undef segmap_group_sizze_77322 +} +__kernel void mainDetailedzisegmap_77355(__global int *global_failure, + int64_t N_70860, double freq_70865, + int64_t k2p2zq_70876, __global + unsigned char *mappingindices_mem_120107, + __global unsigned char *mem_120116) +{ + #define segmap_group_sizze_77418 (mainDetailedzisegmap_group_sizze_77358) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126341; + int32_t local_tid_126342; + int64_t group_sizze_126345; + int32_t wave_sizze_126344; + int32_t group_tid_126343; + + global_tid_126341 = get_global_id(0); + local_tid_126342 = get_local_id(0); + group_sizze_126345 = get_local_size(0); + wave_sizze_126344 = LOCKSTEP_WIDTH; + group_tid_126343 = get_group_id(0); + + int32_t phys_tid_77355; + + phys_tid_77355 = global_tid_126341; + + int64_t gtid_77353; + + gtid_77353 = squot64(sext_i32_i64(group_tid_126343) * + segmap_group_sizze_77418 + + sext_i32_i64(local_tid_126342), N_70860); + + int64_t gtid_77354; + + gtid_77354 = sext_i32_i64(group_tid_126343) * segmap_group_sizze_77418 + + sext_i32_i64(local_tid_126342) - + squot64(sext_i32_i64(group_tid_126343) * segmap_group_sizze_77418 + + sext_i32_i64(local_tid_126342), N_70860) * N_70860; + if (slt64(gtid_77353, k2p2zq_70876) && slt64(gtid_77354, N_70860)) { + bool index_primexp_115017 = gtid_77353 == (int64_t) 0; + double defunc_0_f_res_77424; + + if (index_primexp_115017) { + defunc_0_f_res_77424 = 1.0; + } else { + int64_t x_77423 = ((__global + int64_t *) mappingindices_mem_120107)[gtid_77354]; + int64_t i_77425 = add64((int64_t) 1, gtid_77353); + int64_t i64_arg_77426 = sdiv64(i_77425, (int64_t) 2); + double i64_res_77427 = sitofp_i64_f64(i64_arg_77426); + double i64_res_77428 = sitofp_i64_f64(x_77423); + double x_77429 = 6.283185307179586 * i64_res_77427; + double x_77430 = i64_res_77428 * x_77429; + double angle_77431 = x_77430 / freq_70865; + int64_t x_77432 = smod64(i_77425, (int64_t) 2); + bool cond_77433 = x_77432 == (int64_t) 0; + double defunc_0_f_res_f_res_77434; + + if (cond_77433) { + double sin_res_77435; + + sin_res_77435 = futrts_sin64(angle_77431); + defunc_0_f_res_f_res_77434 = sin_res_77435; + } else { + double cos_res_77436; + + cos_res_77436 = futrts_cos64(angle_77431); + defunc_0_f_res_f_res_77434 = cos_res_77436; + } + defunc_0_f_res_77424 = defunc_0_f_res_f_res_77434; + } + ((__global double *) mem_120116)[gtid_77353 * N_70860 + gtid_77354] = + defunc_0_f_res_77424; + } + + error_0: + return; + #undef segmap_group_sizze_77418 +} +__kernel void mainDetailedzisegmap_77448(__global int *global_failure, + int64_t N_70860, int64_t k2p2zq_70876, + double defunc_0_f_res_70939, __global + unsigned char *mem_120120, __global + unsigned char *mem_120124) +{ + #define segmap_group_sizze_77469 (mainDetailedzisegmap_group_sizze_77451) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126346; + int32_t local_tid_126347; + int64_t group_sizze_126350; + int32_t wave_sizze_126349; + int32_t group_tid_126348; + + global_tid_126346 = get_global_id(0); + local_tid_126347 = get_local_id(0); + group_sizze_126350 = get_local_size(0); + wave_sizze_126349 = LOCKSTEP_WIDTH; + group_tid_126348 = get_group_id(0); + + int32_t phys_tid_77448; + + phys_tid_77448 = global_tid_126346; + + int64_t gtid_77446; + + gtid_77446 = squot64(sext_i32_i64(group_tid_126348) * + segmap_group_sizze_77469 + + sext_i32_i64(local_tid_126347), k2p2zq_70876); + + int64_t gtid_77447; + + gtid_77447 = sext_i32_i64(group_tid_126348) * segmap_group_sizze_77469 + + sext_i32_i64(local_tid_126347) - + squot64(sext_i32_i64(group_tid_126348) * segmap_group_sizze_77469 + + sext_i32_i64(local_tid_126347), k2p2zq_70876) * k2p2zq_70876; + if (slt64(gtid_77446, N_70860) && slt64(gtid_77447, k2p2zq_70876)) { + double x_77472 = ((__global double *) mem_120120)[gtid_77446 * + k2p2zq_70876 + + gtid_77447]; + double defunc_0_f_res_77473 = defunc_0_f_res_70939 + x_77472; + + ((__global double *) mem_120124)[gtid_77446 * k2p2zq_70876 + + gtid_77447] = defunc_0_f_res_77473; + } + + error_0: + return; + #undef segmap_group_sizze_77469 +} +__kernel void mainDetailedzisegmap_77572(__global int *global_failure, + int64_t N_70860, int64_t m_70861, + int64_t n_70864, int64_t m_70956, + __global + unsigned char *images_mem_120108, + __global unsigned char *mem_120201, + __global unsigned char *mem_120206, + __global unsigned char *mem_120209) +{ + #define segmap_group_sizze_77806 (mainDetailedzisegmap_group_sizze_77575) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126501; + int32_t local_tid_126502; + int64_t group_sizze_126505; + int32_t wave_sizze_126504; + int32_t group_tid_126503; + + global_tid_126501 = get_global_id(0); + local_tid_126502 = get_local_id(0); + group_sizze_126505 = get_local_size(0); + wave_sizze_126504 = LOCKSTEP_WIDTH; + group_tid_126503 = get_group_id(0); + + int32_t phys_tid_77572; + + phys_tid_77572 = global_tid_126501; + + int64_t gtid_77570; + + gtid_77570 = squot64(sext_i32_i64(group_tid_126503) * + segmap_group_sizze_77806 + + sext_i32_i64(local_tid_126502), n_70864); + + int64_t gtid_77571; + + gtid_77571 = sext_i32_i64(group_tid_126503) * segmap_group_sizze_77806 + + sext_i32_i64(local_tid_126502) - + squot64(sext_i32_i64(group_tid_126503) * segmap_group_sizze_77806 + + sext_i32_i64(local_tid_126502), n_70864) * n_70864; + if (slt64(gtid_77570, m_70861) && slt64(gtid_77571, n_70864)) { + int64_t binop_y_115030 = (int64_t) -1 * gtid_77571; + int64_t slice_115031 = m_70956 + binop_y_115030; + double x_77810 = ((__global double *) images_mem_120108)[gtid_77570 * + N_70860 + + slice_115031]; + bool defunc_0_f_res_77813; + + defunc_0_f_res_77813 = futrts_isnan64(x_77810); + + bool defunc_0_g_res_77814 = !defunc_0_f_res_77813; + int64_t defunc_1_f_res_77815; + + if (defunc_0_g_res_77814) { + int64_t x_77811 = ((__global int64_t *) mem_120201)[gtid_77570 * + n_70864 + + gtid_77571]; + int64_t defunc_1_f_res_t_res_77816 = sub64(x_77811, (int64_t) 1); + + defunc_1_f_res_77815 = defunc_1_f_res_t_res_77816; + } else { + defunc_1_f_res_77815 = (int64_t) -1; + } + if ((sle64((int64_t) 0, gtid_77570) && slt64(gtid_77570, m_70861)) && + (sle64((int64_t) 0, defunc_1_f_res_77815) && + slt64(defunc_1_f_res_77815, n_70864))) { + ((__global int64_t *) mem_120209)[gtid_77570 * n_70864 + + defunc_1_f_res_77815] = + gtid_77571; + } + if ((sle64((int64_t) 0, gtid_77570) && slt64(gtid_77570, m_70861)) && + (sle64((int64_t) 0, defunc_1_f_res_77815) && + slt64(defunc_1_f_res_77815, n_70864))) { + ((__global double *) mem_120206)[gtid_77570 * n_70864 + + defunc_1_f_res_77815] = x_77810; + } + } + + error_0: + return; + #undef segmap_group_sizze_77806 +} +__kernel void mainDetailedzisegmap_77850(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t N_70860, int64_t m_70861, + int64_t n_70864, int64_t k2p2zq_70876, + int64_t m_70956, + int64_t defunc_2_reduce_res_70985, + int64_t num_groups_77953, + int64_t num_threads_125639, __global + unsigned char *defunc_3_map_res_mem_120232, + __global unsigned char *mem_120235, + __global unsigned char *mem_120238, + __global unsigned char *mem_120246, + __global unsigned char *mem_125145) +{ + #define segmap_group_sizze_77952 (mainDetailedzisegmap_group_sizze_77853) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_126539; + int32_t local_tid_126540; + int64_t group_sizze_126543; + int32_t wave_sizze_126542; + int32_t group_tid_126541; + + global_tid_126539 = get_global_id(0); + local_tid_126540 = get_local_id(0); + group_sizze_126543 = get_local_size(0); + wave_sizze_126542 = LOCKSTEP_WIDTH; + group_tid_126541 = get_group_id(0); + + int32_t phys_tid_77850; + + phys_tid_77850 = global_tid_126539; + + int32_t phys_group_id_126544; + + phys_group_id_126544 = get_group_id(0); + for (int32_t i_126545 = 0; i_126545 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861 * defunc_2_reduce_res_70985, + segmap_group_sizze_77952)) - + phys_group_id_126544, sext_i64_i32(num_groups_77953)); + i_126545++) { + int32_t virt_group_id_126546 = phys_group_id_126544 + i_126545 * + sext_i64_i32(num_groups_77953); + int64_t gtid_77848 = squot64(sext_i32_i64(virt_group_id_126546) * + segmap_group_sizze_77952 + + sext_i32_i64(local_tid_126540), + defunc_2_reduce_res_70985); + int64_t gtid_77849 = sext_i32_i64(virt_group_id_126546) * + segmap_group_sizze_77952 + sext_i32_i64(local_tid_126540) - + squot64(sext_i32_i64(virt_group_id_126546) * + segmap_group_sizze_77952 + + sext_i32_i64(local_tid_126540), + defunc_2_reduce_res_70985) * defunc_2_reduce_res_70985; + + if (slt64(gtid_77848, m_70861) && slt64(gtid_77849, + defunc_2_reduce_res_70985)) { + int64_t x_77956 = ((__global + int64_t *) defunc_3_map_res_mem_120232)[gtid_77848 * + n_70864 + + gtid_77849]; + bool cond_77957 = sle64((int64_t) 0, x_77956); + + if (cond_77957) { + bool y_77959 = slt64(x_77956, n_70864); + bool bounds_check_77960 = cond_77957 && y_77959; + bool empty_slice_77961 = k2p2zq_70876 == (int64_t) 0; + int64_t m_77962 = sub64(k2p2zq_70876, (int64_t) 1); + bool zzero_leq_i_p_m_t_s_77963 = sle64((int64_t) 0, m_77962); + bool i_p_m_t_s_leq_w_77964 = slt64(m_77962, k2p2zq_70876); + bool i_lte_j_77965 = sle64((int64_t) 0, k2p2zq_70876); + bool y_77966 = zzero_leq_i_p_m_t_s_77963 && + i_p_m_t_s_leq_w_77964; + bool y_77967 = i_lte_j_77965 && y_77966; + bool ok_or_empty_77968 = empty_slice_77961 || y_77967; + bool index_ok_77969 = bounds_check_77960 && ok_or_empty_77968; + bool index_certs_77970; + + if (!index_ok_77969) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 209) == -1) { + global_failure_args[0] = x_77956; + global_failure_args[1] = (int64_t) 0; + global_failure_args[2] = n_70864; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + int64_t binop_y_77971 = (int64_t) -1 * x_77956; + int64_t slice_77972 = m_70956 + binop_y_77971; + + for (int64_t i_126547 = 0; i_126547 < k2p2zq_70876; + i_126547++) { + ((__global double *) mem_125145)[phys_tid_77850 + i_126547 * + num_threads_125639] = + ((__global double *) mem_120235)[slice_77972 + + i_126547 * N_70860]; + } + } else { + for (int64_t i_126548 = 0; i_126548 < k2p2zq_70876; + i_126548++) { + ((__global double *) mem_120238)[phys_tid_77850 + i_126548 * + num_threads_125639] = NAN; + } + for (int64_t i_126549 = 0; i_126549 < k2p2zq_70876; + i_126549++) { + ((__global double *) mem_125145)[phys_tid_77850 + i_126549 * + num_threads_125639] = + ((__global double *) mem_120238)[phys_tid_77850 + + i_126549 * + num_threads_125639]; + } + } + for (int64_t i_126550 = 0; i_126550 < k2p2zq_70876; i_126550++) { + ((__global double *) mem_120246)[i_126550 * + (defunc_2_reduce_res_70985 * + m_70861) + gtid_77848 * + defunc_2_reduce_res_70985 + + gtid_77849] = ((__global + double *) mem_125145)[phys_tid_77850 + + i_126550 * + num_threads_125639]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_77952 +} +__kernel void mainDetailedzisegmap_77986(__global int *global_failure, + int64_t k2p2zq_70876, __global + unsigned char *mem_120252) +{ + #define segmap_group_sizze_78011 (mainDetailedzisegmap_group_sizze_77989) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126561; + int32_t local_tid_126562; + int64_t group_sizze_126565; + int32_t wave_sizze_126564; + int32_t group_tid_126563; + + global_tid_126561 = get_global_id(0); + local_tid_126562 = get_local_id(0); + group_sizze_126565 = get_local_size(0); + wave_sizze_126564 = LOCKSTEP_WIDTH; + group_tid_126563 = get_group_id(0); + + int32_t phys_tid_77986; + + phys_tid_77986 = global_tid_126561; + + int64_t gtid_77984; + + gtid_77984 = squot64(sext_i32_i64(group_tid_126563) * + segmap_group_sizze_78011 + + sext_i32_i64(local_tid_126562), k2p2zq_70876); + + int64_t gtid_77985; + + gtid_77985 = sext_i32_i64(group_tid_126563) * segmap_group_sizze_78011 + + sext_i32_i64(local_tid_126562) - + squot64(sext_i32_i64(group_tid_126563) * segmap_group_sizze_78011 + + sext_i32_i64(local_tid_126562), k2p2zq_70876) * k2p2zq_70876; + if (slt64(gtid_77984, k2p2zq_70876) && slt64(gtid_77985, k2p2zq_70876)) { + bool cond_78016 = gtid_77985 == gtid_77984; + double defunc_0_f_res_78017; + + if (cond_78016) { + defunc_0_f_res_78017 = 1.0; + } else { + defunc_0_f_res_78017 = 0.0; + } + ((__global double *) mem_120252)[gtid_77984 * k2p2zq_70876 + + gtid_77985] = defunc_0_f_res_78017; + } + + error_0: + return; + #undef segmap_group_sizze_78011 +} +__kernel void mainDetailedzisegmap_78117(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t m_70948, unsigned char y_70952, + unsigned char ok_or_empty_70954, + int64_t min_res_71066, int64_t k_71067, + int64_t num_groups_78573, + int64_t binop_x_120251, + int64_t num_threads_125643, __global + unsigned char *mem_120252, __global + unsigned char *mem_120257, __global + unsigned char *mem_120261, __global + unsigned char *mem_120265, __global + unsigned char *mem_120268, __global + unsigned char *mem_120271, __global + unsigned char *mem_120273, __global + unsigned char *mem_120608, __global + unsigned char *mem_120649, __global + unsigned char *mem_120661, __global + unsigned char *mem_120690, __global + unsigned char *mem_120763, __global + unsigned char *mem_120778, __global + unsigned char *mem_120790, __global + unsigned char *mem_120801, __global + unsigned char *mem_120821, __global + unsigned char *mem_120824, __global + unsigned char *mem_120878, __global + unsigned char *mem_120881, __global + unsigned char *mem_120883, __global + unsigned char *mem_125150, __global + unsigned char *mem_125152, __global + unsigned char *mem_125160, __global + unsigned char *mem_125421, __global + unsigned char *mem_125429, __global + unsigned char *mem_125431, __global + unsigned char *mem_125491, __global + unsigned char *double_buffer_mem_125535, + __global + unsigned char *double_buffer_mem_125536, + __global + unsigned char *double_buffer_mem_125537, + __global + unsigned char *double_buffer_mem_125548) +{ + #define segmap_group_sizze_78572 (mainDetailedzisegmap_group_sizze_78119) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_126589; + int32_t local_tid_126590; + int64_t group_sizze_126593; + int32_t wave_sizze_126592; + int32_t group_tid_126591; + + global_tid_126589 = get_global_id(0); + local_tid_126590 = get_local_id(0); + group_sizze_126593 = get_local_size(0); + wave_sizze_126592 = LOCKSTEP_WIDTH; + group_tid_126591 = get_group_id(0); + + int32_t phys_tid_78117; + + phys_tid_78117 = global_tid_126589; + + int32_t phys_group_id_126594; + + phys_group_id_126594 = get_group_id(0); + for (int32_t i_126595 = 0; i_126595 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, segmap_group_sizze_78572)) - + phys_group_id_126594, sext_i64_i32(num_groups_78573)); + i_126595++) { + int32_t virt_group_id_126596 = phys_group_id_126594 + i_126595 * + sext_i64_i32(num_groups_78573); + int64_t gtid_78116 = sext_i32_i64(virt_group_id_126596) * + segmap_group_sizze_78572 + sext_i32_i64(local_tid_126590); + + if (slt64(gtid_78116, m_70861)) { + for (int64_t i_126597 = 0; i_126597 < k2p2zq_70876; i_126597++) { + ((__global double *) mem_120268)[phys_tid_78117 + i_126597 * + num_threads_125643] = 0.0; + } + for (int64_t i_126598 = 0; i_126598 < (int64_t) 2; i_126598++) { + for (int64_t i_126599 = 0; i_126599 < k2p2zq_70876; + i_126599++) { + ((__global double *) mem_120271)[phys_tid_78117 + + (i_126598 * + (num_threads_125643 * + k2p2zq_70876) + + i_126599 * + num_threads_125643)] = + 0.0; + } + } + for (int64_t i_126600 = 0; i_126600 < k2p2zq_70876; i_126600++) { + int64_t x_126601 = (int64_t) 0 + i_126600 * (int64_t) 1; + + ((__global int64_t *) mem_120273)[phys_tid_78117 + i_126600 * + num_threads_125643] = + x_126601; + } + for (int64_t j_78585 = 0; j_78585 < k2p2zq_70876; j_78585++) { + bool index_certs_78588; + + if (!ok_or_empty_70954) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 210) == -1) { + global_failure_args[0] = j_78585; + global_failure_args[1] = (int64_t) 0; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_78590; + double redout_119641 = 0.0; + + for (int64_t i_119642 = 0; i_119642 < k2p2zq_70876; + i_119642++) { + double x_78594 = ((__global double *) mem_120265)[i_119642 * + (k2p2zq_70876 * + m_70861) + + gtid_78116 * + k2p2zq_70876 + + j_78585]; + double defunc_1_f_res_78595 = x_78594 * x_78594; + double defunc_1_op_res_78593 = defunc_1_f_res_78595 + + redout_119641; + double redout_tmp_126604 = defunc_1_op_res_78593; + + redout_119641 = redout_tmp_126604; + } + defunc_2_reduce_res_78590 = redout_119641; + + double sqrt_res_78596; + + sqrt_res_78596 = futrts_sqrt64(defunc_2_reduce_res_78590); + ((__global double *) mem_120268)[phys_tid_78117 + j_78585 * + num_threads_125643] = + sqrt_res_78596; + ((__global double *) mem_120271)[phys_tid_78117 + j_78585 * + num_threads_125643] = + sqrt_res_78596; + + bool zeze_res_78599 = sqrt_res_78596 == 0.0; + double lw_val_78600; + + if (zeze_res_78599) { + lw_val_78600 = 1.0; + } else { + lw_val_78600 = sqrt_res_78596; + } + ((__global double *) mem_120271)[phys_tid_78117 + + (num_threads_125643 * + k2p2zq_70876 + j_78585 * + num_threads_125643)] = + lw_val_78600; + } + for (int64_t i_126605 = 0; i_126605 < k2p2zq_70876; i_126605++) { + for (int64_t i_126606 = 0; i_126606 < k2p2zq_70876; + i_126606++) { + ((__global + double *) double_buffer_mem_125535)[phys_tid_78117 + + (i_126605 * + (num_threads_125643 * + k2p2zq_70876) + + i_126606 * + num_threads_125643)] = + ((__global double *) mem_120261)[gtid_78116 + + (i_126605 * (m_70861 * + k2p2zq_70876) + + i_126606 * m_70861)]; + } + } + for (int64_t i_126607 = 0; i_126607 < k2p2zq_70876; i_126607++) { + ((__global double *) double_buffer_mem_125536)[phys_tid_78117 + + i_126607 * + num_threads_125643] = + ((__global double *) mem_120268)[phys_tid_78117 + i_126607 * + num_threads_125643]; + } + for (int64_t i_126608 = 0; i_126608 < (int64_t) 2; i_126608++) { + for (int64_t i_126609 = 0; i_126609 < k2p2zq_70876; + i_126609++) { + ((__global + double *) double_buffer_mem_125537)[phys_tid_78117 + + (i_126608 * + (num_threads_125643 * + k2p2zq_70876) + + i_126609 * + num_threads_125643)] = + ((__global double *) mem_120271)[phys_tid_78117 + + (i_126608 * + (num_threads_125643 * + k2p2zq_70876) + + i_126609 * + num_threads_125643)]; + } + } + + int64_t dqrdc2_res_78606; + int64_t k_78612 = k_71067; + + for (int64_t l_78607 = 0; l_78607 < min_res_71066; l_78607++) { + int64_t x_78613 = add64((int64_t) 1, l_78607); + bool cond_78614 = slt64(x_78613, k_78612); + bool loop_cond_78615; + + if (cond_78614) { + bool y_78616 = slt64(l_78607, k2p2zq_70876); + bool index_certs_78617; + + if (!y_78616) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 211) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_78607; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double zt_arg_78618 = ((__global + double *) double_buffer_mem_125537)[phys_tid_78117 + + (num_threads_125643 * + k2p2zq_70876 + + l_78607 * + num_threads_125643)]; + double zt_res_78619 = 1.0e-7 * zt_arg_78618; + bool index_certs_78620; + + if (!y_78616) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 212) == -1) { + global_failure_args[0] = l_78607; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double zl_arg_78621 = ((__global + double *) double_buffer_mem_125536)[phys_tid_78117 + + l_78607 * + num_threads_125643]; + bool zl_res_78622 = zl_arg_78621 < zt_res_78619; + + loop_cond_78615 = zl_res_78622; + } else { + loop_cond_78615 = 0; + } + + bool y_78623 = slt64(l_78607, k2p2zq_70876); + int64_t upper_bound_78624 = sub64(k2p2zq_70876, x_78613); + bool loop_not_taken_78625 = !loop_cond_78615; + bool protect_assert_disj_78626 = y_78623 || + loop_not_taken_78625; + bool index_certs_78627; + + if (!protect_assert_disj_78626) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 213) == -1) { + global_failure_args[0] = l_78607; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_78628; + + if (!protect_assert_disj_78626) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 214) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = l_78607; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_78629; + + if (!protect_assert_disj_78626) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 215) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_78607; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool protect_assert_disj_78630 = y_70952 || + loop_not_taken_78625; + bool index_certs_78631; + + if (!protect_assert_disj_78630) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 216) == -1) { + global_failure_args[0] = m_70948; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_78632; + + if (!protect_assert_disj_78630) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 217) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = m_70948; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_78633; + + if (!protect_assert_disj_78630) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 218) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = m_70948; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool loopres_78634; + int64_t loopres_78639; + bool loop_while_78640; + int64_t k_78645; + + loop_while_78640 = loop_cond_78615; + k_78645 = k_78612; + while (loop_while_78640) { + for (int64_t i_78647 = 0; i_78647 < k2p2zq_70876; + i_78647++) { + bool index_certs_78649; + + if (!y_78623) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 219) == -1) { + global_failure_args[0] = l_78607; + global_failure_args[1] = i_78647; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double t_78650 = ((__global + double *) double_buffer_mem_125535)[phys_tid_78117 + + (l_78607 * + (num_threads_125643 * + k2p2zq_70876) + + i_78647 * + num_threads_125643)]; + + for (int64_t j0_78652 = 0; j0_78652 < upper_bound_78624; + j0_78652++) { + int64_t j_78654 = add64(x_78613, j0_78652); + bool x_78655 = sle64((int64_t) 0, j_78654); + bool y_78656 = slt64(j_78654, k2p2zq_70876); + bool bounds_check_78657 = x_78655 && y_78656; + bool index_certs_78658; + + if (!bounds_check_78657) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 220) == + -1) { + global_failure_args[0] = j_78654; + global_failure_args[1] = i_78647; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_78659 = ((__global + double *) double_buffer_mem_125535)[phys_tid_78117 + + (j_78654 * + (num_threads_125643 * + k2p2zq_70876) + + i_78647 * + num_threads_125643)]; + int64_t i_78660 = sub64(j_78654, (int64_t) 1); + bool x_78661 = sle64((int64_t) 0, i_78660); + bool y_78662 = slt64(i_78660, k2p2zq_70876); + bool bounds_check_78663 = x_78661 && y_78662; + bool index_certs_78664; + + if (!bounds_check_78663) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 221) == + -1) { + global_failure_args[0] = i_78660; + global_failure_args[1] = i_78647; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125535)[phys_tid_78117 + + (i_78660 * + (num_threads_125643 * + k2p2zq_70876) + + i_78647 * + num_threads_125643)] = + lw_val_78659; + } + + bool index_certs_78666; + + if (!y_70952) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 222) == -1) { + global_failure_args[0] = m_70948; + global_failure_args[1] = i_78647; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125535)[phys_tid_78117 + + (m_70948 * + (num_threads_125643 * + k2p2zq_70876) + + i_78647 * + num_threads_125643)] = + t_78650; + } + + int64_t i_78668 = ((__global + int64_t *) mem_120273)[phys_tid_78117 + + l_78607 * + num_threads_125643]; + double t_78669 = ((__global + double *) double_buffer_mem_125536)[phys_tid_78117 + + l_78607 * + num_threads_125643]; + double tt_78670 = ((__global + double *) double_buffer_mem_125537)[phys_tid_78117 + + l_78607 * + num_threads_125643]; + double ttt_78671 = ((__global + double *) double_buffer_mem_125537)[phys_tid_78117 + + (num_threads_125643 * + k2p2zq_70876 + + l_78607 * + num_threads_125643)]; + + for (int64_t j0_78675 = 0; j0_78675 < upper_bound_78624; + j0_78675++) { + int64_t j_78679 = add64(x_78613, j0_78675); + bool x_78680 = sle64((int64_t) 0, j_78679); + bool y_78681 = slt64(j_78679, k2p2zq_70876); + bool bounds_check_78682 = x_78680 && y_78681; + bool index_certs_78683; + + if (!bounds_check_78682) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 223) == -1) { + global_failure_args[0] = j_78679; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + int64_t lw_val_78684 = ((__global + int64_t *) mem_120273)[phys_tid_78117 + + j_78679 * + num_threads_125643]; + int64_t i_78685 = sub64(j_78679, (int64_t) 1); + bool x_78686 = sle64((int64_t) 0, i_78685); + bool y_78687 = slt64(i_78685, k2p2zq_70876); + bool bounds_check_78688 = x_78686 && y_78687; + bool index_certs_78689; + + if (!bounds_check_78688) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 224) == -1) { + global_failure_args[0] = i_78685; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global int64_t *) mem_120273)[phys_tid_78117 + + i_78685 * + num_threads_125643] = + lw_val_78684; + + double lw_val_78691 = ((__global + double *) double_buffer_mem_125536)[phys_tid_78117 + + j_78679 * + num_threads_125643]; + + ((__global + double *) double_buffer_mem_125536)[phys_tid_78117 + + i_78685 * + num_threads_125643] = + lw_val_78691; + + bool index_certs_78693; + + if (!bounds_check_78682) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 225) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = j_78679; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_78694 = ((__global + double *) double_buffer_mem_125537)[phys_tid_78117 + + j_78679 * + num_threads_125643]; + bool index_certs_78695; + + if (!bounds_check_78688) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 226) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = i_78685; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125537)[phys_tid_78117 + + i_78685 * + num_threads_125643] = + lw_val_78694; + + bool index_certs_78697; + + if (!bounds_check_78682) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 227) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = j_78679; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_78698 = ((__global + double *) double_buffer_mem_125537)[phys_tid_78117 + + (num_threads_125643 * + k2p2zq_70876 + + j_78679 * + num_threads_125643)]; + bool index_certs_78699; + + if (!bounds_check_78688) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 228) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = i_78685; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125537)[phys_tid_78117 + + (num_threads_125643 * + k2p2zq_70876 + + i_78685 * + num_threads_125643)] = + lw_val_78698; + } + ((__global int64_t *) mem_120273)[phys_tid_78117 + m_70948 * + num_threads_125643] = + i_78668; + ((__global + double *) double_buffer_mem_125536)[phys_tid_78117 + + m_70948 * + num_threads_125643] = + t_78669; + ((__global + double *) double_buffer_mem_125537)[phys_tid_78117 + + m_70948 * + num_threads_125643] = + tt_78670; + ((__global + double *) double_buffer_mem_125537)[phys_tid_78117 + + (num_threads_125643 * + k2p2zq_70876 + + m_70948 * + num_threads_125643)] = + ttt_78671; + + int64_t k_78705 = sub64(k_78645, (int64_t) 1); + bool cond_78706 = slt64(x_78613, k_78705); + bool loop_cond_78707; + + if (cond_78706) { + bool index_certs_78708; + + if (!y_78623) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 229) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_78607; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double zt_arg_78709 = ((__global + double *) double_buffer_mem_125537)[phys_tid_78117 + + (num_threads_125643 * + k2p2zq_70876 + + l_78607 * + num_threads_125643)]; + double zt_res_78710 = 1.0e-7 * zt_arg_78709; + bool index_certs_78711; + + if (!y_78623) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 230) == -1) { + global_failure_args[0] = l_78607; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double zl_arg_78712 = ((__global + double *) double_buffer_mem_125536)[phys_tid_78117 + + l_78607 * + num_threads_125643]; + bool zl_res_78713 = zl_arg_78712 < zt_res_78710; + + loop_cond_78707 = zl_res_78713; + } else { + loop_cond_78707 = 0; + } + + bool loop_while_tmp_126615 = loop_cond_78707; + int64_t k_tmp_126620 = k_78705; + + loop_while_78640 = loop_while_tmp_126615; + k_78645 = k_tmp_126620; + } + loopres_78634 = loop_while_78640; + loopres_78639 = k_78645; + + bool cond_78714 = x_78613 == k2p2zq_70876; + int64_t j_m_i_78715 = sub64(k2p2zq_70876, l_78607); + bool empty_slice_78719 = j_m_i_78715 == (int64_t) 0; + int64_t m_78720 = sub64(j_m_i_78715, (int64_t) 1); + int64_t i_p_m_t_s_78721 = add64(l_78607, m_78720); + bool zzero_leq_i_p_m_t_s_78722 = sle64((int64_t) 0, + i_p_m_t_s_78721); + bool i_p_m_t_s_leq_w_78723 = slt64(i_p_m_t_s_78721, + k2p2zq_70876); + bool i_lte_j_78724 = sle64(l_78607, k2p2zq_70876); + bool y_78725 = zzero_leq_i_p_m_t_s_78722 && + i_p_m_t_s_leq_w_78723; + bool y_78726 = i_lte_j_78724 && y_78725; + bool ok_or_empty_78727 = empty_slice_78719 || y_78726; + bool index_ok_78728 = y_78623 && ok_or_empty_78727; + + if (cond_78714) { + for (int64_t i_126626 = 0; i_126626 < k2p2zq_70876; + i_126626++) { + ((__global double *) mem_125431)[phys_tid_78117 + + i_126626 * + num_threads_125643] = + ((__global + double *) double_buffer_mem_125536)[phys_tid_78117 + + i_126626 * + num_threads_125643]; + } + for (int64_t i_126627 = 0; i_126627 < (int64_t) 2; + i_126627++) { + for (int64_t i_126628 = 0; i_126628 < k2p2zq_70876; + i_126628++) { + ((__global double *) mem_125429)[phys_tid_78117 + + (i_126627 * + (num_threads_125643 * + k2p2zq_70876) + + i_126628 * + num_threads_125643)] = + ((__global + double *) double_buffer_mem_125537)[phys_tid_78117 + + (i_126627 * + (num_threads_125643 * + k2p2zq_70876) + + i_126628 * + num_threads_125643)]; + } + } + for (int64_t i_126629 = 0; i_126629 < k2p2zq_70876; + i_126629++) { + for (int64_t i_126630 = 0; i_126630 < k2p2zq_70876; + i_126630++) { + ((__global double *) mem_125491)[phys_tid_78117 + + (i_126629 * + (num_threads_125643 * + k2p2zq_70876) + + i_126630 * + num_threads_125643)] = + ((__global + double *) double_buffer_mem_125535)[phys_tid_78117 + + (i_126629 * + (num_threads_125643 * + k2p2zq_70876) + + i_126630 * + num_threads_125643)]; + } + } + } else { + bool index_certs_78729; + + if (!index_ok_78728) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 231) == -1) { + global_failure_args[0] = l_78607; + global_failure_args[1] = l_78607; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_78731; + double redout_119643 = 0.0; + + for (int64_t i_119644 = 0; i_119644 < j_m_i_78715; + i_119644++) { + int64_t slice_119985 = l_78607 + i_119644; + double x_78735 = ((__global + double *) double_buffer_mem_125535)[phys_tid_78117 + + (l_78607 * + (num_threads_125643 * + k2p2zq_70876) + + slice_119985 * + num_threads_125643)]; + double defunc_1_f_res_78736 = x_78735 * x_78735; + double defunc_1_op_res_78734 = defunc_1_f_res_78736 + + redout_119643; + double redout_tmp_126631 = defunc_1_op_res_78734; + + redout_119643 = redout_tmp_126631; + } + defunc_2_reduce_res_78731 = redout_119643; + + double sqrt_res_78737; + + sqrt_res_78737 = futrts_sqrt64(defunc_2_reduce_res_78731); + + bool zeze_res_78738 = sqrt_res_78737 == 0.0; + + if (zeze_res_78738) { + for (int64_t i_126632 = 0; i_126632 < k2p2zq_70876; + i_126632++) { + ((__global double *) mem_125152)[phys_tid_78117 + + i_126632 * + num_threads_125643] = + ((__global + double *) double_buffer_mem_125536)[phys_tid_78117 + + i_126632 * + num_threads_125643]; + } + for (int64_t i_126633 = 0; i_126633 < (int64_t) 2; + i_126633++) { + for (int64_t i_126634 = 0; i_126634 < k2p2zq_70876; + i_126634++) { + ((__global + double *) mem_125150)[phys_tid_78117 + + (i_126633 * + (num_threads_125643 * + k2p2zq_70876) + + i_126634 * + num_threads_125643)] = + ((__global + double *) double_buffer_mem_125537)[phys_tid_78117 + + (i_126633 * + (num_threads_125643 * + k2p2zq_70876) + + i_126634 * + num_threads_125643)]; + } + } + for (int64_t i_126635 = 0; i_126635 < k2p2zq_70876; + i_126635++) { + for (int64_t i_126636 = 0; i_126636 < k2p2zq_70876; + i_126636++) { + ((__global + double *) mem_125421)[phys_tid_78117 + + (i_126635 * + (num_threads_125643 * + k2p2zq_70876) + + i_126636 * + num_threads_125643)] = + ((__global + double *) double_buffer_mem_125535)[phys_tid_78117 + + (i_126635 * + (num_threads_125643 * + k2p2zq_70876) + + i_126636 * + num_threads_125643)]; + } + } + } else { + bool index_ok_78742 = y_78623 && y_78623; + bool index_certs_78743; + + if (!index_ok_78742) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 232) == -1) { + global_failure_args[0] = l_78607; + global_failure_args[1] = l_78607; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double znze_arg_78744 = ((__global + double *) double_buffer_mem_125535)[phys_tid_78117 + + (l_78607 * + (num_threads_125643 * + k2p2zq_70876) + + l_78607 * + num_threads_125643)]; + bool zeze_res_78745 = znze_arg_78744 == 0.0; + bool znze_res_78746 = !zeze_res_78745; + double nrmxl_78747; + + if (znze_res_78746) { + double abs_res_78748 = fabs(sqrt_res_78737); + double sgn_res_78749 = fsignum32(znze_arg_78744); + double zt_res_78750 = abs_res_78748 * sgn_res_78749; + + nrmxl_78747 = zt_res_78750; + } else { + nrmxl_78747 = sqrt_res_78737; + } + for (int64_t i0_78752 = 0; i0_78752 < j_m_i_78715; + i0_78752++) { + int64_t i_78754 = add64(l_78607, i0_78752); + bool x_78755 = sle64((int64_t) 0, i_78754); + bool y_78756 = slt64(i_78754, k2p2zq_70876); + bool bounds_check_78757 = x_78755 && y_78756; + bool index_ok_78758 = y_78623 && bounds_check_78757; + bool index_certs_78759; + + if (!index_ok_78758) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 233) == + -1) { + global_failure_args[0] = l_78607; + global_failure_args[1] = i_78754; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_78760 = ((__global + double *) double_buffer_mem_125535)[phys_tid_78117 + + (l_78607 * + (num_threads_125643 * + k2p2zq_70876) + + i_78754 * + num_threads_125643)]; + double lw_val_78761 = x_78760 / nrmxl_78747; + + ((__global + double *) double_buffer_mem_125535)[phys_tid_78117 + + (l_78607 * + (num_threads_125643 * + k2p2zq_70876) + + i_78754 * + num_threads_125643)] = + lw_val_78761; + } + + double zp_arg_78763 = ((__global + double *) double_buffer_mem_125535)[phys_tid_78117 + + (l_78607 * + (num_threads_125643 * + k2p2zq_70876) + + l_78607 * + num_threads_125643)]; + double zp_res_78764 = 1.0 + zp_arg_78763; + + ((__global + double *) double_buffer_mem_125535)[phys_tid_78117 + + (l_78607 * + (num_threads_125643 * + k2p2zq_70876) + + l_78607 * + num_threads_125643)] = + zp_res_78764; + + bool bounds_invalid_upwards_78766 = slt64(k2p2zq_70876, + x_78613); + bool valid_78767 = !bounds_invalid_upwards_78766; + bool range_valid_c_78768; + + if (!valid_78767) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 234) == -1) { + global_failure_args[0] = x_78613; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool loop_nonempty_78769 = slt64((int64_t) 0, + upper_bound_78624); + bool loop_not_taken_78770 = !loop_nonempty_78769; + bool protect_assert_disj_78771 = index_ok_78742 || + loop_not_taken_78770; + bool index_certs_78772; + + if (!protect_assert_disj_78771) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 235) == -1) { + global_failure_args[0] = l_78607; + global_failure_args[1] = l_78607; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_78776 = 0; i_78776 < upper_bound_78624; + i_78776++) { + int64_t index_primexp_78780 = add64(x_78613, + i_78776); + bool x_78781 = sle64((int64_t) 0, + index_primexp_78780); + bool y_78782 = slt64(index_primexp_78780, + k2p2zq_70876); + bool bounds_check_78783 = x_78781 && y_78782; + double t_78784; + double t_78786 = 0.0; + + for (int64_t i0_78785 = 0; i0_78785 < j_m_i_78715; + i0_78785++) { + int64_t i_78787 = add64(l_78607, i0_78785); + bool x_78788 = sle64((int64_t) 0, i_78787); + bool y_78789 = slt64(i_78787, k2p2zq_70876); + bool bounds_check_78790 = x_78788 && y_78789; + bool index_ok_78791 = y_78623 && + bounds_check_78790; + bool index_certs_78792; + + if (!index_ok_78791) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 236) == + -1) { + global_failure_args[0] = l_78607; + global_failure_args[1] = i_78787; + global_failure_args[2] = + k2p2zq_70876; + global_failure_args[3] = + k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_78793 = ((__global + double *) double_buffer_mem_125535)[phys_tid_78117 + + (l_78607 * + (num_threads_125643 * + k2p2zq_70876) + + i_78787 * + num_threads_125643)]; + bool index_ok_78794 = bounds_check_78783 && + bounds_check_78790; + bool index_certs_78795; + + if (!index_ok_78794) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 237) == + -1) { + global_failure_args[0] = + index_primexp_78780; + global_failure_args[1] = i_78787; + global_failure_args[2] = + k2p2zq_70876; + global_failure_args[3] = + k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_78796 = ((__global + double *) double_buffer_mem_125535)[phys_tid_78117 + + (index_primexp_78780 * + (num_threads_125643 * + k2p2zq_70876) + + i_78787 * + num_threads_125643)]; + double y_78797 = x_78793 * y_78796; + double loopres_78798 = t_78786 - y_78797; + double t_tmp_126641 = loopres_78798; + + t_78786 = t_tmp_126641; + } + t_78784 = t_78786; + + double y_78799 = ((__global + double *) double_buffer_mem_125535)[phys_tid_78117 + + (l_78607 * + (num_threads_125643 * + k2p2zq_70876) + + l_78607 * + num_threads_125643)]; + double t_78800 = t_78784 / y_78799; + + for (int64_t i0_78802 = 0; i0_78802 < j_m_i_78715; + i0_78802++) { + int64_t i_78804 = add64(l_78607, i0_78802); + bool x_78805 = sle64((int64_t) 0, i_78804); + bool y_78806 = slt64(i_78804, k2p2zq_70876); + bool bounds_check_78807 = x_78805 && y_78806; + bool index_ok_78808 = bounds_check_78783 && + bounds_check_78807; + bool index_certs_78809; + + if (!index_ok_78808) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 238) == + -1) { + global_failure_args[0] = + index_primexp_78780; + global_failure_args[1] = i_78804; + global_failure_args[2] = + k2p2zq_70876; + global_failure_args[3] = + k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_78810 = ((__global + double *) double_buffer_mem_125535)[phys_tid_78117 + + (index_primexp_78780 * + (num_threads_125643 * + k2p2zq_70876) + + i_78804 * + num_threads_125643)]; + bool index_ok_78811 = y_78623 && + bounds_check_78807; + bool index_certs_78812; + + if (!index_ok_78811) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 239) == + -1) { + global_failure_args[0] = l_78607; + global_failure_args[1] = i_78804; + global_failure_args[2] = + k2p2zq_70876; + global_failure_args[3] = + k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_78813 = ((__global + double *) double_buffer_mem_125535)[phys_tid_78117 + + (l_78607 * + (num_threads_125643 * + k2p2zq_70876) + + i_78804 * + num_threads_125643)]; + double y_78814 = t_78800 * y_78813; + double lw_val_78815 = x_78810 + y_78814; + + ((__global + double *) double_buffer_mem_125535)[phys_tid_78117 + + (index_primexp_78780 * + (num_threads_125643 * + k2p2zq_70876) + + i_78804 * + num_threads_125643)] = + lw_val_78815; + } + + bool index_certs_78817; + + if (!bounds_check_78783) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 240) == + -1) { + global_failure_args[0] = + index_primexp_78780; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double zeze_arg_78818 = ((__global + double *) double_buffer_mem_125536)[phys_tid_78117 + + index_primexp_78780 * + num_threads_125643]; + bool zeze_res_78819 = zeze_arg_78818 == 0.0; + + if (!zeze_res_78819) { + bool index_ok_78822 = y_78623 && + bounds_check_78783; + bool index_certs_78823; + + if (!index_ok_78822) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 241) == + -1) { + global_failure_args[0] = + index_primexp_78780; + global_failure_args[1] = l_78607; + global_failure_args[2] = + k2p2zq_70876; + global_failure_args[3] = + k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double abs_arg_78824 = ((__global + double *) double_buffer_mem_125535)[phys_tid_78117 + + (index_primexp_78780 * + (num_threads_125643 * + k2p2zq_70876) + + l_78607 * + num_threads_125643)]; + double abs_res_78825 = fabs(abs_arg_78824); + double zs_res_78826 = abs_res_78825 / + zeze_arg_78818; + double ztzt_res_78827 = fpow64(zs_res_78826, + 2.0); + double zm_res_78828 = 1.0 - ztzt_res_78827; + double max_res_78829 = fmax64(0.0, + zm_res_78828); + double abs_res_78830 = fabs(max_res_78829); + bool zgze_res_78831 = 1.0e-6 <= abs_res_78830; + + if (zgze_res_78831) { + double sqrt_res_78834; + + sqrt_res_78834 = + futrts_sqrt64(max_res_78829); + + double zt_res_78835 = zeze_arg_78818 * + sqrt_res_78834; + + ((__global + double *) double_buffer_mem_125536)[phys_tid_78117 + + index_primexp_78780 * + num_threads_125643] = + zt_res_78835; + } else { + bool empty_slice_78837 = + upper_bound_78624 == (int64_t) 0; + int64_t m_78838 = sub64(upper_bound_78624, + (int64_t) 1); + int64_t i_p_m_t_s_78839 = add64(x_78613, + m_78838); + bool zzero_leq_i_p_m_t_s_78840 = + sle64((int64_t) 0, i_p_m_t_s_78839); + bool i_p_m_t_s_leq_w_78841 = + slt64(i_p_m_t_s_78839, k2p2zq_70876); + bool zzero_lte_i_78842 = sle64((int64_t) 0, + x_78613); + bool i_lte_j_78843 = sle64(x_78613, + k2p2zq_70876); + bool y_78844 = i_p_m_t_s_leq_w_78841 && + zzero_lte_i_78842; + bool y_78845 = zzero_leq_i_p_m_t_s_78840 && + y_78844; + bool y_78846 = i_lte_j_78843 && y_78845; + bool forwards_ok_78847 = + zzero_lte_i_78842 && y_78846; + bool ok_or_empty_78848 = + empty_slice_78837 || forwards_ok_78847; + bool index_ok_78849 = bounds_check_78783 && + ok_or_empty_78848; + bool index_certs_78850; + + if (!index_ok_78849) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 242) == + -1) { + global_failure_args[0] = + index_primexp_78780; + global_failure_args[1] = + x_78613; + global_failure_args[2] = + k2p2zq_70876; + global_failure_args[3] = + k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_78852; + double redout_119645 = 0.0; + + for (int64_t i_119646 = 0; i_119646 < + upper_bound_78624; i_119646++) { + int64_t slice_119986 = x_78613 + + i_119646; + double x_78856 = ((__global + double *) double_buffer_mem_125535)[phys_tid_78117 + + (index_primexp_78780 * + (num_threads_125643 * + k2p2zq_70876) + + slice_119986 * + num_threads_125643)]; + double defunc_1_f_res_78857 = x_78856 * + x_78856; + double defunc_1_op_res_78855 = + defunc_1_f_res_78857 + + redout_119645; + double redout_tmp_126643 = + defunc_1_op_res_78855; + + redout_119645 = redout_tmp_126643; + } + defunc_2_reduce_res_78852 = redout_119645; + + double sqrt_res_78858; + + sqrt_res_78858 = + futrts_sqrt64(defunc_2_reduce_res_78852); + ((__global + double *) double_buffer_mem_125536)[phys_tid_78117 + + index_primexp_78780 * + num_threads_125643] = + sqrt_res_78858; + + bool index_certs_78860; + + if (!bounds_check_78783) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 243) == + -1) { + global_failure_args[0] = + (int64_t) 0; + global_failure_args[1] = + index_primexp_78780; + global_failure_args[2] = + (int64_t) 2; + global_failure_args[3] = + k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_126644 = 0; i_126644 < + (int64_t) 1; i_126644++) { + ((__global + double *) double_buffer_mem_125537)[phys_tid_78117 + + (index_primexp_78780 + + i_126644) * + num_threads_125643] = + ((__global + double *) double_buffer_mem_125536)[phys_tid_78117 + + num_threads_125643 * + index_primexp_78780 + + i_126644 * + num_threads_125643]; + } + } + } + } + + bool index_certs_78863; + + if (!y_78623) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 244) == -1) { + global_failure_args[0] = l_78607; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_126645 = 0; i_126645 < (int64_t) 1; + i_126645++) { + ((__global + double *) double_buffer_mem_125536)[phys_tid_78117 + + (l_78607 + + i_126645) * + num_threads_125643] = + ((__global + double *) double_buffer_mem_125535)[phys_tid_78117 + + l_78607 * + (num_threads_125643 * + k2p2zq_70876) + + num_threads_125643 * + l_78607 + + i_126645 * + num_threads_125643]; + } + + double zt_res_78866 = -1.0 * nrmxl_78747; + + ((__global + double *) double_buffer_mem_125535)[phys_tid_78117 + + (l_78607 * + (num_threads_125643 * + k2p2zq_70876) + + l_78607 * + num_threads_125643)] = + zt_res_78866; + for (int64_t i_126646 = 0; i_126646 < k2p2zq_70876; + i_126646++) { + ((__global double *) mem_125152)[phys_tid_78117 + + i_126646 * + num_threads_125643] = + ((__global + double *) double_buffer_mem_125536)[phys_tid_78117 + + i_126646 * + num_threads_125643]; + } + for (int64_t i_126647 = 0; i_126647 < (int64_t) 2; + i_126647++) { + for (int64_t i_126648 = 0; i_126648 < k2p2zq_70876; + i_126648++) { + ((__global + double *) mem_125150)[phys_tid_78117 + + (i_126647 * + (num_threads_125643 * + k2p2zq_70876) + + i_126648 * + num_threads_125643)] = + ((__global + double *) double_buffer_mem_125537)[phys_tid_78117 + + (i_126647 * + (num_threads_125643 * + k2p2zq_70876) + + i_126648 * + num_threads_125643)]; + } + } + for (int64_t i_126649 = 0; i_126649 < k2p2zq_70876; + i_126649++) { + for (int64_t i_126650 = 0; i_126650 < k2p2zq_70876; + i_126650++) { + ((__global + double *) mem_125421)[phys_tid_78117 + + (i_126649 * + (num_threads_125643 * + k2p2zq_70876) + + i_126650 * + num_threads_125643)] = + ((__global + double *) double_buffer_mem_125535)[phys_tid_78117 + + (i_126649 * + (num_threads_125643 * + k2p2zq_70876) + + i_126650 * + num_threads_125643)]; + } + } + } + for (int64_t i_126651 = 0; i_126651 < k2p2zq_70876; + i_126651++) { + ((__global double *) mem_125431)[phys_tid_78117 + + i_126651 * + num_threads_125643] = + ((__global double *) mem_125152)[phys_tid_78117 + + i_126651 * + num_threads_125643]; + } + for (int64_t i_126652 = 0; i_126652 < (int64_t) 2; + i_126652++) { + for (int64_t i_126653 = 0; i_126653 < k2p2zq_70876; + i_126653++) { + ((__global double *) mem_125429)[phys_tid_78117 + + (i_126652 * + (num_threads_125643 * + k2p2zq_70876) + + i_126653 * + num_threads_125643)] = + ((__global + double *) mem_125150)[phys_tid_78117 + + (i_126652 * + (num_threads_125643 * + k2p2zq_70876) + + i_126653 * + num_threads_125643)]; + } + } + for (int64_t i_126654 = 0; i_126654 < k2p2zq_70876; + i_126654++) { + for (int64_t i_126655 = 0; i_126655 < k2p2zq_70876; + i_126655++) { + ((__global double *) mem_125491)[phys_tid_78117 + + (i_126654 * + (num_threads_125643 * + k2p2zq_70876) + + i_126655 * + num_threads_125643)] = + ((__global + double *) mem_125421)[phys_tid_78117 + + (i_126654 * + (num_threads_125643 * + k2p2zq_70876) + + i_126655 * + num_threads_125643)]; + } + } + } + for (int64_t i_126656 = 0; i_126656 < k2p2zq_70876; + i_126656++) { + for (int64_t i_126657 = 0; i_126657 < k2p2zq_70876; + i_126657++) { + ((__global + double *) double_buffer_mem_125535)[phys_tid_78117 + + (i_126656 * + (num_threads_125643 * + k2p2zq_70876) + + i_126657 * + num_threads_125643)] = + ((__global double *) mem_125491)[phys_tid_78117 + + (i_126656 * + (num_threads_125643 * + k2p2zq_70876) + + i_126657 * + num_threads_125643)]; + } + } + for (int64_t i_126658 = 0; i_126658 < k2p2zq_70876; + i_126658++) { + ((__global + double *) double_buffer_mem_125536)[phys_tid_78117 + + i_126658 * + num_threads_125643] = + ((__global double *) mem_125431)[phys_tid_78117 + + i_126658 * + num_threads_125643]; + } + for (int64_t i_126659 = 0; i_126659 < (int64_t) 2; i_126659++) { + for (int64_t i_126660 = 0; i_126660 < k2p2zq_70876; + i_126660++) { + ((__global + double *) double_buffer_mem_125537)[phys_tid_78117 + + (i_126659 * + (num_threads_125643 * + k2p2zq_70876) + + i_126660 * + num_threads_125643)] = + ((__global double *) mem_125429)[phys_tid_78117 + + (i_126659 * + (num_threads_125643 * + k2p2zq_70876) + + i_126660 * + num_threads_125643)]; + } + } + + int64_t k_tmp_126614 = loopres_78639; + + k_78612 = k_tmp_126614; + } + dqrdc2_res_78606 = k_78612; + + int64_t min_arg_78868 = sub64(dqrdc2_res_78606, (int64_t) 1); + int64_t min_res_78869 = smin64(k2p2zq_70876, min_arg_78868); + + for (int64_t i_119649 = 0; i_119649 < k2p2zq_70876; i_119649++) { + int64_t x_78873 = add64((int64_t) 1, i_119649); + bool cond_f_res_78874 = slt64(min_res_78869, x_78873); + + for (int64_t i_119653 = 0; i_119653 < k2p2zq_70876; + i_119653++) { + int64_t x_78878 = add64((int64_t) 1, i_119653); + bool cond_78879 = slt64(min_res_78869, x_78878); + bool x_78880 = !cond_78879; + bool y_78881 = cond_f_res_78874 && x_78880; + bool cond_78882 = cond_78879 || y_78881; + double defunc_1_f_res_78883; + + if (cond_78882) { + defunc_1_f_res_78883 = NAN; + } else { + double x_78877 = ((__global + double *) double_buffer_mem_125535)[phys_tid_78117 + + (i_119649 * + (num_threads_125643 * + k2p2zq_70876) + + i_119653 * + num_threads_125643)]; + + defunc_1_f_res_78883 = x_78877; + } + ((__global double *) mem_120608)[phys_tid_78117 + + (i_119649 * + (num_threads_125643 * + k2p2zq_70876) + + i_119653 * + num_threads_125643)] = + defunc_1_f_res_78883; + } + } + for (int64_t i_126663 = 0; i_126663 < k2p2zq_70876; i_126663++) { + ((__global double *) mem_120661)[phys_tid_78117 + i_126663 * + num_threads_125643] = 0.0; + } + for (int64_t i_119657 = 0; i_119657 < k2p2zq_70876; i_119657++) { + for (int64_t i_126665 = 0; i_126665 < k2p2zq_70876; + i_126665++) { + ((__global double *) mem_120649)[phys_tid_78117 + + (i_119657 * + (num_threads_125643 * + k2p2zq_70876) + + i_126665 * + num_threads_125643)] = + ((__global double *) mem_120661)[phys_tid_78117 + + i_126665 * + num_threads_125643]; + } + for (int64_t i_78889 = 0; i_78889 < k2p2zq_70876; i_78889++) { + int64_t x_78891 = sub64(k2p2zq_70876, i_78889); + int64_t i_78892 = sub64(x_78891, (int64_t) 1); + bool x_78893 = sle64((int64_t) 0, i_78892); + bool y_78894 = slt64(i_78892, k2p2zq_70876); + bool bounds_check_78895 = x_78893 && y_78894; + int64_t j_m_i_78896 = sub64(k2p2zq_70876, x_78891); + bool empty_slice_78897 = j_m_i_78896 == (int64_t) 0; + int64_t m_78898 = sub64(j_m_i_78896, (int64_t) 1); + int64_t i_p_m_t_s_78899 = add64(x_78891, m_78898); + bool zzero_leq_i_p_m_t_s_78900 = sle64((int64_t) 0, + i_p_m_t_s_78899); + bool i_p_m_t_s_leq_w_78901 = slt64(i_p_m_t_s_78899, + k2p2zq_70876); + bool zzero_lte_i_78902 = sle64((int64_t) 0, x_78891); + bool i_lte_j_78903 = sle64(x_78891, k2p2zq_70876); + bool y_78904 = i_p_m_t_s_leq_w_78901 && zzero_lte_i_78902; + bool y_78905 = zzero_leq_i_p_m_t_s_78900 && y_78904; + bool y_78906 = i_lte_j_78903 && y_78905; + bool forwards_ok_78907 = zzero_lte_i_78902 && y_78906; + bool ok_or_empty_78908 = empty_slice_78897 || + forwards_ok_78907; + bool index_ok_78909 = bounds_check_78895 && + ok_or_empty_78908; + bool index_certs_78910; + + if (!index_ok_78909) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 245) == -1) { + global_failure_args[0] = i_78892; + global_failure_args[1] = x_78891; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + global_failure_args[4] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_78911; + + if (!ok_or_empty_78908) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 246) == -1) { + global_failure_args[0] = x_78891; + global_failure_args[1] = k2p2zq_70876; + global_failure_args[2] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_78914; + double redout_119659 = 0.0; + + for (int64_t i_119660 = 0; i_119660 < j_m_i_78896; + i_119660++) { + int64_t slice_119992 = x_78891 + i_119660; + double x_78919 = ((__global + double *) mem_120608)[phys_tid_78117 + + (slice_119992 * + (num_threads_125643 * + k2p2zq_70876) + + i_78892 * + num_threads_125643)]; + bool isnan_res_78920; + + isnan_res_78920 = futrts_isnan64(x_78919); + + double defunc_1_f_res_78921; + + if (isnan_res_78920) { + defunc_1_f_res_78921 = 0.0; + } else { + double x_78918 = ((__global + double *) mem_120649)[phys_tid_78117 + + (i_119657 * + (num_threads_125643 * + k2p2zq_70876) + + slice_119992 * + num_threads_125643)]; + double defunc_1_f_res_f_res_78922 = x_78918 * + x_78919; + + defunc_1_f_res_78921 = defunc_1_f_res_f_res_78922; + } + + double defunc_1_op_res_78917 = defunc_1_f_res_78921 + + redout_119659; + double redout_tmp_126667 = defunc_1_op_res_78917; + + redout_119659 = redout_tmp_126667; + } + defunc_2_reduce_res_78914 = redout_119659; + + bool index_ok_78923 = bounds_check_78895 && + bounds_check_78895; + bool index_certs_78924; + + if (!index_ok_78923) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 247) == -1) { + global_failure_args[0] = i_78892; + global_failure_args[1] = i_78892; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double zs_arg_78925 = ((__global + double *) mem_120608)[phys_tid_78117 + + (i_78892 * + (num_threads_125643 * + k2p2zq_70876) + + i_78892 * + num_threads_125643)]; + bool index_certs_78926; + + if (!bounds_check_78895) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 248) == -1) { + global_failure_args[0] = i_78892; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double zm_arg_78927 = ((__global + double *) mem_120252)[i_119657 * + k2p2zq_70876 + + i_78892]; + double zm_res_78928 = zm_arg_78927 - + defunc_2_reduce_res_78914; + double zs_res_78929 = zm_res_78928 / zs_arg_78925; + + ((__global double *) mem_120649)[phys_tid_78117 + + (i_119657 * + (num_threads_125643 * + k2p2zq_70876) + i_78892 * + num_threads_125643)] = + zs_res_78929; + } + } + for (int64_t i_119663 = 0; i_119663 < k2p2zq_70876; i_119663++) { + for (int64_t i_119667 = 0; i_119667 < k2p2zq_70876; + i_119667++) { + double defunc_2_reduce_res_78936; + double redout_119669 = 0.0; + + for (int64_t i_119670 = 0; i_119670 < k2p2zq_70876; + i_119670++) { + double x_78940 = ((__global + double *) mem_120649)[phys_tid_78117 + + (i_119670 * + (num_threads_125643 * + k2p2zq_70876) + + i_119663 * + num_threads_125643)]; + double x_78941 = ((__global + double *) mem_120649)[phys_tid_78117 + + (i_119670 * + (num_threads_125643 * + k2p2zq_70876) + + i_119667 * + num_threads_125643)]; + double defunc_1_f_res_78942 = x_78940 * x_78941; + double defunc_1_op_res_78939 = defunc_1_f_res_78942 + + redout_119669; + double redout_tmp_126670 = defunc_1_op_res_78939; + + redout_119669 = redout_tmp_126670; + } + defunc_2_reduce_res_78936 = redout_119669; + ((__global double *) mem_120690)[phys_tid_78117 + + (i_119663 * + (num_threads_125643 * + k2p2zq_70876) + + i_119667 * + num_threads_125643)] = + defunc_2_reduce_res_78936; + } + } + + int64_t min_res_78943 = smin64(m_70948, min_res_78869); + + for (int64_t i_126671 = 0; i_126671 < k2p2zq_70876; i_126671++) { + ((__global double *) double_buffer_mem_125548)[phys_tid_78117 + + i_126671 * + num_threads_125643] = + ((__global double *) mem_120257)[gtid_78116 + i_126671 * + m_70861]; + } + for (int64_t j_78945 = 0; j_78945 < min_res_78943; j_78945++) { + bool y_78947 = slt64(j_78945, k2p2zq_70876); + bool index_certs_78948; + + if (!y_78947) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 249) == -1) { + global_failure_args[0] = j_78945; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double zeze_arg_78949 = ((__global + double *) double_buffer_mem_125536)[phys_tid_78117 + + j_78945 * + num_threads_125643]; + bool zeze_res_78950 = zeze_arg_78949 == 0.0; + + if (zeze_res_78950) { + for (int64_t i_126673 = 0; i_126673 < k2p2zq_70876; + i_126673++) { + ((__global double *) mem_125160)[phys_tid_78117 + + i_126673 * + num_threads_125643] = + ((__global + double *) double_buffer_mem_125548)[phys_tid_78117 + + i_126673 * + num_threads_125643]; + } + } else { + double y_78952 = ((__global + double *) double_buffer_mem_125548)[phys_tid_78117 + + j_78945 * + num_threads_125643]; + double negate_arg_78953 = zeze_arg_78949 * y_78952; + double t_78954 = 0.0 - negate_arg_78953; + int64_t x_78955 = sub64(k2p2zq_70876, j_78945); + int64_t upper_bound_78956 = sub64(x_78955, (int64_t) 1); + double t_78957; + double t_78959 = t_78954; + + for (int64_t i0_78958 = 0; i0_78958 < upper_bound_78956; + i0_78958++) { + int64_t x_78960 = add64(j_78945, i0_78958); + int64_t i_78961 = add64((int64_t) 1, x_78960); + bool x_78962 = sle64((int64_t) 0, i_78961); + bool y_78963 = slt64(i_78961, k2p2zq_70876); + bool bounds_check_78964 = x_78962 && y_78963; + bool index_ok_78965 = y_78947 && bounds_check_78964; + bool index_certs_78966; + + if (!index_ok_78965) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 250) == -1) { + global_failure_args[0] = j_78945; + global_failure_args[1] = i_78961; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_78967 = ((__global + double *) double_buffer_mem_125535)[phys_tid_78117 + + (j_78945 * + (num_threads_125643 * + k2p2zq_70876) + + i_78961 * + num_threads_125643)]; + bool index_certs_78968; + + if (!bounds_check_78964) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 251) == -1) { + global_failure_args[0] = i_78961; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_78969 = ((__global + double *) double_buffer_mem_125548)[phys_tid_78117 + + i_78961 * + num_threads_125643]; + double y_78970 = x_78967 * y_78969; + double loopres_78971 = t_78959 - y_78970; + double t_tmp_126674 = loopres_78971; + + t_78959 = t_tmp_126674; + } + t_78957 = t_78959; + + double t_78972 = t_78957 / zeze_arg_78949; + double y_78973 = zeze_arg_78949 * t_78972; + double lw_val_78974 = y_78952 + y_78973; + + ((__global + double *) double_buffer_mem_125548)[phys_tid_78117 + + j_78945 * + num_threads_125643] = + lw_val_78974; + for (int64_t i0_78977 = 0; i0_78977 < upper_bound_78956; + i0_78977++) { + int64_t x_78979 = add64(j_78945, i0_78977); + int64_t i_78980 = add64((int64_t) 1, x_78979); + bool x_78981 = sle64((int64_t) 0, i_78980); + bool y_78982 = slt64(i_78980, k2p2zq_70876); + bool bounds_check_78983 = x_78981 && y_78982; + bool index_certs_78984; + + if (!bounds_check_78983) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 252) == -1) { + global_failure_args[0] = i_78980; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_78985 = ((__global + double *) double_buffer_mem_125548)[phys_tid_78117 + + i_78980 * + num_threads_125643]; + bool index_ok_78986 = y_78947 && bounds_check_78983; + bool index_certs_78987; + + if (!index_ok_78986) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 253) == -1) { + global_failure_args[0] = j_78945; + global_failure_args[1] = i_78980; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_78988 = ((__global + double *) double_buffer_mem_125535)[phys_tid_78117 + + (j_78945 * + (num_threads_125643 * + k2p2zq_70876) + + i_78980 * + num_threads_125643)]; + double y_78989 = t_78972 * y_78988; + double lw_val_78990 = x_78985 + y_78989; + + ((__global + double *) double_buffer_mem_125548)[phys_tid_78117 + + i_78980 * + num_threads_125643] = + lw_val_78990; + } + for (int64_t i_126676 = 0; i_126676 < k2p2zq_70876; + i_126676++) { + ((__global double *) mem_125160)[phys_tid_78117 + + i_126676 * + num_threads_125643] = + ((__global + double *) double_buffer_mem_125548)[phys_tid_78117 + + i_126676 * + num_threads_125643]; + } + } + for (int64_t i_126677 = 0; i_126677 < k2p2zq_70876; + i_126677++) { + ((__global + double *) double_buffer_mem_125548)[phys_tid_78117 + + i_126677 * + num_threads_125643] = + ((__global double *) mem_125160)[phys_tid_78117 + + i_126677 * + num_threads_125643]; + } + } + for (int64_t i_126678 = 0; i_126678 < k2p2zq_70876; i_126678++) { + ((__global double *) mem_120763)[phys_tid_78117 + i_126678 * + num_threads_125643] = 0.0; + } + for (int64_t write_iter_119671 = 0; write_iter_119671 < + k2p2zq_70876; write_iter_119671++) { + int64_t write_iv_119674 = ((__global + int64_t *) mem_120273)[phys_tid_78117 + + write_iter_119671 * + num_threads_125643]; + double defunc_2_reduce_res_78996; + double redout_119681 = 0.0; + + for (int64_t i_119682 = 0; i_119682 < k2p2zq_70876; + i_119682++) { + double x_79000 = ((__global + double *) double_buffer_mem_125548)[phys_tid_78117 + + i_119682 * + num_threads_125643]; + double x_79001 = ((__global + double *) mem_120649)[phys_tid_78117 + + (i_119682 * + (num_threads_125643 * + k2p2zq_70876) + + write_iter_119671 * + num_threads_125643)]; + double defunc_1_f_res_79002 = x_79000 * x_79001; + double defunc_1_op_res_78999 = defunc_1_f_res_79002 + + redout_119681; + double redout_tmp_126680 = defunc_1_op_res_78999; + + redout_119681 = redout_tmp_126680; + } + defunc_2_reduce_res_78996 = redout_119681; + + bool less_than_zzero_119675 = slt64(write_iv_119674, + (int64_t) 0); + bool greater_than_sizze_119676 = sle64(k2p2zq_70876, + write_iv_119674); + bool outside_bounds_dim_119677 = less_than_zzero_119675 || + greater_than_sizze_119676; + + if (!outside_bounds_dim_119677) { + ((__global double *) mem_120763)[phys_tid_78117 + + write_iv_119674 * + num_threads_125643] = + defunc_2_reduce_res_78996; + } + } + for (int64_t i_119685 = 0; i_119685 < k2p2zq_70876; i_119685++) { + int64_t x_79005 = ((__global + int64_t *) mem_120273)[phys_tid_78117 + + i_119685 * + num_threads_125643]; + + for (int64_t i_126682 = 0; i_126682 < k2p2zq_70876; + i_126682++) { + ((__global int64_t *) mem_120790)[phys_tid_78117 + + i_126682 * + num_threads_125643] = + x_79005; + } + for (int64_t i_126683 = 0; i_126683 < k2p2zq_70876; + i_126683++) { + ((__global int64_t *) mem_120778)[phys_tid_78117 + + (i_119685 * + (num_threads_125643 * + k2p2zq_70876) + + i_126683 * + num_threads_125643)] = + ((__global int64_t *) mem_120790)[phys_tid_78117 + + i_126683 * + num_threads_125643]; + } + } + for (int64_t i_126684 = 0; i_126684 < k2p2zq_70876; i_126684++) { + for (int64_t i_126685 = 0; i_126685 < k2p2zq_70876; + i_126685++) { + ((__global double *) mem_120801)[phys_tid_78117 + + (i_126684 * + (num_threads_125643 * + k2p2zq_70876) + + i_126685 * + num_threads_125643)] = + 0.0; + } + } + for (int64_t write_iter_119687 = 0; write_iter_119687 < + binop_x_120251; write_iter_119687++) { + int64_t new_index_119993 = squot64(write_iter_119687, + k2p2zq_70876); + int64_t binop_y_119995 = k2p2zq_70876 * new_index_119993; + int64_t new_index_119996 = write_iter_119687 - binop_y_119995; + int64_t write_iv_119689 = ((__global + int64_t *) mem_120778)[phys_tid_78117 + + (new_index_119993 * + (num_threads_125643 * + k2p2zq_70876) + + new_index_119996 * + num_threads_125643)]; + int64_t write_iv_119690 = ((__global + int64_t *) mem_120273)[phys_tid_78117 + + new_index_119996 * + num_threads_125643]; + bool less_than_zzero_119692 = slt64(write_iv_119689, + (int64_t) 0); + bool greater_than_sizze_119693 = sle64(k2p2zq_70876, + write_iv_119689); + bool outside_bounds_dim_119694 = less_than_zzero_119692 || + greater_than_sizze_119693; + bool less_than_zzero_119695 = slt64(write_iv_119690, + (int64_t) 0); + bool greater_than_sizze_119696 = sle64(k2p2zq_70876, + write_iv_119690); + bool outside_bounds_dim_119697 = less_than_zzero_119695 || + greater_than_sizze_119696; + bool outside_bounds_119699 = outside_bounds_dim_119694 || + outside_bounds_dim_119697; + + if (!outside_bounds_119699) { + for (int64_t i_126687 = 0; i_126687 < (int64_t) 1; + i_126687++) { + ((__global double *) mem_120801)[phys_tid_78117 + + (write_iv_119689 * + (num_threads_125643 * + k2p2zq_70876) + + (write_iv_119690 + + i_126687) * + num_threads_125643)] = + ((__global double *) mem_120690)[phys_tid_78117 + + new_index_119993 * + (num_threads_125643 * + k2p2zq_70876) + + num_threads_125643 * + new_index_119996 + + i_126687 * + num_threads_125643]; + } + } + } + for (int64_t i_119706 = 0; i_119706 < k2p2zq_70876; i_119706++) { + double x_79018 = ((__global + double *) mem_120763)[phys_tid_78117 + + i_119706 * + num_threads_125643]; + + for (int64_t i_119711 = 0; i_119711 < k2p2zq_70876; + i_119711++) { + double x_79020 = ((__global + double *) mem_120801)[phys_tid_78117 + + (i_119706 * + (num_threads_125643 * + k2p2zq_70876) + + i_119711 * + num_threads_125643)]; + bool isnan_res_79021; + + isnan_res_79021 = futrts_isnan64(x_79020); + + double defunc_0_f_res_79022; + + if (isnan_res_79021) { + defunc_0_f_res_79022 = 0.0; + } else { + defunc_0_f_res_79022 = x_79020; + } + ((__global double *) mem_120824)[phys_tid_78117 + + (i_119706 * + (num_threads_125643 * + k2p2zq_70876) + + i_119711 * + num_threads_125643)] = + defunc_0_f_res_79022; + } + + bool isnan_res_79023; + + isnan_res_79023 = futrts_isnan64(x_79018); + + double defunc_0_f_res_79024; + + if (isnan_res_79023) { + defunc_0_f_res_79024 = 0.0; + } else { + defunc_0_f_res_79024 = x_79018; + } + ((__global double *) mem_120821)[phys_tid_78117 + i_119706 * + num_threads_125643] = + defunc_0_f_res_79024; + } + for (int64_t i_126691 = 0; i_126691 < k2p2zq_70876; i_126691++) { + for (int64_t i_126692 = 0; i_126692 < k2p2zq_70876; + i_126692++) { + ((__global double *) mem_120878)[i_126691 * (m_70861 * + k2p2zq_70876) + + i_126692 * m_70861 + + gtid_78116] = ((__global + double *) mem_120824)[phys_tid_78117 + + (i_126691 * + (num_threads_125643 * + k2p2zq_70876) + + i_126692 * + num_threads_125643)]; + } + } + for (int64_t i_126693 = 0; i_126693 < k2p2zq_70876; i_126693++) { + ((__global double *) mem_120881)[i_126693 * m_70861 + + gtid_78116] = ((__global + double *) mem_120821)[phys_tid_78117 + + i_126693 * + num_threads_125643]; + } + ((__global int64_t *) mem_120883)[gtid_78116] = min_res_78869; + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_78572 +} +__kernel void mainDetailedzisegmap_79039(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + __global unsigned char *mem_121898, + __global unsigned char *mem_121919) +{ + #define segmap_group_sizze_81446 (mainDetailedzisegmap_group_sizze_79042) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127188; + int32_t local_tid_127189; + int64_t group_sizze_127192; + int32_t wave_sizze_127191; + int32_t group_tid_127190; + + global_tid_127188 = get_global_id(0); + local_tid_127189 = get_local_id(0); + group_sizze_127192 = get_local_size(0); + wave_sizze_127191 = LOCKSTEP_WIDTH; + group_tid_127190 = get_group_id(0); + + int32_t phys_tid_79039; + + phys_tid_79039 = global_tid_127188; + + int64_t gtid_79037; + + gtid_79037 = squot64(sext_i32_i64(group_tid_127190) * + segmap_group_sizze_81446 + + sext_i32_i64(local_tid_127189), k2p2zq_70876); + + int64_t gtid_79038; + + gtid_79038 = sext_i32_i64(group_tid_127190) * segmap_group_sizze_81446 + + sext_i32_i64(local_tid_127189) - + squot64(sext_i32_i64(group_tid_127190) * segmap_group_sizze_81446 + + sext_i32_i64(local_tid_127189), k2p2zq_70876) * k2p2zq_70876; + if (slt64(gtid_79037, m_70861) && slt64(gtid_79038, k2p2zq_70876)) { + double x_81449 = ((__global double *) mem_121898)[gtid_79037 * + k2p2zq_70876 + + gtid_79038]; + bool isnan_res_81450; + + isnan_res_81450 = futrts_isnan64(x_81449); + + double defunc_0_f_res_81451; + + if (isnan_res_81450) { + defunc_0_f_res_81451 = 0.0; + } else { + defunc_0_f_res_81451 = x_81449; + } + ((__global double *) mem_121919)[gtid_79037 * k2p2zq_70876 + + gtid_79038] = defunc_0_f_res_81451; + } + + error_0: + return; + #undef segmap_group_sizze_81446 +} +__kernel void mainDetailedzisegmap_79061(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + __global unsigned char *mem_121906, + __global unsigned char *mem_121915) +{ + #define segmap_group_sizze_81437 (mainDetailedzisegmap_group_sizze_79065) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127183; + int32_t local_tid_127184; + int64_t group_sizze_127187; + int32_t wave_sizze_127186; + int32_t group_tid_127185; + + global_tid_127183 = get_global_id(0); + local_tid_127184 = get_local_id(0); + group_sizze_127187 = get_local_size(0); + wave_sizze_127186 = LOCKSTEP_WIDTH; + group_tid_127185 = get_group_id(0); + + int32_t phys_tid_79061; + + phys_tid_79061 = global_tid_127183; + + int64_t gtid_79058; + + gtid_79058 = squot64(sext_i32_i64(group_tid_127185) * + segmap_group_sizze_81437 + + sext_i32_i64(local_tid_127184), k2p2zq_70876 * + k2p2zq_70876); + + int64_t gtid_79059; + + gtid_79059 = squot64(sext_i32_i64(group_tid_127185) * + segmap_group_sizze_81437 + + sext_i32_i64(local_tid_127184) - + squot64(sext_i32_i64(group_tid_127185) * + segmap_group_sizze_81437 + + sext_i32_i64(local_tid_127184), k2p2zq_70876 * + k2p2zq_70876) * (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876); + + int64_t gtid_79060; + + gtid_79060 = sext_i32_i64(group_tid_127185) * segmap_group_sizze_81437 + + sext_i32_i64(local_tid_127184) - + squot64(sext_i32_i64(group_tid_127185) * segmap_group_sizze_81437 + + sext_i32_i64(local_tid_127184), k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876) - squot64(sext_i32_i64(group_tid_127185) * + segmap_group_sizze_81437 + + sext_i32_i64(local_tid_127184) - + squot64(sext_i32_i64(group_tid_127185) * + segmap_group_sizze_81437 + + sext_i32_i64(local_tid_127184), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876) * k2p2zq_70876; + if ((slt64(gtid_79058, m_70861) && slt64(gtid_79059, k2p2zq_70876)) && + slt64(gtid_79060, k2p2zq_70876)) { + double x_81440 = ((__global double *) mem_121906)[gtid_79058 * + (k2p2zq_70876 * + k2p2zq_70876) + + gtid_79059 * + k2p2zq_70876 + + gtid_79060]; + bool isnan_res_81441; + + isnan_res_81441 = futrts_isnan64(x_81440); + + double defunc_0_f_res_81442; + + if (isnan_res_81441) { + defunc_0_f_res_81442 = 0.0; + } else { + defunc_0_f_res_81442 = x_81440; + } + ((__global double *) mem_121915)[gtid_79058 * (k2p2zq_70876 * + k2p2zq_70876) + + gtid_79059 * k2p2zq_70876 + + gtid_79060] = defunc_0_f_res_81442; + } + + error_0: + return; + #undef segmap_group_sizze_81437 +} +__kernel void mainDetailedzisegmap_79109(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t binop_x_120251, __global + unsigned char *defunc_3_map_res_r_mem_121847, + __global unsigned char *mem_121906, + __global unsigned char *mem_121909) +{ + #define segmap_group_sizze_81418 (mainDetailedzisegmap_group_sizze_79112) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127178; + int32_t local_tid_127179; + int64_t group_sizze_127182; + int32_t wave_sizze_127181; + int32_t group_tid_127180; + + global_tid_127178 = get_global_id(0); + local_tid_127179 = get_local_id(0); + group_sizze_127182 = get_local_size(0); + wave_sizze_127181 = LOCKSTEP_WIDTH; + group_tid_127180 = get_group_id(0); + + int32_t phys_tid_79109; + + phys_tid_79109 = global_tid_127178; + + int64_t gtid_79107; + + gtid_79107 = squot64(sext_i32_i64(group_tid_127180) * + segmap_group_sizze_81418 + + sext_i32_i64(local_tid_127179), binop_x_120251); + + int64_t gtid_79108; + + gtid_79108 = sext_i32_i64(group_tid_127180) * segmap_group_sizze_81418 + + sext_i32_i64(local_tid_127179) - + squot64(sext_i32_i64(group_tid_127180) * segmap_group_sizze_81418 + + sext_i32_i64(local_tid_127179), binop_x_120251) * + binop_x_120251; + if (slt64(gtid_79107, m_70861) && slt64(gtid_79108, binop_x_120251)) { + int64_t binop_x_115049 = gtid_79107 * binop_x_120251; + int64_t binop_x_115050 = gtid_79108 + binop_x_115049; + int64_t new_index_115052 = squot64(binop_x_115050, binop_x_120251); + int64_t binop_y_115060 = new_index_115052 * binop_x_120251; + int64_t binop_x_115061 = binop_x_115050 - binop_y_115060; + int64_t new_index_115062 = squot64(binop_x_115061, k2p2zq_70876); + int64_t write_index_81421 = ((__global + int64_t *) mem_121909)[new_index_115052 * + k2p2zq_70876 + + new_index_115062]; + int64_t binop_y_115117 = k2p2zq_70876 * new_index_115062; + int64_t new_index_115118 = binop_x_115061 - binop_y_115117; + int64_t write_index_81422 = ((__global + int64_t *) mem_121909)[new_index_115052 * + k2p2zq_70876 + + new_index_115118]; + double write_value_81423 = ((__global + double *) defunc_3_map_res_r_mem_121847)[new_index_115052 * + (k2p2zq_70876 * + k2p2zq_70876) + + new_index_115062 * + k2p2zq_70876 + + new_index_115118]; + + if (((sle64((int64_t) 0, gtid_79107) && slt64(gtid_79107, m_70861)) && + (sle64((int64_t) 0, write_index_81421) && slt64(write_index_81421, + k2p2zq_70876))) && + (sle64((int64_t) 0, write_index_81422) && slt64(write_index_81422, + k2p2zq_70876))) { + ((__global double *) mem_121906)[gtid_79107 * (k2p2zq_70876 * + k2p2zq_70876) + + write_index_81421 * k2p2zq_70876 + + write_index_81422] = + write_value_81423; + } + } + + error_0: + return; + #undef segmap_group_sizze_81418 +} +__kernel void mainDetailedzisegmap_79266(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t binop_x_120251, __global + unsigned char *mem_121341, __global + unsigned char *defunc_3_map_res_r_mem_121609, + __global unsigned char *mem_121898, + __global unsigned char *mem_121901) +{ + #define segmap_group_sizze_81347 (mainDetailedzisegmap_group_sizze_79269) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127172; + int32_t local_tid_127173; + int64_t group_sizze_127176; + int32_t wave_sizze_127175; + int32_t group_tid_127174; + + global_tid_127172 = get_global_id(0); + local_tid_127173 = get_local_id(0); + group_sizze_127176 = get_local_size(0); + wave_sizze_127175 = LOCKSTEP_WIDTH; + group_tid_127174 = get_group_id(0); + + int32_t phys_tid_79266; + + phys_tid_79266 = global_tid_127172; + + int64_t gtid_79264; + + gtid_79264 = squot64(sext_i32_i64(group_tid_127174) * + segmap_group_sizze_81347 + + sext_i32_i64(local_tid_127173), k2p2zq_70876); + + int64_t gtid_79265; + + gtid_79265 = sext_i32_i64(group_tid_127174) * segmap_group_sizze_81347 + + sext_i32_i64(local_tid_127173) - + squot64(sext_i32_i64(group_tid_127174) * segmap_group_sizze_81347 + + sext_i32_i64(local_tid_127173), k2p2zq_70876) * k2p2zq_70876; + if (slt64(gtid_79264, m_70861) && slt64(gtid_79265, k2p2zq_70876)) { + int64_t write_index_81352 = ((__global + int64_t *) mem_121341)[gtid_79265 * + m_70861 + + gtid_79264]; + double defunc_2_reduce_res_81353; + double redout_119740 = 0.0; + + for (int64_t i_119741 = 0; i_119741 < k2p2zq_70876; i_119741++) { + double x_81357 = ((__global double *) mem_121901)[gtid_79264 * + k2p2zq_70876 + + i_119741]; + double x_81358 = ((__global + double *) defunc_3_map_res_r_mem_121609)[gtid_79264 * + binop_x_120251 + + i_119741 * + k2p2zq_70876 + + gtid_79265]; + double defunc_1_f_res_81359 = x_81357 * x_81358; + double defunc_1_op_res_81356 = defunc_1_f_res_81359 + redout_119740; + double redout_tmp_127177 = defunc_1_op_res_81356; + + redout_119740 = redout_tmp_127177; + } + defunc_2_reduce_res_81353 = redout_119740; + if ((sle64((int64_t) 0, gtid_79264) && slt64(gtid_79264, m_70861)) && + (sle64((int64_t) 0, write_index_81352) && slt64(write_index_81352, + k2p2zq_70876))) { + ((__global double *) mem_121898)[gtid_79264 * k2p2zq_70876 + + write_index_81352] = + defunc_2_reduce_res_81353; + } + } + + error_0: + return; + #undef segmap_group_sizze_81347 +} +__kernel void mainDetailedzisegmap_79309(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t m_70948, + int64_t num_groups_81278, + int64_t num_threads_125700, __global + unsigned char *mem_121338, __global + unsigned char *mem_121343, __global + unsigned char *mem_121850, __global + unsigned char *mem_121858, __global + unsigned char *mem_121895, __global + unsigned char *mem_125243, __global + unsigned char *double_buffer_mem_125565) +{ + #define segmap_group_sizze_81277 (mainDetailedzisegmap_group_sizze_79311) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_127156; + int32_t local_tid_127157; + int64_t group_sizze_127160; + int32_t wave_sizze_127159; + int32_t group_tid_127158; + + global_tid_127156 = get_global_id(0); + local_tid_127157 = get_local_id(0); + group_sizze_127160 = get_local_size(0); + wave_sizze_127159 = LOCKSTEP_WIDTH; + group_tid_127158 = get_group_id(0); + + int32_t phys_tid_79309; + + phys_tid_79309 = global_tid_127156; + + int32_t phys_group_id_127161; + + phys_group_id_127161 = get_group_id(0); + for (int32_t i_127162 = 0; i_127162 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, segmap_group_sizze_81277)) - + phys_group_id_127161, sext_i64_i32(num_groups_81278)); + i_127162++) { + int32_t virt_group_id_127163 = phys_group_id_127161 + i_127162 * + sext_i64_i32(num_groups_81278); + int64_t gtid_79308 = sext_i32_i64(virt_group_id_127163) * + segmap_group_sizze_81277 + sext_i32_i64(local_tid_127157); + + if (slt64(gtid_79308, m_70861)) { + int64_t min_res_81284 = ((__global + int64_t *) mem_121343)[gtid_79308]; + int64_t min_res_81285 = smin64(m_70948, min_res_81284); + + for (int64_t i_127164 = 0; i_127164 < k2p2zq_70876; i_127164++) { + ((__global double *) double_buffer_mem_125565)[phys_tid_79309 + + i_127164 * + num_threads_125700] = + ((__global double *) mem_121850)[gtid_79308 + i_127164 * + m_70861]; + } + for (int64_t j_81287 = 0; j_81287 < min_res_81285; j_81287++) { + bool y_81289 = slt64(j_81287, k2p2zq_70876); + bool index_certs_81290; + + if (!y_81289) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 292) == -1) { + global_failure_args[0] = j_81287; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double zeze_arg_81291 = ((__global + double *) mem_121338)[j_81287 * + m_70861 + + gtid_79308]; + bool zeze_res_81292 = zeze_arg_81291 == 0.0; + + if (zeze_res_81292) { + for (int64_t i_127166 = 0; i_127166 < k2p2zq_70876; + i_127166++) { + ((__global double *) mem_125243)[phys_tid_79309 + + i_127166 * + num_threads_125700] = + ((__global + double *) double_buffer_mem_125565)[phys_tid_79309 + + i_127166 * + num_threads_125700]; + } + } else { + double y_81294 = ((__global + double *) double_buffer_mem_125565)[phys_tid_79309 + + j_81287 * + num_threads_125700]; + double negate_arg_81295 = zeze_arg_81291 * y_81294; + double t_81296 = 0.0 - negate_arg_81295; + int64_t x_81297 = sub64(k2p2zq_70876, j_81287); + int64_t upper_bound_81298 = sub64(x_81297, (int64_t) 1); + double t_81299; + double t_81301 = t_81296; + + for (int64_t i0_81300 = 0; i0_81300 < upper_bound_81298; + i0_81300++) { + int64_t x_81302 = add64(j_81287, i0_81300); + int64_t i_81303 = add64((int64_t) 1, x_81302); + bool x_81304 = sle64((int64_t) 0, i_81303); + bool y_81305 = slt64(i_81303, k2p2zq_70876); + bool bounds_check_81306 = x_81304 && y_81305; + bool index_ok_81307 = y_81289 && bounds_check_81306; + bool index_certs_81308; + + if (!index_ok_81307) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 293) == -1) { + global_failure_args[0] = j_81287; + global_failure_args[1] = i_81303; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_81309 = ((__global + double *) mem_121858)[i_81303 * + (m_70861 * + k2p2zq_70876) + + j_81287 * + m_70861 + + gtid_79308]; + bool index_certs_81310; + + if (!bounds_check_81306) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 294) == -1) { + global_failure_args[0] = i_81303; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_81311 = ((__global + double *) double_buffer_mem_125565)[phys_tid_79309 + + i_81303 * + num_threads_125700]; + double y_81312 = x_81309 * y_81311; + double loopres_81313 = t_81301 - y_81312; + double t_tmp_127167 = loopres_81313; + + t_81301 = t_tmp_127167; + } + t_81299 = t_81301; + + double t_81314 = t_81299 / zeze_arg_81291; + double y_81315 = zeze_arg_81291 * t_81314; + double lw_val_81316 = y_81294 + y_81315; + + ((__global + double *) double_buffer_mem_125565)[phys_tid_79309 + + j_81287 * + num_threads_125700] = + lw_val_81316; + for (int64_t i0_81319 = 0; i0_81319 < upper_bound_81298; + i0_81319++) { + int64_t x_81321 = add64(j_81287, i0_81319); + int64_t i_81322 = add64((int64_t) 1, x_81321); + bool x_81323 = sle64((int64_t) 0, i_81322); + bool y_81324 = slt64(i_81322, k2p2zq_70876); + bool bounds_check_81325 = x_81323 && y_81324; + bool index_certs_81326; + + if (!bounds_check_81325) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 295) == -1) { + global_failure_args[0] = i_81322; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_81327 = ((__global + double *) double_buffer_mem_125565)[phys_tid_79309 + + i_81322 * + num_threads_125700]; + bool index_ok_81328 = y_81289 && bounds_check_81325; + bool index_certs_81329; + + if (!index_ok_81328) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 296) == -1) { + global_failure_args[0] = j_81287; + global_failure_args[1] = i_81322; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_81330 = ((__global + double *) mem_121858)[i_81322 * + (m_70861 * + k2p2zq_70876) + + j_81287 * + m_70861 + + gtid_79308]; + double y_81331 = t_81314 * y_81330; + double lw_val_81332 = x_81327 + y_81331; + + ((__global + double *) double_buffer_mem_125565)[phys_tid_79309 + + i_81322 * + num_threads_125700] = + lw_val_81332; + } + for (int64_t i_127169 = 0; i_127169 < k2p2zq_70876; + i_127169++) { + ((__global double *) mem_125243)[phys_tid_79309 + + i_127169 * + num_threads_125700] = + ((__global + double *) double_buffer_mem_125565)[phys_tid_79309 + + i_127169 * + num_threads_125700]; + } + } + for (int64_t i_127170 = 0; i_127170 < k2p2zq_70876; + i_127170++) { + ((__global + double *) double_buffer_mem_125565)[phys_tid_79309 + + i_127170 * + num_threads_125700] = + ((__global double *) mem_125243)[phys_tid_79309 + + i_127170 * + num_threads_125700]; + } + } + for (int64_t i_127171 = 0; i_127171 < k2p2zq_70876; i_127171++) { + ((__global double *) mem_121895)[i_127171 * m_70861 + + gtid_79308] = ((__global + double *) double_buffer_mem_125565)[phys_tid_79309 + + i_127171 * + num_threads_125700]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_81277 +} +__kernel void mainDetailedzisegmap_79371(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t num_groups_81219, + int64_t binop_x_120251, + int64_t num_threads_125696, __global + unsigned char *defunc_3_map_res_r_mem_121609, + __global unsigned char *mem_121613, + __global unsigned char *mem_121616, + __global unsigned char *mem_121632) +{ + #define segmap_group_sizze_81218 (mainDetailedzisegmap_group_sizze_79374) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127013; + int32_t local_tid_127014; + int64_t group_sizze_127017; + int32_t wave_sizze_127016; + int32_t group_tid_127015; + + global_tid_127013 = get_global_id(0); + local_tid_127014 = get_local_id(0); + group_sizze_127017 = get_local_size(0); + wave_sizze_127016 = LOCKSTEP_WIDTH; + group_tid_127015 = get_group_id(0); + + int32_t phys_tid_79371; + + phys_tid_79371 = global_tid_127013; + + int32_t phys_group_id_127018; + + phys_group_id_127018 = get_group_id(0); + for (int32_t i_127019 = 0; i_127019 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861 * k2p2zq_70876, + segmap_group_sizze_81218)) - + phys_group_id_127018, sext_i64_i32(num_groups_81219)); + i_127019++) { + int32_t virt_group_id_127020 = phys_group_id_127018 + i_127019 * + sext_i64_i32(num_groups_81219); + int64_t gtid_79369 = squot64(sext_i32_i64(virt_group_id_127020) * + segmap_group_sizze_81218 + + sext_i32_i64(local_tid_127014), + k2p2zq_70876); + int64_t gtid_79370 = sext_i32_i64(virt_group_id_127020) * + segmap_group_sizze_81218 + sext_i32_i64(local_tid_127014) - + squot64(sext_i32_i64(virt_group_id_127020) * + segmap_group_sizze_81218 + + sext_i32_i64(local_tid_127014), k2p2zq_70876) * + k2p2zq_70876; + + if (slt64(gtid_79369, m_70861) && slt64(gtid_79370, k2p2zq_70876)) { + for (int64_t i_119736 = 0; i_119736 < k2p2zq_70876; i_119736++) { + double defunc_2_reduce_res_81230; + double redout_119738 = 0.0; + + for (int64_t i_119739 = 0; i_119739 < k2p2zq_70876; + i_119739++) { + double x_81234 = ((__global + double *) defunc_3_map_res_r_mem_121609)[gtid_79369 * + binop_x_120251 + + i_119739 * + k2p2zq_70876 + + gtid_79370]; + double x_81235 = ((__global + double *) mem_121613)[gtid_79369 * + (k2p2zq_70876 * + k2p2zq_70876) + + i_119736 * + k2p2zq_70876 + + i_119739]; + double defunc_1_f_res_81236 = x_81234 * x_81235; + double defunc_1_op_res_81233 = defunc_1_f_res_81236 + + redout_119738; + double redout_tmp_127022 = defunc_1_op_res_81233; + + redout_119738 = redout_tmp_127022; + } + defunc_2_reduce_res_81230 = redout_119738; + ((__global double *) mem_121616)[phys_tid_79371 + i_119736 * + num_threads_125696] = + defunc_2_reduce_res_81230; + } + for (int64_t i_127023 = 0; i_127023 < k2p2zq_70876; i_127023++) { + ((__global double *) mem_121632)[i_127023 * (k2p2zq_70876 * + m_70861) + + gtid_79369 * k2p2zq_70876 + + gtid_79370] = ((__global + double *) mem_121616)[phys_tid_79371 + + i_127023 * + num_threads_125696]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_81218 +} +__kernel void mainDetailedzisegmap_79587(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t x_81093, int64_t i_81094, + int64_t j_m_i_81098, + int64_t num_groups_81126, + int64_t num_threads_125688, __global + unsigned char *mem_120252, __global + unsigned char *mem_121351, __global + unsigned char *mem_121458, __global + unsigned char *mem_121476, __global + unsigned char *mem_121480, __global + unsigned char *mem_121492, __global + unsigned char *mem_121504) +{ + #define segmap_group_sizze_81125 (mainDetailedzisegmap_group_sizze_79589) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126910; + int32_t local_tid_126911; + int64_t group_sizze_126914; + int32_t wave_sizze_126913; + int32_t group_tid_126912; + + global_tid_126910 = get_global_id(0); + local_tid_126911 = get_local_id(0); + group_sizze_126914 = get_local_size(0); + wave_sizze_126913 = LOCKSTEP_WIDTH; + group_tid_126912 = get_group_id(0); + + int32_t phys_tid_79587; + + phys_tid_79587 = global_tid_126910; + + int32_t phys_group_id_126915; + + phys_group_id_126915 = get_group_id(0); + for (int32_t i_126916 = 0; i_126916 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, segmap_group_sizze_81125)) - + phys_group_id_126915, sext_i64_i32(num_groups_81126)); + i_126916++) { + int32_t virt_group_id_126917 = phys_group_id_126915 + i_126916 * + sext_i64_i32(num_groups_81126); + int64_t gtid_79586 = sext_i32_i64(virt_group_id_126917) * + segmap_group_sizze_81125 + sext_i32_i64(local_tid_126911); + + if (slt64(gtid_79586, m_70861)) { + double defunc_3_map_res_r_transformed_row_81131 = ((__global + double *) mem_121351)[gtid_79586 * + (k2p2zq_70876 * + k2p2zq_70876) + + i_81094 * + k2p2zq_70876 + + i_81094]; + + for (int64_t i_119725 = 0; i_119725 < k2p2zq_70876; i_119725++) { + for (int64_t i_126919 = 0; i_126919 < k2p2zq_70876; + i_126919++) { + ((__global double *) mem_121492)[phys_tid_79587 + i_126919 * + num_threads_125688] = + ((__global double *) mem_121476)[i_119725 * (m_70861 * + k2p2zq_70876) + + gtid_79586 + i_126919 * + m_70861]; + } + + double defunc_2_map_res_transformed_row_81136 = ((__global + double *) mem_120252)[i_119725 * + k2p2zq_70876 + + i_81094]; + double defunc_2_reduce_res_81137; + double redout_119728 = 0.0; + + for (int64_t i_119729 = 0; i_119729 < j_m_i_81098; i_119729++) { + int64_t slice_120011 = x_81093 + i_119729; + double x_81142 = ((__global + double *) mem_121458)[slice_120011 * + (k2p2zq_70876 * + m_70861) + + gtid_79586 * + k2p2zq_70876 + + i_81094]; + bool isnan_res_81143; + + isnan_res_81143 = futrts_isnan64(x_81142); + + double defunc_1_f_res_81144; + + if (isnan_res_81143) { + defunc_1_f_res_81144 = 0.0; + } else { + double x_81141 = ((__global + double *) mem_121476)[i_119725 * + (m_70861 * + k2p2zq_70876) + + slice_120011 * + m_70861 + + gtid_79586]; + double defunc_1_f_res_f_res_81145 = x_81141 * x_81142; + + defunc_1_f_res_81144 = defunc_1_f_res_f_res_81145; + } + + double defunc_1_op_res_81140 = defunc_1_f_res_81144 + + redout_119728; + double redout_tmp_126920 = defunc_1_op_res_81140; + + redout_119728 = redout_tmp_126920; + } + defunc_2_reduce_res_81137 = redout_119728; + + double zm_res_81146 = defunc_2_map_res_transformed_row_81136 - + defunc_2_reduce_res_81137; + double zs_res_81147 = zm_res_81146 / + defunc_3_map_res_r_transformed_row_81131; + + ((__global double *) mem_121492)[phys_tid_79587 + i_81094 * + num_threads_125688] = + zs_res_81147; + for (int64_t i_126921 = 0; i_126921 < k2p2zq_70876; + i_126921++) { + ((__global double *) mem_121480)[phys_tid_79587 + + (i_119725 * + (num_threads_125688 * + k2p2zq_70876) + + i_126921 * + num_threads_125688)] = + ((__global double *) mem_121492)[phys_tid_79587 + + i_126921 * + num_threads_125688]; + } + } + for (int64_t i_126922 = 0; i_126922 < k2p2zq_70876; i_126922++) { + for (int64_t i_126923 = 0; i_126923 < k2p2zq_70876; + i_126923++) { + ((__global double *) mem_121504)[i_126922 * (m_70861 * + k2p2zq_70876) + + i_126923 * m_70861 + + gtid_79586] = ((__global + double *) mem_121480)[phys_tid_79587 + + (i_126922 * + (num_threads_125688 * + k2p2zq_70876) + + i_126923 * + num_threads_125688)]; + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_81125 +} +__kernel void mainDetailedzisegmap_79675(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t i_81094, + int64_t binop_x_120251, __global + unsigned char *mem_param_121469, + __global unsigned char *mem_121559) +{ + #define segmap_group_sizze_81207 (mainDetailedzisegmap_group_sizze_79679) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127005; + int32_t local_tid_127006; + int64_t group_sizze_127009; + int32_t wave_sizze_127008; + int32_t group_tid_127007; + + global_tid_127005 = get_global_id(0); + local_tid_127006 = get_local_id(0); + group_sizze_127009 = get_local_size(0); + wave_sizze_127008 = LOCKSTEP_WIDTH; + group_tid_127007 = get_group_id(0); + + int32_t phys_tid_79675; + + phys_tid_79675 = global_tid_127005; + + int64_t gtid_79672; + + gtid_79672 = squot64(sext_i32_i64(group_tid_127007) * + segmap_group_sizze_81207 + + sext_i32_i64(local_tid_127006), k2p2zq_70876); + + int64_t gtid_79673; + + gtid_79673 = sext_i32_i64(group_tid_127007) * segmap_group_sizze_81207 + + sext_i32_i64(local_tid_127006) - + squot64(sext_i32_i64(group_tid_127007) * segmap_group_sizze_81207 + + sext_i32_i64(local_tid_127006), k2p2zq_70876) * k2p2zq_70876; + + int64_t gtid_79674; + + gtid_79674 = sext_i32_i64(group_tid_127007) * segmap_group_sizze_81207 + + sext_i32_i64(local_tid_127006) - + squot64(sext_i32_i64(group_tid_127007) * segmap_group_sizze_81207 + + sext_i32_i64(local_tid_127006), k2p2zq_70876) * k2p2zq_70876 - + (sext_i32_i64(group_tid_127007) * segmap_group_sizze_81207 + + sext_i32_i64(local_tid_127006) - + squot64(sext_i32_i64(group_tid_127007) * segmap_group_sizze_81207 + + sext_i32_i64(local_tid_127006), k2p2zq_70876) * k2p2zq_70876); + if ((slt64(gtid_79672, m_70861) && slt64(gtid_79673, k2p2zq_70876)) && + slt64(gtid_79674, (int64_t) 1)) { + double zs_res_81210 = ((__global double *) mem_121559)[gtid_79672 * + k2p2zq_70876 + + gtid_79673]; + + if (((sle64((int64_t) 0, gtid_79672) && slt64(gtid_79672, m_70861)) && + (sle64((int64_t) 0, gtid_79673) && slt64(gtid_79673, + k2p2zq_70876))) && + (sle64((int64_t) 0, i_81094) && slt64(i_81094, k2p2zq_70876))) { + ((__global double *) mem_param_121469)[gtid_79672 * binop_x_120251 + + gtid_79673 * k2p2zq_70876 + + i_81094] = zs_res_81210; + } + } + + error_0: + return; + #undef segmap_group_sizze_81207 +} +__kernel void mainDetailedzisegmap_79687(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t i_81094, __global + unsigned char *mem_120252, __global + unsigned char *mem_121351, __global + unsigned char *mem_121555, __global + unsigned char *mem_121559) +{ + #define segmap_group_sizze_81196 (mainDetailedzisegmap_group_sizze_79690) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127000; + int32_t local_tid_127001; + int64_t group_sizze_127004; + int32_t wave_sizze_127003; + int32_t group_tid_127002; + + global_tid_127000 = get_global_id(0); + local_tid_127001 = get_local_id(0); + group_sizze_127004 = get_local_size(0); + wave_sizze_127003 = LOCKSTEP_WIDTH; + group_tid_127002 = get_group_id(0); + + int32_t phys_tid_79687; + + phys_tid_79687 = global_tid_127000; + + int64_t gtid_79685; + + gtid_79685 = squot64(sext_i32_i64(group_tid_127002) * + segmap_group_sizze_81196 + + sext_i32_i64(local_tid_127001), k2p2zq_70876); + + int64_t gtid_79686; + + gtid_79686 = sext_i32_i64(group_tid_127002) * segmap_group_sizze_81196 + + sext_i32_i64(local_tid_127001) - + squot64(sext_i32_i64(group_tid_127002) * segmap_group_sizze_81196 + + sext_i32_i64(local_tid_127001), k2p2zq_70876) * k2p2zq_70876; + if (slt64(gtid_79685, m_70861) && slt64(gtid_79686, k2p2zq_70876)) { + double defunc_3_map_res_r_transformed_row_81199 = ((__global + double *) mem_121351)[gtid_79685 * + (k2p2zq_70876 * + k2p2zq_70876) + + i_81094 * + k2p2zq_70876 + + i_81094]; + double defunc_2_map_res_transformed_row_81200 = ((__global + double *) mem_120252)[gtid_79686 * + k2p2zq_70876 + + i_81094]; + double defunc_2_reduce_res_81201 = ((__global + double *) mem_121555)[gtid_79685 * + k2p2zq_70876 + + gtid_79686]; + double zm_res_81202 = defunc_2_map_res_transformed_row_81200 - + defunc_2_reduce_res_81201; + double zs_res_81203 = zm_res_81202 / + defunc_3_map_res_r_transformed_row_81199; + + ((__global double *) mem_121559)[gtid_79685 * k2p2zq_70876 + + gtid_79686] = zs_res_81203; + } + + error_0: + return; + #undef segmap_group_sizze_81196 +} +__kernel void mainDetailedzisegmap_80013(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + __global unsigned char *mem_121335, + __global unsigned char *mem_121343, + __global unsigned char *mem_121346, + __global unsigned char *mem_121351) +{ + #define segmap_group_sizze_81003 (mainDetailedzisegmap_group_sizze_80017) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126877; + int32_t local_tid_126878; + int64_t group_sizze_126881; + int32_t wave_sizze_126880; + int32_t group_tid_126879; + + global_tid_126877 = get_global_id(0); + local_tid_126878 = get_local_id(0); + group_sizze_126881 = get_local_size(0); + wave_sizze_126880 = LOCKSTEP_WIDTH; + group_tid_126879 = get_group_id(0); + + int32_t phys_tid_80013; + + phys_tid_80013 = global_tid_126877; + + int64_t gtid_80010; + + gtid_80010 = squot64(sext_i32_i64(group_tid_126879) * + segmap_group_sizze_81003 + + sext_i32_i64(local_tid_126878), k2p2zq_70876 * + k2p2zq_70876); + + int64_t gtid_80011; + + gtid_80011 = squot64(sext_i32_i64(group_tid_126879) * + segmap_group_sizze_81003 + + sext_i32_i64(local_tid_126878) - + squot64(sext_i32_i64(group_tid_126879) * + segmap_group_sizze_81003 + + sext_i32_i64(local_tid_126878), k2p2zq_70876 * + k2p2zq_70876) * (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876); + + int64_t gtid_80012; + + gtid_80012 = sext_i32_i64(group_tid_126879) * segmap_group_sizze_81003 + + sext_i32_i64(local_tid_126878) - + squot64(sext_i32_i64(group_tid_126879) * segmap_group_sizze_81003 + + sext_i32_i64(local_tid_126878), k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876) - squot64(sext_i32_i64(group_tid_126879) * + segmap_group_sizze_81003 + + sext_i32_i64(local_tid_126878) - + squot64(sext_i32_i64(group_tid_126879) * + segmap_group_sizze_81003 + + sext_i32_i64(local_tid_126878), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876) * k2p2zq_70876; + if ((slt64(gtid_80010, m_70861) && slt64(gtid_80011, k2p2zq_70876)) && + slt64(gtid_80012, k2p2zq_70876)) { + int64_t min_res_81006 = ((__global int64_t *) mem_121343)[gtid_80010]; + bool cond_f_res_81007 = ((__global bool *) mem_121346)[gtid_80010 * + k2p2zq_70876 + + gtid_80011]; + int64_t x_81010 = add64((int64_t) 1, gtid_80012); + bool cond_81011 = slt64(min_res_81006, x_81010); + bool x_81012 = !cond_81011; + bool y_81013 = cond_f_res_81007 && x_81012; + bool cond_81014 = cond_81011 || y_81013; + double defunc_1_f_res_81015; + + if (cond_81014) { + defunc_1_f_res_81015 = NAN; + } else { + double x_81009 = ((__global double *) mem_121335)[gtid_80011 * + (m_70861 * + k2p2zq_70876) + + gtid_80012 * + m_70861 + + gtid_80010]; + + defunc_1_f_res_81015 = x_81009; + } + ((__global double *) mem_121351)[gtid_80010 * (k2p2zq_70876 * + k2p2zq_70876) + + gtid_80011 * k2p2zq_70876 + + gtid_80012] = defunc_1_f_res_81015; + } + + error_0: + return; + #undef segmap_group_sizze_81003 +} +__kernel void mainDetailedzisegmap_80048(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + __global unsigned char *mem_121343, + __global unsigned char *mem_121346) +{ + #define segmap_group_sizze_80988 (mainDetailedzisegmap_group_sizze_80051) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126872; + int32_t local_tid_126873; + int64_t group_sizze_126876; + int32_t wave_sizze_126875; + int32_t group_tid_126874; + + global_tid_126872 = get_global_id(0); + local_tid_126873 = get_local_id(0); + group_sizze_126876 = get_local_size(0); + wave_sizze_126875 = LOCKSTEP_WIDTH; + group_tid_126874 = get_group_id(0); + + int32_t phys_tid_80048; + + phys_tid_80048 = global_tid_126872; + + int64_t gtid_80046; + + gtid_80046 = squot64(sext_i32_i64(group_tid_126874) * + segmap_group_sizze_80988 + + sext_i32_i64(local_tid_126873), k2p2zq_70876); + + int64_t gtid_80047; + + gtid_80047 = sext_i32_i64(group_tid_126874) * segmap_group_sizze_80988 + + sext_i32_i64(local_tid_126873) - + squot64(sext_i32_i64(group_tid_126874) * segmap_group_sizze_80988 + + sext_i32_i64(local_tid_126873), k2p2zq_70876) * k2p2zq_70876; + if (slt64(gtid_80046, m_70861) && slt64(gtid_80047, k2p2zq_70876)) { + int64_t min_res_80991 = ((__global int64_t *) mem_121343)[gtid_80046]; + int64_t x_80993 = add64((int64_t) 1, gtid_80047); + bool cond_f_res_80994 = slt64(min_res_80991, x_80993); + + ((__global bool *) mem_121346)[gtid_80046 * k2p2zq_70876 + gtid_80047] = + cond_f_res_80994; + } + + error_0: + return; + #undef segmap_group_sizze_80988 +} +__kernel void mainDetailedzisegmap_80093(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t m_70948, unsigned char y_70952, + int64_t min_res_71066, int64_t k_71067, + int64_t num_groups_80703, + int64_t num_threads_125673, __global + unsigned char *mem_120248, __global + unsigned char *mem_121001, __global + unsigned char *mem_121004, __global + unsigned char *mem_121008, __global + unsigned char *mem_121011, __global + unsigned char *mem_121335, __global + unsigned char *mem_121338, __global + unsigned char *mem_121341, __global + unsigned char *mem_121343, __global + unsigned char *mem_125167, __global + unsigned char *mem_125169, __global + unsigned char *mem_125438, __global + unsigned char *mem_125446, __global + unsigned char *mem_125448, __global + unsigned char *mem_125498, __global + unsigned char *double_buffer_mem_125552, + __global + unsigned char *double_buffer_mem_125553, + __global + unsigned char *double_buffer_mem_125554) +{ + #define segmap_group_sizze_80702 (mainDetailedzisegmap_group_sizze_80095) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_126803; + int32_t local_tid_126804; + int64_t group_sizze_126807; + int32_t wave_sizze_126806; + int32_t group_tid_126805; + + global_tid_126803 = get_global_id(0); + local_tid_126804 = get_local_id(0); + group_sizze_126807 = get_local_size(0); + wave_sizze_126806 = LOCKSTEP_WIDTH; + group_tid_126805 = get_group_id(0); + + int32_t phys_tid_80093; + + phys_tid_80093 = global_tid_126803; + + int32_t phys_group_id_126808; + + phys_group_id_126808 = get_group_id(0); + for (int32_t i_126809 = 0; i_126809 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, segmap_group_sizze_80702)) - + phys_group_id_126808, sext_i64_i32(num_groups_80703)); + i_126809++) { + int32_t virt_group_id_126810 = phys_group_id_126808 + i_126809 * + sext_i64_i32(num_groups_80703); + int64_t gtid_80092 = sext_i32_i64(virt_group_id_126810) * + segmap_group_sizze_80702 + sext_i32_i64(local_tid_126804); + + if (slt64(gtid_80092, m_70861)) { + for (int64_t i_126811 = 0; i_126811 < k2p2zq_70876; i_126811++) { + ((__global int64_t *) mem_121011)[phys_tid_80093 + i_126811 * + num_threads_125673] = + ((__global int64_t *) mem_120248)[i_126811]; + } + for (int64_t i_126812 = 0; i_126812 < k2p2zq_70876; i_126812++) { + for (int64_t i_126813 = 0; i_126813 < k2p2zq_70876; + i_126813++) { + ((__global + double *) double_buffer_mem_125552)[phys_tid_80093 + + (i_126812 * + (num_threads_125673 * + k2p2zq_70876) + + i_126813 * + num_threads_125673)] = + ((__global double *) mem_121001)[gtid_80092 + + (i_126812 * (m_70861 * + k2p2zq_70876) + + i_126813 * m_70861)]; + } + } + for (int64_t i_126814 = 0; i_126814 < k2p2zq_70876; i_126814++) { + ((__global double *) double_buffer_mem_125553)[phys_tid_80093 + + i_126814 * + num_threads_125673] = + ((__global double *) mem_121004)[gtid_80092 + i_126814 * + m_70861]; + } + for (int64_t i_126815 = 0; i_126815 < (int64_t) 2; i_126815++) { + for (int64_t i_126816 = 0; i_126816 < k2p2zq_70876; + i_126816++) { + ((__global + double *) double_buffer_mem_125554)[phys_tid_80093 + + (i_126815 * + (num_threads_125673 * + k2p2zq_70876) + + i_126816 * + num_threads_125673)] = + ((__global double *) mem_121008)[gtid_80092 + + (i_126815 * (m_70861 * + k2p2zq_70876) + + i_126816 * m_70861)]; + } + } + + int64_t dqrdc2_res_80717; + int64_t k_80723 = k_71067; + + for (int64_t l_80718 = 0; l_80718 < min_res_71066; l_80718++) { + int64_t x_80724 = add64((int64_t) 1, l_80718); + bool cond_80725 = slt64(x_80724, k_80723); + bool loop_cond_80726; + + if (cond_80725) { + bool y_80727 = slt64(l_80718, k2p2zq_70876); + bool index_certs_80728; + + if (!y_80727) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 254) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_80718; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double zt_arg_80729 = ((__global + double *) double_buffer_mem_125554)[phys_tid_80093 + + (num_threads_125673 * + k2p2zq_70876 + + l_80718 * + num_threads_125673)]; + double zt_res_80730 = 1.0e-7 * zt_arg_80729; + bool index_certs_80731; + + if (!y_80727) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 255) == -1) { + global_failure_args[0] = l_80718; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double zl_arg_80732 = ((__global + double *) double_buffer_mem_125553)[phys_tid_80093 + + l_80718 * + num_threads_125673]; + bool zl_res_80733 = zl_arg_80732 < zt_res_80730; + + loop_cond_80726 = zl_res_80733; + } else { + loop_cond_80726 = 0; + } + + bool y_80734 = slt64(l_80718, k2p2zq_70876); + int64_t upper_bound_80735 = sub64(k2p2zq_70876, x_80724); + bool loop_not_taken_80736 = !loop_cond_80726; + bool protect_assert_disj_80737 = y_80734 || + loop_not_taken_80736; + bool index_certs_80738; + + if (!protect_assert_disj_80737) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 256) == -1) { + global_failure_args[0] = l_80718; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_80739; + + if (!protect_assert_disj_80737) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 257) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = l_80718; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_80740; + + if (!protect_assert_disj_80737) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 258) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_80718; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool protect_assert_disj_80741 = y_70952 || + loop_not_taken_80736; + bool index_certs_80742; + + if (!protect_assert_disj_80741) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 259) == -1) { + global_failure_args[0] = m_70948; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_80743; + + if (!protect_assert_disj_80741) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 260) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = m_70948; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_80744; + + if (!protect_assert_disj_80741) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 261) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = m_70948; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool loopres_80745; + int64_t loopres_80750; + bool loop_while_80751; + int64_t k_80756; + + loop_while_80751 = loop_cond_80726; + k_80756 = k_80723; + while (loop_while_80751) { + for (int64_t i_80758 = 0; i_80758 < k2p2zq_70876; + i_80758++) { + bool index_certs_80760; + + if (!y_80734) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 262) == -1) { + global_failure_args[0] = l_80718; + global_failure_args[1] = i_80758; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double t_80761 = ((__global + double *) double_buffer_mem_125552)[phys_tid_80093 + + (l_80718 * + (num_threads_125673 * + k2p2zq_70876) + + i_80758 * + num_threads_125673)]; + + for (int64_t j0_80763 = 0; j0_80763 < upper_bound_80735; + j0_80763++) { + int64_t j_80765 = add64(x_80724, j0_80763); + bool x_80766 = sle64((int64_t) 0, j_80765); + bool y_80767 = slt64(j_80765, k2p2zq_70876); + bool bounds_check_80768 = x_80766 && y_80767; + bool index_certs_80769; + + if (!bounds_check_80768) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 263) == + -1) { + global_failure_args[0] = j_80765; + global_failure_args[1] = i_80758; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_80770 = ((__global + double *) double_buffer_mem_125552)[phys_tid_80093 + + (j_80765 * + (num_threads_125673 * + k2p2zq_70876) + + i_80758 * + num_threads_125673)]; + int64_t i_80771 = sub64(j_80765, (int64_t) 1); + bool x_80772 = sle64((int64_t) 0, i_80771); + bool y_80773 = slt64(i_80771, k2p2zq_70876); + bool bounds_check_80774 = x_80772 && y_80773; + bool index_certs_80775; + + if (!bounds_check_80774) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 264) == + -1) { + global_failure_args[0] = i_80771; + global_failure_args[1] = i_80758; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125552)[phys_tid_80093 + + (i_80771 * + (num_threads_125673 * + k2p2zq_70876) + + i_80758 * + num_threads_125673)] = + lw_val_80770; + } + + bool index_certs_80777; + + if (!y_70952) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 265) == -1) { + global_failure_args[0] = m_70948; + global_failure_args[1] = i_80758; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125552)[phys_tid_80093 + + (m_70948 * + (num_threads_125673 * + k2p2zq_70876) + + i_80758 * + num_threads_125673)] = + t_80761; + } + + int64_t i_80779 = ((__global + int64_t *) mem_121011)[phys_tid_80093 + + l_80718 * + num_threads_125673]; + double t_80780 = ((__global + double *) double_buffer_mem_125553)[phys_tid_80093 + + l_80718 * + num_threads_125673]; + double tt_80781 = ((__global + double *) double_buffer_mem_125554)[phys_tid_80093 + + l_80718 * + num_threads_125673]; + double ttt_80782 = ((__global + double *) double_buffer_mem_125554)[phys_tid_80093 + + (num_threads_125673 * + k2p2zq_70876 + + l_80718 * + num_threads_125673)]; + + for (int64_t j0_80786 = 0; j0_80786 < upper_bound_80735; + j0_80786++) { + int64_t j_80790 = add64(x_80724, j0_80786); + bool x_80791 = sle64((int64_t) 0, j_80790); + bool y_80792 = slt64(j_80790, k2p2zq_70876); + bool bounds_check_80793 = x_80791 && y_80792; + bool index_certs_80794; + + if (!bounds_check_80793) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 266) == -1) { + global_failure_args[0] = j_80790; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + int64_t lw_val_80795 = ((__global + int64_t *) mem_121011)[phys_tid_80093 + + j_80790 * + num_threads_125673]; + int64_t i_80796 = sub64(j_80790, (int64_t) 1); + bool x_80797 = sle64((int64_t) 0, i_80796); + bool y_80798 = slt64(i_80796, k2p2zq_70876); + bool bounds_check_80799 = x_80797 && y_80798; + bool index_certs_80800; + + if (!bounds_check_80799) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 267) == -1) { + global_failure_args[0] = i_80796; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global int64_t *) mem_121011)[phys_tid_80093 + + i_80796 * + num_threads_125673] = + lw_val_80795; + + double lw_val_80802 = ((__global + double *) double_buffer_mem_125553)[phys_tid_80093 + + j_80790 * + num_threads_125673]; + + ((__global + double *) double_buffer_mem_125553)[phys_tid_80093 + + i_80796 * + num_threads_125673] = + lw_val_80802; + + bool index_certs_80804; + + if (!bounds_check_80793) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 268) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = j_80790; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_80805 = ((__global + double *) double_buffer_mem_125554)[phys_tid_80093 + + j_80790 * + num_threads_125673]; + bool index_certs_80806; + + if (!bounds_check_80799) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 269) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = i_80796; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125554)[phys_tid_80093 + + i_80796 * + num_threads_125673] = + lw_val_80805; + + bool index_certs_80808; + + if (!bounds_check_80793) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 270) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = j_80790; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_80809 = ((__global + double *) double_buffer_mem_125554)[phys_tid_80093 + + (num_threads_125673 * + k2p2zq_70876 + + j_80790 * + num_threads_125673)]; + bool index_certs_80810; + + if (!bounds_check_80799) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 271) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = i_80796; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125554)[phys_tid_80093 + + (num_threads_125673 * + k2p2zq_70876 + + i_80796 * + num_threads_125673)] = + lw_val_80809; + } + ((__global int64_t *) mem_121011)[phys_tid_80093 + m_70948 * + num_threads_125673] = + i_80779; + ((__global + double *) double_buffer_mem_125553)[phys_tid_80093 + + m_70948 * + num_threads_125673] = + t_80780; + ((__global + double *) double_buffer_mem_125554)[phys_tid_80093 + + m_70948 * + num_threads_125673] = + tt_80781; + ((__global + double *) double_buffer_mem_125554)[phys_tid_80093 + + (num_threads_125673 * + k2p2zq_70876 + + m_70948 * + num_threads_125673)] = + ttt_80782; + + int64_t k_80816 = sub64(k_80756, (int64_t) 1); + bool cond_80817 = slt64(x_80724, k_80816); + bool loop_cond_80818; + + if (cond_80817) { + bool index_certs_80819; + + if (!y_80734) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 272) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_80718; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double zt_arg_80820 = ((__global + double *) double_buffer_mem_125554)[phys_tid_80093 + + (num_threads_125673 * + k2p2zq_70876 + + l_80718 * + num_threads_125673)]; + double zt_res_80821 = 1.0e-7 * zt_arg_80820; + bool index_certs_80822; + + if (!y_80734) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 273) == -1) { + global_failure_args[0] = l_80718; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double zl_arg_80823 = ((__global + double *) double_buffer_mem_125553)[phys_tid_80093 + + l_80718 * + num_threads_125673]; + bool zl_res_80824 = zl_arg_80823 < zt_res_80821; + + loop_cond_80818 = zl_res_80824; + } else { + loop_cond_80818 = 0; + } + + bool loop_while_tmp_126822 = loop_cond_80818; + int64_t k_tmp_126827 = k_80816; + + loop_while_80751 = loop_while_tmp_126822; + k_80756 = k_tmp_126827; + } + loopres_80745 = loop_while_80751; + loopres_80750 = k_80756; + + bool cond_80825 = x_80724 == k2p2zq_70876; + int64_t j_m_i_80826 = sub64(k2p2zq_70876, l_80718); + bool empty_slice_80830 = j_m_i_80826 == (int64_t) 0; + int64_t m_80831 = sub64(j_m_i_80826, (int64_t) 1); + int64_t i_p_m_t_s_80832 = add64(l_80718, m_80831); + bool zzero_leq_i_p_m_t_s_80833 = sle64((int64_t) 0, + i_p_m_t_s_80832); + bool i_p_m_t_s_leq_w_80834 = slt64(i_p_m_t_s_80832, + k2p2zq_70876); + bool i_lte_j_80835 = sle64(l_80718, k2p2zq_70876); + bool y_80836 = zzero_leq_i_p_m_t_s_80833 && + i_p_m_t_s_leq_w_80834; + bool y_80837 = i_lte_j_80835 && y_80836; + bool ok_or_empty_80838 = empty_slice_80830 || y_80837; + bool index_ok_80839 = y_80734 && ok_or_empty_80838; + + if (cond_80825) { + for (int64_t i_126833 = 0; i_126833 < k2p2zq_70876; + i_126833++) { + ((__global double *) mem_125448)[phys_tid_80093 + + i_126833 * + num_threads_125673] = + ((__global + double *) double_buffer_mem_125553)[phys_tid_80093 + + i_126833 * + num_threads_125673]; + } + for (int64_t i_126834 = 0; i_126834 < (int64_t) 2; + i_126834++) { + for (int64_t i_126835 = 0; i_126835 < k2p2zq_70876; + i_126835++) { + ((__global double *) mem_125446)[phys_tid_80093 + + (i_126834 * + (num_threads_125673 * + k2p2zq_70876) + + i_126835 * + num_threads_125673)] = + ((__global + double *) double_buffer_mem_125554)[phys_tid_80093 + + (i_126834 * + (num_threads_125673 * + k2p2zq_70876) + + i_126835 * + num_threads_125673)]; + } + } + for (int64_t i_126836 = 0; i_126836 < k2p2zq_70876; + i_126836++) { + for (int64_t i_126837 = 0; i_126837 < k2p2zq_70876; + i_126837++) { + ((__global double *) mem_125498)[phys_tid_80093 + + (i_126836 * + (num_threads_125673 * + k2p2zq_70876) + + i_126837 * + num_threads_125673)] = + ((__global + double *) double_buffer_mem_125552)[phys_tid_80093 + + (i_126836 * + (num_threads_125673 * + k2p2zq_70876) + + i_126837 * + num_threads_125673)]; + } + } + } else { + bool index_certs_80840; + + if (!index_ok_80839) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 274) == -1) { + global_failure_args[0] = l_80718; + global_failure_args[1] = l_80718; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_80842; + double redout_119715 = 0.0; + + for (int64_t i_119716 = 0; i_119716 < j_m_i_80826; + i_119716++) { + int64_t slice_120006 = l_80718 + i_119716; + double x_80846 = ((__global + double *) double_buffer_mem_125552)[phys_tid_80093 + + (l_80718 * + (num_threads_125673 * + k2p2zq_70876) + + slice_120006 * + num_threads_125673)]; + double defunc_1_f_res_80847 = x_80846 * x_80846; + double defunc_1_op_res_80845 = defunc_1_f_res_80847 + + redout_119715; + double redout_tmp_126838 = defunc_1_op_res_80845; + + redout_119715 = redout_tmp_126838; + } + defunc_2_reduce_res_80842 = redout_119715; + + double sqrt_res_80848; + + sqrt_res_80848 = futrts_sqrt64(defunc_2_reduce_res_80842); + + bool zeze_res_80849 = sqrt_res_80848 == 0.0; + + if (zeze_res_80849) { + for (int64_t i_126839 = 0; i_126839 < k2p2zq_70876; + i_126839++) { + ((__global double *) mem_125169)[phys_tid_80093 + + i_126839 * + num_threads_125673] = + ((__global + double *) double_buffer_mem_125553)[phys_tid_80093 + + i_126839 * + num_threads_125673]; + } + for (int64_t i_126840 = 0; i_126840 < (int64_t) 2; + i_126840++) { + for (int64_t i_126841 = 0; i_126841 < k2p2zq_70876; + i_126841++) { + ((__global + double *) mem_125167)[phys_tid_80093 + + (i_126840 * + (num_threads_125673 * + k2p2zq_70876) + + i_126841 * + num_threads_125673)] = + ((__global + double *) double_buffer_mem_125554)[phys_tid_80093 + + (i_126840 * + (num_threads_125673 * + k2p2zq_70876) + + i_126841 * + num_threads_125673)]; + } + } + for (int64_t i_126842 = 0; i_126842 < k2p2zq_70876; + i_126842++) { + for (int64_t i_126843 = 0; i_126843 < k2p2zq_70876; + i_126843++) { + ((__global + double *) mem_125438)[phys_tid_80093 + + (i_126842 * + (num_threads_125673 * + k2p2zq_70876) + + i_126843 * + num_threads_125673)] = + ((__global + double *) double_buffer_mem_125552)[phys_tid_80093 + + (i_126842 * + (num_threads_125673 * + k2p2zq_70876) + + i_126843 * + num_threads_125673)]; + } + } + } else { + bool index_ok_80853 = y_80734 && y_80734; + bool index_certs_80854; + + if (!index_ok_80853) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 275) == -1) { + global_failure_args[0] = l_80718; + global_failure_args[1] = l_80718; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double znze_arg_80855 = ((__global + double *) double_buffer_mem_125552)[phys_tid_80093 + + (l_80718 * + (num_threads_125673 * + k2p2zq_70876) + + l_80718 * + num_threads_125673)]; + bool zeze_res_80856 = znze_arg_80855 == 0.0; + bool znze_res_80857 = !zeze_res_80856; + double nrmxl_80858; + + if (znze_res_80857) { + double abs_res_80859 = fabs(sqrt_res_80848); + double sgn_res_80860 = fsignum32(znze_arg_80855); + double zt_res_80861 = abs_res_80859 * sgn_res_80860; + + nrmxl_80858 = zt_res_80861; + } else { + nrmxl_80858 = sqrt_res_80848; + } + for (int64_t i0_80863 = 0; i0_80863 < j_m_i_80826; + i0_80863++) { + int64_t i_80865 = add64(l_80718, i0_80863); + bool x_80866 = sle64((int64_t) 0, i_80865); + bool y_80867 = slt64(i_80865, k2p2zq_70876); + bool bounds_check_80868 = x_80866 && y_80867; + bool index_ok_80869 = y_80734 && bounds_check_80868; + bool index_certs_80870; + + if (!index_ok_80869) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 276) == + -1) { + global_failure_args[0] = l_80718; + global_failure_args[1] = i_80865; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_80871 = ((__global + double *) double_buffer_mem_125552)[phys_tid_80093 + + (l_80718 * + (num_threads_125673 * + k2p2zq_70876) + + i_80865 * + num_threads_125673)]; + double lw_val_80872 = x_80871 / nrmxl_80858; + + ((__global + double *) double_buffer_mem_125552)[phys_tid_80093 + + (l_80718 * + (num_threads_125673 * + k2p2zq_70876) + + i_80865 * + num_threads_125673)] = + lw_val_80872; + } + + double zp_arg_80874 = ((__global + double *) double_buffer_mem_125552)[phys_tid_80093 + + (l_80718 * + (num_threads_125673 * + k2p2zq_70876) + + l_80718 * + num_threads_125673)]; + double zp_res_80875 = 1.0 + zp_arg_80874; + + ((__global + double *) double_buffer_mem_125552)[phys_tid_80093 + + (l_80718 * + (num_threads_125673 * + k2p2zq_70876) + + l_80718 * + num_threads_125673)] = + zp_res_80875; + + bool bounds_invalid_upwards_80877 = slt64(k2p2zq_70876, + x_80724); + bool valid_80878 = !bounds_invalid_upwards_80877; + bool range_valid_c_80879; + + if (!valid_80878) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 277) == -1) { + global_failure_args[0] = x_80724; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool loop_nonempty_80880 = slt64((int64_t) 0, + upper_bound_80735); + bool loop_not_taken_80881 = !loop_nonempty_80880; + bool protect_assert_disj_80882 = index_ok_80853 || + loop_not_taken_80881; + bool index_certs_80883; + + if (!protect_assert_disj_80882) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 278) == -1) { + global_failure_args[0] = l_80718; + global_failure_args[1] = l_80718; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_80887 = 0; i_80887 < upper_bound_80735; + i_80887++) { + int64_t index_primexp_80891 = add64(x_80724, + i_80887); + bool x_80892 = sle64((int64_t) 0, + index_primexp_80891); + bool y_80893 = slt64(index_primexp_80891, + k2p2zq_70876); + bool bounds_check_80894 = x_80892 && y_80893; + double t_80895; + double t_80897 = 0.0; + + for (int64_t i0_80896 = 0; i0_80896 < j_m_i_80826; + i0_80896++) { + int64_t i_80898 = add64(l_80718, i0_80896); + bool x_80899 = sle64((int64_t) 0, i_80898); + bool y_80900 = slt64(i_80898, k2p2zq_70876); + bool bounds_check_80901 = x_80899 && y_80900; + bool index_ok_80902 = y_80734 && + bounds_check_80901; + bool index_certs_80903; + + if (!index_ok_80902) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 279) == + -1) { + global_failure_args[0] = l_80718; + global_failure_args[1] = i_80898; + global_failure_args[2] = + k2p2zq_70876; + global_failure_args[3] = + k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_80904 = ((__global + double *) double_buffer_mem_125552)[phys_tid_80093 + + (l_80718 * + (num_threads_125673 * + k2p2zq_70876) + + i_80898 * + num_threads_125673)]; + bool index_ok_80905 = bounds_check_80894 && + bounds_check_80901; + bool index_certs_80906; + + if (!index_ok_80905) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 280) == + -1) { + global_failure_args[0] = + index_primexp_80891; + global_failure_args[1] = i_80898; + global_failure_args[2] = + k2p2zq_70876; + global_failure_args[3] = + k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_80907 = ((__global + double *) double_buffer_mem_125552)[phys_tid_80093 + + (index_primexp_80891 * + (num_threads_125673 * + k2p2zq_70876) + + i_80898 * + num_threads_125673)]; + double y_80908 = x_80904 * y_80907; + double loopres_80909 = t_80897 - y_80908; + double t_tmp_126848 = loopres_80909; + + t_80897 = t_tmp_126848; + } + t_80895 = t_80897; + + double y_80910 = ((__global + double *) double_buffer_mem_125552)[phys_tid_80093 + + (l_80718 * + (num_threads_125673 * + k2p2zq_70876) + + l_80718 * + num_threads_125673)]; + double t_80911 = t_80895 / y_80910; + + for (int64_t i0_80913 = 0; i0_80913 < j_m_i_80826; + i0_80913++) { + int64_t i_80915 = add64(l_80718, i0_80913); + bool x_80916 = sle64((int64_t) 0, i_80915); + bool y_80917 = slt64(i_80915, k2p2zq_70876); + bool bounds_check_80918 = x_80916 && y_80917; + bool index_ok_80919 = bounds_check_80894 && + bounds_check_80918; + bool index_certs_80920; + + if (!index_ok_80919) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 281) == + -1) { + global_failure_args[0] = + index_primexp_80891; + global_failure_args[1] = i_80915; + global_failure_args[2] = + k2p2zq_70876; + global_failure_args[3] = + k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_80921 = ((__global + double *) double_buffer_mem_125552)[phys_tid_80093 + + (index_primexp_80891 * + (num_threads_125673 * + k2p2zq_70876) + + i_80915 * + num_threads_125673)]; + bool index_ok_80922 = y_80734 && + bounds_check_80918; + bool index_certs_80923; + + if (!index_ok_80922) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 282) == + -1) { + global_failure_args[0] = l_80718; + global_failure_args[1] = i_80915; + global_failure_args[2] = + k2p2zq_70876; + global_failure_args[3] = + k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_80924 = ((__global + double *) double_buffer_mem_125552)[phys_tid_80093 + + (l_80718 * + (num_threads_125673 * + k2p2zq_70876) + + i_80915 * + num_threads_125673)]; + double y_80925 = t_80911 * y_80924; + double lw_val_80926 = x_80921 + y_80925; + + ((__global + double *) double_buffer_mem_125552)[phys_tid_80093 + + (index_primexp_80891 * + (num_threads_125673 * + k2p2zq_70876) + + i_80915 * + num_threads_125673)] = + lw_val_80926; + } + + bool index_certs_80928; + + if (!bounds_check_80894) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 283) == + -1) { + global_failure_args[0] = + index_primexp_80891; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double zeze_arg_80929 = ((__global + double *) double_buffer_mem_125553)[phys_tid_80093 + + index_primexp_80891 * + num_threads_125673]; + bool zeze_res_80930 = zeze_arg_80929 == 0.0; + + if (!zeze_res_80930) { + bool index_ok_80933 = y_80734 && + bounds_check_80894; + bool index_certs_80934; + + if (!index_ok_80933) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 284) == + -1) { + global_failure_args[0] = + index_primexp_80891; + global_failure_args[1] = l_80718; + global_failure_args[2] = + k2p2zq_70876; + global_failure_args[3] = + k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double abs_arg_80935 = ((__global + double *) double_buffer_mem_125552)[phys_tid_80093 + + (index_primexp_80891 * + (num_threads_125673 * + k2p2zq_70876) + + l_80718 * + num_threads_125673)]; + double abs_res_80936 = fabs(abs_arg_80935); + double zs_res_80937 = abs_res_80936 / + zeze_arg_80929; + double ztzt_res_80938 = fpow64(zs_res_80937, + 2.0); + double zm_res_80939 = 1.0 - ztzt_res_80938; + double max_res_80940 = fmax64(0.0, + zm_res_80939); + double abs_res_80941 = fabs(max_res_80940); + bool zgze_res_80942 = 1.0e-6 <= abs_res_80941; + + if (zgze_res_80942) { + double sqrt_res_80945; + + sqrt_res_80945 = + futrts_sqrt64(max_res_80940); + + double zt_res_80946 = zeze_arg_80929 * + sqrt_res_80945; + + ((__global + double *) double_buffer_mem_125553)[phys_tid_80093 + + index_primexp_80891 * + num_threads_125673] = + zt_res_80946; + } else { + bool empty_slice_80948 = + upper_bound_80735 == (int64_t) 0; + int64_t m_80949 = sub64(upper_bound_80735, + (int64_t) 1); + int64_t i_p_m_t_s_80950 = add64(x_80724, + m_80949); + bool zzero_leq_i_p_m_t_s_80951 = + sle64((int64_t) 0, i_p_m_t_s_80950); + bool i_p_m_t_s_leq_w_80952 = + slt64(i_p_m_t_s_80950, k2p2zq_70876); + bool zzero_lte_i_80953 = sle64((int64_t) 0, + x_80724); + bool i_lte_j_80954 = sle64(x_80724, + k2p2zq_70876); + bool y_80955 = i_p_m_t_s_leq_w_80952 && + zzero_lte_i_80953; + bool y_80956 = zzero_leq_i_p_m_t_s_80951 && + y_80955; + bool y_80957 = i_lte_j_80954 && y_80956; + bool forwards_ok_80958 = + zzero_lte_i_80953 && y_80957; + bool ok_or_empty_80959 = + empty_slice_80948 || forwards_ok_80958; + bool index_ok_80960 = bounds_check_80894 && + ok_or_empty_80959; + bool index_certs_80961; + + if (!index_ok_80960) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 285) == + -1) { + global_failure_args[0] = + index_primexp_80891; + global_failure_args[1] = + x_80724; + global_failure_args[2] = + k2p2zq_70876; + global_failure_args[3] = + k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_80963; + double redout_119717 = 0.0; + + for (int64_t i_119718 = 0; i_119718 < + upper_bound_80735; i_119718++) { + int64_t slice_120007 = x_80724 + + i_119718; + double x_80967 = ((__global + double *) double_buffer_mem_125552)[phys_tid_80093 + + (index_primexp_80891 * + (num_threads_125673 * + k2p2zq_70876) + + slice_120007 * + num_threads_125673)]; + double defunc_1_f_res_80968 = x_80967 * + x_80967; + double defunc_1_op_res_80966 = + defunc_1_f_res_80968 + + redout_119717; + double redout_tmp_126850 = + defunc_1_op_res_80966; + + redout_119717 = redout_tmp_126850; + } + defunc_2_reduce_res_80963 = redout_119717; + + double sqrt_res_80969; + + sqrt_res_80969 = + futrts_sqrt64(defunc_2_reduce_res_80963); + ((__global + double *) double_buffer_mem_125553)[phys_tid_80093 + + index_primexp_80891 * + num_threads_125673] = + sqrt_res_80969; + + bool index_certs_80971; + + if (!bounds_check_80894) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 286) == + -1) { + global_failure_args[0] = + (int64_t) 0; + global_failure_args[1] = + index_primexp_80891; + global_failure_args[2] = + (int64_t) 2; + global_failure_args[3] = + k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_126851 = 0; i_126851 < + (int64_t) 1; i_126851++) { + ((__global + double *) double_buffer_mem_125554)[phys_tid_80093 + + (index_primexp_80891 + + i_126851) * + num_threads_125673] = + ((__global + double *) double_buffer_mem_125553)[phys_tid_80093 + + num_threads_125673 * + index_primexp_80891 + + i_126851 * + num_threads_125673]; + } + } + } + } + + bool index_certs_80974; + + if (!y_80734) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 287) == -1) { + global_failure_args[0] = l_80718; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_126852 = 0; i_126852 < (int64_t) 1; + i_126852++) { + ((__global + double *) double_buffer_mem_125553)[phys_tid_80093 + + (l_80718 + + i_126852) * + num_threads_125673] = + ((__global + double *) double_buffer_mem_125552)[phys_tid_80093 + + l_80718 * + (num_threads_125673 * + k2p2zq_70876) + + num_threads_125673 * + l_80718 + + i_126852 * + num_threads_125673]; + } + + double zt_res_80977 = -1.0 * nrmxl_80858; + + ((__global + double *) double_buffer_mem_125552)[phys_tid_80093 + + (l_80718 * + (num_threads_125673 * + k2p2zq_70876) + + l_80718 * + num_threads_125673)] = + zt_res_80977; + for (int64_t i_126853 = 0; i_126853 < k2p2zq_70876; + i_126853++) { + ((__global double *) mem_125169)[phys_tid_80093 + + i_126853 * + num_threads_125673] = + ((__global + double *) double_buffer_mem_125553)[phys_tid_80093 + + i_126853 * + num_threads_125673]; + } + for (int64_t i_126854 = 0; i_126854 < (int64_t) 2; + i_126854++) { + for (int64_t i_126855 = 0; i_126855 < k2p2zq_70876; + i_126855++) { + ((__global + double *) mem_125167)[phys_tid_80093 + + (i_126854 * + (num_threads_125673 * + k2p2zq_70876) + + i_126855 * + num_threads_125673)] = + ((__global + double *) double_buffer_mem_125554)[phys_tid_80093 + + (i_126854 * + (num_threads_125673 * + k2p2zq_70876) + + i_126855 * + num_threads_125673)]; + } + } + for (int64_t i_126856 = 0; i_126856 < k2p2zq_70876; + i_126856++) { + for (int64_t i_126857 = 0; i_126857 < k2p2zq_70876; + i_126857++) { + ((__global + double *) mem_125438)[phys_tid_80093 + + (i_126856 * + (num_threads_125673 * + k2p2zq_70876) + + i_126857 * + num_threads_125673)] = + ((__global + double *) double_buffer_mem_125552)[phys_tid_80093 + + (i_126856 * + (num_threads_125673 * + k2p2zq_70876) + + i_126857 * + num_threads_125673)]; + } + } + } + for (int64_t i_126858 = 0; i_126858 < k2p2zq_70876; + i_126858++) { + ((__global double *) mem_125448)[phys_tid_80093 + + i_126858 * + num_threads_125673] = + ((__global double *) mem_125169)[phys_tid_80093 + + i_126858 * + num_threads_125673]; + } + for (int64_t i_126859 = 0; i_126859 < (int64_t) 2; + i_126859++) { + for (int64_t i_126860 = 0; i_126860 < k2p2zq_70876; + i_126860++) { + ((__global double *) mem_125446)[phys_tid_80093 + + (i_126859 * + (num_threads_125673 * + k2p2zq_70876) + + i_126860 * + num_threads_125673)] = + ((__global + double *) mem_125167)[phys_tid_80093 + + (i_126859 * + (num_threads_125673 * + k2p2zq_70876) + + i_126860 * + num_threads_125673)]; + } + } + for (int64_t i_126861 = 0; i_126861 < k2p2zq_70876; + i_126861++) { + for (int64_t i_126862 = 0; i_126862 < k2p2zq_70876; + i_126862++) { + ((__global double *) mem_125498)[phys_tid_80093 + + (i_126861 * + (num_threads_125673 * + k2p2zq_70876) + + i_126862 * + num_threads_125673)] = + ((__global + double *) mem_125438)[phys_tid_80093 + + (i_126861 * + (num_threads_125673 * + k2p2zq_70876) + + i_126862 * + num_threads_125673)]; + } + } + } + for (int64_t i_126863 = 0; i_126863 < k2p2zq_70876; + i_126863++) { + for (int64_t i_126864 = 0; i_126864 < k2p2zq_70876; + i_126864++) { + ((__global + double *) double_buffer_mem_125552)[phys_tid_80093 + + (i_126863 * + (num_threads_125673 * + k2p2zq_70876) + + i_126864 * + num_threads_125673)] = + ((__global double *) mem_125498)[phys_tid_80093 + + (i_126863 * + (num_threads_125673 * + k2p2zq_70876) + + i_126864 * + num_threads_125673)]; + } + } + for (int64_t i_126865 = 0; i_126865 < k2p2zq_70876; + i_126865++) { + ((__global + double *) double_buffer_mem_125553)[phys_tid_80093 + + i_126865 * + num_threads_125673] = + ((__global double *) mem_125448)[phys_tid_80093 + + i_126865 * + num_threads_125673]; + } + for (int64_t i_126866 = 0; i_126866 < (int64_t) 2; i_126866++) { + for (int64_t i_126867 = 0; i_126867 < k2p2zq_70876; + i_126867++) { + ((__global + double *) double_buffer_mem_125554)[phys_tid_80093 + + (i_126866 * + (num_threads_125673 * + k2p2zq_70876) + + i_126867 * + num_threads_125673)] = + ((__global double *) mem_125446)[phys_tid_80093 + + (i_126866 * + (num_threads_125673 * + k2p2zq_70876) + + i_126867 * + num_threads_125673)]; + } + } + + int64_t k_tmp_126821 = loopres_80750; + + k_80723 = k_tmp_126821; + } + dqrdc2_res_80717 = k_80723; + + int64_t min_arg_80979 = sub64(dqrdc2_res_80717, (int64_t) 1); + int64_t min_res_80980 = smin64(k2p2zq_70876, min_arg_80979); + + for (int64_t i_126868 = 0; i_126868 < k2p2zq_70876; i_126868++) { + for (int64_t i_126869 = 0; i_126869 < k2p2zq_70876; + i_126869++) { + ((__global double *) mem_121335)[i_126868 * (m_70861 * + k2p2zq_70876) + + i_126869 * m_70861 + + gtid_80092] = ((__global + double *) double_buffer_mem_125552)[phys_tid_80093 + + (i_126868 * + (num_threads_125673 * + k2p2zq_70876) + + i_126869 * + num_threads_125673)]; + } + } + for (int64_t i_126870 = 0; i_126870 < k2p2zq_70876; i_126870++) { + ((__global double *) mem_121338)[i_126870 * m_70861 + + gtid_80092] = ((__global + double *) double_buffer_mem_125553)[phys_tid_80093 + + i_126870 * + num_threads_125673]; + } + for (int64_t i_126871 = 0; i_126871 < k2p2zq_70876; i_126871++) { + ((__global int64_t *) mem_121341)[i_126871 * m_70861 + + gtid_80092] = ((__global + int64_t *) mem_121011)[phys_tid_80093 + + i_126871 * + num_threads_125673]; + } + ((__global int64_t *) mem_121343)[gtid_80092] = min_res_80980; + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_80702 +} +__kernel void mainDetailedzisegmap_80381(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t j_80634, + int64_t num_groups_80645, __global + unsigned char *mem_120894, __global + unsigned char *mem_120923, __global + unsigned char *mem_120927, __global + unsigned char *mem_120931, __global + unsigned char *mem_120935) +{ + #define segmap_group_sizze_80644 (mainDetailedzisegmap_group_sizze_80383) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126703; + int32_t local_tid_126704; + int64_t group_sizze_126707; + int32_t wave_sizze_126706; + int32_t group_tid_126705; + + global_tid_126703 = get_global_id(0); + local_tid_126704 = get_local_id(0); + group_sizze_126707 = get_local_size(0); + wave_sizze_126706 = LOCKSTEP_WIDTH; + group_tid_126705 = get_group_id(0); + + int32_t phys_tid_80381; + + phys_tid_80381 = global_tid_126703; + + int32_t phys_group_id_126708; + + phys_group_id_126708 = get_group_id(0); + for (int32_t i_126709 = 0; i_126709 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, segmap_group_sizze_80644)) - + phys_group_id_126708, sext_i64_i32(num_groups_80645)); + i_126709++) { + int32_t virt_group_id_126710 = phys_group_id_126708 + i_126709 * + sext_i64_i32(num_groups_80645); + int64_t gtid_80380 = sext_i32_i64(virt_group_id_126710) * + segmap_group_sizze_80644 + sext_i32_i64(local_tid_126704); + + if (slt64(gtid_80380, m_70861)) { + double defunc_2_reduce_res_80652; + double redout_119713 = 0.0; + + for (int64_t i_119714 = 0; i_119714 < k2p2zq_70876; i_119714++) { + double x_80656 = ((__global double *) mem_120894)[i_119714 * + (k2p2zq_70876 * + m_70861) + + gtid_80380 * + k2p2zq_70876 + + j_80634]; + double defunc_1_f_res_80657 = x_80656 * x_80656; + double defunc_1_op_res_80655 = defunc_1_f_res_80657 + + redout_119713; + double redout_tmp_126711 = defunc_1_op_res_80655; + + redout_119713 = redout_tmp_126711; + } + defunc_2_reduce_res_80652 = redout_119713; + + double sqrt_res_80658; + + sqrt_res_80658 = futrts_sqrt64(defunc_2_reduce_res_80652); + ((__global double *) mem_120923)[gtid_80380 + j_80634 * m_70861] = + sqrt_res_80658; + ((__global double *) mem_120927)[gtid_80380 + j_80634 * m_70861] = + sqrt_res_80658; + + bool zeze_res_80661 = sqrt_res_80658 == 0.0; + double lw_val_80662; + + if (zeze_res_80661) { + lw_val_80662 = 1.0; + } else { + lw_val_80662 = sqrt_res_80658; + } + ((__global double *) mem_120927)[gtid_80380 + (m_70861 * + k2p2zq_70876 + + j_80634 * m_70861)] = + lw_val_80662; + for (int64_t i_126712 = 0; i_126712 < k2p2zq_70876; i_126712++) { + ((__global double *) mem_120931)[i_126712 * m_70861 + + gtid_80380] = ((__global + double *) mem_120923)[gtid_80380 + + i_126712 * + m_70861]; + } + for (int64_t i_126713 = 0; i_126713 < (int64_t) 2; i_126713++) { + for (int64_t i_126714 = 0; i_126714 < k2p2zq_70876; + i_126714++) { + ((__global double *) mem_120935)[i_126713 * (m_70861 * + k2p2zq_70876) + + i_126714 * m_70861 + + gtid_80380] = ((__global + double *) mem_120927)[gtid_80380 + + (i_126713 * + (m_70861 * + k2p2zq_70876) + + i_126714 * + m_70861)]; + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_80644 +} +__kernel void mainDetailedzisegmap_80426(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t j_80634, + int64_t num_groups_80690, + int64_t num_threads_115425, + int64_t per_chunk_115432, __global + unsigned char *mem_120941, __global + unsigned char *mem_120946, __global + unsigned char *mem_120951, __global + unsigned char *mem_120956) +{ + #define segmap_group_sizze_80689 (mainDetailedzisegmap_group_sizze_80428) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126786; + int32_t local_tid_126787; + int64_t group_sizze_126790; + int32_t wave_sizze_126789; + int32_t group_tid_126788; + + global_tid_126786 = get_global_id(0); + local_tid_126787 = get_local_id(0); + group_sizze_126790 = get_local_size(0); + wave_sizze_126789 = LOCKSTEP_WIDTH; + group_tid_126788 = get_group_id(0); + + int32_t phys_tid_80426; + + phys_tid_80426 = global_tid_126786; + + int32_t phys_group_id_126791; + + phys_group_id_126791 = get_group_id(0); + for (int32_t i_126792 = 0; i_126792 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, segmap_group_sizze_80689)) - + phys_group_id_126791, sext_i64_i32(num_groups_80690)); + i_126792++) { + int32_t virt_group_id_126793 = phys_group_id_126791 + i_126792 * + sext_i64_i32(num_groups_80690); + int64_t gtid_80425 = sext_i32_i64(virt_group_id_126793) * + segmap_group_sizze_80689 + sext_i32_i64(local_tid_126787); + + if (slt64(gtid_80425, m_70861)) { + double sqrt_res_80694 = ((__global + double *) mem_120941)[gtid_80425]; + + for (int64_t i_126794 = 0; i_126794 < (int64_t) 1; i_126794++) { + ((__global double *) mem_120946)[gtid_80425 + (j_80634 + + i_126794) * + m_70861] = ((__global + double *) mem_120951)[(gtid_80425 + + i_126794 - + squot64(gtid_80425 + + i_126794, + per_chunk_115432) * + per_chunk_115432) * + num_threads_115425 + + squot64(gtid_80425 + + i_126794, + per_chunk_115432)]; + } + + bool zeze_res_80696 = sqrt_res_80694 == 0.0; + double lw_val_80697; + + if (zeze_res_80696) { + lw_val_80697 = 1.0; + } else { + lw_val_80697 = sqrt_res_80694; + } + ((__global double *) mem_120946)[gtid_80425 + (m_70861 * + k2p2zq_70876 + + j_80634 * m_70861)] = + lw_val_80697; + for (int64_t i_126795 = 0; i_126795 < (int64_t) 2; i_126795++) { + for (int64_t i_126796 = 0; i_126796 < k2p2zq_70876; + i_126796++) { + ((__global double *) mem_120956)[i_126795 * (m_70861 * + k2p2zq_70876) + + i_126796 * m_70861 + + gtid_80425] = ((__global + double *) mem_120946)[gtid_80425 + + (i_126795 * + (m_70861 * + k2p2zq_70876) + + i_126796 * + m_70861)]; + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_80689 +} +__kernel void mainDetailedzisegmap_80441(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t j_80634, __global + unsigned char *mem_param_120902, + __global unsigned char *mem_120941) +{ + #define segmap_group_sizze_80684 (mainDetailedzisegmap_group_sizze_80444) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126780; + int32_t local_tid_126781; + int64_t group_sizze_126784; + int32_t wave_sizze_126783; + int32_t group_tid_126782; + + global_tid_126780 = get_global_id(0); + local_tid_126781 = get_local_id(0); + group_sizze_126784 = get_local_size(0); + wave_sizze_126783 = LOCKSTEP_WIDTH; + group_tid_126782 = get_group_id(0); + + int32_t phys_tid_80441; + + phys_tid_80441 = global_tid_126780; + + int64_t gtid_80439; + + gtid_80439 = sext_i32_i64(group_tid_126782) * segmap_group_sizze_80684 + + sext_i32_i64(local_tid_126781); + + int64_t gtid_80440; + + gtid_80440 = sext_i32_i64(group_tid_126782) * segmap_group_sizze_80684 + + sext_i32_i64(local_tid_126781) - (sext_i32_i64(group_tid_126782) * + segmap_group_sizze_80684 + + sext_i32_i64(local_tid_126781)); + if (slt64(gtid_80439, m_70861) && slt64(gtid_80440, (int64_t) 1)) { + double sqrt_res_80687 = ((__global double *) mem_120941)[gtid_80439]; + + if ((sle64((int64_t) 0, gtid_80439) && slt64(gtid_80439, m_70861)) && + (sle64((int64_t) 0, j_80634) && slt64(j_80634, k2p2zq_70876))) { + ((__global double *) mem_param_120902)[gtid_80439 * k2p2zq_70876 + + j_80634] = sqrt_res_80687; + } + } + + error_0: + return; + #undef segmap_group_sizze_80684 +} +__kernel void mainDetailedzisegmap_80450(__global int *global_failure, + int64_t m_70861, __global + unsigned char *mem_120938, __global + unsigned char *mem_120941) +{ + #define segmap_group_sizze_80677 (mainDetailedzisegmap_group_sizze_80452) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126775; + int32_t local_tid_126776; + int64_t group_sizze_126779; + int32_t wave_sizze_126778; + int32_t group_tid_126777; + + global_tid_126775 = get_global_id(0); + local_tid_126776 = get_local_id(0); + group_sizze_126779 = get_local_size(0); + wave_sizze_126778 = LOCKSTEP_WIDTH; + group_tid_126777 = get_group_id(0); + + int32_t phys_tid_80450; + + phys_tid_80450 = global_tid_126775; + + int64_t gtid_80449; + + gtid_80449 = sext_i32_i64(group_tid_126777) * segmap_group_sizze_80677 + + sext_i32_i64(local_tid_126776); + if (slt64(gtid_80449, m_70861)) { + double defunc_2_reduce_res_80680 = ((__global + double *) mem_120938)[gtid_80449]; + double sqrt_res_80681; + + sqrt_res_80681 = futrts_sqrt64(defunc_2_reduce_res_80680); + ((__global double *) mem_120941)[gtid_80449] = sqrt_res_80681; + } + + error_0: + return; + #undef segmap_group_sizze_80677 +} +__kernel void mainDetailedzisegmap_81467(__global int *global_failure, + int64_t k2p2zq_70876, __global + unsigned char *mem_121938) +{ + #define segmap_group_sizze_81492 (mainDetailedzisegmap_group_sizze_81470) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127194; + int32_t local_tid_127195; + int64_t group_sizze_127198; + int32_t wave_sizze_127197; + int32_t group_tid_127196; + + global_tid_127194 = get_global_id(0); + local_tid_127195 = get_local_id(0); + group_sizze_127198 = get_local_size(0); + wave_sizze_127197 = LOCKSTEP_WIDTH; + group_tid_127196 = get_group_id(0); + + int32_t phys_tid_81467; + + phys_tid_81467 = global_tid_127194; + + int64_t gtid_81465; + + gtid_81465 = squot64(sext_i32_i64(group_tid_127196) * + segmap_group_sizze_81492 + + sext_i32_i64(local_tid_127195), k2p2zq_70876); + + int64_t gtid_81466; + + gtid_81466 = sext_i32_i64(group_tid_127196) * segmap_group_sizze_81492 + + sext_i32_i64(local_tid_127195) - + squot64(sext_i32_i64(group_tid_127196) * segmap_group_sizze_81492 + + sext_i32_i64(local_tid_127195), k2p2zq_70876) * k2p2zq_70876; + if (slt64(gtid_81465, k2p2zq_70876) && slt64(gtid_81466, k2p2zq_70876)) { + bool cond_81497 = gtid_81466 == gtid_81465; + double defunc_0_f_res_81498; + + if (cond_81497) { + defunc_0_f_res_81498 = 1.0; + } else { + defunc_0_f_res_81498 = 0.0; + } + ((__global double *) mem_121938)[gtid_81465 * k2p2zq_70876 + + gtid_81466] = defunc_0_f_res_81498; + } + + error_0: + return; + #undef segmap_group_sizze_81492 +} +__kernel void mainDetailedzisegmap_81606(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t m_70861, int64_t n_70864, + int64_t k2p2zq_70876, int64_t m_70948, + unsigned char y_70952, + int64_t defunc_2_reduce_res_70985, + double tol_71054, int64_t k_71067, + int64_t r_71551, int64_t rp1_71562, + unsigned char ok_or_empty_71573, + int64_t min_res_71574, + int64_t num_groups_82142, + int64_t binop_x_120251, + int64_t num_threads_125708, __global + unsigned char *defunc_3_map_res_mem_120231, + __global unsigned char *mem_120246, + __global unsigned char *mem_121938, + __global unsigned char *mem_121941, + __global + unsigned char *mem_param_121972, + __global unsigned char *mem_122011, + __global unsigned char *mem_122014, + __global unsigned char *mem_122017, + __global unsigned char *mem_122021, + __global unsigned char *mem_122025, + __global unsigned char *mem_122028, + __global unsigned char *mem_122042, + __global unsigned char *mem_122045, + __global unsigned char *mem_122047, + __global unsigned char *mem_122382, + __global unsigned char *mem_122423, + __global unsigned char *mem_122435, + __global unsigned char *mem_122464, + __global unsigned char *mem_122537, + __global unsigned char *mem_122552, + __global unsigned char *mem_122564, + __global unsigned char *mem_122575, + __global unsigned char *mem_122595, + __global unsigned char *mem_122598, + __global unsigned char *mem_122650, + __global unsigned char *mem_122654, + __global unsigned char *mem_122657, + __global unsigned char *mem_122659, + __global unsigned char *mem_122661, + __global unsigned char *mem_125248, + __global unsigned char *mem_125250, + __global unsigned char *mem_125258, + __global unsigned char *mem_125455, + __global unsigned char *mem_125463, + __global unsigned char *mem_125465, + __global unsigned char *mem_125505, + __global + unsigned char *double_buffer_mem_125569, + __global + unsigned char *double_buffer_mem_125570, + __global + unsigned char *double_buffer_mem_125571, + __global + unsigned char *double_buffer_mem_125582) +{ + #define segmap_group_sizze_82141 (mainDetailedzisegmap_group_sizze_81608) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_127234; + int32_t local_tid_127235; + int64_t group_sizze_127238; + int32_t wave_sizze_127237; + int32_t group_tid_127236; + + global_tid_127234 = get_global_id(0); + local_tid_127235 = get_local_id(0); + group_sizze_127238 = get_local_size(0); + wave_sizze_127237 = LOCKSTEP_WIDTH; + group_tid_127236 = get_group_id(0); + + int32_t phys_tid_81606; + + phys_tid_81606 = global_tid_127234; + + int32_t phys_group_id_127239; + + phys_group_id_127239 = get_group_id(0); + for (int32_t i_127240 = 0; i_127240 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, segmap_group_sizze_82141)) - + phys_group_id_127239, sext_i64_i32(num_groups_82142)); + i_127240++) { + int32_t virt_group_id_127241 = phys_group_id_127239 + i_127240 * + sext_i64_i32(num_groups_82142); + int64_t gtid_81605 = sext_i32_i64(virt_group_id_127241) * + segmap_group_sizze_82141 + sext_i32_i64(local_tid_127235); + + if (slt64(gtid_81605, m_70861)) { + int64_t x_82152 = ((__global + int64_t *) mem_param_121972)[gtid_81605]; + double defunc_0_f_res_82156; + double redout_119743 = 0.0; + + for (int64_t i_119745 = 0; i_119745 < k2p2zq_70876; i_119745++) { + double x_82162 = ((__global double *) mem_120246)[i_119745 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_81605 * + defunc_2_reduce_res_70985 + + r_71551]; + double defunc_0_f_res_82163; + double redout_119747 = 0.0; + + for (int64_t i_119748 = 0; i_119748 < k2p2zq_70876; + i_119748++) { + double x_82167 = ((__global double *) mem_120246)[i_119748 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_81605 * + defunc_2_reduce_res_70985 + + r_71551]; + double x_82168 = ((__global double *) mem_122011)[i_119745 * + (m_70861 * + k2p2zq_70876) + + i_119748 * + m_70861 + + gtid_81605]; + double defunc_1_f_res_82169 = x_82167 * x_82168; + double defunc_1_op_res_82166 = defunc_1_f_res_82169 + + redout_119747; + double redout_tmp_127244 = defunc_1_op_res_82166; + + redout_119747 = redout_tmp_127244; + } + defunc_0_f_res_82163 = redout_119747; + + double defunc_1_f_res_82170 = x_82162 * defunc_0_f_res_82163; + double defunc_1_op_res_82160 = defunc_1_f_res_82170 + + redout_119743; + + ((__global double *) mem_122028)[phys_tid_81606 + i_119745 * + num_threads_125708] = + defunc_0_f_res_82163; + + double redout_tmp_127242 = defunc_1_op_res_82160; + + redout_119743 = redout_tmp_127242; + } + defunc_0_f_res_82156 = redout_119743; + + double fr_82171 = 1.0 + defunc_0_f_res_82156; + double x_82172 = ((__global + double *) defunc_3_map_res_mem_120231)[gtid_81605 * + n_70864 + + r_71551]; + double defunc_0_f_res_82173; + double redout_119749 = 0.0; + + for (int64_t i_119750 = 0; i_119750 < k2p2zq_70876; i_119750++) { + double x_82177 = ((__global double *) mem_120246)[i_119750 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_81605 * + defunc_2_reduce_res_70985 + + r_71551]; + double x_82178 = ((__global double *) mem_122014)[i_119750 * + m_70861 + + gtid_81605]; + double defunc_1_f_res_82179 = x_82177 * x_82178; + double defunc_1_op_res_82176 = defunc_1_f_res_82179 + + redout_119749; + double redout_tmp_127245 = defunc_1_op_res_82176; + + redout_119749 = redout_tmp_127245; + } + defunc_0_f_res_82173 = redout_119749; + + double resid_82180 = x_82172 - defunc_0_f_res_82173; + double sqrt_res_82181; + + sqrt_res_82181 = futrts_sqrt64(fr_82171); + + double recresid_r_82182 = resid_82180 / sqrt_res_82181; + + for (int64_t i_127246 = 0; i_127246 < k2p2zq_70876; i_127246++) { + ((__global double *) mem_122042)[phys_tid_81606 + i_127246 * + num_threads_125708] = 0.0; + } + for (int64_t i_127247 = 0; i_127247 < (int64_t) 2; i_127247++) { + for (int64_t i_127248 = 0; i_127248 < k2p2zq_70876; + i_127248++) { + ((__global double *) mem_122045)[phys_tid_81606 + + (i_127247 * + (num_threads_125708 * + k2p2zq_70876) + + i_127248 * + num_threads_125708)] = + 0.0; + } + } + for (int64_t i_127249 = 0; i_127249 < k2p2zq_70876; i_127249++) { + int64_t x_127250 = (int64_t) 0 + i_127249 * (int64_t) 1; + + ((__global int64_t *) mem_122047)[phys_tid_81606 + i_127249 * + num_threads_125708] = + x_127250; + } + for (int64_t j_82188 = 0; j_82188 < k2p2zq_70876; j_82188++) { + bool index_certs_82191; + + if (!ok_or_empty_71573) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 297) == -1) { + global_failure_args[0] = j_82188; + global_failure_args[1] = (int64_t) 0; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_82193; + double redout_119751 = 0.0; + + for (int64_t i_119752 = 0; i_119752 < rp1_71562; i_119752++) { + double x_82197 = ((__global double *) mem_122025)[i_119752 * + (k2p2zq_70876 * + m_70861) + + gtid_81605 * + k2p2zq_70876 + + j_82188]; + double defunc_1_f_res_82198 = x_82197 * x_82197; + double defunc_1_op_res_82196 = defunc_1_f_res_82198 + + redout_119751; + double redout_tmp_127253 = defunc_1_op_res_82196; + + redout_119751 = redout_tmp_127253; + } + defunc_2_reduce_res_82193 = redout_119751; + + double sqrt_res_82199; + + sqrt_res_82199 = futrts_sqrt64(defunc_2_reduce_res_82193); + ((__global double *) mem_122042)[phys_tid_81606 + j_82188 * + num_threads_125708] = + sqrt_res_82199; + ((__global double *) mem_122045)[phys_tid_81606 + j_82188 * + num_threads_125708] = + sqrt_res_82199; + + bool zeze_res_82202 = sqrt_res_82199 == 0.0; + double lw_val_82203; + + if (zeze_res_82202) { + lw_val_82203 = 1.0; + } else { + lw_val_82203 = sqrt_res_82199; + } + ((__global double *) mem_122045)[phys_tid_81606 + + (num_threads_125708 * + k2p2zq_70876 + j_82188 * + num_threads_125708)] = + lw_val_82203; + } + for (int64_t i_127254 = 0; i_127254 < k2p2zq_70876; i_127254++) { + for (int64_t i_127255 = 0; i_127255 < rp1_71562; i_127255++) { + ((__global + double *) double_buffer_mem_125569)[phys_tid_81606 + + (i_127254 * + (num_threads_125708 * + rp1_71562) + + i_127255 * + num_threads_125708)] = + ((__global double *) mem_122021)[gtid_81605 + + (i_127254 * (m_70861 * + rp1_71562) + + i_127255 * m_70861)]; + } + } + for (int64_t i_127256 = 0; i_127256 < k2p2zq_70876; i_127256++) { + ((__global double *) double_buffer_mem_125570)[phys_tid_81606 + + i_127256 * + num_threads_125708] = + ((__global double *) mem_122042)[phys_tid_81606 + i_127256 * + num_threads_125708]; + } + for (int64_t i_127257 = 0; i_127257 < (int64_t) 2; i_127257++) { + for (int64_t i_127258 = 0; i_127258 < k2p2zq_70876; + i_127258++) { + ((__global + double *) double_buffer_mem_125571)[phys_tid_81606 + + (i_127257 * + (num_threads_125708 * + k2p2zq_70876) + + i_127258 * + num_threads_125708)] = + ((__global double *) mem_122045)[phys_tid_81606 + + (i_127257 * + (num_threads_125708 * + k2p2zq_70876) + + i_127258 * + num_threads_125708)]; + } + } + + int64_t dqrdc2_res_82209; + int64_t k_82215 = k_71067; + + for (int64_t l_82210 = 0; l_82210 < min_res_71574; l_82210++) { + int64_t x_82216 = add64((int64_t) 1, l_82210); + bool cond_82217 = slt64(x_82216, k_82215); + bool loop_cond_82218; + + if (cond_82217) { + bool y_82219 = slt64(l_82210, k2p2zq_70876); + bool index_certs_82220; + + if (!y_82219) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 298) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_82210; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double zt_arg_82221 = ((__global + double *) double_buffer_mem_125571)[phys_tid_81606 + + (num_threads_125708 * + k2p2zq_70876 + + l_82210 * + num_threads_125708)]; + double zt_res_82222 = 1.0e-7 * zt_arg_82221; + bool index_certs_82223; + + if (!y_82219) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 299) == -1) { + global_failure_args[0] = l_82210; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double zl_arg_82224 = ((__global + double *) double_buffer_mem_125570)[phys_tid_81606 + + l_82210 * + num_threads_125708]; + bool zl_res_82225 = zl_arg_82224 < zt_res_82222; + + loop_cond_82218 = zl_res_82225; + } else { + loop_cond_82218 = 0; + } + + bool y_82226 = slt64(l_82210, k2p2zq_70876); + int64_t upper_bound_82227 = sub64(k2p2zq_70876, x_82216); + bool loop_not_taken_82228 = !loop_cond_82218; + bool protect_assert_disj_82229 = y_82226 || + loop_not_taken_82228; + bool index_certs_82230; + + if (!protect_assert_disj_82229) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 300) == -1) { + global_failure_args[0] = l_82210; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_82231; + + if (!protect_assert_disj_82229) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 301) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = l_82210; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_82232; + + if (!protect_assert_disj_82229) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 302) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_82210; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool protect_assert_disj_82233 = y_70952 || + loop_not_taken_82228; + bool index_certs_82234; + + if (!protect_assert_disj_82233) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 303) == -1) { + global_failure_args[0] = m_70948; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_82235; + + if (!protect_assert_disj_82233) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 304) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = m_70948; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_82236; + + if (!protect_assert_disj_82233) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 305) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = m_70948; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool loopres_82237; + int64_t loopres_82242; + bool loop_while_82243; + int64_t k_82248; + + loop_while_82243 = loop_cond_82218; + k_82248 = k_82215; + while (loop_while_82243) { + for (int64_t i_82250 = 0; i_82250 < rp1_71562; i_82250++) { + bool index_certs_82252; + + if (!y_82226) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 306) == -1) { + global_failure_args[0] = l_82210; + global_failure_args[1] = i_82250; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double t_82253 = ((__global + double *) double_buffer_mem_125569)[phys_tid_81606 + + (l_82210 * + (num_threads_125708 * + rp1_71562) + + i_82250 * + num_threads_125708)]; + + for (int64_t j0_82255 = 0; j0_82255 < upper_bound_82227; + j0_82255++) { + int64_t j_82257 = add64(x_82216, j0_82255); + bool x_82258 = sle64((int64_t) 0, j_82257); + bool y_82259 = slt64(j_82257, k2p2zq_70876); + bool bounds_check_82260 = x_82258 && y_82259; + bool index_certs_82261; + + if (!bounds_check_82260) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 307) == + -1) { + global_failure_args[0] = j_82257; + global_failure_args[1] = i_82250; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_82262 = ((__global + double *) double_buffer_mem_125569)[phys_tid_81606 + + (j_82257 * + (num_threads_125708 * + rp1_71562) + + i_82250 * + num_threads_125708)]; + int64_t i_82263 = sub64(j_82257, (int64_t) 1); + bool x_82264 = sle64((int64_t) 0, i_82263); + bool y_82265 = slt64(i_82263, k2p2zq_70876); + bool bounds_check_82266 = x_82264 && y_82265; + bool index_certs_82267; + + if (!bounds_check_82266) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 308) == + -1) { + global_failure_args[0] = i_82263; + global_failure_args[1] = i_82250; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125569)[phys_tid_81606 + + (i_82263 * + (num_threads_125708 * + rp1_71562) + + i_82250 * + num_threads_125708)] = + lw_val_82262; + } + + bool index_certs_82269; + + if (!y_70952) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 309) == -1) { + global_failure_args[0] = m_70948; + global_failure_args[1] = i_82250; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125569)[phys_tid_81606 + + (m_70948 * + (num_threads_125708 * + rp1_71562) + + i_82250 * + num_threads_125708)] = + t_82253; + } + + int64_t i_82271 = ((__global + int64_t *) mem_122047)[phys_tid_81606 + + l_82210 * + num_threads_125708]; + double t_82272 = ((__global + double *) double_buffer_mem_125570)[phys_tid_81606 + + l_82210 * + num_threads_125708]; + double tt_82273 = ((__global + double *) double_buffer_mem_125571)[phys_tid_81606 + + l_82210 * + num_threads_125708]; + double ttt_82274 = ((__global + double *) double_buffer_mem_125571)[phys_tid_81606 + + (num_threads_125708 * + k2p2zq_70876 + + l_82210 * + num_threads_125708)]; + + for (int64_t j0_82278 = 0; j0_82278 < upper_bound_82227; + j0_82278++) { + int64_t j_82282 = add64(x_82216, j0_82278); + bool x_82283 = sle64((int64_t) 0, j_82282); + bool y_82284 = slt64(j_82282, k2p2zq_70876); + bool bounds_check_82285 = x_82283 && y_82284; + bool index_certs_82286; + + if (!bounds_check_82285) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 310) == -1) { + global_failure_args[0] = j_82282; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + int64_t lw_val_82287 = ((__global + int64_t *) mem_122047)[phys_tid_81606 + + j_82282 * + num_threads_125708]; + int64_t i_82288 = sub64(j_82282, (int64_t) 1); + bool x_82289 = sle64((int64_t) 0, i_82288); + bool y_82290 = slt64(i_82288, k2p2zq_70876); + bool bounds_check_82291 = x_82289 && y_82290; + bool index_certs_82292; + + if (!bounds_check_82291) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 311) == -1) { + global_failure_args[0] = i_82288; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global int64_t *) mem_122047)[phys_tid_81606 + + i_82288 * + num_threads_125708] = + lw_val_82287; + + double lw_val_82294 = ((__global + double *) double_buffer_mem_125570)[phys_tid_81606 + + j_82282 * + num_threads_125708]; + + ((__global + double *) double_buffer_mem_125570)[phys_tid_81606 + + i_82288 * + num_threads_125708] = + lw_val_82294; + + bool index_certs_82296; + + if (!bounds_check_82285) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 312) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = j_82282; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_82297 = ((__global + double *) double_buffer_mem_125571)[phys_tid_81606 + + j_82282 * + num_threads_125708]; + bool index_certs_82298; + + if (!bounds_check_82291) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 313) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = i_82288; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125571)[phys_tid_81606 + + i_82288 * + num_threads_125708] = + lw_val_82297; + + bool index_certs_82300; + + if (!bounds_check_82285) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 314) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = j_82282; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_82301 = ((__global + double *) double_buffer_mem_125571)[phys_tid_81606 + + (num_threads_125708 * + k2p2zq_70876 + + j_82282 * + num_threads_125708)]; + bool index_certs_82302; + + if (!bounds_check_82291) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 315) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = i_82288; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125571)[phys_tid_81606 + + (num_threads_125708 * + k2p2zq_70876 + + i_82288 * + num_threads_125708)] = + lw_val_82301; + } + ((__global int64_t *) mem_122047)[phys_tid_81606 + m_70948 * + num_threads_125708] = + i_82271; + ((__global + double *) double_buffer_mem_125570)[phys_tid_81606 + + m_70948 * + num_threads_125708] = + t_82272; + ((__global + double *) double_buffer_mem_125571)[phys_tid_81606 + + m_70948 * + num_threads_125708] = + tt_82273; + ((__global + double *) double_buffer_mem_125571)[phys_tid_81606 + + (num_threads_125708 * + k2p2zq_70876 + + m_70948 * + num_threads_125708)] = + ttt_82274; + + int64_t k_82308 = sub64(k_82248, (int64_t) 1); + bool cond_82309 = slt64(x_82216, k_82308); + bool loop_cond_82310; + + if (cond_82309) { + bool index_certs_82311; + + if (!y_82226) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 316) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_82210; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double zt_arg_82312 = ((__global + double *) double_buffer_mem_125571)[phys_tid_81606 + + (num_threads_125708 * + k2p2zq_70876 + + l_82210 * + num_threads_125708)]; + double zt_res_82313 = 1.0e-7 * zt_arg_82312; + bool index_certs_82314; + + if (!y_82226) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 317) == -1) { + global_failure_args[0] = l_82210; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double zl_arg_82315 = ((__global + double *) double_buffer_mem_125570)[phys_tid_81606 + + l_82210 * + num_threads_125708]; + bool zl_res_82316 = zl_arg_82315 < zt_res_82313; + + loop_cond_82310 = zl_res_82316; + } else { + loop_cond_82310 = 0; + } + + bool loop_while_tmp_127264 = loop_cond_82310; + int64_t k_tmp_127269 = k_82308; + + loop_while_82243 = loop_while_tmp_127264; + k_82248 = k_tmp_127269; + } + loopres_82237 = loop_while_82243; + loopres_82242 = k_82248; + + bool cond_82317 = x_82216 == rp1_71562; + int64_t j_m_i_82318 = sub64(rp1_71562, l_82210); + bool empty_slice_82322 = j_m_i_82318 == (int64_t) 0; + int64_t m_82323 = sub64(j_m_i_82318, (int64_t) 1); + int64_t i_p_m_t_s_82324 = add64(l_82210, m_82323); + bool zzero_leq_i_p_m_t_s_82325 = sle64((int64_t) 0, + i_p_m_t_s_82324); + bool i_p_m_t_s_leq_w_82326 = slt64(i_p_m_t_s_82324, rp1_71562); + bool i_lte_j_82327 = sle64(l_82210, rp1_71562); + bool y_82328 = zzero_leq_i_p_m_t_s_82325 && + i_p_m_t_s_leq_w_82326; + bool y_82329 = i_lte_j_82327 && y_82328; + bool ok_or_empty_82330 = empty_slice_82322 || y_82329; + bool index_ok_82331 = y_82226 && ok_or_empty_82330; + + if (cond_82317) { + for (int64_t i_127275 = 0; i_127275 < k2p2zq_70876; + i_127275++) { + ((__global double *) mem_125465)[phys_tid_81606 + + i_127275 * + num_threads_125708] = + ((__global + double *) double_buffer_mem_125570)[phys_tid_81606 + + i_127275 * + num_threads_125708]; + } + for (int64_t i_127276 = 0; i_127276 < (int64_t) 2; + i_127276++) { + for (int64_t i_127277 = 0; i_127277 < k2p2zq_70876; + i_127277++) { + ((__global double *) mem_125463)[phys_tid_81606 + + (i_127276 * + (num_threads_125708 * + k2p2zq_70876) + + i_127277 * + num_threads_125708)] = + ((__global + double *) double_buffer_mem_125571)[phys_tid_81606 + + (i_127276 * + (num_threads_125708 * + k2p2zq_70876) + + i_127277 * + num_threads_125708)]; + } + } + for (int64_t i_127278 = 0; i_127278 < k2p2zq_70876; + i_127278++) { + for (int64_t i_127279 = 0; i_127279 < rp1_71562; + i_127279++) { + ((__global double *) mem_125505)[phys_tid_81606 + + (i_127278 * + (num_threads_125708 * + rp1_71562) + + i_127279 * + num_threads_125708)] = + ((__global + double *) double_buffer_mem_125569)[phys_tid_81606 + + (i_127278 * + (num_threads_125708 * + rp1_71562) + + i_127279 * + num_threads_125708)]; + } + } + } else { + bool index_certs_82332; + + if (!index_ok_82331) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 318) == -1) { + global_failure_args[0] = l_82210; + global_failure_args[1] = l_82210; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_82334; + double redout_119753 = 0.0; + + for (int64_t i_119754 = 0; i_119754 < j_m_i_82318; + i_119754++) { + int64_t slice_120014 = l_82210 + i_119754; + double x_82338 = ((__global + double *) double_buffer_mem_125569)[phys_tid_81606 + + (l_82210 * + (num_threads_125708 * + rp1_71562) + + slice_120014 * + num_threads_125708)]; + double defunc_1_f_res_82339 = x_82338 * x_82338; + double defunc_1_op_res_82337 = defunc_1_f_res_82339 + + redout_119753; + double redout_tmp_127280 = defunc_1_op_res_82337; + + redout_119753 = redout_tmp_127280; + } + defunc_2_reduce_res_82334 = redout_119753; + + double sqrt_res_82340; + + sqrt_res_82340 = futrts_sqrt64(defunc_2_reduce_res_82334); + + bool zeze_res_82341 = sqrt_res_82340 == 0.0; + + if (zeze_res_82341) { + for (int64_t i_127281 = 0; i_127281 < k2p2zq_70876; + i_127281++) { + ((__global double *) mem_125250)[phys_tid_81606 + + i_127281 * + num_threads_125708] = + ((__global + double *) double_buffer_mem_125570)[phys_tid_81606 + + i_127281 * + num_threads_125708]; + } + for (int64_t i_127282 = 0; i_127282 < (int64_t) 2; + i_127282++) { + for (int64_t i_127283 = 0; i_127283 < k2p2zq_70876; + i_127283++) { + ((__global + double *) mem_125248)[phys_tid_81606 + + (i_127282 * + (num_threads_125708 * + k2p2zq_70876) + + i_127283 * + num_threads_125708)] = + ((__global + double *) double_buffer_mem_125571)[phys_tid_81606 + + (i_127282 * + (num_threads_125708 * + k2p2zq_70876) + + i_127283 * + num_threads_125708)]; + } + } + for (int64_t i_127284 = 0; i_127284 < k2p2zq_70876; + i_127284++) { + for (int64_t i_127285 = 0; i_127285 < rp1_71562; + i_127285++) { + ((__global + double *) mem_125455)[phys_tid_81606 + + (i_127284 * + (num_threads_125708 * + rp1_71562) + + i_127285 * + num_threads_125708)] = + ((__global + double *) double_buffer_mem_125569)[phys_tid_81606 + + (i_127284 * + (num_threads_125708 * + rp1_71562) + + i_127285 * + num_threads_125708)]; + } + } + } else { + bool y_82345 = slt64(l_82210, rp1_71562); + bool index_ok_82346 = y_82226 && y_82345; + bool index_certs_82347; + + if (!index_ok_82346) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 319) == -1) { + global_failure_args[0] = l_82210; + global_failure_args[1] = l_82210; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double znze_arg_82348 = ((__global + double *) double_buffer_mem_125569)[phys_tid_81606 + + (l_82210 * + (num_threads_125708 * + rp1_71562) + + l_82210 * + num_threads_125708)]; + bool zeze_res_82349 = znze_arg_82348 == 0.0; + bool znze_res_82350 = !zeze_res_82349; + double nrmxl_82351; + + if (znze_res_82350) { + double abs_res_82352 = fabs(sqrt_res_82340); + double sgn_res_82353 = fsignum32(znze_arg_82348); + double zt_res_82354 = abs_res_82352 * sgn_res_82353; + + nrmxl_82351 = zt_res_82354; + } else { + nrmxl_82351 = sqrt_res_82340; + } + for (int64_t i0_82356 = 0; i0_82356 < j_m_i_82318; + i0_82356++) { + int64_t i_82358 = add64(l_82210, i0_82356); + bool x_82359 = sle64((int64_t) 0, i_82358); + bool y_82360 = slt64(i_82358, rp1_71562); + bool bounds_check_82361 = x_82359 && y_82360; + bool index_ok_82362 = y_82226 && bounds_check_82361; + bool index_certs_82363; + + if (!index_ok_82362) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 320) == + -1) { + global_failure_args[0] = l_82210; + global_failure_args[1] = i_82358; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_82364 = ((__global + double *) double_buffer_mem_125569)[phys_tid_81606 + + (l_82210 * + (num_threads_125708 * + rp1_71562) + + i_82358 * + num_threads_125708)]; + double lw_val_82365 = x_82364 / nrmxl_82351; + + ((__global + double *) double_buffer_mem_125569)[phys_tid_81606 + + (l_82210 * + (num_threads_125708 * + rp1_71562) + + i_82358 * + num_threads_125708)] = + lw_val_82365; + } + + double zp_arg_82367 = ((__global + double *) double_buffer_mem_125569)[phys_tid_81606 + + (l_82210 * + (num_threads_125708 * + rp1_71562) + + l_82210 * + num_threads_125708)]; + double zp_res_82368 = 1.0 + zp_arg_82367; + + ((__global + double *) double_buffer_mem_125569)[phys_tid_81606 + + (l_82210 * + (num_threads_125708 * + rp1_71562) + + l_82210 * + num_threads_125708)] = + zp_res_82368; + + bool bounds_invalid_upwards_82370 = slt64(k2p2zq_70876, + x_82216); + bool valid_82371 = !bounds_invalid_upwards_82370; + bool range_valid_c_82372; + + if (!valid_82371) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 321) == -1) { + global_failure_args[0] = x_82216; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool loop_nonempty_82373 = slt64((int64_t) 0, + upper_bound_82227); + bool loop_not_taken_82374 = !loop_nonempty_82373; + bool protect_assert_disj_82375 = index_ok_82346 || + loop_not_taken_82374; + bool index_certs_82376; + + if (!protect_assert_disj_82375) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 322) == -1) { + global_failure_args[0] = l_82210; + global_failure_args[1] = l_82210; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_82380 = 0; i_82380 < upper_bound_82227; + i_82380++) { + int64_t index_primexp_82384 = add64(x_82216, + i_82380); + bool x_82385 = sle64((int64_t) 0, + index_primexp_82384); + bool y_82386 = slt64(index_primexp_82384, + k2p2zq_70876); + bool bounds_check_82387 = x_82385 && y_82386; + double t_82388; + double t_82390 = 0.0; + + for (int64_t i0_82389 = 0; i0_82389 < j_m_i_82318; + i0_82389++) { + int64_t i_82391 = add64(l_82210, i0_82389); + bool x_82392 = sle64((int64_t) 0, i_82391); + bool y_82393 = slt64(i_82391, rp1_71562); + bool bounds_check_82394 = x_82392 && y_82393; + bool index_ok_82395 = y_82226 && + bounds_check_82394; + bool index_certs_82396; + + if (!index_ok_82395) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 323) == + -1) { + global_failure_args[0] = l_82210; + global_failure_args[1] = i_82391; + global_failure_args[2] = + k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_82397 = ((__global + double *) double_buffer_mem_125569)[phys_tid_81606 + + (l_82210 * + (num_threads_125708 * + rp1_71562) + + i_82391 * + num_threads_125708)]; + bool index_ok_82398 = bounds_check_82387 && + bounds_check_82394; + bool index_certs_82399; + + if (!index_ok_82398) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 324) == + -1) { + global_failure_args[0] = + index_primexp_82384; + global_failure_args[1] = i_82391; + global_failure_args[2] = + k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_82400 = ((__global + double *) double_buffer_mem_125569)[phys_tid_81606 + + (index_primexp_82384 * + (num_threads_125708 * + rp1_71562) + + i_82391 * + num_threads_125708)]; + double y_82401 = x_82397 * y_82400; + double loopres_82402 = t_82390 - y_82401; + double t_tmp_127290 = loopres_82402; + + t_82390 = t_tmp_127290; + } + t_82388 = t_82390; + + double y_82403 = ((__global + double *) double_buffer_mem_125569)[phys_tid_81606 + + (l_82210 * + (num_threads_125708 * + rp1_71562) + + l_82210 * + num_threads_125708)]; + double t_82404 = t_82388 / y_82403; + + for (int64_t i0_82406 = 0; i0_82406 < j_m_i_82318; + i0_82406++) { + int64_t i_82408 = add64(l_82210, i0_82406); + bool x_82409 = sle64((int64_t) 0, i_82408); + bool y_82410 = slt64(i_82408, rp1_71562); + bool bounds_check_82411 = x_82409 && y_82410; + bool index_ok_82412 = bounds_check_82387 && + bounds_check_82411; + bool index_certs_82413; + + if (!index_ok_82412) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 325) == + -1) { + global_failure_args[0] = + index_primexp_82384; + global_failure_args[1] = i_82408; + global_failure_args[2] = + k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_82414 = ((__global + double *) double_buffer_mem_125569)[phys_tid_81606 + + (index_primexp_82384 * + (num_threads_125708 * + rp1_71562) + + i_82408 * + num_threads_125708)]; + bool index_ok_82415 = y_82226 && + bounds_check_82411; + bool index_certs_82416; + + if (!index_ok_82415) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 326) == + -1) { + global_failure_args[0] = l_82210; + global_failure_args[1] = i_82408; + global_failure_args[2] = + k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_82417 = ((__global + double *) double_buffer_mem_125569)[phys_tid_81606 + + (l_82210 * + (num_threads_125708 * + rp1_71562) + + i_82408 * + num_threads_125708)]; + double y_82418 = t_82404 * y_82417; + double lw_val_82419 = x_82414 + y_82418; + + ((__global + double *) double_buffer_mem_125569)[phys_tid_81606 + + (index_primexp_82384 * + (num_threads_125708 * + rp1_71562) + + i_82408 * + num_threads_125708)] = + lw_val_82419; + } + + bool index_certs_82421; + + if (!bounds_check_82387) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 327) == + -1) { + global_failure_args[0] = + index_primexp_82384; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double zeze_arg_82422 = ((__global + double *) double_buffer_mem_125570)[phys_tid_81606 + + index_primexp_82384 * + num_threads_125708]; + bool zeze_res_82423 = zeze_arg_82422 == 0.0; + + if (!zeze_res_82423) { + bool index_ok_82426 = y_82345 && + bounds_check_82387; + bool index_certs_82427; + + if (!index_ok_82426) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 328) == + -1) { + global_failure_args[0] = + index_primexp_82384; + global_failure_args[1] = l_82210; + global_failure_args[2] = + k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double abs_arg_82428 = ((__global + double *) double_buffer_mem_125569)[phys_tid_81606 + + (index_primexp_82384 * + (num_threads_125708 * + rp1_71562) + + l_82210 * + num_threads_125708)]; + double abs_res_82429 = fabs(abs_arg_82428); + double zs_res_82430 = abs_res_82429 / + zeze_arg_82422; + double ztzt_res_82431 = fpow64(zs_res_82430, + 2.0); + double zm_res_82432 = 1.0 - ztzt_res_82431; + double max_res_82433 = fmax64(0.0, + zm_res_82432); + double abs_res_82434 = fabs(max_res_82433); + bool zgze_res_82435 = 1.0e-6 <= abs_res_82434; + int64_t j_m_i_82436 = sub64(rp1_71562, x_82216); + + if (zgze_res_82435) { + double sqrt_res_82439; + + sqrt_res_82439 = + futrts_sqrt64(max_res_82433); + + double zt_res_82440 = zeze_arg_82422 * + sqrt_res_82439; + + ((__global + double *) double_buffer_mem_125570)[phys_tid_81606 + + index_primexp_82384 * + num_threads_125708] = + zt_res_82440; + } else { + bool empty_slice_82442 = j_m_i_82436 == + (int64_t) 0; + int64_t m_82443 = sub64(j_m_i_82436, + (int64_t) 1); + int64_t i_p_m_t_s_82444 = add64(x_82216, + m_82443); + bool zzero_leq_i_p_m_t_s_82445 = + sle64((int64_t) 0, i_p_m_t_s_82444); + bool i_p_m_t_s_leq_w_82446 = + slt64(i_p_m_t_s_82444, rp1_71562); + bool zzero_lte_i_82447 = sle64((int64_t) 0, + x_82216); + bool i_lte_j_82448 = sle64(x_82216, + rp1_71562); + bool y_82449 = i_p_m_t_s_leq_w_82446 && + zzero_lte_i_82447; + bool y_82450 = zzero_leq_i_p_m_t_s_82445 && + y_82449; + bool y_82451 = i_lte_j_82448 && y_82450; + bool forwards_ok_82452 = + zzero_lte_i_82447 && y_82451; + bool ok_or_empty_82453 = + empty_slice_82442 || forwards_ok_82452; + bool index_ok_82454 = bounds_check_82387 && + ok_or_empty_82453; + bool index_certs_82455; + + if (!index_ok_82454) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 329) == + -1) { + global_failure_args[0] = + index_primexp_82384; + global_failure_args[1] = + x_82216; + global_failure_args[2] = + k2p2zq_70876; + global_failure_args[3] = + rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_82457; + double redout_119755 = 0.0; + + for (int64_t i_119756 = 0; i_119756 < + j_m_i_82436; i_119756++) { + int64_t slice_120015 = x_82216 + + i_119756; + double x_82461 = ((__global + double *) double_buffer_mem_125569)[phys_tid_81606 + + (index_primexp_82384 * + (num_threads_125708 * + rp1_71562) + + slice_120015 * + num_threads_125708)]; + double defunc_1_f_res_82462 = x_82461 * + x_82461; + double defunc_1_op_res_82460 = + defunc_1_f_res_82462 + + redout_119755; + double redout_tmp_127292 = + defunc_1_op_res_82460; + + redout_119755 = redout_tmp_127292; + } + defunc_2_reduce_res_82457 = redout_119755; + + double sqrt_res_82463; + + sqrt_res_82463 = + futrts_sqrt64(defunc_2_reduce_res_82457); + ((__global + double *) double_buffer_mem_125570)[phys_tid_81606 + + index_primexp_82384 * + num_threads_125708] = + sqrt_res_82463; + + bool index_certs_82465; + + if (!bounds_check_82387) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 330) == + -1) { + global_failure_args[0] = + (int64_t) 0; + global_failure_args[1] = + index_primexp_82384; + global_failure_args[2] = + (int64_t) 2; + global_failure_args[3] = + k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_127293 = 0; i_127293 < + (int64_t) 1; i_127293++) { + ((__global + double *) double_buffer_mem_125571)[phys_tid_81606 + + (index_primexp_82384 + + i_127293) * + num_threads_125708] = + ((__global + double *) double_buffer_mem_125570)[phys_tid_81606 + + num_threads_125708 * + index_primexp_82384 + + i_127293 * + num_threads_125708]; + } + } + } + } + + bool index_certs_82468; + + if (!y_82226) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 331) == -1) { + global_failure_args[0] = l_82210; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_127294 = 0; i_127294 < (int64_t) 1; + i_127294++) { + ((__global + double *) double_buffer_mem_125570)[phys_tid_81606 + + (l_82210 + + i_127294) * + num_threads_125708] = + ((__global + double *) double_buffer_mem_125569)[phys_tid_81606 + + l_82210 * + (num_threads_125708 * + rp1_71562) + + num_threads_125708 * + l_82210 + + i_127294 * + num_threads_125708]; + } + + double zt_res_82471 = -1.0 * nrmxl_82351; + + ((__global + double *) double_buffer_mem_125569)[phys_tid_81606 + + (l_82210 * + (num_threads_125708 * + rp1_71562) + + l_82210 * + num_threads_125708)] = + zt_res_82471; + for (int64_t i_127295 = 0; i_127295 < k2p2zq_70876; + i_127295++) { + ((__global double *) mem_125250)[phys_tid_81606 + + i_127295 * + num_threads_125708] = + ((__global + double *) double_buffer_mem_125570)[phys_tid_81606 + + i_127295 * + num_threads_125708]; + } + for (int64_t i_127296 = 0; i_127296 < (int64_t) 2; + i_127296++) { + for (int64_t i_127297 = 0; i_127297 < k2p2zq_70876; + i_127297++) { + ((__global + double *) mem_125248)[phys_tid_81606 + + (i_127296 * + (num_threads_125708 * + k2p2zq_70876) + + i_127297 * + num_threads_125708)] = + ((__global + double *) double_buffer_mem_125571)[phys_tid_81606 + + (i_127296 * + (num_threads_125708 * + k2p2zq_70876) + + i_127297 * + num_threads_125708)]; + } + } + for (int64_t i_127298 = 0; i_127298 < k2p2zq_70876; + i_127298++) { + for (int64_t i_127299 = 0; i_127299 < rp1_71562; + i_127299++) { + ((__global + double *) mem_125455)[phys_tid_81606 + + (i_127298 * + (num_threads_125708 * + rp1_71562) + + i_127299 * + num_threads_125708)] = + ((__global + double *) double_buffer_mem_125569)[phys_tid_81606 + + (i_127298 * + (num_threads_125708 * + rp1_71562) + + i_127299 * + num_threads_125708)]; + } + } + } + for (int64_t i_127300 = 0; i_127300 < k2p2zq_70876; + i_127300++) { + ((__global double *) mem_125465)[phys_tid_81606 + + i_127300 * + num_threads_125708] = + ((__global double *) mem_125250)[phys_tid_81606 + + i_127300 * + num_threads_125708]; + } + for (int64_t i_127301 = 0; i_127301 < (int64_t) 2; + i_127301++) { + for (int64_t i_127302 = 0; i_127302 < k2p2zq_70876; + i_127302++) { + ((__global double *) mem_125463)[phys_tid_81606 + + (i_127301 * + (num_threads_125708 * + k2p2zq_70876) + + i_127302 * + num_threads_125708)] = + ((__global + double *) mem_125248)[phys_tid_81606 + + (i_127301 * + (num_threads_125708 * + k2p2zq_70876) + + i_127302 * + num_threads_125708)]; + } + } + for (int64_t i_127303 = 0; i_127303 < k2p2zq_70876; + i_127303++) { + for (int64_t i_127304 = 0; i_127304 < rp1_71562; + i_127304++) { + ((__global double *) mem_125505)[phys_tid_81606 + + (i_127303 * + (num_threads_125708 * + rp1_71562) + + i_127304 * + num_threads_125708)] = + ((__global + double *) mem_125455)[phys_tid_81606 + + (i_127303 * + (num_threads_125708 * + rp1_71562) + + i_127304 * + num_threads_125708)]; + } + } + } + for (int64_t i_127305 = 0; i_127305 < k2p2zq_70876; + i_127305++) { + for (int64_t i_127306 = 0; i_127306 < rp1_71562; + i_127306++) { + ((__global + double *) double_buffer_mem_125569)[phys_tid_81606 + + (i_127305 * + (num_threads_125708 * + rp1_71562) + + i_127306 * + num_threads_125708)] = + ((__global double *) mem_125505)[phys_tid_81606 + + (i_127305 * + (num_threads_125708 * + rp1_71562) + + i_127306 * + num_threads_125708)]; + } + } + for (int64_t i_127307 = 0; i_127307 < k2p2zq_70876; + i_127307++) { + ((__global + double *) double_buffer_mem_125570)[phys_tid_81606 + + i_127307 * + num_threads_125708] = + ((__global double *) mem_125465)[phys_tid_81606 + + i_127307 * + num_threads_125708]; + } + for (int64_t i_127308 = 0; i_127308 < (int64_t) 2; i_127308++) { + for (int64_t i_127309 = 0; i_127309 < k2p2zq_70876; + i_127309++) { + ((__global + double *) double_buffer_mem_125571)[phys_tid_81606 + + (i_127308 * + (num_threads_125708 * + k2p2zq_70876) + + i_127309 * + num_threads_125708)] = + ((__global double *) mem_125463)[phys_tid_81606 + + (i_127308 * + (num_threads_125708 * + k2p2zq_70876) + + i_127309 * + num_threads_125708)]; + } + } + + int64_t k_tmp_127263 = loopres_82242; + + k_82215 = k_tmp_127263; + } + dqrdc2_res_82209 = k_82215; + + int64_t min_arg_82473 = sub64(dqrdc2_res_82209, (int64_t) 1); + int64_t min_res_82474 = smin64(rp1_71562, min_arg_82473); + + for (int64_t i_119759 = 0; i_119759 < k2p2zq_70876; i_119759++) { + int64_t x_82479 = add64((int64_t) 1, i_119759); + bool cond_f_res_82480 = slt64(min_res_82474, x_82479); + + for (int64_t i_119763 = 0; i_119763 < k2p2zq_70876; + i_119763++) { + int64_t x_82484 = add64((int64_t) 1, i_119763); + bool cond_82485 = slt64(min_res_82474, x_82484); + bool x_82486 = !cond_82485; + bool y_82487 = cond_f_res_82480 && x_82486; + bool cond_82488 = cond_82485 || y_82487; + double defunc_1_f_res_82489; + + if (cond_82488) { + defunc_1_f_res_82489 = NAN; + } else { + double x_82483 = ((__global + double *) double_buffer_mem_125569)[phys_tid_81606 + + (i_119759 * + (num_threads_125708 * + rp1_71562) + + i_119763 * + num_threads_125708)]; + + defunc_1_f_res_82489 = x_82483; + } + ((__global double *) mem_122382)[phys_tid_81606 + + (i_119759 * + (num_threads_125708 * + k2p2zq_70876) + + i_119763 * + num_threads_125708)] = + defunc_1_f_res_82489; + } + } + for (int64_t i_127312 = 0; i_127312 < k2p2zq_70876; i_127312++) { + ((__global double *) mem_122435)[phys_tid_81606 + i_127312 * + num_threads_125708] = 0.0; + } + for (int64_t i_119767 = 0; i_119767 < k2p2zq_70876; i_119767++) { + for (int64_t i_127314 = 0; i_127314 < k2p2zq_70876; + i_127314++) { + ((__global double *) mem_122423)[phys_tid_81606 + + (i_119767 * + (num_threads_125708 * + k2p2zq_70876) + + i_127314 * + num_threads_125708)] = + ((__global double *) mem_122435)[phys_tid_81606 + + i_127314 * + num_threads_125708]; + } + for (int64_t i_82495 = 0; i_82495 < k2p2zq_70876; i_82495++) { + int64_t x_82497 = sub64(k2p2zq_70876, i_82495); + int64_t i_82498 = sub64(x_82497, (int64_t) 1); + bool x_82499 = sle64((int64_t) 0, i_82498); + bool y_82500 = slt64(i_82498, k2p2zq_70876); + bool bounds_check_82501 = x_82499 && y_82500; + int64_t j_m_i_82502 = sub64(k2p2zq_70876, x_82497); + bool empty_slice_82503 = j_m_i_82502 == (int64_t) 0; + int64_t m_82504 = sub64(j_m_i_82502, (int64_t) 1); + int64_t i_p_m_t_s_82505 = add64(x_82497, m_82504); + bool zzero_leq_i_p_m_t_s_82506 = sle64((int64_t) 0, + i_p_m_t_s_82505); + bool i_p_m_t_s_leq_w_82507 = slt64(i_p_m_t_s_82505, + k2p2zq_70876); + bool zzero_lte_i_82508 = sle64((int64_t) 0, x_82497); + bool i_lte_j_82509 = sle64(x_82497, k2p2zq_70876); + bool y_82510 = i_p_m_t_s_leq_w_82507 && zzero_lte_i_82508; + bool y_82511 = zzero_leq_i_p_m_t_s_82506 && y_82510; + bool y_82512 = i_lte_j_82509 && y_82511; + bool forwards_ok_82513 = zzero_lte_i_82508 && y_82512; + bool ok_or_empty_82514 = empty_slice_82503 || + forwards_ok_82513; + bool index_ok_82515 = bounds_check_82501 && + ok_or_empty_82514; + bool index_certs_82516; + + if (!index_ok_82515) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 332) == -1) { + global_failure_args[0] = i_82498; + global_failure_args[1] = x_82497; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + global_failure_args[4] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_82517; + + if (!ok_or_empty_82514) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 333) == -1) { + global_failure_args[0] = x_82497; + global_failure_args[1] = k2p2zq_70876; + global_failure_args[2] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_82520; + double redout_119769 = 0.0; + + for (int64_t i_119770 = 0; i_119770 < j_m_i_82502; + i_119770++) { + int64_t slice_120021 = x_82497 + i_119770; + double x_82525 = ((__global + double *) mem_122382)[phys_tid_81606 + + (slice_120021 * + (num_threads_125708 * + k2p2zq_70876) + + i_82498 * + num_threads_125708)]; + bool isnan_res_82526; + + isnan_res_82526 = futrts_isnan64(x_82525); + + double defunc_1_f_res_82527; + + if (isnan_res_82526) { + defunc_1_f_res_82527 = 0.0; + } else { + double x_82524 = ((__global + double *) mem_122423)[phys_tid_81606 + + (i_119767 * + (num_threads_125708 * + k2p2zq_70876) + + slice_120021 * + num_threads_125708)]; + double defunc_1_f_res_f_res_82528 = x_82524 * + x_82525; + + defunc_1_f_res_82527 = defunc_1_f_res_f_res_82528; + } + + double defunc_1_op_res_82523 = defunc_1_f_res_82527 + + redout_119769; + double redout_tmp_127316 = defunc_1_op_res_82523; + + redout_119769 = redout_tmp_127316; + } + defunc_2_reduce_res_82520 = redout_119769; + + bool index_ok_82529 = bounds_check_82501 && + bounds_check_82501; + bool index_certs_82530; + + if (!index_ok_82529) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 334) == -1) { + global_failure_args[0] = i_82498; + global_failure_args[1] = i_82498; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double zs_arg_82531 = ((__global + double *) mem_122382)[phys_tid_81606 + + (i_82498 * + (num_threads_125708 * + k2p2zq_70876) + + i_82498 * + num_threads_125708)]; + bool index_certs_82532; + + if (!bounds_check_82501) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 335) == -1) { + global_failure_args[0] = i_82498; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double zm_arg_82533 = ((__global + double *) mem_121938)[i_119767 * + k2p2zq_70876 + + i_82498]; + double zm_res_82534 = zm_arg_82533 - + defunc_2_reduce_res_82520; + double zs_res_82535 = zm_res_82534 / zs_arg_82531; + + ((__global double *) mem_122423)[phys_tid_81606 + + (i_119767 * + (num_threads_125708 * + k2p2zq_70876) + i_82498 * + num_threads_125708)] = + zs_res_82535; + } + } + for (int64_t i_119773 = 0; i_119773 < k2p2zq_70876; i_119773++) { + for (int64_t i_119777 = 0; i_119777 < k2p2zq_70876; + i_119777++) { + double defunc_2_reduce_res_82542; + double redout_119779 = 0.0; + + for (int64_t i_119780 = 0; i_119780 < k2p2zq_70876; + i_119780++) { + double x_82546 = ((__global + double *) mem_122423)[phys_tid_81606 + + (i_119780 * + (num_threads_125708 * + k2p2zq_70876) + + i_119773 * + num_threads_125708)]; + double x_82547 = ((__global + double *) mem_122423)[phys_tid_81606 + + (i_119780 * + (num_threads_125708 * + k2p2zq_70876) + + i_119777 * + num_threads_125708)]; + double defunc_1_f_res_82548 = x_82546 * x_82547; + double defunc_1_op_res_82545 = defunc_1_f_res_82548 + + redout_119779; + double redout_tmp_127319 = defunc_1_op_res_82545; + + redout_119779 = redout_tmp_127319; + } + defunc_2_reduce_res_82542 = redout_119779; + ((__global double *) mem_122464)[phys_tid_81606 + + (i_119773 * + (num_threads_125708 * + k2p2zq_70876) + + i_119777 * + num_threads_125708)] = + defunc_2_reduce_res_82542; + } + } + + int64_t min_res_82549 = smin64(r_71551, min_res_82474); + + for (int64_t i_127320 = 0; i_127320 < rp1_71562; i_127320++) { + ((__global double *) double_buffer_mem_125582)[phys_tid_81606 + + i_127320 * + num_threads_125708] = + ((__global double *) mem_122017)[gtid_81605 + i_127320 * + m_70861]; + } + for (int64_t j_82551 = 0; j_82551 < min_res_82549; j_82551++) { + bool y_82553 = slt64(j_82551, k2p2zq_70876); + bool index_certs_82554; + + if (!y_82553) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 336) == -1) { + global_failure_args[0] = j_82551; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double zeze_arg_82555 = ((__global + double *) double_buffer_mem_125570)[phys_tid_81606 + + j_82551 * + num_threads_125708]; + bool zeze_res_82556 = zeze_arg_82555 == 0.0; + + if (zeze_res_82556) { + for (int64_t i_127322 = 0; i_127322 < rp1_71562; + i_127322++) { + ((__global double *) mem_125258)[phys_tid_81606 + + i_127322 * + num_threads_125708] = + ((__global + double *) double_buffer_mem_125582)[phys_tid_81606 + + i_127322 * + num_threads_125708]; + } + } else { + bool y_82558 = slt64(j_82551, rp1_71562); + bool index_certs_82559; + + if (!y_82558) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 337) == -1) { + global_failure_args[0] = j_82551; + global_failure_args[1] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_82560 = ((__global + double *) double_buffer_mem_125582)[phys_tid_81606 + + j_82551 * + num_threads_125708]; + double negate_arg_82561 = zeze_arg_82555 * y_82560; + double t_82562 = 0.0 - negate_arg_82561; + int64_t x_82563 = sub64(rp1_71562, j_82551); + int64_t upper_bound_82564 = sub64(x_82563, (int64_t) 1); + double t_82565; + double t_82567 = t_82562; + + for (int64_t i0_82566 = 0; i0_82566 < upper_bound_82564; + i0_82566++) { + int64_t x_82568 = add64(j_82551, i0_82566); + int64_t i_82569 = add64((int64_t) 1, x_82568); + bool x_82570 = sle64((int64_t) 0, i_82569); + bool y_82571 = slt64(i_82569, rp1_71562); + bool bounds_check_82572 = x_82570 && y_82571; + bool index_ok_82573 = y_82553 && bounds_check_82572; + bool index_certs_82574; + + if (!index_ok_82573) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 338) == -1) { + global_failure_args[0] = j_82551; + global_failure_args[1] = i_82569; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_82575 = ((__global + double *) double_buffer_mem_125569)[phys_tid_81606 + + (j_82551 * + (num_threads_125708 * + rp1_71562) + + i_82569 * + num_threads_125708)]; + bool index_certs_82576; + + if (!bounds_check_82572) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 339) == -1) { + global_failure_args[0] = i_82569; + global_failure_args[1] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_82577 = ((__global + double *) double_buffer_mem_125582)[phys_tid_81606 + + i_82569 * + num_threads_125708]; + double y_82578 = x_82575 * y_82577; + double loopres_82579 = t_82567 - y_82578; + double t_tmp_127323 = loopres_82579; + + t_82567 = t_tmp_127323; + } + t_82565 = t_82567; + + double t_82580 = t_82565 / zeze_arg_82555; + double y_82581 = zeze_arg_82555 * t_82580; + double lw_val_82582 = y_82560 + y_82581; + + ((__global + double *) double_buffer_mem_125582)[phys_tid_81606 + + j_82551 * + num_threads_125708] = + lw_val_82582; + for (int64_t i0_82585 = 0; i0_82585 < upper_bound_82564; + i0_82585++) { + int64_t x_82587 = add64(j_82551, i0_82585); + int64_t i_82588 = add64((int64_t) 1, x_82587); + bool x_82589 = sle64((int64_t) 0, i_82588); + bool y_82590 = slt64(i_82588, rp1_71562); + bool bounds_check_82591 = x_82589 && y_82590; + bool index_certs_82592; + + if (!bounds_check_82591) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 340) == -1) { + global_failure_args[0] = i_82588; + global_failure_args[1] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_82593 = ((__global + double *) double_buffer_mem_125582)[phys_tid_81606 + + i_82588 * + num_threads_125708]; + bool index_ok_82594 = y_82553 && bounds_check_82591; + bool index_certs_82595; + + if (!index_ok_82594) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 341) == -1) { + global_failure_args[0] = j_82551; + global_failure_args[1] = i_82588; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_82596 = ((__global + double *) double_buffer_mem_125569)[phys_tid_81606 + + (j_82551 * + (num_threads_125708 * + rp1_71562) + + i_82588 * + num_threads_125708)]; + double y_82597 = t_82580 * y_82596; + double lw_val_82598 = x_82593 + y_82597; + + ((__global + double *) double_buffer_mem_125582)[phys_tid_81606 + + i_82588 * + num_threads_125708] = + lw_val_82598; + } + for (int64_t i_127325 = 0; i_127325 < rp1_71562; + i_127325++) { + ((__global double *) mem_125258)[phys_tid_81606 + + i_127325 * + num_threads_125708] = + ((__global + double *) double_buffer_mem_125582)[phys_tid_81606 + + i_127325 * + num_threads_125708]; + } + } + for (int64_t i_127326 = 0; i_127326 < rp1_71562; i_127326++) { + ((__global + double *) double_buffer_mem_125582)[phys_tid_81606 + + i_127326 * + num_threads_125708] = + ((__global double *) mem_125258)[phys_tid_81606 + + i_127326 * + num_threads_125708]; + } + } + for (int64_t i_127327 = 0; i_127327 < k2p2zq_70876; i_127327++) { + ((__global double *) mem_122537)[phys_tid_81606 + i_127327 * + num_threads_125708] = 0.0; + } + for (int64_t write_iter_119781 = 0; write_iter_119781 < + k2p2zq_70876; write_iter_119781++) { + int64_t write_iv_119784 = ((__global + int64_t *) mem_122047)[phys_tid_81606 + + write_iter_119781 * + num_threads_125708]; + double defunc_2_reduce_res_82605; + double redout_119791 = 0.0; + + for (int64_t i_119792 = 0; i_119792 < k2p2zq_70876; + i_119792++) { + double x_82609 = ((__global + double *) double_buffer_mem_125582)[phys_tid_81606 + + i_119792 * + num_threads_125708]; + double x_82610 = ((__global + double *) mem_122423)[phys_tid_81606 + + (i_119792 * + (num_threads_125708 * + k2p2zq_70876) + + write_iter_119781 * + num_threads_125708)]; + double defunc_1_f_res_82611 = x_82609 * x_82610; + double defunc_1_op_res_82608 = defunc_1_f_res_82611 + + redout_119791; + double redout_tmp_127329 = defunc_1_op_res_82608; + + redout_119791 = redout_tmp_127329; + } + defunc_2_reduce_res_82605 = redout_119791; + + bool less_than_zzero_119785 = slt64(write_iv_119784, + (int64_t) 0); + bool greater_than_sizze_119786 = sle64(k2p2zq_70876, + write_iv_119784); + bool outside_bounds_dim_119787 = less_than_zzero_119785 || + greater_than_sizze_119786; + + if (!outside_bounds_dim_119787) { + ((__global double *) mem_122537)[phys_tid_81606 + + write_iv_119784 * + num_threads_125708] = + defunc_2_reduce_res_82605; + } + } + for (int64_t i_119795 = 0; i_119795 < k2p2zq_70876; i_119795++) { + int64_t x_82614 = ((__global + int64_t *) mem_122047)[phys_tid_81606 + + i_119795 * + num_threads_125708]; + + for (int64_t i_127331 = 0; i_127331 < k2p2zq_70876; + i_127331++) { + ((__global int64_t *) mem_122564)[phys_tid_81606 + + i_127331 * + num_threads_125708] = + x_82614; + } + for (int64_t i_127332 = 0; i_127332 < k2p2zq_70876; + i_127332++) { + ((__global int64_t *) mem_122552)[phys_tid_81606 + + (i_119795 * + (num_threads_125708 * + k2p2zq_70876) + + i_127332 * + num_threads_125708)] = + ((__global int64_t *) mem_122564)[phys_tid_81606 + + i_127332 * + num_threads_125708]; + } + } + for (int64_t i_127333 = 0; i_127333 < k2p2zq_70876; i_127333++) { + for (int64_t i_127334 = 0; i_127334 < k2p2zq_70876; + i_127334++) { + ((__global double *) mem_122575)[phys_tid_81606 + + (i_127333 * + (num_threads_125708 * + k2p2zq_70876) + + i_127334 * + num_threads_125708)] = + 0.0; + } + } + for (int64_t write_iter_119797 = 0; write_iter_119797 < + binop_x_120251; write_iter_119797++) { + int64_t new_index_120022 = squot64(write_iter_119797, + k2p2zq_70876); + int64_t binop_y_120024 = k2p2zq_70876 * new_index_120022; + int64_t new_index_120025 = write_iter_119797 - binop_y_120024; + int64_t write_iv_119799 = ((__global + int64_t *) mem_122552)[phys_tid_81606 + + (new_index_120022 * + (num_threads_125708 * + k2p2zq_70876) + + new_index_120025 * + num_threads_125708)]; + int64_t write_iv_119800 = ((__global + int64_t *) mem_122047)[phys_tid_81606 + + new_index_120025 * + num_threads_125708]; + bool less_than_zzero_119802 = slt64(write_iv_119799, + (int64_t) 0); + bool greater_than_sizze_119803 = sle64(k2p2zq_70876, + write_iv_119799); + bool outside_bounds_dim_119804 = less_than_zzero_119802 || + greater_than_sizze_119803; + bool less_than_zzero_119805 = slt64(write_iv_119800, + (int64_t) 0); + bool greater_than_sizze_119806 = sle64(k2p2zq_70876, + write_iv_119800); + bool outside_bounds_dim_119807 = less_than_zzero_119805 || + greater_than_sizze_119806; + bool outside_bounds_119809 = outside_bounds_dim_119804 || + outside_bounds_dim_119807; + + if (!outside_bounds_119809) { + for (int64_t i_127336 = 0; i_127336 < (int64_t) 1; + i_127336++) { + ((__global double *) mem_122575)[phys_tid_81606 + + (write_iv_119799 * + (num_threads_125708 * + k2p2zq_70876) + + (write_iv_119800 + + i_127336) * + num_threads_125708)] = + ((__global double *) mem_122464)[phys_tid_81606 + + new_index_120022 * + (num_threads_125708 * + k2p2zq_70876) + + num_threads_125708 * + new_index_120025 + + i_127336 * + num_threads_125708]; + } + } + } + for (int64_t i_119816 = 0; i_119816 < k2p2zq_70876; i_119816++) { + double x_82627 = ((__global + double *) mem_122537)[phys_tid_81606 + + i_119816 * + num_threads_125708]; + + for (int64_t i_119821 = 0; i_119821 < k2p2zq_70876; + i_119821++) { + double x_82629 = ((__global + double *) mem_122575)[phys_tid_81606 + + (i_119816 * + (num_threads_125708 * + k2p2zq_70876) + + i_119821 * + num_threads_125708)]; + bool isnan_res_82630; + + isnan_res_82630 = futrts_isnan64(x_82629); + + double defunc_0_f_res_82631; + + if (isnan_res_82630) { + defunc_0_f_res_82631 = 0.0; + } else { + defunc_0_f_res_82631 = x_82629; + } + ((__global double *) mem_122598)[phys_tid_81606 + + (i_119816 * + (num_threads_125708 * + k2p2zq_70876) + + i_119821 * + num_threads_125708)] = + defunc_0_f_res_82631; + } + + bool isnan_res_82632; + + isnan_res_82632 = futrts_isnan64(x_82627); + + double defunc_0_f_res_82633; + + if (isnan_res_82632) { + defunc_0_f_res_82633 = 0.0; + } else { + defunc_0_f_res_82633 = x_82627; + } + ((__global double *) mem_122595)[phys_tid_81606 + i_119816 * + num_threads_125708] = + defunc_0_f_res_82633; + } + + bool isnan_res_82634; + + isnan_res_82634 = futrts_isnan64(recresid_r_82182); + + bool cond_82635 = !isnan_res_82634; + bool cond_t_res_82636 = x_82152 == k2p2zq_70876; + bool x_82637 = cond_82635 && cond_t_res_82636; + bool nona_t_res_82638 = min_res_82474 == k2p2zq_70876; + bool x_82639 = x_82637 && nona_t_res_82638; + bool complement_arg_82640; + + if (x_82639) { + double defunc_2_reduce_res_82641; + double redout_119823 = 0.0; + + for (int64_t i_119824 = 0; i_119824 < k2p2zq_70876; + i_119824++) { + double x_82645 = ((__global + double *) mem_122028)[phys_tid_81606 + + i_119824 * + num_threads_125708]; + double x_82647 = ((__global double *) mem_122014)[i_119824 * + m_70861 + + gtid_81605]; + double x_82648 = ((__global + double *) mem_122595)[phys_tid_81606 + + i_119824 * + num_threads_125708]; + double defunc_0_f_res_82649; + double redout_119825 = 0.0; + + for (int64_t i_119826 = 0; i_119826 < k2p2zq_70876; + i_119826++) { + double x_82653 = ((__global + double *) mem_122028)[phys_tid_81606 + + i_119826 * + num_threads_125708]; + double x_82654 = ((__global + double *) mem_122011)[i_119824 * + (m_70861 * + k2p2zq_70876) + + i_119826 * + m_70861 + + gtid_81605]; + double x_82655 = ((__global + double *) mem_120246)[i_119826 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_81605 * + defunc_2_reduce_res_70985 + + r_71551]; + double x_82656 = x_82645 * x_82653; + double y_82657 = x_82656 / fr_82171; + double defunc_1_f_res_82658 = x_82654 - y_82657; + double defunc_1_f_res_82659 = x_82655 * + defunc_1_f_res_82658; + double defunc_1_op_res_82652 = defunc_1_f_res_82659 + + redout_119825; + double redout_tmp_127341 = defunc_1_op_res_82652; + + redout_119825 = redout_tmp_127341; + } + defunc_0_f_res_82649 = redout_119825; + + double defunc_0_g_res_82660 = resid_82180 * + defunc_0_f_res_82649; + double defunc_1_f_res_82661 = x_82647 + + defunc_0_g_res_82660; + double defunc_1_f_res_82662 = x_82648 - + defunc_1_f_res_82661; + double defunc_0_f_res_82663 = fabs(defunc_1_f_res_82662); + double defunc_1_op_res_82644 = defunc_0_f_res_82663 + + redout_119823; + double redout_tmp_127340 = defunc_1_op_res_82644; + + redout_119823 = redout_tmp_127340; + } + defunc_2_reduce_res_82641 = redout_119823; + + double i64_res_82664 = sitofp_i64_f64(k2p2zq_70876); + double mean_abs_res_82665 = defunc_2_reduce_res_82641 / + i64_res_82664; + bool approx_equal_res_82666 = mean_abs_res_82665 <= tol_71054; + + complement_arg_82640 = approx_equal_res_82666; + } else { + complement_arg_82640 = 0; + } + + bool check_82667 = !complement_arg_82640; + bool check_82668; + + if (check_82667) { + bool defunc_2_reduce_res_82669; + bool redout_119827 = 1; + + for (int64_t i_119828 = 0; i_119828 < defunc_2_reduce_res_70985; + i_119828++) { + double x_82673 = ((__global double *) mem_121941)[i_119828 * + m_70861 + + gtid_81605]; + bool defunc_0_f_res_82674; + + defunc_0_f_res_82674 = futrts_isnan64(x_82673); + + bool x_82672 = defunc_0_f_res_82674 && redout_119827; + bool redout_tmp_127342 = x_82672; + + redout_119827 = redout_tmp_127342; + } + defunc_2_reduce_res_82669 = redout_119827; + + bool check_t_res_82675 = !defunc_2_reduce_res_82669; + + check_82668 = check_t_res_82675; + } else { + check_82668 = 0; + } + ((__global bool *) mem_122650)[gtid_81605] = check_82668; + for (int64_t i_127343 = 0; i_127343 < k2p2zq_70876; i_127343++) { + for (int64_t i_127344 = 0; i_127344 < k2p2zq_70876; + i_127344++) { + ((__global double *) mem_122654)[i_127343 * (m_70861 * + k2p2zq_70876) + + i_127344 * m_70861 + + gtid_81605] = ((__global + double *) mem_122598)[phys_tid_81606 + + (i_127343 * + (num_threads_125708 * + k2p2zq_70876) + + i_127344 * + num_threads_125708)]; + } + } + for (int64_t i_127345 = 0; i_127345 < k2p2zq_70876; i_127345++) { + ((__global double *) mem_122657)[i_127345 * m_70861 + + gtid_81605] = ((__global + double *) mem_122595)[phys_tid_81606 + + i_127345 * + num_threads_125708]; + } + ((__global int64_t *) mem_122659)[gtid_81605] = min_res_82474; + ((__global double *) mem_122661)[gtid_81605] = recresid_r_82182; + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_82141 +} +__kernel void mainDetailedzisegmap_82703(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t defunc_2_reduce_res_70985, + double tol_71054, int64_t r_71551, + __global unsigned char *mem_120246, + __global unsigned char *mem_121941, + __global + unsigned char *mem_param_121972, + __global unsigned char *mem_122674, + __global unsigned char *mem_122680, + __global unsigned char *mem_122682, + __global unsigned char *mem_123135, + __global unsigned char *mem_123699, + __global unsigned char *mem_123702, + __global unsigned char *mem_123705, + __global unsigned char *mem_123708, + __global unsigned char *mem_123711) +{ + #define segmap_group_sizze_85364 (mainDetailedzisegmap_group_sizze_82705) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127977; + int32_t local_tid_127978; + int64_t group_sizze_127981; + int32_t wave_sizze_127980; + int32_t group_tid_127979; + + global_tid_127977 = get_global_id(0); + local_tid_127978 = get_local_id(0); + group_sizze_127981 = get_local_size(0); + wave_sizze_127980 = LOCKSTEP_WIDTH; + group_tid_127979 = get_group_id(0); + + int32_t phys_tid_82703; + + phys_tid_82703 = global_tid_127977; + + int64_t gtid_82702; + + gtid_82702 = sext_i32_i64(group_tid_127979) * segmap_group_sizze_85364 + + sext_i32_i64(local_tid_127978); + if (slt64(gtid_82702, m_70861)) { + int64_t x_85370 = ((__global int64_t *) mem_param_121972)[gtid_82702]; + double recresid_r_85375 = ((__global double *) mem_122682)[gtid_82702]; + int64_t min_res_85376 = ((__global int64_t *) mem_123135)[gtid_82702]; + bool isnan_res_85378; + + isnan_res_85378 = futrts_isnan64(recresid_r_85375); + + bool cond_85379 = !isnan_res_85378; + bool cond_t_res_85380 = x_85370 == k2p2zq_70876; + bool x_85381 = cond_85379 && cond_t_res_85380; + bool nona_t_res_85382 = min_res_85376 == k2p2zq_70876; + bool x_85383 = x_85381 && nona_t_res_85382; + bool complement_arg_85384; + + if (x_85383) { + double fr_85373 = ((__global double *) mem_122674)[gtid_82702]; + double resid_85374 = ((__global double *) mem_122680)[gtid_82702]; + double defunc_2_reduce_res_85385; + double redout_119860 = 0.0; + + for (int64_t i_119861 = 0; i_119861 < k2p2zq_70876; i_119861++) { + double x_85389 = ((__global double *) mem_123705)[i_119861 * + m_70861 + + gtid_82702]; + double x_85391 = ((__global double *) mem_123702)[i_119861 * + m_70861 + + gtid_82702]; + double x_85392 = ((__global double *) mem_123708)[i_119861 * + m_70861 + + gtid_82702]; + double defunc_0_f_res_85393; + double redout_119862 = 0.0; + + for (int64_t i_119863 = 0; i_119863 < k2p2zq_70876; + i_119863++) { + double x_85397 = ((__global double *) mem_123705)[i_119863 * + m_70861 + + gtid_82702]; + double x_85398 = ((__global double *) mem_123699)[i_119861 * + (m_70861 * + k2p2zq_70876) + + i_119863 * + m_70861 + + gtid_82702]; + double x_85399 = ((__global double *) mem_120246)[i_119863 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_82702 * + defunc_2_reduce_res_70985 + + r_71551]; + double x_85400 = x_85389 * x_85397; + double y_85401 = x_85400 / fr_85373; + double defunc_1_f_res_85402 = x_85398 - y_85401; + double defunc_1_f_res_85403 = x_85399 * + defunc_1_f_res_85402; + double defunc_1_op_res_85396 = defunc_1_f_res_85403 + + redout_119862; + double redout_tmp_127983 = defunc_1_op_res_85396; + + redout_119862 = redout_tmp_127983; + } + defunc_0_f_res_85393 = redout_119862; + + double defunc_0_g_res_85404 = resid_85374 * + defunc_0_f_res_85393; + double defunc_1_f_res_85405 = x_85391 + defunc_0_g_res_85404; + double defunc_1_f_res_85406 = x_85392 - defunc_1_f_res_85405; + double defunc_0_f_res_85407 = fabs(defunc_1_f_res_85406); + double defunc_1_op_res_85388 = defunc_0_f_res_85407 + + redout_119860; + double redout_tmp_127982 = defunc_1_op_res_85388; + + redout_119860 = redout_tmp_127982; + } + defunc_2_reduce_res_85385 = redout_119860; + + double i64_res_85408 = sitofp_i64_f64(k2p2zq_70876); + double mean_abs_res_85409 = defunc_2_reduce_res_85385 / + i64_res_85408; + bool approx_equal_res_85410 = mean_abs_res_85409 <= tol_71054; + + complement_arg_85384 = approx_equal_res_85410; + } else { + complement_arg_85384 = 0; + } + + bool check_85411 = !complement_arg_85384; + bool check_85412; + + if (check_85411) { + bool defunc_2_reduce_res_85413; + bool redout_119864 = 1; + + for (int64_t i_119865 = 0; i_119865 < defunc_2_reduce_res_70985; + i_119865++) { + double x_85417 = ((__global double *) mem_121941)[i_119865 * + m_70861 + + gtid_82702]; + bool defunc_0_f_res_85418; + + defunc_0_f_res_85418 = futrts_isnan64(x_85417); + + bool x_85416 = defunc_0_f_res_85418 && redout_119864; + bool redout_tmp_127984 = x_85416; + + redout_119864 = redout_tmp_127984; + } + defunc_2_reduce_res_85413 = redout_119864; + + bool check_t_res_85419 = !defunc_2_reduce_res_85413; + + check_85412 = check_t_res_85419; + } else { + check_85412 = 0; + } + ((__global bool *) mem_123711)[gtid_82702] = check_85412; + } + + error_0: + return; + #undef segmap_group_sizze_85364 +} +__kernel void mainDetailedzisegmap_82775(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + __global unsigned char *mem_122003, + __global unsigned char *mem_123695) +{ + #define segmap_group_sizze_85355 (mainDetailedzisegmap_group_sizze_82778) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127972; + int32_t local_tid_127973; + int64_t group_sizze_127976; + int32_t wave_sizze_127975; + int32_t group_tid_127974; + + global_tid_127972 = get_global_id(0); + local_tid_127973 = get_local_id(0); + group_sizze_127976 = get_local_size(0); + wave_sizze_127975 = LOCKSTEP_WIDTH; + group_tid_127974 = get_group_id(0); + + int32_t phys_tid_82775; + + phys_tid_82775 = global_tid_127972; + + int64_t gtid_82773; + + gtid_82773 = squot64(sext_i32_i64(group_tid_127974) * + segmap_group_sizze_85355 + + sext_i32_i64(local_tid_127973), k2p2zq_70876); + + int64_t gtid_82774; + + gtid_82774 = sext_i32_i64(group_tid_127974) * segmap_group_sizze_85355 + + sext_i32_i64(local_tid_127973) - + squot64(sext_i32_i64(group_tid_127974) * segmap_group_sizze_85355 + + sext_i32_i64(local_tid_127973), k2p2zq_70876) * k2p2zq_70876; + if (slt64(gtid_82773, m_70861) && slt64(gtid_82774, k2p2zq_70876)) { + double x_85358 = ((__global double *) mem_122003)[gtid_82773 * + k2p2zq_70876 + + gtid_82774]; + bool isnan_res_85359; + + isnan_res_85359 = futrts_isnan64(x_85358); + + double defunc_0_f_res_85360; + + if (isnan_res_85359) { + defunc_0_f_res_85360 = 0.0; + } else { + defunc_0_f_res_85360 = x_85358; + } + ((__global double *) mem_123695)[gtid_82773 * k2p2zq_70876 + + gtid_82774] = defunc_0_f_res_85360; + } + + error_0: + return; + #undef segmap_group_sizze_85355 +} +__kernel void mainDetailedzisegmap_82797(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + __global unsigned char *mem_122007, + __global unsigned char *mem_123691) +{ + #define segmap_group_sizze_85346 (mainDetailedzisegmap_group_sizze_82801) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127967; + int32_t local_tid_127968; + int64_t group_sizze_127971; + int32_t wave_sizze_127970; + int32_t group_tid_127969; + + global_tid_127967 = get_global_id(0); + local_tid_127968 = get_local_id(0); + group_sizze_127971 = get_local_size(0); + wave_sizze_127970 = LOCKSTEP_WIDTH; + group_tid_127969 = get_group_id(0); + + int32_t phys_tid_82797; + + phys_tid_82797 = global_tid_127967; + + int64_t gtid_82794; + + gtid_82794 = squot64(sext_i32_i64(group_tid_127969) * + segmap_group_sizze_85346 + + sext_i32_i64(local_tid_127968), k2p2zq_70876 * + k2p2zq_70876); + + int64_t gtid_82795; + + gtid_82795 = squot64(sext_i32_i64(group_tid_127969) * + segmap_group_sizze_85346 + + sext_i32_i64(local_tid_127968) - + squot64(sext_i32_i64(group_tid_127969) * + segmap_group_sizze_85346 + + sext_i32_i64(local_tid_127968), k2p2zq_70876 * + k2p2zq_70876) * (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876); + + int64_t gtid_82796; + + gtid_82796 = sext_i32_i64(group_tid_127969) * segmap_group_sizze_85346 + + sext_i32_i64(local_tid_127968) - + squot64(sext_i32_i64(group_tid_127969) * segmap_group_sizze_85346 + + sext_i32_i64(local_tid_127968), k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876) - squot64(sext_i32_i64(group_tid_127969) * + segmap_group_sizze_85346 + + sext_i32_i64(local_tid_127968) - + squot64(sext_i32_i64(group_tid_127969) * + segmap_group_sizze_85346 + + sext_i32_i64(local_tid_127968), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876) * k2p2zq_70876; + if ((slt64(gtid_82794, m_70861) && slt64(gtid_82795, k2p2zq_70876)) && + slt64(gtid_82796, k2p2zq_70876)) { + double x_85349 = ((__global double *) mem_122007)[gtid_82794 * + (k2p2zq_70876 * + k2p2zq_70876) + + gtid_82795 * + k2p2zq_70876 + + gtid_82796]; + bool isnan_res_85350; + + isnan_res_85350 = futrts_isnan64(x_85349); + + double defunc_0_f_res_85351; + + if (isnan_res_85350) { + defunc_0_f_res_85351 = 0.0; + } else { + defunc_0_f_res_85351 = x_85349; + } + ((__global double *) mem_123691)[gtid_82794 * (k2p2zq_70876 * + k2p2zq_70876) + + gtid_82795 * k2p2zq_70876 + + gtid_82796] = defunc_0_f_res_85351; + } + + error_0: + return; + #undef segmap_group_sizze_85346 +} +__kernel void mainDetailedzisegmap_82845(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t binop_x_120251, __global + unsigned char *mem_122007, __global + unsigned char *defunc_3_map_res_r_mem_123630, + __global unsigned char *mem_123685) +{ + #define segmap_group_sizze_85327 (mainDetailedzisegmap_group_sizze_82848) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127962; + int32_t local_tid_127963; + int64_t group_sizze_127966; + int32_t wave_sizze_127965; + int32_t group_tid_127964; + + global_tid_127962 = get_global_id(0); + local_tid_127963 = get_local_id(0); + group_sizze_127966 = get_local_size(0); + wave_sizze_127965 = LOCKSTEP_WIDTH; + group_tid_127964 = get_group_id(0); + + int32_t phys_tid_82845; + + phys_tid_82845 = global_tid_127962; + + int64_t gtid_82843; + + gtid_82843 = squot64(sext_i32_i64(group_tid_127964) * + segmap_group_sizze_85327 + + sext_i32_i64(local_tid_127963), binop_x_120251); + + int64_t gtid_82844; + + gtid_82844 = sext_i32_i64(group_tid_127964) * segmap_group_sizze_85327 + + sext_i32_i64(local_tid_127963) - + squot64(sext_i32_i64(group_tid_127964) * segmap_group_sizze_85327 + + sext_i32_i64(local_tid_127963), binop_x_120251) * + binop_x_120251; + if (slt64(gtid_82843, m_70861) && slt64(gtid_82844, binop_x_120251)) { + int64_t binop_x_115166 = gtid_82843 * binop_x_120251; + int64_t binop_x_115167 = gtid_82844 + binop_x_115166; + int64_t new_index_115169 = squot64(binop_x_115167, binop_x_120251); + int64_t binop_y_115177 = new_index_115169 * binop_x_120251; + int64_t binop_x_115178 = binop_x_115167 - binop_y_115177; + int64_t new_index_115179 = squot64(binop_x_115178, k2p2zq_70876); + int64_t write_index_85330 = ((__global + int64_t *) mem_123685)[new_index_115169 * + k2p2zq_70876 + + new_index_115179]; + int64_t binop_y_115234 = k2p2zq_70876 * new_index_115179; + int64_t new_index_115235 = binop_x_115178 - binop_y_115234; + int64_t write_index_85331 = ((__global + int64_t *) mem_123685)[new_index_115169 * + k2p2zq_70876 + + new_index_115235]; + double write_value_85332 = ((__global + double *) defunc_3_map_res_r_mem_123630)[new_index_115169 * + (k2p2zq_70876 * + k2p2zq_70876) + + new_index_115179 * + k2p2zq_70876 + + new_index_115235]; + + if (((sle64((int64_t) 0, gtid_82843) && slt64(gtid_82843, m_70861)) && + (sle64((int64_t) 0, write_index_85330) && slt64(write_index_85330, + k2p2zq_70876))) && + (sle64((int64_t) 0, write_index_85331) && slt64(write_index_85331, + k2p2zq_70876))) { + ((__global double *) mem_122007)[gtid_82843 * (k2p2zq_70876 * + k2p2zq_70876) + + write_index_85330 * k2p2zq_70876 + + write_index_85331] = + write_value_85332; + } + } + + error_0: + return; + #undef segmap_group_sizze_85327 +} +__kernel void mainDetailedzisegmap_83002(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t rp1_71562, + int64_t binop_x_120251, __global + unsigned char *mem_122003, __global + unsigned char *mem_123133, __global + unsigned char *defunc_3_map_res_r_mem_123392, + __global unsigned char *mem_123681) +{ + #define segmap_group_sizze_85256 (mainDetailedzisegmap_group_sizze_83005) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127956; + int32_t local_tid_127957; + int64_t group_sizze_127960; + int32_t wave_sizze_127959; + int32_t group_tid_127958; + + global_tid_127956 = get_global_id(0); + local_tid_127957 = get_local_id(0); + group_sizze_127960 = get_local_size(0); + wave_sizze_127959 = LOCKSTEP_WIDTH; + group_tid_127958 = get_group_id(0); + + int32_t phys_tid_83002; + + phys_tid_83002 = global_tid_127956; + + int64_t gtid_83000; + + gtid_83000 = squot64(sext_i32_i64(group_tid_127958) * + segmap_group_sizze_85256 + + sext_i32_i64(local_tid_127957), k2p2zq_70876); + + int64_t gtid_83001; + + gtid_83001 = sext_i32_i64(group_tid_127958) * segmap_group_sizze_85256 + + sext_i32_i64(local_tid_127957) - + squot64(sext_i32_i64(group_tid_127958) * segmap_group_sizze_85256 + + sext_i32_i64(local_tid_127957), k2p2zq_70876) * k2p2zq_70876; + if (slt64(gtid_83000, m_70861) && slt64(gtid_83001, k2p2zq_70876)) { + int64_t write_index_85261 = ((__global + int64_t *) mem_123133)[gtid_83001 * + m_70861 + + gtid_83000]; + double defunc_2_reduce_res_85262; + double redout_119858 = 0.0; + + for (int64_t i_119859 = 0; i_119859 < k2p2zq_70876; i_119859++) { + double x_85266 = ((__global double *) mem_123681)[gtid_83000 * + rp1_71562 + + i_119859]; + double x_85267 = ((__global + double *) defunc_3_map_res_r_mem_123392)[gtid_83000 * + binop_x_120251 + + i_119859 * + k2p2zq_70876 + + gtid_83001]; + double defunc_1_f_res_85268 = x_85266 * x_85267; + double defunc_1_op_res_85265 = defunc_1_f_res_85268 + redout_119858; + double redout_tmp_127961 = defunc_1_op_res_85265; + + redout_119858 = redout_tmp_127961; + } + defunc_2_reduce_res_85262 = redout_119858; + if ((sle64((int64_t) 0, gtid_83000) && slt64(gtid_83000, m_70861)) && + (sle64((int64_t) 0, write_index_85261) && slt64(write_index_85261, + k2p2zq_70876))) { + ((__global double *) mem_122003)[gtid_83000 * k2p2zq_70876 + + write_index_85261] = + defunc_2_reduce_res_85262; + } + } + + error_0: + return; + #undef segmap_group_sizze_85256 +} +__kernel void mainDetailedzisegmap_83057(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t r_71551, int64_t rp1_71562, + int64_t num_groups_85178, + int64_t num_threads_125770, __global + unsigned char *mem_123130, __global + unsigned char *mem_123135, __global + unsigned char *mem_123633, __global + unsigned char *mem_123641, __global + unsigned char *mem_123678, __global + unsigned char *mem_125341, __global + unsigned char *double_buffer_mem_125599) +{ + #define segmap_group_sizze_85177 (mainDetailedzisegmap_group_sizze_83059) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_127940; + int32_t local_tid_127941; + int64_t group_sizze_127944; + int32_t wave_sizze_127943; + int32_t group_tid_127942; + + global_tid_127940 = get_global_id(0); + local_tid_127941 = get_local_id(0); + group_sizze_127944 = get_local_size(0); + wave_sizze_127943 = LOCKSTEP_WIDTH; + group_tid_127942 = get_group_id(0); + + int32_t phys_tid_83057; + + phys_tid_83057 = global_tid_127940; + + int32_t phys_group_id_127945; + + phys_group_id_127945 = get_group_id(0); + for (int32_t i_127946 = 0; i_127946 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, segmap_group_sizze_85177)) - + phys_group_id_127945, sext_i64_i32(num_groups_85178)); + i_127946++) { + int32_t virt_group_id_127947 = phys_group_id_127945 + i_127946 * + sext_i64_i32(num_groups_85178); + int64_t gtid_83056 = sext_i32_i64(virt_group_id_127947) * + segmap_group_sizze_85177 + sext_i32_i64(local_tid_127941); + + if (slt64(gtid_83056, m_70861)) { + int64_t min_res_85184 = ((__global + int64_t *) mem_123135)[gtid_83056]; + int64_t min_res_85185 = smin64(r_71551, min_res_85184); + + for (int64_t i_127948 = 0; i_127948 < rp1_71562; i_127948++) { + ((__global double *) double_buffer_mem_125599)[phys_tid_83057 + + i_127948 * + num_threads_125770] = + ((__global double *) mem_123633)[gtid_83056 + i_127948 * + m_70861]; + } + for (int64_t j_85187 = 0; j_85187 < min_res_85185; j_85187++) { + bool y_85189 = slt64(j_85187, k2p2zq_70876); + bool index_certs_85190; + + if (!y_85189) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 380) == -1) { + global_failure_args[0] = j_85187; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double zeze_arg_85191 = ((__global + double *) mem_123130)[j_85187 * + m_70861 + + gtid_83056]; + bool zeze_res_85192 = zeze_arg_85191 == 0.0; + + if (zeze_res_85192) { + for (int64_t i_127950 = 0; i_127950 < rp1_71562; + i_127950++) { + ((__global double *) mem_125341)[phys_tid_83057 + + i_127950 * + num_threads_125770] = + ((__global + double *) double_buffer_mem_125599)[phys_tid_83057 + + i_127950 * + num_threads_125770]; + } + } else { + bool y_85194 = slt64(j_85187, rp1_71562); + bool index_certs_85195; + + if (!y_85194) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 381) == -1) { + global_failure_args[0] = j_85187; + global_failure_args[1] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_85196 = ((__global + double *) double_buffer_mem_125599)[phys_tid_83057 + + j_85187 * + num_threads_125770]; + double negate_arg_85197 = zeze_arg_85191 * y_85196; + double t_85198 = 0.0 - negate_arg_85197; + int64_t x_85199 = sub64(rp1_71562, j_85187); + int64_t upper_bound_85200 = sub64(x_85199, (int64_t) 1); + double t_85201; + double t_85203 = t_85198; + + for (int64_t i0_85202 = 0; i0_85202 < upper_bound_85200; + i0_85202++) { + int64_t x_85204 = add64(j_85187, i0_85202); + int64_t i_85205 = add64((int64_t) 1, x_85204); + bool x_85206 = sle64((int64_t) 0, i_85205); + bool y_85207 = slt64(i_85205, rp1_71562); + bool bounds_check_85208 = x_85206 && y_85207; + bool index_ok_85209 = y_85189 && bounds_check_85208; + bool index_certs_85210; + + if (!index_ok_85209) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 382) == -1) { + global_failure_args[0] = j_85187; + global_failure_args[1] = i_85205; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_85211 = ((__global + double *) mem_123641)[i_85205 * + (m_70861 * + k2p2zq_70876) + + j_85187 * + m_70861 + + gtid_83056]; + bool index_certs_85212; + + if (!bounds_check_85208) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 383) == -1) { + global_failure_args[0] = i_85205; + global_failure_args[1] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_85213 = ((__global + double *) double_buffer_mem_125599)[phys_tid_83057 + + i_85205 * + num_threads_125770]; + double y_85214 = x_85211 * y_85213; + double loopres_85215 = t_85203 - y_85214; + double t_tmp_127951 = loopres_85215; + + t_85203 = t_tmp_127951; + } + t_85201 = t_85203; + + double t_85216 = t_85201 / zeze_arg_85191; + double y_85217 = zeze_arg_85191 * t_85216; + double lw_val_85218 = y_85196 + y_85217; + + ((__global + double *) double_buffer_mem_125599)[phys_tid_83057 + + j_85187 * + num_threads_125770] = + lw_val_85218; + for (int64_t i0_85221 = 0; i0_85221 < upper_bound_85200; + i0_85221++) { + int64_t x_85223 = add64(j_85187, i0_85221); + int64_t i_85224 = add64((int64_t) 1, x_85223); + bool x_85225 = sle64((int64_t) 0, i_85224); + bool y_85226 = slt64(i_85224, rp1_71562); + bool bounds_check_85227 = x_85225 && y_85226; + bool index_certs_85228; + + if (!bounds_check_85227) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 384) == -1) { + global_failure_args[0] = i_85224; + global_failure_args[1] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_85229 = ((__global + double *) double_buffer_mem_125599)[phys_tid_83057 + + i_85224 * + num_threads_125770]; + bool index_ok_85230 = y_85189 && bounds_check_85227; + bool index_certs_85231; + + if (!index_ok_85230) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 385) == -1) { + global_failure_args[0] = j_85187; + global_failure_args[1] = i_85224; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_85232 = ((__global + double *) mem_123641)[i_85224 * + (m_70861 * + k2p2zq_70876) + + j_85187 * + m_70861 + + gtid_83056]; + double y_85233 = t_85216 * y_85232; + double lw_val_85234 = x_85229 + y_85233; + + ((__global + double *) double_buffer_mem_125599)[phys_tid_83057 + + i_85224 * + num_threads_125770] = + lw_val_85234; + } + for (int64_t i_127953 = 0; i_127953 < rp1_71562; + i_127953++) { + ((__global double *) mem_125341)[phys_tid_83057 + + i_127953 * + num_threads_125770] = + ((__global + double *) double_buffer_mem_125599)[phys_tid_83057 + + i_127953 * + num_threads_125770]; + } + } + for (int64_t i_127954 = 0; i_127954 < rp1_71562; i_127954++) { + ((__global + double *) double_buffer_mem_125599)[phys_tid_83057 + + i_127954 * + num_threads_125770] = + ((__global double *) mem_125341)[phys_tid_83057 + + i_127954 * + num_threads_125770]; + } + } + for (int64_t i_127955 = 0; i_127955 < rp1_71562; i_127955++) { + ((__global double *) mem_123678)[i_127955 * m_70861 + + gtid_83056] = ((__global + double *) double_buffer_mem_125599)[phys_tid_83057 + + i_127955 * + num_threads_125770]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_85177 +} +__kernel void mainDetailedzisegmap_83121(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t num_groups_85119, + int64_t binop_x_120251, + int64_t num_threads_125766, __global + unsigned char *defunc_3_map_res_r_mem_123392, + __global unsigned char *mem_123396, + __global unsigned char *mem_123399, + __global unsigned char *mem_123415) +{ + #define segmap_group_sizze_85118 (mainDetailedzisegmap_group_sizze_83124) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127797; + int32_t local_tid_127798; + int64_t group_sizze_127801; + int32_t wave_sizze_127800; + int32_t group_tid_127799; + + global_tid_127797 = get_global_id(0); + local_tid_127798 = get_local_id(0); + group_sizze_127801 = get_local_size(0); + wave_sizze_127800 = LOCKSTEP_WIDTH; + group_tid_127799 = get_group_id(0); + + int32_t phys_tid_83121; + + phys_tid_83121 = global_tid_127797; + + int32_t phys_group_id_127802; + + phys_group_id_127802 = get_group_id(0); + for (int32_t i_127803 = 0; i_127803 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861 * k2p2zq_70876, + segmap_group_sizze_85118)) - + phys_group_id_127802, sext_i64_i32(num_groups_85119)); + i_127803++) { + int32_t virt_group_id_127804 = phys_group_id_127802 + i_127803 * + sext_i64_i32(num_groups_85119); + int64_t gtid_83119 = squot64(sext_i32_i64(virt_group_id_127804) * + segmap_group_sizze_85118 + + sext_i32_i64(local_tid_127798), + k2p2zq_70876); + int64_t gtid_83120 = sext_i32_i64(virt_group_id_127804) * + segmap_group_sizze_85118 + sext_i32_i64(local_tid_127798) - + squot64(sext_i32_i64(virt_group_id_127804) * + segmap_group_sizze_85118 + + sext_i32_i64(local_tid_127798), k2p2zq_70876) * + k2p2zq_70876; + + if (slt64(gtid_83119, m_70861) && slt64(gtid_83120, k2p2zq_70876)) { + for (int64_t i_119854 = 0; i_119854 < k2p2zq_70876; i_119854++) { + double defunc_2_reduce_res_85130; + double redout_119856 = 0.0; + + for (int64_t i_119857 = 0; i_119857 < k2p2zq_70876; + i_119857++) { + double x_85134 = ((__global + double *) defunc_3_map_res_r_mem_123392)[gtid_83119 * + binop_x_120251 + + i_119857 * + k2p2zq_70876 + + gtid_83120]; + double x_85135 = ((__global + double *) mem_123396)[gtid_83119 * + (k2p2zq_70876 * + k2p2zq_70876) + + i_119854 * + k2p2zq_70876 + + i_119857]; + double defunc_1_f_res_85136 = x_85134 * x_85135; + double defunc_1_op_res_85133 = defunc_1_f_res_85136 + + redout_119856; + double redout_tmp_127806 = defunc_1_op_res_85133; + + redout_119856 = redout_tmp_127806; + } + defunc_2_reduce_res_85130 = redout_119856; + ((__global double *) mem_123399)[phys_tid_83121 + i_119854 * + num_threads_125766] = + defunc_2_reduce_res_85130; + } + for (int64_t i_127807 = 0; i_127807 < k2p2zq_70876; i_127807++) { + ((__global double *) mem_123415)[i_127807 * (k2p2zq_70876 * + m_70861) + + gtid_83119 * k2p2zq_70876 + + gtid_83120] = ((__global + double *) mem_123399)[phys_tid_83121 + + i_127807 * + num_threads_125766]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_85118 +} +__kernel void mainDetailedzisegmap_83337(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t x_84993, int64_t i_84994, + int64_t j_m_i_84998, + int64_t num_groups_85026, + int64_t num_threads_125758, __global + unsigned char *mem_121938, __global + unsigned char *mem_123143, __global + unsigned char *mem_123241, __global + unsigned char *mem_123259, __global + unsigned char *mem_123263, __global + unsigned char *mem_123275, __global + unsigned char *mem_123287) +{ + #define segmap_group_sizze_85025 (mainDetailedzisegmap_group_sizze_83339) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127694; + int32_t local_tid_127695; + int64_t group_sizze_127698; + int32_t wave_sizze_127697; + int32_t group_tid_127696; + + global_tid_127694 = get_global_id(0); + local_tid_127695 = get_local_id(0); + group_sizze_127698 = get_local_size(0); + wave_sizze_127697 = LOCKSTEP_WIDTH; + group_tid_127696 = get_group_id(0); + + int32_t phys_tid_83337; + + phys_tid_83337 = global_tid_127694; + + int32_t phys_group_id_127699; + + phys_group_id_127699 = get_group_id(0); + for (int32_t i_127700 = 0; i_127700 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, segmap_group_sizze_85025)) - + phys_group_id_127699, sext_i64_i32(num_groups_85026)); + i_127700++) { + int32_t virt_group_id_127701 = phys_group_id_127699 + i_127700 * + sext_i64_i32(num_groups_85026); + int64_t gtid_83336 = sext_i32_i64(virt_group_id_127701) * + segmap_group_sizze_85025 + sext_i32_i64(local_tid_127695); + + if (slt64(gtid_83336, m_70861)) { + double defunc_3_map_res_r_transformed_row_85031 = ((__global + double *) mem_123143)[gtid_83336 * + (k2p2zq_70876 * + k2p2zq_70876) + + i_84994 * + k2p2zq_70876 + + i_84994]; + + for (int64_t i_119843 = 0; i_119843 < k2p2zq_70876; i_119843++) { + for (int64_t i_127703 = 0; i_127703 < k2p2zq_70876; + i_127703++) { + ((__global double *) mem_123275)[phys_tid_83337 + i_127703 * + num_threads_125758] = + ((__global double *) mem_123259)[i_119843 * (m_70861 * + k2p2zq_70876) + + gtid_83336 + i_127703 * + m_70861]; + } + + double defunc_2_map_res_transformed_row_85036 = ((__global + double *) mem_121938)[i_119843 * + k2p2zq_70876 + + i_84994]; + double defunc_2_reduce_res_85037; + double redout_119846 = 0.0; + + for (int64_t i_119847 = 0; i_119847 < j_m_i_84998; i_119847++) { + int64_t slice_120040 = x_84993 + i_119847; + double x_85042 = ((__global + double *) mem_123241)[slice_120040 * + (k2p2zq_70876 * + m_70861) + + gtid_83336 * + k2p2zq_70876 + + i_84994]; + bool isnan_res_85043; + + isnan_res_85043 = futrts_isnan64(x_85042); + + double defunc_1_f_res_85044; + + if (isnan_res_85043) { + defunc_1_f_res_85044 = 0.0; + } else { + double x_85041 = ((__global + double *) mem_123259)[i_119843 * + (m_70861 * + k2p2zq_70876) + + slice_120040 * + m_70861 + + gtid_83336]; + double defunc_1_f_res_f_res_85045 = x_85041 * x_85042; + + defunc_1_f_res_85044 = defunc_1_f_res_f_res_85045; + } + + double defunc_1_op_res_85040 = defunc_1_f_res_85044 + + redout_119846; + double redout_tmp_127704 = defunc_1_op_res_85040; + + redout_119846 = redout_tmp_127704; + } + defunc_2_reduce_res_85037 = redout_119846; + + double zm_res_85046 = defunc_2_map_res_transformed_row_85036 - + defunc_2_reduce_res_85037; + double zs_res_85047 = zm_res_85046 / + defunc_3_map_res_r_transformed_row_85031; + + ((__global double *) mem_123275)[phys_tid_83337 + i_84994 * + num_threads_125758] = + zs_res_85047; + for (int64_t i_127705 = 0; i_127705 < k2p2zq_70876; + i_127705++) { + ((__global double *) mem_123263)[phys_tid_83337 + + (i_119843 * + (num_threads_125758 * + k2p2zq_70876) + + i_127705 * + num_threads_125758)] = + ((__global double *) mem_123275)[phys_tid_83337 + + i_127705 * + num_threads_125758]; + } + } + for (int64_t i_127706 = 0; i_127706 < k2p2zq_70876; i_127706++) { + for (int64_t i_127707 = 0; i_127707 < k2p2zq_70876; + i_127707++) { + ((__global double *) mem_123287)[i_127706 * (m_70861 * + k2p2zq_70876) + + i_127707 * m_70861 + + gtid_83336] = ((__global + double *) mem_123263)[phys_tid_83337 + + (i_127706 * + (num_threads_125758 * + k2p2zq_70876) + + i_127707 * + num_threads_125758)]; + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_85025 +} +__kernel void mainDetailedzisegmap_83425(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t i_84994, + int64_t binop_x_120251, __global + unsigned char *mem_param_123252, + __global unsigned char *mem_123342) +{ + #define segmap_group_sizze_85107 (mainDetailedzisegmap_group_sizze_83429) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127789; + int32_t local_tid_127790; + int64_t group_sizze_127793; + int32_t wave_sizze_127792; + int32_t group_tid_127791; + + global_tid_127789 = get_global_id(0); + local_tid_127790 = get_local_id(0); + group_sizze_127793 = get_local_size(0); + wave_sizze_127792 = LOCKSTEP_WIDTH; + group_tid_127791 = get_group_id(0); + + int32_t phys_tid_83425; + + phys_tid_83425 = global_tid_127789; + + int64_t gtid_83422; + + gtid_83422 = squot64(sext_i32_i64(group_tid_127791) * + segmap_group_sizze_85107 + + sext_i32_i64(local_tid_127790), k2p2zq_70876); + + int64_t gtid_83423; + + gtid_83423 = sext_i32_i64(group_tid_127791) * segmap_group_sizze_85107 + + sext_i32_i64(local_tid_127790) - + squot64(sext_i32_i64(group_tid_127791) * segmap_group_sizze_85107 + + sext_i32_i64(local_tid_127790), k2p2zq_70876) * k2p2zq_70876; + + int64_t gtid_83424; + + gtid_83424 = sext_i32_i64(group_tid_127791) * segmap_group_sizze_85107 + + sext_i32_i64(local_tid_127790) - + squot64(sext_i32_i64(group_tid_127791) * segmap_group_sizze_85107 + + sext_i32_i64(local_tid_127790), k2p2zq_70876) * k2p2zq_70876 - + (sext_i32_i64(group_tid_127791) * segmap_group_sizze_85107 + + sext_i32_i64(local_tid_127790) - + squot64(sext_i32_i64(group_tid_127791) * segmap_group_sizze_85107 + + sext_i32_i64(local_tid_127790), k2p2zq_70876) * k2p2zq_70876); + if ((slt64(gtid_83422, m_70861) && slt64(gtid_83423, k2p2zq_70876)) && + slt64(gtid_83424, (int64_t) 1)) { + double zs_res_85110 = ((__global double *) mem_123342)[gtid_83422 * + k2p2zq_70876 + + gtid_83423]; + + if (((sle64((int64_t) 0, gtid_83422) && slt64(gtid_83422, m_70861)) && + (sle64((int64_t) 0, gtid_83423) && slt64(gtid_83423, + k2p2zq_70876))) && + (sle64((int64_t) 0, i_84994) && slt64(i_84994, k2p2zq_70876))) { + ((__global double *) mem_param_123252)[gtid_83422 * binop_x_120251 + + gtid_83423 * k2p2zq_70876 + + i_84994] = zs_res_85110; + } + } + + error_0: + return; + #undef segmap_group_sizze_85107 +} +__kernel void mainDetailedzisegmap_83437(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t i_84994, __global + unsigned char *mem_121938, __global + unsigned char *mem_123143, __global + unsigned char *mem_123338, __global + unsigned char *mem_123342) +{ + #define segmap_group_sizze_85096 (mainDetailedzisegmap_group_sizze_83440) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127784; + int32_t local_tid_127785; + int64_t group_sizze_127788; + int32_t wave_sizze_127787; + int32_t group_tid_127786; + + global_tid_127784 = get_global_id(0); + local_tid_127785 = get_local_id(0); + group_sizze_127788 = get_local_size(0); + wave_sizze_127787 = LOCKSTEP_WIDTH; + group_tid_127786 = get_group_id(0); + + int32_t phys_tid_83437; + + phys_tid_83437 = global_tid_127784; + + int64_t gtid_83435; + + gtid_83435 = squot64(sext_i32_i64(group_tid_127786) * + segmap_group_sizze_85096 + + sext_i32_i64(local_tid_127785), k2p2zq_70876); + + int64_t gtid_83436; + + gtid_83436 = sext_i32_i64(group_tid_127786) * segmap_group_sizze_85096 + + sext_i32_i64(local_tid_127785) - + squot64(sext_i32_i64(group_tid_127786) * segmap_group_sizze_85096 + + sext_i32_i64(local_tid_127785), k2p2zq_70876) * k2p2zq_70876; + if (slt64(gtid_83435, m_70861) && slt64(gtid_83436, k2p2zq_70876)) { + double defunc_3_map_res_r_transformed_row_85099 = ((__global + double *) mem_123143)[gtid_83435 * + (k2p2zq_70876 * + k2p2zq_70876) + + i_84994 * + k2p2zq_70876 + + i_84994]; + double defunc_2_map_res_transformed_row_85100 = ((__global + double *) mem_121938)[gtid_83436 * + k2p2zq_70876 + + i_84994]; + double defunc_2_reduce_res_85101 = ((__global + double *) mem_123338)[gtid_83435 * + k2p2zq_70876 + + gtid_83436]; + double zm_res_85102 = defunc_2_map_res_transformed_row_85100 - + defunc_2_reduce_res_85101; + double zs_res_85103 = zm_res_85102 / + defunc_3_map_res_r_transformed_row_85099; + + ((__global double *) mem_123342)[gtid_83435 * k2p2zq_70876 + + gtid_83436] = zs_res_85103; + } + + error_0: + return; + #undef segmap_group_sizze_85096 +} +__kernel void mainDetailedzisegmap_83764(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t rp1_71562, __global + unsigned char *mem_123127, __global + unsigned char *mem_123135, __global + unsigned char *mem_123138, __global + unsigned char *mem_123143) +{ + #define segmap_group_sizze_84903 (mainDetailedzisegmap_group_sizze_83768) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127661; + int32_t local_tid_127662; + int64_t group_sizze_127665; + int32_t wave_sizze_127664; + int32_t group_tid_127663; + + global_tid_127661 = get_global_id(0); + local_tid_127662 = get_local_id(0); + group_sizze_127665 = get_local_size(0); + wave_sizze_127664 = LOCKSTEP_WIDTH; + group_tid_127663 = get_group_id(0); + + int32_t phys_tid_83764; + + phys_tid_83764 = global_tid_127661; + + int64_t gtid_83761; + + gtid_83761 = squot64(sext_i32_i64(group_tid_127663) * + segmap_group_sizze_84903 + + sext_i32_i64(local_tid_127662), k2p2zq_70876 * + k2p2zq_70876); + + int64_t gtid_83762; + + gtid_83762 = squot64(sext_i32_i64(group_tid_127663) * + segmap_group_sizze_84903 + + sext_i32_i64(local_tid_127662) - + squot64(sext_i32_i64(group_tid_127663) * + segmap_group_sizze_84903 + + sext_i32_i64(local_tid_127662), k2p2zq_70876 * + k2p2zq_70876) * (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876); + + int64_t gtid_83763; + + gtid_83763 = sext_i32_i64(group_tid_127663) * segmap_group_sizze_84903 + + sext_i32_i64(local_tid_127662) - + squot64(sext_i32_i64(group_tid_127663) * segmap_group_sizze_84903 + + sext_i32_i64(local_tid_127662), k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876) - squot64(sext_i32_i64(group_tid_127663) * + segmap_group_sizze_84903 + + sext_i32_i64(local_tid_127662) - + squot64(sext_i32_i64(group_tid_127663) * + segmap_group_sizze_84903 + + sext_i32_i64(local_tid_127662), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876) * k2p2zq_70876; + if ((slt64(gtid_83761, m_70861) && slt64(gtid_83762, k2p2zq_70876)) && + slt64(gtid_83763, k2p2zq_70876)) { + int64_t min_res_84906 = ((__global int64_t *) mem_123135)[gtid_83761]; + bool cond_f_res_84907 = ((__global bool *) mem_123138)[gtid_83761 * + k2p2zq_70876 + + gtid_83762]; + int64_t x_84910 = add64((int64_t) 1, gtid_83763); + bool cond_84911 = slt64(min_res_84906, x_84910); + bool x_84912 = !cond_84911; + bool y_84913 = cond_f_res_84907 && x_84912; + bool cond_84914 = cond_84911 || y_84913; + double defunc_1_f_res_84915; + + if (cond_84914) { + defunc_1_f_res_84915 = NAN; + } else { + double x_84909 = ((__global double *) mem_123127)[gtid_83762 * + (m_70861 * + rp1_71562) + + gtid_83763 * + m_70861 + + gtid_83761]; + + defunc_1_f_res_84915 = x_84909; + } + ((__global double *) mem_123143)[gtid_83761 * (k2p2zq_70876 * + k2p2zq_70876) + + gtid_83762 * k2p2zq_70876 + + gtid_83763] = defunc_1_f_res_84915; + } + + error_0: + return; + #undef segmap_group_sizze_84903 +} +__kernel void mainDetailedzisegmap_83799(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + __global unsigned char *mem_123135, + __global unsigned char *mem_123138) +{ + #define segmap_group_sizze_84888 (mainDetailedzisegmap_group_sizze_83802) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127656; + int32_t local_tid_127657; + int64_t group_sizze_127660; + int32_t wave_sizze_127659; + int32_t group_tid_127658; + + global_tid_127656 = get_global_id(0); + local_tid_127657 = get_local_id(0); + group_sizze_127660 = get_local_size(0); + wave_sizze_127659 = LOCKSTEP_WIDTH; + group_tid_127658 = get_group_id(0); + + int32_t phys_tid_83799; + + phys_tid_83799 = global_tid_127656; + + int64_t gtid_83797; + + gtid_83797 = squot64(sext_i32_i64(group_tid_127658) * + segmap_group_sizze_84888 + + sext_i32_i64(local_tid_127657), k2p2zq_70876); + + int64_t gtid_83798; + + gtid_83798 = sext_i32_i64(group_tid_127658) * segmap_group_sizze_84888 + + sext_i32_i64(local_tid_127657) - + squot64(sext_i32_i64(group_tid_127658) * segmap_group_sizze_84888 + + sext_i32_i64(local_tid_127657), k2p2zq_70876) * k2p2zq_70876; + if (slt64(gtid_83797, m_70861) && slt64(gtid_83798, k2p2zq_70876)) { + int64_t min_res_84891 = ((__global int64_t *) mem_123135)[gtid_83797]; + int64_t x_84893 = add64((int64_t) 1, gtid_83798); + bool cond_f_res_84894 = slt64(min_res_84891, x_84893); + + ((__global bool *) mem_123138)[gtid_83797 * k2p2zq_70876 + gtid_83798] = + cond_f_res_84894; + } + + error_0: + return; + #undef segmap_group_sizze_84888 +} +__kernel void mainDetailedzisegmap_83857(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t m_70948, unsigned char y_70952, + int64_t k_71067, int64_t rp1_71562, + int64_t min_res_71574, + int64_t num_groups_84593, + int64_t num_threads_125743, __global + unsigned char *mem_120248, __global + unsigned char *mem_122793, __global + unsigned char *mem_122796, __global + unsigned char *mem_122800, __global + unsigned char *mem_122803, __global + unsigned char *mem_123127, __global + unsigned char *mem_123130, __global + unsigned char *mem_123133, __global + unsigned char *mem_123135, __global + unsigned char *mem_125265, __global + unsigned char *mem_125267, __global + unsigned char *mem_125472, __global + unsigned char *mem_125480, __global + unsigned char *mem_125482, __global + unsigned char *mem_125512, __global + unsigned char *double_buffer_mem_125586, + __global + unsigned char *double_buffer_mem_125587, + __global + unsigned char *double_buffer_mem_125588) +{ + #define segmap_group_sizze_84592 (mainDetailedzisegmap_group_sizze_83859) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_127587; + int32_t local_tid_127588; + int64_t group_sizze_127591; + int32_t wave_sizze_127590; + int32_t group_tid_127589; + + global_tid_127587 = get_global_id(0); + local_tid_127588 = get_local_id(0); + group_sizze_127591 = get_local_size(0); + wave_sizze_127590 = LOCKSTEP_WIDTH; + group_tid_127589 = get_group_id(0); + + int32_t phys_tid_83857; + + phys_tid_83857 = global_tid_127587; + + int32_t phys_group_id_127592; + + phys_group_id_127592 = get_group_id(0); + for (int32_t i_127593 = 0; i_127593 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, segmap_group_sizze_84592)) - + phys_group_id_127592, sext_i64_i32(num_groups_84593)); + i_127593++) { + int32_t virt_group_id_127594 = phys_group_id_127592 + i_127593 * + sext_i64_i32(num_groups_84593); + int64_t gtid_83856 = sext_i32_i64(virt_group_id_127594) * + segmap_group_sizze_84592 + sext_i32_i64(local_tid_127588); + + if (slt64(gtid_83856, m_70861)) { + for (int64_t i_127595 = 0; i_127595 < k2p2zq_70876; i_127595++) { + ((__global int64_t *) mem_122803)[phys_tid_83857 + i_127595 * + num_threads_125743] = + ((__global int64_t *) mem_120248)[i_127595]; + } + for (int64_t i_127596 = 0; i_127596 < k2p2zq_70876; i_127596++) { + for (int64_t i_127597 = 0; i_127597 < rp1_71562; i_127597++) { + ((__global + double *) double_buffer_mem_125586)[phys_tid_83857 + + (i_127596 * + (num_threads_125743 * + rp1_71562) + + i_127597 * + num_threads_125743)] = + ((__global double *) mem_122793)[gtid_83856 + + (i_127596 * (m_70861 * + rp1_71562) + + i_127597 * m_70861)]; + } + } + for (int64_t i_127598 = 0; i_127598 < k2p2zq_70876; i_127598++) { + ((__global double *) double_buffer_mem_125587)[phys_tid_83857 + + i_127598 * + num_threads_125743] = + ((__global double *) mem_122796)[gtid_83856 + i_127598 * + m_70861]; + } + for (int64_t i_127599 = 0; i_127599 < (int64_t) 2; i_127599++) { + for (int64_t i_127600 = 0; i_127600 < k2p2zq_70876; + i_127600++) { + ((__global + double *) double_buffer_mem_125588)[phys_tid_83857 + + (i_127599 * + (num_threads_125743 * + k2p2zq_70876) + + i_127600 * + num_threads_125743)] = + ((__global double *) mem_122800)[gtid_83856 + + (i_127599 * (m_70861 * + k2p2zq_70876) + + i_127600 * m_70861)]; + } + } + + int64_t dqrdc2_res_84607; + int64_t k_84613 = k_71067; + + for (int64_t l_84608 = 0; l_84608 < min_res_71574; l_84608++) { + int64_t x_84614 = add64((int64_t) 1, l_84608); + bool cond_84615 = slt64(x_84614, k_84613); + bool loop_cond_84616; + + if (cond_84615) { + bool y_84617 = slt64(l_84608, k2p2zq_70876); + bool index_certs_84618; + + if (!y_84617) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 342) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_84608; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double zt_arg_84619 = ((__global + double *) double_buffer_mem_125588)[phys_tid_83857 + + (num_threads_125743 * + k2p2zq_70876 + + l_84608 * + num_threads_125743)]; + double zt_res_84620 = 1.0e-7 * zt_arg_84619; + bool index_certs_84621; + + if (!y_84617) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 343) == -1) { + global_failure_args[0] = l_84608; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double zl_arg_84622 = ((__global + double *) double_buffer_mem_125587)[phys_tid_83857 + + l_84608 * + num_threads_125743]; + bool zl_res_84623 = zl_arg_84622 < zt_res_84620; + + loop_cond_84616 = zl_res_84623; + } else { + loop_cond_84616 = 0; + } + + bool y_84624 = slt64(l_84608, k2p2zq_70876); + int64_t upper_bound_84625 = sub64(k2p2zq_70876, x_84614); + bool loop_not_taken_84626 = !loop_cond_84616; + bool protect_assert_disj_84627 = y_84624 || + loop_not_taken_84626; + bool index_certs_84628; + + if (!protect_assert_disj_84627) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 344) == -1) { + global_failure_args[0] = l_84608; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_84629; + + if (!protect_assert_disj_84627) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 345) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = l_84608; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_84630; + + if (!protect_assert_disj_84627) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 346) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_84608; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool protect_assert_disj_84631 = y_70952 || + loop_not_taken_84626; + bool index_certs_84632; + + if (!protect_assert_disj_84631) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 347) == -1) { + global_failure_args[0] = m_70948; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_84633; + + if (!protect_assert_disj_84631) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 348) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = m_70948; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_84634; + + if (!protect_assert_disj_84631) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 349) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = m_70948; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool loopres_84635; + int64_t loopres_84640; + bool loop_while_84641; + int64_t k_84646; + + loop_while_84641 = loop_cond_84616; + k_84646 = k_84613; + while (loop_while_84641) { + for (int64_t i_84648 = 0; i_84648 < rp1_71562; i_84648++) { + bool index_certs_84650; + + if (!y_84624) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 350) == -1) { + global_failure_args[0] = l_84608; + global_failure_args[1] = i_84648; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double t_84651 = ((__global + double *) double_buffer_mem_125586)[phys_tid_83857 + + (l_84608 * + (num_threads_125743 * + rp1_71562) + + i_84648 * + num_threads_125743)]; + + for (int64_t j0_84653 = 0; j0_84653 < upper_bound_84625; + j0_84653++) { + int64_t j_84655 = add64(x_84614, j0_84653); + bool x_84656 = sle64((int64_t) 0, j_84655); + bool y_84657 = slt64(j_84655, k2p2zq_70876); + bool bounds_check_84658 = x_84656 && y_84657; + bool index_certs_84659; + + if (!bounds_check_84658) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 351) == + -1) { + global_failure_args[0] = j_84655; + global_failure_args[1] = i_84648; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_84660 = ((__global + double *) double_buffer_mem_125586)[phys_tid_83857 + + (j_84655 * + (num_threads_125743 * + rp1_71562) + + i_84648 * + num_threads_125743)]; + int64_t i_84661 = sub64(j_84655, (int64_t) 1); + bool x_84662 = sle64((int64_t) 0, i_84661); + bool y_84663 = slt64(i_84661, k2p2zq_70876); + bool bounds_check_84664 = x_84662 && y_84663; + bool index_certs_84665; + + if (!bounds_check_84664) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 352) == + -1) { + global_failure_args[0] = i_84661; + global_failure_args[1] = i_84648; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125586)[phys_tid_83857 + + (i_84661 * + (num_threads_125743 * + rp1_71562) + + i_84648 * + num_threads_125743)] = + lw_val_84660; + } + + bool index_certs_84667; + + if (!y_70952) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 353) == -1) { + global_failure_args[0] = m_70948; + global_failure_args[1] = i_84648; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125586)[phys_tid_83857 + + (m_70948 * + (num_threads_125743 * + rp1_71562) + + i_84648 * + num_threads_125743)] = + t_84651; + } + + int64_t i_84669 = ((__global + int64_t *) mem_122803)[phys_tid_83857 + + l_84608 * + num_threads_125743]; + double t_84670 = ((__global + double *) double_buffer_mem_125587)[phys_tid_83857 + + l_84608 * + num_threads_125743]; + double tt_84671 = ((__global + double *) double_buffer_mem_125588)[phys_tid_83857 + + l_84608 * + num_threads_125743]; + double ttt_84672 = ((__global + double *) double_buffer_mem_125588)[phys_tid_83857 + + (num_threads_125743 * + k2p2zq_70876 + + l_84608 * + num_threads_125743)]; + + for (int64_t j0_84676 = 0; j0_84676 < upper_bound_84625; + j0_84676++) { + int64_t j_84680 = add64(x_84614, j0_84676); + bool x_84681 = sle64((int64_t) 0, j_84680); + bool y_84682 = slt64(j_84680, k2p2zq_70876); + bool bounds_check_84683 = x_84681 && y_84682; + bool index_certs_84684; + + if (!bounds_check_84683) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 354) == -1) { + global_failure_args[0] = j_84680; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + int64_t lw_val_84685 = ((__global + int64_t *) mem_122803)[phys_tid_83857 + + j_84680 * + num_threads_125743]; + int64_t i_84686 = sub64(j_84680, (int64_t) 1); + bool x_84687 = sle64((int64_t) 0, i_84686); + bool y_84688 = slt64(i_84686, k2p2zq_70876); + bool bounds_check_84689 = x_84687 && y_84688; + bool index_certs_84690; + + if (!bounds_check_84689) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 355) == -1) { + global_failure_args[0] = i_84686; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global int64_t *) mem_122803)[phys_tid_83857 + + i_84686 * + num_threads_125743] = + lw_val_84685; + + double lw_val_84692 = ((__global + double *) double_buffer_mem_125587)[phys_tid_83857 + + j_84680 * + num_threads_125743]; + + ((__global + double *) double_buffer_mem_125587)[phys_tid_83857 + + i_84686 * + num_threads_125743] = + lw_val_84692; + + bool index_certs_84694; + + if (!bounds_check_84683) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 356) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = j_84680; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_84695 = ((__global + double *) double_buffer_mem_125588)[phys_tid_83857 + + j_84680 * + num_threads_125743]; + bool index_certs_84696; + + if (!bounds_check_84689) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 357) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = i_84686; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125588)[phys_tid_83857 + + i_84686 * + num_threads_125743] = + lw_val_84695; + + bool index_certs_84698; + + if (!bounds_check_84683) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 358) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = j_84680; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_84699 = ((__global + double *) double_buffer_mem_125588)[phys_tid_83857 + + (num_threads_125743 * + k2p2zq_70876 + + j_84680 * + num_threads_125743)]; + bool index_certs_84700; + + if (!bounds_check_84689) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 359) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = i_84686; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125588)[phys_tid_83857 + + (num_threads_125743 * + k2p2zq_70876 + + i_84686 * + num_threads_125743)] = + lw_val_84699; + } + ((__global int64_t *) mem_122803)[phys_tid_83857 + m_70948 * + num_threads_125743] = + i_84669; + ((__global + double *) double_buffer_mem_125587)[phys_tid_83857 + + m_70948 * + num_threads_125743] = + t_84670; + ((__global + double *) double_buffer_mem_125588)[phys_tid_83857 + + m_70948 * + num_threads_125743] = + tt_84671; + ((__global + double *) double_buffer_mem_125588)[phys_tid_83857 + + (num_threads_125743 * + k2p2zq_70876 + + m_70948 * + num_threads_125743)] = + ttt_84672; + + int64_t k_84706 = sub64(k_84646, (int64_t) 1); + bool cond_84707 = slt64(x_84614, k_84706); + bool loop_cond_84708; + + if (cond_84707) { + bool index_certs_84709; + + if (!y_84624) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 360) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_84608; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double zt_arg_84710 = ((__global + double *) double_buffer_mem_125588)[phys_tid_83857 + + (num_threads_125743 * + k2p2zq_70876 + + l_84608 * + num_threads_125743)]; + double zt_res_84711 = 1.0e-7 * zt_arg_84710; + bool index_certs_84712; + + if (!y_84624) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 361) == -1) { + global_failure_args[0] = l_84608; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double zl_arg_84713 = ((__global + double *) double_buffer_mem_125587)[phys_tid_83857 + + l_84608 * + num_threads_125743]; + bool zl_res_84714 = zl_arg_84713 < zt_res_84711; + + loop_cond_84708 = zl_res_84714; + } else { + loop_cond_84708 = 0; + } + + bool loop_while_tmp_127606 = loop_cond_84708; + int64_t k_tmp_127611 = k_84706; + + loop_while_84641 = loop_while_tmp_127606; + k_84646 = k_tmp_127611; + } + loopres_84635 = loop_while_84641; + loopres_84640 = k_84646; + + bool cond_84715 = x_84614 == rp1_71562; + int64_t j_m_i_84716 = sub64(rp1_71562, l_84608); + bool empty_slice_84720 = j_m_i_84716 == (int64_t) 0; + int64_t m_84721 = sub64(j_m_i_84716, (int64_t) 1); + int64_t i_p_m_t_s_84722 = add64(l_84608, m_84721); + bool zzero_leq_i_p_m_t_s_84723 = sle64((int64_t) 0, + i_p_m_t_s_84722); + bool i_p_m_t_s_leq_w_84724 = slt64(i_p_m_t_s_84722, rp1_71562); + bool i_lte_j_84725 = sle64(l_84608, rp1_71562); + bool y_84726 = zzero_leq_i_p_m_t_s_84723 && + i_p_m_t_s_leq_w_84724; + bool y_84727 = i_lte_j_84725 && y_84726; + bool ok_or_empty_84728 = empty_slice_84720 || y_84727; + bool index_ok_84729 = y_84624 && ok_or_empty_84728; + + if (cond_84715) { + for (int64_t i_127617 = 0; i_127617 < k2p2zq_70876; + i_127617++) { + ((__global double *) mem_125482)[phys_tid_83857 + + i_127617 * + num_threads_125743] = + ((__global + double *) double_buffer_mem_125587)[phys_tid_83857 + + i_127617 * + num_threads_125743]; + } + for (int64_t i_127618 = 0; i_127618 < (int64_t) 2; + i_127618++) { + for (int64_t i_127619 = 0; i_127619 < k2p2zq_70876; + i_127619++) { + ((__global double *) mem_125480)[phys_tid_83857 + + (i_127618 * + (num_threads_125743 * + k2p2zq_70876) + + i_127619 * + num_threads_125743)] = + ((__global + double *) double_buffer_mem_125588)[phys_tid_83857 + + (i_127618 * + (num_threads_125743 * + k2p2zq_70876) + + i_127619 * + num_threads_125743)]; + } + } + for (int64_t i_127620 = 0; i_127620 < k2p2zq_70876; + i_127620++) { + for (int64_t i_127621 = 0; i_127621 < rp1_71562; + i_127621++) { + ((__global double *) mem_125512)[phys_tid_83857 + + (i_127620 * + (num_threads_125743 * + rp1_71562) + + i_127621 * + num_threads_125743)] = + ((__global + double *) double_buffer_mem_125586)[phys_tid_83857 + + (i_127620 * + (num_threads_125743 * + rp1_71562) + + i_127621 * + num_threads_125743)]; + } + } + } else { + bool index_certs_84730; + + if (!index_ok_84729) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 362) == -1) { + global_failure_args[0] = l_84608; + global_failure_args[1] = l_84608; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_84732; + double redout_119833 = 0.0; + + for (int64_t i_119834 = 0; i_119834 < j_m_i_84716; + i_119834++) { + int64_t slice_120035 = l_84608 + i_119834; + double x_84736 = ((__global + double *) double_buffer_mem_125586)[phys_tid_83857 + + (l_84608 * + (num_threads_125743 * + rp1_71562) + + slice_120035 * + num_threads_125743)]; + double defunc_1_f_res_84737 = x_84736 * x_84736; + double defunc_1_op_res_84735 = defunc_1_f_res_84737 + + redout_119833; + double redout_tmp_127622 = defunc_1_op_res_84735; + + redout_119833 = redout_tmp_127622; + } + defunc_2_reduce_res_84732 = redout_119833; + + double sqrt_res_84738; + + sqrt_res_84738 = futrts_sqrt64(defunc_2_reduce_res_84732); + + bool zeze_res_84739 = sqrt_res_84738 == 0.0; + + if (zeze_res_84739) { + for (int64_t i_127623 = 0; i_127623 < k2p2zq_70876; + i_127623++) { + ((__global double *) mem_125267)[phys_tid_83857 + + i_127623 * + num_threads_125743] = + ((__global + double *) double_buffer_mem_125587)[phys_tid_83857 + + i_127623 * + num_threads_125743]; + } + for (int64_t i_127624 = 0; i_127624 < (int64_t) 2; + i_127624++) { + for (int64_t i_127625 = 0; i_127625 < k2p2zq_70876; + i_127625++) { + ((__global + double *) mem_125265)[phys_tid_83857 + + (i_127624 * + (num_threads_125743 * + k2p2zq_70876) + + i_127625 * + num_threads_125743)] = + ((__global + double *) double_buffer_mem_125588)[phys_tid_83857 + + (i_127624 * + (num_threads_125743 * + k2p2zq_70876) + + i_127625 * + num_threads_125743)]; + } + } + for (int64_t i_127626 = 0; i_127626 < k2p2zq_70876; + i_127626++) { + for (int64_t i_127627 = 0; i_127627 < rp1_71562; + i_127627++) { + ((__global + double *) mem_125472)[phys_tid_83857 + + (i_127626 * + (num_threads_125743 * + rp1_71562) + + i_127627 * + num_threads_125743)] = + ((__global + double *) double_buffer_mem_125586)[phys_tid_83857 + + (i_127626 * + (num_threads_125743 * + rp1_71562) + + i_127627 * + num_threads_125743)]; + } + } + } else { + bool y_84743 = slt64(l_84608, rp1_71562); + bool index_ok_84744 = y_84624 && y_84743; + bool index_certs_84745; + + if (!index_ok_84744) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 363) == -1) { + global_failure_args[0] = l_84608; + global_failure_args[1] = l_84608; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double znze_arg_84746 = ((__global + double *) double_buffer_mem_125586)[phys_tid_83857 + + (l_84608 * + (num_threads_125743 * + rp1_71562) + + l_84608 * + num_threads_125743)]; + bool zeze_res_84747 = znze_arg_84746 == 0.0; + bool znze_res_84748 = !zeze_res_84747; + double nrmxl_84749; + + if (znze_res_84748) { + double abs_res_84750 = fabs(sqrt_res_84738); + double sgn_res_84751 = fsignum32(znze_arg_84746); + double zt_res_84752 = abs_res_84750 * sgn_res_84751; + + nrmxl_84749 = zt_res_84752; + } else { + nrmxl_84749 = sqrt_res_84738; + } + for (int64_t i0_84754 = 0; i0_84754 < j_m_i_84716; + i0_84754++) { + int64_t i_84756 = add64(l_84608, i0_84754); + bool x_84757 = sle64((int64_t) 0, i_84756); + bool y_84758 = slt64(i_84756, rp1_71562); + bool bounds_check_84759 = x_84757 && y_84758; + bool index_ok_84760 = y_84624 && bounds_check_84759; + bool index_certs_84761; + + if (!index_ok_84760) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 364) == + -1) { + global_failure_args[0] = l_84608; + global_failure_args[1] = i_84756; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_84762 = ((__global + double *) double_buffer_mem_125586)[phys_tid_83857 + + (l_84608 * + (num_threads_125743 * + rp1_71562) + + i_84756 * + num_threads_125743)]; + double lw_val_84763 = x_84762 / nrmxl_84749; + + ((__global + double *) double_buffer_mem_125586)[phys_tid_83857 + + (l_84608 * + (num_threads_125743 * + rp1_71562) + + i_84756 * + num_threads_125743)] = + lw_val_84763; + } + + double zp_arg_84765 = ((__global + double *) double_buffer_mem_125586)[phys_tid_83857 + + (l_84608 * + (num_threads_125743 * + rp1_71562) + + l_84608 * + num_threads_125743)]; + double zp_res_84766 = 1.0 + zp_arg_84765; + + ((__global + double *) double_buffer_mem_125586)[phys_tid_83857 + + (l_84608 * + (num_threads_125743 * + rp1_71562) + + l_84608 * + num_threads_125743)] = + zp_res_84766; + + bool bounds_invalid_upwards_84768 = slt64(k2p2zq_70876, + x_84614); + bool valid_84769 = !bounds_invalid_upwards_84768; + bool range_valid_c_84770; + + if (!valid_84769) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 365) == -1) { + global_failure_args[0] = x_84614; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + bool loop_nonempty_84771 = slt64((int64_t) 0, + upper_bound_84625); + bool loop_not_taken_84772 = !loop_nonempty_84771; + bool protect_assert_disj_84773 = index_ok_84744 || + loop_not_taken_84772; + bool index_certs_84774; + + if (!protect_assert_disj_84773) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 366) == -1) { + global_failure_args[0] = l_84608; + global_failure_args[1] = l_84608; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_84778 = 0; i_84778 < upper_bound_84625; + i_84778++) { + int64_t index_primexp_84782 = add64(x_84614, + i_84778); + bool x_84783 = sle64((int64_t) 0, + index_primexp_84782); + bool y_84784 = slt64(index_primexp_84782, + k2p2zq_70876); + bool bounds_check_84785 = x_84783 && y_84784; + double t_84786; + double t_84788 = 0.0; + + for (int64_t i0_84787 = 0; i0_84787 < j_m_i_84716; + i0_84787++) { + int64_t i_84789 = add64(l_84608, i0_84787); + bool x_84790 = sle64((int64_t) 0, i_84789); + bool y_84791 = slt64(i_84789, rp1_71562); + bool bounds_check_84792 = x_84790 && y_84791; + bool index_ok_84793 = y_84624 && + bounds_check_84792; + bool index_certs_84794; + + if (!index_ok_84793) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 367) == + -1) { + global_failure_args[0] = l_84608; + global_failure_args[1] = i_84789; + global_failure_args[2] = + k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_84795 = ((__global + double *) double_buffer_mem_125586)[phys_tid_83857 + + (l_84608 * + (num_threads_125743 * + rp1_71562) + + i_84789 * + num_threads_125743)]; + bool index_ok_84796 = bounds_check_84785 && + bounds_check_84792; + bool index_certs_84797; + + if (!index_ok_84796) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 368) == + -1) { + global_failure_args[0] = + index_primexp_84782; + global_failure_args[1] = i_84789; + global_failure_args[2] = + k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_84798 = ((__global + double *) double_buffer_mem_125586)[phys_tid_83857 + + (index_primexp_84782 * + (num_threads_125743 * + rp1_71562) + + i_84789 * + num_threads_125743)]; + double y_84799 = x_84795 * y_84798; + double loopres_84800 = t_84788 - y_84799; + double t_tmp_127632 = loopres_84800; + + t_84788 = t_tmp_127632; + } + t_84786 = t_84788; + + double y_84801 = ((__global + double *) double_buffer_mem_125586)[phys_tid_83857 + + (l_84608 * + (num_threads_125743 * + rp1_71562) + + l_84608 * + num_threads_125743)]; + double t_84802 = t_84786 / y_84801; + + for (int64_t i0_84804 = 0; i0_84804 < j_m_i_84716; + i0_84804++) { + int64_t i_84806 = add64(l_84608, i0_84804); + bool x_84807 = sle64((int64_t) 0, i_84806); + bool y_84808 = slt64(i_84806, rp1_71562); + bool bounds_check_84809 = x_84807 && y_84808; + bool index_ok_84810 = bounds_check_84785 && + bounds_check_84809; + bool index_certs_84811; + + if (!index_ok_84810) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 369) == + -1) { + global_failure_args[0] = + index_primexp_84782; + global_failure_args[1] = i_84806; + global_failure_args[2] = + k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_84812 = ((__global + double *) double_buffer_mem_125586)[phys_tid_83857 + + (index_primexp_84782 * + (num_threads_125743 * + rp1_71562) + + i_84806 * + num_threads_125743)]; + bool index_ok_84813 = y_84624 && + bounds_check_84809; + bool index_certs_84814; + + if (!index_ok_84813) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 370) == + -1) { + global_failure_args[0] = l_84608; + global_failure_args[1] = i_84806; + global_failure_args[2] = + k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_84815 = ((__global + double *) double_buffer_mem_125586)[phys_tid_83857 + + (l_84608 * + (num_threads_125743 * + rp1_71562) + + i_84806 * + num_threads_125743)]; + double y_84816 = t_84802 * y_84815; + double lw_val_84817 = x_84812 + y_84816; + + ((__global + double *) double_buffer_mem_125586)[phys_tid_83857 + + (index_primexp_84782 * + (num_threads_125743 * + rp1_71562) + + i_84806 * + num_threads_125743)] = + lw_val_84817; + } + + bool index_certs_84819; + + if (!bounds_check_84785) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 371) == + -1) { + global_failure_args[0] = + index_primexp_84782; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double zeze_arg_84820 = ((__global + double *) double_buffer_mem_125587)[phys_tid_83857 + + index_primexp_84782 * + num_threads_125743]; + bool zeze_res_84821 = zeze_arg_84820 == 0.0; + + if (!zeze_res_84821) { + bool index_ok_84824 = y_84743 && + bounds_check_84785; + bool index_certs_84825; + + if (!index_ok_84824) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 372) == + -1) { + global_failure_args[0] = + index_primexp_84782; + global_failure_args[1] = l_84608; + global_failure_args[2] = + k2p2zq_70876; + global_failure_args[3] = rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double abs_arg_84826 = ((__global + double *) double_buffer_mem_125586)[phys_tid_83857 + + (index_primexp_84782 * + (num_threads_125743 * + rp1_71562) + + l_84608 * + num_threads_125743)]; + double abs_res_84827 = fabs(abs_arg_84826); + double zs_res_84828 = abs_res_84827 / + zeze_arg_84820; + double ztzt_res_84829 = fpow64(zs_res_84828, + 2.0); + double zm_res_84830 = 1.0 - ztzt_res_84829; + double max_res_84831 = fmax64(0.0, + zm_res_84830); + double abs_res_84832 = fabs(max_res_84831); + bool zgze_res_84833 = 1.0e-6 <= abs_res_84832; + int64_t j_m_i_84834 = sub64(rp1_71562, x_84614); + + if (zgze_res_84833) { + double sqrt_res_84837; + + sqrt_res_84837 = + futrts_sqrt64(max_res_84831); + + double zt_res_84838 = zeze_arg_84820 * + sqrt_res_84837; + + ((__global + double *) double_buffer_mem_125587)[phys_tid_83857 + + index_primexp_84782 * + num_threads_125743] = + zt_res_84838; + } else { + bool empty_slice_84840 = j_m_i_84834 == + (int64_t) 0; + int64_t m_84841 = sub64(j_m_i_84834, + (int64_t) 1); + int64_t i_p_m_t_s_84842 = add64(x_84614, + m_84841); + bool zzero_leq_i_p_m_t_s_84843 = + sle64((int64_t) 0, i_p_m_t_s_84842); + bool i_p_m_t_s_leq_w_84844 = + slt64(i_p_m_t_s_84842, rp1_71562); + bool zzero_lte_i_84845 = sle64((int64_t) 0, + x_84614); + bool i_lte_j_84846 = sle64(x_84614, + rp1_71562); + bool y_84847 = i_p_m_t_s_leq_w_84844 && + zzero_lte_i_84845; + bool y_84848 = zzero_leq_i_p_m_t_s_84843 && + y_84847; + bool y_84849 = i_lte_j_84846 && y_84848; + bool forwards_ok_84850 = + zzero_lte_i_84845 && y_84849; + bool ok_or_empty_84851 = + empty_slice_84840 || forwards_ok_84850; + bool index_ok_84852 = bounds_check_84785 && + ok_or_empty_84851; + bool index_certs_84853; + + if (!index_ok_84852) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 373) == + -1) { + global_failure_args[0] = + index_primexp_84782; + global_failure_args[1] = + x_84614; + global_failure_args[2] = + k2p2zq_70876; + global_failure_args[3] = + rp1_71562; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_84855; + double redout_119835 = 0.0; + + for (int64_t i_119836 = 0; i_119836 < + j_m_i_84834; i_119836++) { + int64_t slice_120036 = x_84614 + + i_119836; + double x_84859 = ((__global + double *) double_buffer_mem_125586)[phys_tid_83857 + + (index_primexp_84782 * + (num_threads_125743 * + rp1_71562) + + slice_120036 * + num_threads_125743)]; + double defunc_1_f_res_84860 = x_84859 * + x_84859; + double defunc_1_op_res_84858 = + defunc_1_f_res_84860 + + redout_119835; + double redout_tmp_127634 = + defunc_1_op_res_84858; + + redout_119835 = redout_tmp_127634; + } + defunc_2_reduce_res_84855 = redout_119835; + + double sqrt_res_84861; + + sqrt_res_84861 = + futrts_sqrt64(defunc_2_reduce_res_84855); + ((__global + double *) double_buffer_mem_125587)[phys_tid_83857 + + index_primexp_84782 * + num_threads_125743] = + sqrt_res_84861; + + bool index_certs_84863; + + if (!bounds_check_84785) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 374) == + -1) { + global_failure_args[0] = + (int64_t) 0; + global_failure_args[1] = + index_primexp_84782; + global_failure_args[2] = + (int64_t) 2; + global_failure_args[3] = + k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_127635 = 0; i_127635 < + (int64_t) 1; i_127635++) { + ((__global + double *) double_buffer_mem_125588)[phys_tid_83857 + + (index_primexp_84782 + + i_127635) * + num_threads_125743] = + ((__global + double *) double_buffer_mem_125587)[phys_tid_83857 + + num_threads_125743 * + index_primexp_84782 + + i_127635 * + num_threads_125743]; + } + } + } + } + + bool index_certs_84866; + + if (!y_84624) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 375) == -1) { + global_failure_args[0] = l_84608; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_127636 = 0; i_127636 < (int64_t) 1; + i_127636++) { + ((__global + double *) double_buffer_mem_125587)[phys_tid_83857 + + (l_84608 + + i_127636) * + num_threads_125743] = + ((__global + double *) double_buffer_mem_125586)[phys_tid_83857 + + l_84608 * + (num_threads_125743 * + rp1_71562) + + num_threads_125743 * + l_84608 + + i_127636 * + num_threads_125743]; + } + + double zt_res_84869 = -1.0 * nrmxl_84749; + + ((__global + double *) double_buffer_mem_125586)[phys_tid_83857 + + (l_84608 * + (num_threads_125743 * + rp1_71562) + + l_84608 * + num_threads_125743)] = + zt_res_84869; + for (int64_t i_127637 = 0; i_127637 < k2p2zq_70876; + i_127637++) { + ((__global double *) mem_125267)[phys_tid_83857 + + i_127637 * + num_threads_125743] = + ((__global + double *) double_buffer_mem_125587)[phys_tid_83857 + + i_127637 * + num_threads_125743]; + } + for (int64_t i_127638 = 0; i_127638 < (int64_t) 2; + i_127638++) { + for (int64_t i_127639 = 0; i_127639 < k2p2zq_70876; + i_127639++) { + ((__global + double *) mem_125265)[phys_tid_83857 + + (i_127638 * + (num_threads_125743 * + k2p2zq_70876) + + i_127639 * + num_threads_125743)] = + ((__global + double *) double_buffer_mem_125588)[phys_tid_83857 + + (i_127638 * + (num_threads_125743 * + k2p2zq_70876) + + i_127639 * + num_threads_125743)]; + } + } + for (int64_t i_127640 = 0; i_127640 < k2p2zq_70876; + i_127640++) { + for (int64_t i_127641 = 0; i_127641 < rp1_71562; + i_127641++) { + ((__global + double *) mem_125472)[phys_tid_83857 + + (i_127640 * + (num_threads_125743 * + rp1_71562) + + i_127641 * + num_threads_125743)] = + ((__global + double *) double_buffer_mem_125586)[phys_tid_83857 + + (i_127640 * + (num_threads_125743 * + rp1_71562) + + i_127641 * + num_threads_125743)]; + } + } + } + for (int64_t i_127642 = 0; i_127642 < k2p2zq_70876; + i_127642++) { + ((__global double *) mem_125482)[phys_tid_83857 + + i_127642 * + num_threads_125743] = + ((__global double *) mem_125267)[phys_tid_83857 + + i_127642 * + num_threads_125743]; + } + for (int64_t i_127643 = 0; i_127643 < (int64_t) 2; + i_127643++) { + for (int64_t i_127644 = 0; i_127644 < k2p2zq_70876; + i_127644++) { + ((__global double *) mem_125480)[phys_tid_83857 + + (i_127643 * + (num_threads_125743 * + k2p2zq_70876) + + i_127644 * + num_threads_125743)] = + ((__global + double *) mem_125265)[phys_tid_83857 + + (i_127643 * + (num_threads_125743 * + k2p2zq_70876) + + i_127644 * + num_threads_125743)]; + } + } + for (int64_t i_127645 = 0; i_127645 < k2p2zq_70876; + i_127645++) { + for (int64_t i_127646 = 0; i_127646 < rp1_71562; + i_127646++) { + ((__global double *) mem_125512)[phys_tid_83857 + + (i_127645 * + (num_threads_125743 * + rp1_71562) + + i_127646 * + num_threads_125743)] = + ((__global + double *) mem_125472)[phys_tid_83857 + + (i_127645 * + (num_threads_125743 * + rp1_71562) + + i_127646 * + num_threads_125743)]; + } + } + } + for (int64_t i_127647 = 0; i_127647 < k2p2zq_70876; + i_127647++) { + for (int64_t i_127648 = 0; i_127648 < rp1_71562; + i_127648++) { + ((__global + double *) double_buffer_mem_125586)[phys_tid_83857 + + (i_127647 * + (num_threads_125743 * + rp1_71562) + + i_127648 * + num_threads_125743)] = + ((__global double *) mem_125512)[phys_tid_83857 + + (i_127647 * + (num_threads_125743 * + rp1_71562) + + i_127648 * + num_threads_125743)]; + } + } + for (int64_t i_127649 = 0; i_127649 < k2p2zq_70876; + i_127649++) { + ((__global + double *) double_buffer_mem_125587)[phys_tid_83857 + + i_127649 * + num_threads_125743] = + ((__global double *) mem_125482)[phys_tid_83857 + + i_127649 * + num_threads_125743]; + } + for (int64_t i_127650 = 0; i_127650 < (int64_t) 2; i_127650++) { + for (int64_t i_127651 = 0; i_127651 < k2p2zq_70876; + i_127651++) { + ((__global + double *) double_buffer_mem_125588)[phys_tid_83857 + + (i_127650 * + (num_threads_125743 * + k2p2zq_70876) + + i_127651 * + num_threads_125743)] = + ((__global double *) mem_125480)[phys_tid_83857 + + (i_127650 * + (num_threads_125743 * + k2p2zq_70876) + + i_127651 * + num_threads_125743)]; + } + } + + int64_t k_tmp_127605 = loopres_84640; + + k_84613 = k_tmp_127605; + } + dqrdc2_res_84607 = k_84613; + + int64_t min_arg_84871 = sub64(dqrdc2_res_84607, (int64_t) 1); + int64_t min_res_84872 = smin64(rp1_71562, min_arg_84871); + + for (int64_t i_127652 = 0; i_127652 < k2p2zq_70876; i_127652++) { + for (int64_t i_127653 = 0; i_127653 < rp1_71562; i_127653++) { + ((__global double *) mem_123127)[i_127652 * (m_70861 * + rp1_71562) + + i_127653 * m_70861 + + gtid_83856] = ((__global + double *) double_buffer_mem_125586)[phys_tid_83857 + + (i_127652 * + (num_threads_125743 * + rp1_71562) + + i_127653 * + num_threads_125743)]; + } + } + for (int64_t i_127654 = 0; i_127654 < k2p2zq_70876; i_127654++) { + ((__global double *) mem_123130)[i_127654 * m_70861 + + gtid_83856] = ((__global + double *) double_buffer_mem_125587)[phys_tid_83857 + + i_127654 * + num_threads_125743]; + } + for (int64_t i_127655 = 0; i_127655 < k2p2zq_70876; i_127655++) { + ((__global int64_t *) mem_123133)[i_127655 * m_70861 + + gtid_83856] = ((__global + int64_t *) mem_122803)[phys_tid_83857 + + i_127655 * + num_threads_125743]; + } + ((__global int64_t *) mem_123135)[gtid_83856] = min_res_84872; + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_84592 +} +__kernel void mainDetailedzisegmap_84147(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t rp1_71562, int64_t j_84524, + int64_t num_groups_84535, __global + unsigned char *mem_122686, __global + unsigned char *mem_122715, __global + unsigned char *mem_122719, __global + unsigned char *mem_122723, __global + unsigned char *mem_122727) +{ + #define segmap_group_sizze_84534 (mainDetailedzisegmap_group_sizze_84149) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127487; + int32_t local_tid_127488; + int64_t group_sizze_127491; + int32_t wave_sizze_127490; + int32_t group_tid_127489; + + global_tid_127487 = get_global_id(0); + local_tid_127488 = get_local_id(0); + group_sizze_127491 = get_local_size(0); + wave_sizze_127490 = LOCKSTEP_WIDTH; + group_tid_127489 = get_group_id(0); + + int32_t phys_tid_84147; + + phys_tid_84147 = global_tid_127487; + + int32_t phys_group_id_127492; + + phys_group_id_127492 = get_group_id(0); + for (int32_t i_127493 = 0; i_127493 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, segmap_group_sizze_84534)) - + phys_group_id_127492, sext_i64_i32(num_groups_84535)); + i_127493++) { + int32_t virt_group_id_127494 = phys_group_id_127492 + i_127493 * + sext_i64_i32(num_groups_84535); + int64_t gtid_84146 = sext_i32_i64(virt_group_id_127494) * + segmap_group_sizze_84534 + sext_i32_i64(local_tid_127488); + + if (slt64(gtid_84146, m_70861)) { + double defunc_2_reduce_res_84542; + double redout_119831 = 0.0; + + for (int64_t i_119832 = 0; i_119832 < rp1_71562; i_119832++) { + double x_84546 = ((__global double *) mem_122686)[i_119832 * + (k2p2zq_70876 * + m_70861) + + gtid_84146 * + k2p2zq_70876 + + j_84524]; + double defunc_1_f_res_84547 = x_84546 * x_84546; + double defunc_1_op_res_84545 = defunc_1_f_res_84547 + + redout_119831; + double redout_tmp_127495 = defunc_1_op_res_84545; + + redout_119831 = redout_tmp_127495; + } + defunc_2_reduce_res_84542 = redout_119831; + + double sqrt_res_84548; + + sqrt_res_84548 = futrts_sqrt64(defunc_2_reduce_res_84542); + ((__global double *) mem_122715)[gtid_84146 + j_84524 * m_70861] = + sqrt_res_84548; + ((__global double *) mem_122719)[gtid_84146 + j_84524 * m_70861] = + sqrt_res_84548; + + bool zeze_res_84551 = sqrt_res_84548 == 0.0; + double lw_val_84552; + + if (zeze_res_84551) { + lw_val_84552 = 1.0; + } else { + lw_val_84552 = sqrt_res_84548; + } + ((__global double *) mem_122719)[gtid_84146 + (m_70861 * + k2p2zq_70876 + + j_84524 * m_70861)] = + lw_val_84552; + for (int64_t i_127496 = 0; i_127496 < k2p2zq_70876; i_127496++) { + ((__global double *) mem_122723)[i_127496 * m_70861 + + gtid_84146] = ((__global + double *) mem_122715)[gtid_84146 + + i_127496 * + m_70861]; + } + for (int64_t i_127497 = 0; i_127497 < (int64_t) 2; i_127497++) { + for (int64_t i_127498 = 0; i_127498 < k2p2zq_70876; + i_127498++) { + ((__global double *) mem_122727)[i_127497 * (m_70861 * + k2p2zq_70876) + + i_127498 * m_70861 + + gtid_84146] = ((__global + double *) mem_122719)[gtid_84146 + + (i_127497 * + (m_70861 * + k2p2zq_70876) + + i_127498 * + m_70861)]; + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_84534 +} +__kernel void mainDetailedzisegmap_84192(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t j_84524, + int64_t num_groups_84580, + int64_t num_threads_115503, + int64_t per_chunk_115510, __global + unsigned char *mem_122733, __global + unsigned char *mem_122738, __global + unsigned char *mem_122743, __global + unsigned char *mem_122748) +{ + #define segmap_group_sizze_84579 (mainDetailedzisegmap_group_sizze_84194) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127570; + int32_t local_tid_127571; + int64_t group_sizze_127574; + int32_t wave_sizze_127573; + int32_t group_tid_127572; + + global_tid_127570 = get_global_id(0); + local_tid_127571 = get_local_id(0); + group_sizze_127574 = get_local_size(0); + wave_sizze_127573 = LOCKSTEP_WIDTH; + group_tid_127572 = get_group_id(0); + + int32_t phys_tid_84192; + + phys_tid_84192 = global_tid_127570; + + int32_t phys_group_id_127575; + + phys_group_id_127575 = get_group_id(0); + for (int32_t i_127576 = 0; i_127576 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, segmap_group_sizze_84579)) - + phys_group_id_127575, sext_i64_i32(num_groups_84580)); + i_127576++) { + int32_t virt_group_id_127577 = phys_group_id_127575 + i_127576 * + sext_i64_i32(num_groups_84580); + int64_t gtid_84191 = sext_i32_i64(virt_group_id_127577) * + segmap_group_sizze_84579 + sext_i32_i64(local_tid_127571); + + if (slt64(gtid_84191, m_70861)) { + double sqrt_res_84584 = ((__global + double *) mem_122733)[gtid_84191]; + + for (int64_t i_127578 = 0; i_127578 < (int64_t) 1; i_127578++) { + ((__global double *) mem_122738)[gtid_84191 + (j_84524 + + i_127578) * + m_70861] = ((__global + double *) mem_122743)[(gtid_84191 + + i_127578 - + squot64(gtid_84191 + + i_127578, + per_chunk_115510) * + per_chunk_115510) * + num_threads_115503 + + squot64(gtid_84191 + + i_127578, + per_chunk_115510)]; + } + + bool zeze_res_84586 = sqrt_res_84584 == 0.0; + double lw_val_84587; + + if (zeze_res_84586) { + lw_val_84587 = 1.0; + } else { + lw_val_84587 = sqrt_res_84584; + } + ((__global double *) mem_122738)[gtid_84191 + (m_70861 * + k2p2zq_70876 + + j_84524 * m_70861)] = + lw_val_84587; + for (int64_t i_127579 = 0; i_127579 < (int64_t) 2; i_127579++) { + for (int64_t i_127580 = 0; i_127580 < k2p2zq_70876; + i_127580++) { + ((__global double *) mem_122748)[i_127579 * (m_70861 * + k2p2zq_70876) + + i_127580 * m_70861 + + gtid_84191] = ((__global + double *) mem_122738)[gtid_84191 + + (i_127579 * + (m_70861 * + k2p2zq_70876) + + i_127580 * + m_70861)]; + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_84579 +} +__kernel void mainDetailedzisegmap_84207(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t j_84524, __global + unsigned char *mem_param_122694, + __global unsigned char *mem_122733) +{ + #define segmap_group_sizze_84574 (mainDetailedzisegmap_group_sizze_84210) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127564; + int32_t local_tid_127565; + int64_t group_sizze_127568; + int32_t wave_sizze_127567; + int32_t group_tid_127566; + + global_tid_127564 = get_global_id(0); + local_tid_127565 = get_local_id(0); + group_sizze_127568 = get_local_size(0); + wave_sizze_127567 = LOCKSTEP_WIDTH; + group_tid_127566 = get_group_id(0); + + int32_t phys_tid_84207; + + phys_tid_84207 = global_tid_127564; + + int64_t gtid_84205; + + gtid_84205 = sext_i32_i64(group_tid_127566) * segmap_group_sizze_84574 + + sext_i32_i64(local_tid_127565); + + int64_t gtid_84206; + + gtid_84206 = sext_i32_i64(group_tid_127566) * segmap_group_sizze_84574 + + sext_i32_i64(local_tid_127565) - (sext_i32_i64(group_tid_127566) * + segmap_group_sizze_84574 + + sext_i32_i64(local_tid_127565)); + if (slt64(gtid_84205, m_70861) && slt64(gtid_84206, (int64_t) 1)) { + double sqrt_res_84577 = ((__global double *) mem_122733)[gtid_84205]; + + if ((sle64((int64_t) 0, gtid_84205) && slt64(gtid_84205, m_70861)) && + (sle64((int64_t) 0, j_84524) && slt64(j_84524, k2p2zq_70876))) { + ((__global double *) mem_param_122694)[gtid_84205 * k2p2zq_70876 + + j_84524] = sqrt_res_84577; + } + } + + error_0: + return; + #undef segmap_group_sizze_84574 +} +__kernel void mainDetailedzisegmap_84216(__global int *global_failure, + int64_t m_70861, __global + unsigned char *mem_122730, __global + unsigned char *mem_122733) +{ + #define segmap_group_sizze_84567 (mainDetailedzisegmap_group_sizze_84218) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127559; + int32_t local_tid_127560; + int64_t group_sizze_127563; + int32_t wave_sizze_127562; + int32_t group_tid_127561; + + global_tid_127559 = get_global_id(0); + local_tid_127560 = get_local_id(0); + group_sizze_127563 = get_local_size(0); + wave_sizze_127562 = LOCKSTEP_WIDTH; + group_tid_127561 = get_group_id(0); + + int32_t phys_tid_84216; + + phys_tid_84216 = global_tid_127559; + + int64_t gtid_84215; + + gtid_84215 = sext_i32_i64(group_tid_127561) * segmap_group_sizze_84567 + + sext_i32_i64(local_tid_127560); + if (slt64(gtid_84215, m_70861)) { + double defunc_2_reduce_res_84570 = ((__global + double *) mem_122730)[gtid_84215]; + double sqrt_res_84571; + + sqrt_res_84571 = futrts_sqrt64(defunc_2_reduce_res_84570); + ((__global double *) mem_122733)[gtid_84215] = sqrt_res_84571; + } + + error_0: + return; + #undef segmap_group_sizze_84567 +} +__kernel void mainDetailedzisegmap_84365(__global int *global_failure, + int64_t m_70861, int64_t n_70864, + int64_t r_71551, __global + unsigned char *defunc_3_map_res_mem_120231, + __global unsigned char *mem_122674, + __global unsigned char *mem_122677, + __global unsigned char *mem_122680, + __global unsigned char *mem_122682) +{ + #define segmap_group_sizze_84476 (mainDetailedzisegmap_group_sizze_84367) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127473; + int32_t local_tid_127474; + int64_t group_sizze_127477; + int32_t wave_sizze_127476; + int32_t group_tid_127475; + + global_tid_127473 = get_global_id(0); + local_tid_127474 = get_local_id(0); + group_sizze_127477 = get_local_size(0); + wave_sizze_127476 = LOCKSTEP_WIDTH; + group_tid_127475 = get_group_id(0); + + int32_t phys_tid_84365; + + phys_tid_84365 = global_tid_127473; + + int64_t gtid_84364; + + gtid_84364 = sext_i32_i64(group_tid_127475) * segmap_group_sizze_84476 + + sext_i32_i64(local_tid_127474); + if (slt64(gtid_84364, m_70861)) { + double fr_84480 = ((__global double *) mem_122674)[gtid_84364]; + double x_84481 = ((__global + double *) defunc_3_map_res_mem_120231)[gtid_84364 * + n_70864 + + r_71551]; + double defunc_0_f_res_84482 = ((__global + double *) mem_122677)[gtid_84364]; + double resid_84483 = x_84481 - defunc_0_f_res_84482; + double sqrt_res_84484; + + sqrt_res_84484 = futrts_sqrt64(fr_84480); + + double recresid_r_84485 = resid_84483 / sqrt_res_84484; + + ((__global double *) mem_122680)[gtid_84364] = resid_84483; + ((__global double *) mem_122682)[gtid_84364] = recresid_r_84485; + } + + error_0: + return; + #undef segmap_group_sizze_84476 +} +__kernel void mainDetailedzisegmap_84396(__global int *global_failure, + int64_t m_70861, __global + unsigned char *mem_122668, __global + unsigned char *mem_122674) +{ + #define segmap_group_sizze_84453 (mainDetailedzisegmap_group_sizze_84398) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127408; + int32_t local_tid_127409; + int64_t group_sizze_127412; + int32_t wave_sizze_127411; + int32_t group_tid_127410; + + global_tid_127408 = get_global_id(0); + local_tid_127409 = get_local_id(0); + group_sizze_127412 = get_local_size(0); + wave_sizze_127411 = LOCKSTEP_WIDTH; + group_tid_127410 = get_group_id(0); + + int32_t phys_tid_84396; + + phys_tid_84396 = global_tid_127408; + + int64_t gtid_84395; + + gtid_84395 = sext_i32_i64(group_tid_127410) * segmap_group_sizze_84453 + + sext_i32_i64(local_tid_127409); + if (slt64(gtid_84395, m_70861)) { + double defunc_0_f_res_84458 = ((__global + double *) mem_122668)[gtid_84395]; + double fr_84459 = 1.0 + defunc_0_f_res_84458; + + ((__global double *) mem_122674)[gtid_84395] = fr_84459; + } + + error_0: + return; + #undef segmap_group_sizze_84453 +} +__kernel void mainDetailedzisegmap_85474(__global int *global_failure, + int64_t m_70861, int64_t n_70864, + int64_t k2p2zq_70876, + int64_t defunc_2_reduce_res_70985, + int64_t index_primexp_72162, + int64_t num_groups_85532, + int64_t num_threads_125779, __global + unsigned char *defunc_3_map_res_mem_120231, + __global unsigned char *mem_120246, + __global unsigned char *mem_123798, + __global unsigned char *mem_123801, + __global unsigned char *mem_123804, + __global unsigned char *mem_123818, + __global unsigned char *mem_123821, + __global unsigned char *mem_123840, + __global unsigned char *mem_123869, + __global unsigned char *mem_123872, + __global unsigned char *mem_123874) +{ + #define segmap_group_sizze_85531 (mainDetailedzisegmap_group_sizze_85476) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128026; + int32_t local_tid_128027; + int64_t group_sizze_128030; + int32_t wave_sizze_128029; + int32_t group_tid_128028; + + global_tid_128026 = get_global_id(0); + local_tid_128027 = get_local_id(0); + group_sizze_128030 = get_local_size(0); + wave_sizze_128029 = LOCKSTEP_WIDTH; + group_tid_128028 = get_group_id(0); + + int32_t phys_tid_85474; + + phys_tid_85474 = global_tid_128026; + + int32_t phys_group_id_128031; + + phys_group_id_128031 = get_group_id(0); + for (int32_t i_128032 = 0; i_128032 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, segmap_group_sizze_85531)) - + phys_group_id_128031, sext_i64_i32(num_groups_85532)); + i_128032++) { + int32_t virt_group_id_128033 = phys_group_id_128031 + i_128032 * + sext_i64_i32(num_groups_85532); + int64_t gtid_85473 = sext_i32_i64(virt_group_id_128033) * + segmap_group_sizze_85531 + sext_i32_i64(local_tid_128027); + + if (slt64(gtid_85473, m_70861)) { + double defunc_11_internal_map_res_transformed_row_85540 = ((__global + double *) defunc_3_map_res_mem_120231)[gtid_85473 * + n_70864 + + index_primexp_72162]; + double defunc_0_f_res_85541; + double redout_119867 = 0.0; + + for (int64_t i_119869 = 0; i_119869 < k2p2zq_70876; i_119869++) { + double x_85547 = ((__global double *) mem_120246)[i_119869 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_85473 * + defunc_2_reduce_res_70985 + + index_primexp_72162]; + double defunc_0_f_res_85548; + double redout_119871 = 0.0; + + for (int64_t i_119872 = 0; i_119872 < k2p2zq_70876; + i_119872++) { + double x_85552 = ((__global double *) mem_120246)[i_119872 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_85473 * + defunc_2_reduce_res_70985 + + index_primexp_72162]; + double x_85553 = ((__global double *) mem_123798)[i_119869 * + (m_70861 * + k2p2zq_70876) + + i_119872 * + m_70861 + + gtid_85473]; + double defunc_1_f_res_85554 = x_85552 * x_85553; + double defunc_1_op_res_85551 = defunc_1_f_res_85554 + + redout_119871; + double redout_tmp_128036 = defunc_1_op_res_85551; + + redout_119871 = redout_tmp_128036; + } + defunc_0_f_res_85548 = redout_119871; + + double defunc_1_f_res_85555 = x_85547 * defunc_0_f_res_85548; + double defunc_1_op_res_85545 = defunc_1_f_res_85555 + + redout_119867; + + ((__global double *) mem_123804)[phys_tid_85474 + i_119869 * + num_threads_125779] = + defunc_0_f_res_85548; + + double redout_tmp_128034 = defunc_1_op_res_85545; + + redout_119867 = redout_tmp_128034; + } + defunc_0_f_res_85541 = redout_119867; + + double fr_85556 = 1.0 + defunc_0_f_res_85541; + double defunc_0_f_res_85557; + double redout_119873 = 0.0; + + for (int64_t i_119874 = 0; i_119874 < k2p2zq_70876; i_119874++) { + double x_85561 = ((__global double *) mem_120246)[i_119874 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_85473 * + defunc_2_reduce_res_70985 + + index_primexp_72162]; + double x_85562 = ((__global double *) mem_123801)[i_119874 * + m_70861 + + gtid_85473]; + double defunc_1_f_res_85563 = x_85561 * x_85562; + double defunc_1_op_res_85560 = defunc_1_f_res_85563 + + redout_119873; + double redout_tmp_128037 = defunc_1_op_res_85560; + + redout_119873 = redout_tmp_128037; + } + defunc_0_f_res_85557 = redout_119873; + + double resid_85564 = + defunc_11_internal_map_res_transformed_row_85540 - + defunc_0_f_res_85557; + double sqrt_res_85565; + + sqrt_res_85565 = futrts_sqrt64(fr_85556); + + double recresid_r_85566 = resid_85564 / sqrt_res_85565; + + for (int64_t i_119879 = 0; i_119879 < k2p2zq_70876; i_119879++) { + double x_85569 = ((__global + double *) mem_123804)[phys_tid_85474 + + i_119879 * + num_threads_125779]; + double x_85571 = ((__global double *) mem_123801)[i_119879 * + m_70861 + + gtid_85473]; + double defunc_0_f_res_85572; + double redout_119883 = 0.0; + + for (int64_t i_119885 = 0; i_119885 < k2p2zq_70876; + i_119885++) { + double x_85577 = ((__global + double *) mem_123804)[phys_tid_85474 + + i_119885 * + num_threads_125779]; + double x_85578 = ((__global double *) mem_123798)[i_119879 * + (m_70861 * + k2p2zq_70876) + + i_119885 * + m_70861 + + gtid_85473]; + double x_85579 = ((__global double *) mem_120246)[i_119885 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_85473 * + defunc_2_reduce_res_70985 + + index_primexp_72162]; + double x_85580 = x_85569 * x_85577; + double y_85581 = x_85580 / fr_85556; + double defunc_1_f_res_85582 = x_85578 - y_85581; + double defunc_1_f_res_85583 = x_85579 * + defunc_1_f_res_85582; + double defunc_1_op_res_85576 = defunc_1_f_res_85583 + + redout_119883; + + ((__global double *) mem_123840)[phys_tid_85474 + i_119885 * + num_threads_125779] = + defunc_1_f_res_85582; + + double redout_tmp_128040 = defunc_1_op_res_85576; + + redout_119883 = redout_tmp_128040; + } + defunc_0_f_res_85572 = redout_119883; + + double defunc_0_g_res_85584 = resid_85564 * + defunc_0_f_res_85572; + double defunc_1_f_res_85585 = x_85571 + defunc_0_g_res_85584; + + ((__global double *) mem_123818)[phys_tid_85474 + i_119879 * + num_threads_125779] = + defunc_1_f_res_85585; + for (int64_t i_128042 = 0; i_128042 < k2p2zq_70876; + i_128042++) { + ((__global double *) mem_123821)[phys_tid_85474 + + (i_119879 * + (num_threads_125779 * + k2p2zq_70876) + + i_128042 * + num_threads_125779)] = + ((__global double *) mem_123840)[phys_tid_85474 + + i_128042 * + num_threads_125779]; + } + } + for (int64_t i_128043 = 0; i_128043 < k2p2zq_70876; i_128043++) { + for (int64_t i_128044 = 0; i_128044 < k2p2zq_70876; + i_128044++) { + ((__global double *) mem_123869)[i_128043 * (m_70861 * + k2p2zq_70876) + + i_128044 * m_70861 + + gtid_85473] = ((__global + double *) mem_123821)[phys_tid_85474 + + (i_128043 * + (num_threads_125779 * + k2p2zq_70876) + + i_128044 * + num_threads_125779)]; + } + } + for (int64_t i_128045 = 0; i_128045 < k2p2zq_70876; i_128045++) { + ((__global double *) mem_123872)[i_128045 * m_70861 + + gtid_85473] = ((__global + double *) mem_123818)[phys_tid_85474 + + i_128045 * + num_threads_125779]; + } + ((__global double *) mem_123874)[gtid_85473] = recresid_r_85566; + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_85531 +} +__kernel void mainDetailedzisegmap_85651(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t defunc_2_reduce_res_70985, + int64_t index_primexp_72162, + int64_t num_groups_85885, + int64_t num_threads_125789, __global + unsigned char *mem_120246, __global + unsigned char *mem_param_123786, + __global unsigned char *mem_123901, + __global unsigned char *mem_123907, + __global unsigned char *mem_123910, + __global unsigned char *mem_123916, + __global unsigned char *mem_123921, + __global unsigned char *mem_123937, + __global unsigned char *mem_123940) +{ + #define segmap_group_sizze_85884 (mainDetailedzisegmap_group_sizze_85654) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128211; + int32_t local_tid_128212; + int64_t group_sizze_128215; + int32_t wave_sizze_128214; + int32_t group_tid_128213; + + global_tid_128211 = get_global_id(0); + local_tid_128212 = get_local_id(0); + group_sizze_128215 = get_local_size(0); + wave_sizze_128214 = LOCKSTEP_WIDTH; + group_tid_128213 = get_group_id(0); + + int32_t phys_tid_85651; + + phys_tid_85651 = global_tid_128211; + + int32_t phys_group_id_128216; + + phys_group_id_128216 = get_group_id(0); + for (int32_t i_128217 = 0; i_128217 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861 * k2p2zq_70876, + segmap_group_sizze_85884)) - + phys_group_id_128216, sext_i64_i32(num_groups_85885)); + i_128217++) { + int32_t virt_group_id_128218 = phys_group_id_128216 + i_128217 * + sext_i64_i32(num_groups_85885); + int64_t gtid_85649 = squot64(sext_i32_i64(virt_group_id_128218) * + segmap_group_sizze_85884 + + sext_i32_i64(local_tid_128212), + k2p2zq_70876); + int64_t gtid_85650 = sext_i32_i64(virt_group_id_128218) * + segmap_group_sizze_85884 + sext_i32_i64(local_tid_128212) - + squot64(sext_i32_i64(virt_group_id_128218) * + segmap_group_sizze_85884 + + sext_i32_i64(local_tid_128212), k2p2zq_70876) * + k2p2zq_70876; + + if (slt64(gtid_85649, m_70861) && slt64(gtid_85650, k2p2zq_70876)) { + double fr_85896 = ((__global double *) mem_123910)[gtid_85649]; + double resid_85897 = ((__global double *) mem_123916)[gtid_85649]; + double x_85898 = ((__global double *) mem_123907)[gtid_85649 * + k2p2zq_70876 + + gtid_85650]; + double x_85900 = ((__global double *) mem_param_123786)[gtid_85649 * + k2p2zq_70876 + + gtid_85650]; + double defunc_0_f_res_85901; + double redout_119892 = 0.0; + + for (int64_t i_119894 = 0; i_119894 < k2p2zq_70876; i_119894++) { + double x_85906 = ((__global double *) mem_123907)[gtid_85649 * + k2p2zq_70876 + + i_119894]; + double x_85907 = ((__global double *) mem_123901)[i_119894 * + (k2p2zq_70876 * + m_70861) + + gtid_85649 * + k2p2zq_70876 + + gtid_85650]; + double x_85908 = ((__global double *) mem_120246)[i_119894 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_85649 * + defunc_2_reduce_res_70985 + + index_primexp_72162]; + double x_85909 = x_85898 * x_85906; + double y_85910 = x_85909 / fr_85896; + double defunc_1_f_res_85911 = x_85907 - y_85910; + double defunc_1_f_res_85912 = x_85908 * defunc_1_f_res_85911; + double defunc_1_op_res_85905 = defunc_1_f_res_85912 + + redout_119892; + + ((__global double *) mem_123921)[phys_tid_85651 + i_119894 * + num_threads_125789] = + defunc_1_f_res_85911; + + double redout_tmp_128219 = defunc_1_op_res_85905; + + redout_119892 = redout_tmp_128219; + } + defunc_0_f_res_85901 = redout_119892; + + double defunc_0_g_res_85913 = resid_85897 * defunc_0_f_res_85901; + double defunc_1_f_res_85914 = x_85900 + defunc_0_g_res_85913; + + for (int64_t i_128221 = 0; i_128221 < k2p2zq_70876; i_128221++) { + ((__global double *) mem_123937)[i_128221 * (k2p2zq_70876 * + m_70861) + + gtid_85649 * k2p2zq_70876 + + gtid_85650] = ((__global + double *) mem_123921)[phys_tid_85651 + + i_128221 * + num_threads_125789]; + } + ((__global double *) mem_123940)[gtid_85649 * k2p2zq_70876 + + gtid_85650] = defunc_1_f_res_85914; + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_85884 +} +__kernel void mainDetailedzisegmap_85689(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + __global + unsigned char *mem_param_123786, + __global unsigned char *mem_123916, + __global unsigned char *mem_123944, + __global unsigned char *mem_123952) +{ + #define segmap_group_sizze_85941 (mainDetailedzisegmap_group_sizze_85692) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128282; + int32_t local_tid_128283; + int64_t group_sizze_128286; + int32_t wave_sizze_128285; + int32_t group_tid_128284; + + global_tid_128282 = get_global_id(0); + local_tid_128283 = get_local_id(0); + group_sizze_128286 = get_local_size(0); + wave_sizze_128285 = LOCKSTEP_WIDTH; + group_tid_128284 = get_group_id(0); + + int32_t phys_tid_85689; + + phys_tid_85689 = global_tid_128282; + + int64_t gtid_85687; + + gtid_85687 = squot64(sext_i32_i64(group_tid_128284) * + segmap_group_sizze_85941 + + sext_i32_i64(local_tid_128283), k2p2zq_70876); + + int64_t gtid_85688; + + gtid_85688 = sext_i32_i64(group_tid_128284) * segmap_group_sizze_85941 + + sext_i32_i64(local_tid_128283) - + squot64(sext_i32_i64(group_tid_128284) * segmap_group_sizze_85941 + + sext_i32_i64(local_tid_128283), k2p2zq_70876) * k2p2zq_70876; + if (slt64(gtid_85687, m_70861) && slt64(gtid_85688, k2p2zq_70876)) { + double resid_85944 = ((__global double *) mem_123916)[gtid_85687]; + double x_85945 = ((__global double *) mem_param_123786)[gtid_85687 * + k2p2zq_70876 + + gtid_85688]; + double defunc_0_f_res_85946 = ((__global + double *) mem_123944)[gtid_85687 * + k2p2zq_70876 + + gtid_85688]; + double defunc_0_g_res_85947 = resid_85944 * defunc_0_f_res_85946; + double defunc_1_f_res_85948 = x_85945 + defunc_0_g_res_85947; + + ((__global double *) mem_123952)[gtid_85687 * k2p2zq_70876 + + gtid_85688] = defunc_1_f_res_85948; + } + + error_0: + return; + #undef segmap_group_sizze_85941 +} +__kernel void mainDetailedzisegmap_85766(__global int *global_failure, + int64_t m_70861, int64_t n_70864, + int64_t index_primexp_72162, __global + unsigned char *defunc_3_map_res_mem_120231, + __global unsigned char *mem_123910, + __global unsigned char *mem_123913, + __global unsigned char *mem_123916, + __global unsigned char *mem_123918) +{ + #define segmap_group_sizze_85872 (mainDetailedzisegmap_group_sizze_85768) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128206; + int32_t local_tid_128207; + int64_t group_sizze_128210; + int32_t wave_sizze_128209; + int32_t group_tid_128208; + + global_tid_128206 = get_global_id(0); + local_tid_128207 = get_local_id(0); + group_sizze_128210 = get_local_size(0); + wave_sizze_128209 = LOCKSTEP_WIDTH; + group_tid_128208 = get_group_id(0); + + int32_t phys_tid_85766; + + phys_tid_85766 = global_tid_128206; + + int64_t gtid_85765; + + gtid_85765 = sext_i32_i64(group_tid_128208) * segmap_group_sizze_85872 + + sext_i32_i64(local_tid_128207); + if (slt64(gtid_85765, m_70861)) { + double defunc_11_internal_map_res_transformed_row_85876 = ((__global + double *) defunc_3_map_res_mem_120231)[gtid_85765 * + n_70864 + + index_primexp_72162]; + double fr_85877 = ((__global double *) mem_123910)[gtid_85765]; + double defunc_0_f_res_85878 = ((__global + double *) mem_123913)[gtid_85765]; + double resid_85879 = defunc_11_internal_map_res_transformed_row_85876 - + defunc_0_f_res_85878; + double sqrt_res_85880; + + sqrt_res_85880 = futrts_sqrt64(fr_85877); + + double recresid_r_85881 = resid_85879 / sqrt_res_85880; + + ((__global double *) mem_123916)[gtid_85765] = resid_85879; + ((__global double *) mem_123918)[gtid_85765] = recresid_r_85881; + } + + error_0: + return; + #undef segmap_group_sizze_85872 +} +__kernel void mainDetailedzisegmap_85797(__global int *global_failure, + int64_t m_70861, __global + unsigned char *mem_123904, __global + unsigned char *mem_123910) +{ + #define segmap_group_sizze_85852 (mainDetailedzisegmap_group_sizze_85799) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128141; + int32_t local_tid_128142; + int64_t group_sizze_128145; + int32_t wave_sizze_128144; + int32_t group_tid_128143; + + global_tid_128141 = get_global_id(0); + local_tid_128142 = get_local_id(0); + group_sizze_128145 = get_local_size(0); + wave_sizze_128144 = LOCKSTEP_WIDTH; + group_tid_128143 = get_group_id(0); + + int32_t phys_tid_85797; + + phys_tid_85797 = global_tid_128141; + + int64_t gtid_85796; + + gtid_85796 = sext_i32_i64(group_tid_128143) * segmap_group_sizze_85852 + + sext_i32_i64(local_tid_128142); + if (slt64(gtid_85796, m_70861)) { + double defunc_0_f_res_85855 = ((__global + double *) mem_123904)[gtid_85796]; + double fr_85856 = 1.0 + defunc_0_f_res_85855; + + ((__global double *) mem_123910)[gtid_85796] = fr_85856; + } + + error_0: + return; + #undef segmap_group_sizze_85852 +} +__kernel void mainDetailedzisegmap_85972(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t num_recresids_padded_71534, + int64_t Nmk_72261, + int64_t num_groups_86023, + int64_t num_threads_125793, __global + unsigned char *defunc_3_map_res_mem_120230, + __global unsigned char *mem_121934, + __global unsigned char *mem_124009, + __global unsigned char *mem_124024, + __global unsigned char *mem_124026) +{ + #define segmap_group_sizze_86022 (mainDetailedzisegmap_group_sizze_85974) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_128294; + int32_t local_tid_128295; + int64_t group_sizze_128298; + int32_t wave_sizze_128297; + int32_t group_tid_128296; + + global_tid_128294 = get_global_id(0); + local_tid_128295 = get_local_id(0); + group_sizze_128298 = get_local_size(0); + wave_sizze_128297 = LOCKSTEP_WIDTH; + group_tid_128296 = get_group_id(0); + + int32_t phys_tid_85972; + + phys_tid_85972 = global_tid_128294; + + int32_t phys_group_id_128299; + + phys_group_id_128299 = get_group_id(0); + for (int32_t i_128300 = 0; i_128300 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, segmap_group_sizze_86022)) - + phys_group_id_128299, sext_i64_i32(num_groups_86023)); + i_128300++) { + int32_t virt_group_id_128301 = phys_group_id_128299 + i_128300 * + sext_i64_i32(num_groups_86023); + int64_t gtid_85971 = sext_i32_i64(virt_group_id_128301) * + segmap_group_sizze_86022 + sext_i32_i64(local_tid_128295); + + if (slt64(gtid_85971, m_70861)) { + int64_t x_86028 = ((__global + int64_t *) defunc_3_map_res_mem_120230)[gtid_85971]; + int64_t n_86029 = sub64(x_86028, k2p2zq_70876); + double i64_res_86030 = sitofp_i64_f64(n_86029); + double defunc_2_reduce_res_86031; + double redout_119896 = 0.0; + + for (int64_t i_119897 = 0; i_119897 < num_recresids_padded_71534; + i_119897++) { + double x_86039 = ((__global double *) mem_121934)[i_119897 * + m_70861 + + gtid_85971]; + bool isnan_res_86034; + + isnan_res_86034 = futrts_isnan64(redout_119896); + + double defunc_1_op_res_86035; + + if (isnan_res_86034) { + defunc_1_op_res_86035 = x_86039; + } else { + bool isnan_res_86036; + + isnan_res_86036 = futrts_isnan64(x_86039); + + double defunc_1_op_res_f_res_86037; + + if (isnan_res_86036) { + defunc_1_op_res_f_res_86037 = redout_119896; + } else { + double defunc_1_op_res_f_res_f_res_86038 = x_86039 + + redout_119896; + + defunc_1_op_res_f_res_86037 = + defunc_1_op_res_f_res_f_res_86038; + } + defunc_1_op_res_86035 = defunc_1_op_res_f_res_86037; + } + + double redout_tmp_128302 = defunc_1_op_res_86035; + + redout_119896 = redout_tmp_128302; + } + defunc_2_reduce_res_86031 = redout_119896; + + double x_mean_86040 = defunc_2_reduce_res_86031 / i64_res_86030; + double defunc_2_reduce_res_86041; + double redout_119898 = 0.0; + + for (int64_t i_119899 = 0; i_119899 < num_recresids_padded_71534; + i_119899++) { + double x_86045 = ((__global double *) mem_121934)[i_119899 * + m_70861 + + gtid_85971]; + bool isnan_res_86046; + + isnan_res_86046 = futrts_isnan64(x_86045); + + double defunc_0_f_res_86047; + + if (isnan_res_86046) { + defunc_0_f_res_86047 = 0.0; + } else { + double x_86048 = x_86045 - x_mean_86040; + double defunc_0_f_res_f_res_86049 = fpow64(x_86048, 2.0); + + defunc_0_f_res_86047 = defunc_0_f_res_f_res_86049; + } + + double defunc_1_op_res_86044 = defunc_0_f_res_86047 + + redout_119898; + double redout_tmp_128303 = defunc_1_op_res_86044; + + redout_119898 = redout_tmp_128303; + } + defunc_2_reduce_res_86041 = redout_119898; + + double y_86050 = i64_res_86030 - 1.0; + double binop_p_86051 = defunc_2_reduce_res_86041 / y_86050; + double defunc_0_f_res_86052; + + defunc_0_f_res_86052 = futrts_sqrt64(binop_p_86051); + + double sqrt_res_86053; + + sqrt_res_86053 = futrts_sqrt64(i64_res_86030); + + double fr_86054 = defunc_0_f_res_86052 * sqrt_res_86053; + double discard_119905; + double scanacc_119901 = 0.0; + + for (int64_t i_119903 = 0; i_119903 < Nmk_72261; i_119903++) { + bool cond_86060 = i_119903 == (int64_t) 0; + double defunc_0_f_res_86061; + + if (cond_86060) { + defunc_0_f_res_86061 = 0.0; + } else { + int64_t i_86062 = sub64(i_119903, (int64_t) 1); + bool x_86063 = sle64((int64_t) 0, i_86062); + bool y_86064 = slt64(i_86062, num_recresids_padded_71534); + bool bounds_check_86065 = x_86063 && y_86064; + bool index_certs_86066; + + if (!bounds_check_86065) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 386) == -1) { + global_failure_args[0] = i_86062; + global_failure_args[1] = + num_recresids_padded_71534; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_86067 = ((__global double *) mem_121934)[i_86062 * + m_70861 + + gtid_85971]; + double defunc_0_f_res_f_res_86068 = x_86067 / fr_86054; + + defunc_0_f_res_86061 = defunc_0_f_res_f_res_86068; + } + + double defunc_1_op_res_86058 = defunc_0_f_res_86061 + + scanacc_119901; + + ((__global double *) mem_124009)[phys_tid_85972 + i_119903 * + num_threads_125793] = + defunc_1_op_res_86058; + + double scanacc_tmp_128304 = defunc_1_op_res_86058; + + scanacc_119901 = scanacc_tmp_128304; + } + discard_119905 = scanacc_119901; + for (int64_t i_128306 = 0; i_128306 < Nmk_72261; i_128306++) { + ((__global double *) mem_124024)[i_128306 * m_70861 + + gtid_85971] = ((__global + double *) mem_124009)[phys_tid_85972 + + i_128306 * + num_threads_125793]; + } + ((__global int64_t *) mem_124026)[gtid_85971] = n_86029; + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_86022 +} +__kernel void mainDetailedzisegmap_86148(__global int *global_failure, + int64_t m_70861, __global + unsigned char *mem_124040, __global + unsigned char *mem_124054, __global + unsigned char *mem_124057) +{ + #define segmap_group_sizze_86266 (mainDetailedzisegmap_group_sizze_86150) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128474; + int32_t local_tid_128475; + int64_t group_sizze_128478; + int32_t wave_sizze_128477; + int32_t group_tid_128476; + + global_tid_128474 = get_global_id(0); + local_tid_128475 = get_local_id(0); + group_sizze_128478 = get_local_size(0); + wave_sizze_128477 = LOCKSTEP_WIDTH; + group_tid_128476 = get_group_id(0); + + int32_t phys_tid_86148; + + phys_tid_86148 = global_tid_128474; + + int64_t gtid_86147; + + gtid_86147 = sext_i32_i64(group_tid_128476) * segmap_group_sizze_86266 + + sext_i32_i64(local_tid_128475); + if (slt64(gtid_86147, m_70861)) { + double i64_res_86269 = ((__global double *) mem_124040)[gtid_86147]; + double defunc_2_reduce_res_86270 = ((__global + double *) mem_124054)[gtid_86147]; + double y_86271 = i64_res_86269 - 1.0; + double binop_p_86272 = defunc_2_reduce_res_86270 / y_86271; + double defunc_0_f_res_86273; + + defunc_0_f_res_86273 = futrts_sqrt64(binop_p_86272); + + double sqrt_res_86274; + + sqrt_res_86274 = futrts_sqrt64(i64_res_86269); + + double fr_86275 = defunc_0_f_res_86273 * sqrt_res_86274; + + ((__global double *) mem_124057)[gtid_86147] = fr_86275; + } + + error_0: + return; + #undef segmap_group_sizze_86266 +} +__kernel void mainDetailedzisegmap_86183(__global int *global_failure, + int64_t m_70861, __global + unsigned char *mem_124040, __global + unsigned char *mem_124048, __global + unsigned char *mem_124051) +{ + #define segmap_group_sizze_86243 (mainDetailedzisegmap_group_sizze_86185) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128409; + int32_t local_tid_128410; + int64_t group_sizze_128413; + int32_t wave_sizze_128412; + int32_t group_tid_128411; + + global_tid_128409 = get_global_id(0); + local_tid_128410 = get_local_id(0); + group_sizze_128413 = get_local_size(0); + wave_sizze_128412 = LOCKSTEP_WIDTH; + group_tid_128411 = get_group_id(0); + + int32_t phys_tid_86183; + + phys_tid_86183 = global_tid_128409; + + int64_t gtid_86182; + + gtid_86182 = sext_i32_i64(group_tid_128411) * segmap_group_sizze_86243 + + sext_i32_i64(local_tid_128410); + if (slt64(gtid_86182, m_70861)) { + double i64_res_86246 = ((__global double *) mem_124040)[gtid_86182]; + double defunc_2_reduce_res_86247 = ((__global + double *) mem_124048)[gtid_86182]; + double x_mean_86248 = defunc_2_reduce_res_86247 / i64_res_86246; + + ((__global double *) mem_124051)[gtid_86182] = x_mean_86248; + } + + error_0: + return; + #undef segmap_group_sizze_86243 +} +__kernel void mainDetailedzisegmap_86211(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + __global + unsigned char *defunc_3_map_res_mem_120230, + __global unsigned char *mem_124040, + __global unsigned char *mem_124042) +{ + #define segmap_group_sizze_86220 (mainDetailedzisegmap_group_sizze_86213) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128334; + int32_t local_tid_128335; + int64_t group_sizze_128338; + int32_t wave_sizze_128337; + int32_t group_tid_128336; + + global_tid_128334 = get_global_id(0); + local_tid_128335 = get_local_id(0); + group_sizze_128338 = get_local_size(0); + wave_sizze_128337 = LOCKSTEP_WIDTH; + group_tid_128336 = get_group_id(0); + + int32_t phys_tid_86211; + + phys_tid_86211 = global_tid_128334; + + int64_t gtid_86210; + + gtid_86210 = sext_i32_i64(group_tid_128336) * segmap_group_sizze_86220 + + sext_i32_i64(local_tid_128335); + if (slt64(gtid_86210, m_70861)) { + int64_t x_86224 = ((__global + int64_t *) defunc_3_map_res_mem_120230)[gtid_86210]; + int64_t n_86225 = sub64(x_86224, k2p2zq_70876); + double i64_res_86226 = sitofp_i64_f64(n_86225); + + ((__global double *) mem_124040)[gtid_86210] = i64_res_86226; + ((__global int64_t *) mem_124042)[gtid_86210] = n_86225; + } + + error_0: + return; + #undef segmap_group_sizze_86220 +} +__kernel void mainDetailedzisegmap_86313(__global int *global_failure, + int64_t m_70861, double conf_70870, + int64_t Nmk_72261, __global + unsigned char *mem_124072, __global + unsigned char *mem_124074, __global + unsigned char *mem_124078) +{ + #define segmap_group_sizze_86369 (mainDetailedzisegmap_group_sizze_86316) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128542; + int32_t local_tid_128543; + int64_t group_sizze_128546; + int32_t wave_sizze_128545; + int32_t group_tid_128544; + + global_tid_128542 = get_global_id(0); + local_tid_128543 = get_local_id(0); + group_sizze_128546 = get_local_size(0); + wave_sizze_128545 = LOCKSTEP_WIDTH; + group_tid_128544 = get_group_id(0); + + int32_t phys_tid_86313; + + phys_tid_86313 = global_tid_128542; + + int64_t gtid_86311; + + gtid_86311 = squot64(sext_i32_i64(group_tid_128544) * + segmap_group_sizze_86369 + + sext_i32_i64(local_tid_128543), Nmk_72261); + + int64_t gtid_86312; + + gtid_86312 = sext_i32_i64(group_tid_128544) * segmap_group_sizze_86369 + + sext_i32_i64(local_tid_128543) - + squot64(sext_i32_i64(group_tid_128544) * segmap_group_sizze_86369 + + sext_i32_i64(local_tid_128543), Nmk_72261) * Nmk_72261; + if (slt64(gtid_86311, m_70861) && slt64(gtid_86312, Nmk_72261)) { + int64_t n_86372 = ((__global int64_t *) mem_124072)[gtid_86311]; + bool cond_86375 = slt64(gtid_86312, n_86372); + double defunc_0_f_res_86376; + + if (cond_86375) { + double div_86373 = ((__global double *) mem_124074)[gtid_86311]; + double x_86377 = 2.0 * conf_70870; + double i64_res_86378 = sitofp_i64_f64(gtid_86312); + double x_86379 = x_86377 * i64_res_86378; + double y_86380 = x_86379 / div_86373; + double defunc_0_f_res_t_res_86381 = conf_70870 + y_86380; + + defunc_0_f_res_86376 = defunc_0_f_res_t_res_86381; + } else { + defunc_0_f_res_86376 = NAN; + } + ((__global double *) mem_124078)[gtid_86311 * Nmk_72261 + gtid_86312] = + defunc_0_f_res_86376; + } + + error_0: + return; + #undef segmap_group_sizze_86369 +} +__kernel void mainDetailedzisegmap_86345(__global int *global_failure, + int64_t m_70861, __global + unsigned char *defunc_3_map_res_mem_124069, + __global unsigned char *mem_124072, + __global unsigned char *mem_124074) +{ + #define segmap_group_sizze_86355 (mainDetailedzisegmap_group_sizze_86347) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128537; + int32_t local_tid_128538; + int64_t group_sizze_128541; + int32_t wave_sizze_128540; + int32_t group_tid_128539; + + global_tid_128537 = get_global_id(0); + local_tid_128538 = get_local_id(0); + group_sizze_128541 = get_local_size(0); + wave_sizze_128540 = LOCKSTEP_WIDTH; + group_tid_128539 = get_group_id(0); + + int32_t phys_tid_86345; + + phys_tid_86345 = global_tid_128537; + + int64_t gtid_86344; + + gtid_86344 = sext_i32_i64(group_tid_128539) * segmap_group_sizze_86355 + + sext_i32_i64(local_tid_128538); + if (slt64(gtid_86344, m_70861)) { + int64_t x_86359 = ((__global + int64_t *) defunc_3_map_res_mem_124069)[gtid_86344]; + int64_t n_86360 = add64((int64_t) 1, x_86359); + double i64_res_86361 = sitofp_i64_f64(n_86360); + double div_86362 = i64_res_86361 - 1.0; + + ((__global int64_t *) mem_124072)[gtid_86344] = n_86360; + ((__global double *) mem_124074)[gtid_86344] = div_86362; + } + + error_0: + return; + #undef segmap_group_sizze_86355 +} +__kernel void mainDetailedzisegmap_86809(__global int *global_failure, + int64_t m_70861, double level_70867, + __global + unsigned char *defunc_3_map_res_mem_124069, + __global unsigned char *mem_124127, + __global unsigned char *mem_124130, + __global unsigned char *mem_124133) +{ + #define segmap_group_sizze_87127 (mainDetailedzisegmap_group_sizze_86811) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128704; + int32_t local_tid_128705; + int64_t group_sizze_128708; + int32_t wave_sizze_128707; + int32_t group_tid_128706; + + global_tid_128704 = get_global_id(0); + local_tid_128705 = get_local_id(0); + group_sizze_128708 = get_local_size(0); + wave_sizze_128707 = LOCKSTEP_WIDTH; + group_tid_128706 = get_group_id(0); + + int32_t phys_tid_86809; + + phys_tid_86809 = global_tid_128704; + + int64_t gtid_86808; + + gtid_86808 = sext_i32_i64(group_tid_128706) * segmap_group_sizze_87127 + + sext_i32_i64(local_tid_128705); + if (slt64(gtid_86808, m_70861)) { + double pval_brownian_motion_max_res_87131 = ((__global + double *) mem_124127)[gtid_86808]; + int64_t defunc_0_f_res_87132 = ((__global + int64_t *) mem_124130)[gtid_86808]; + bool isnan_res_87133; + + isnan_res_87133 = futrts_isnan64(pval_brownian_motion_max_res_87131); + + bool cond_87134 = !isnan_res_87133; + bool cond_t_res_87135 = pval_brownian_motion_max_res_87131 < + level_70867; + bool x_87136 = cond_87134 && cond_t_res_87135; + bool chk_t_res_87137 = defunc_0_f_res_87132 == + (int64_t) 9223372036854775807; + bool chk_t_res_87138 = !chk_t_res_87137; + bool x_87139 = x_87136 && chk_t_res_87138; + int64_t y_start_87140; + + if (x_87139) { + int64_t x_87130 = ((__global + int64_t *) defunc_3_map_res_mem_124069)[gtid_86808]; + int64_t y_start_t_res_87141 = sub64(x_87130, defunc_0_f_res_87132); + + y_start_87140 = y_start_t_res_87141; + } else { + y_start_87140 = (int64_t) 0; + } + ((__global int64_t *) mem_124133)[gtid_86808] = y_start_87140; + } + + error_0: + return; + #undef segmap_group_sizze_87127 +} +__kernel void mainDetailedzisegmap_86849(__global int *global_failure, + int64_t m_70861, __global + unsigned char *mem_124124, __global + unsigned char *mem_124127) +{ + #define segmap_group_sizze_87010 (mainDetailedzisegmap_group_sizze_86851) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128639; + int32_t local_tid_128640; + int64_t group_sizze_128643; + int32_t wave_sizze_128642; + int32_t group_tid_128641; + + global_tid_128639 = get_global_id(0); + local_tid_128640 = get_local_id(0); + group_sizze_128643 = get_local_size(0); + wave_sizze_128642 = LOCKSTEP_WIDTH; + group_tid_128641 = get_group_id(0); + + int32_t phys_tid_86849; + + phys_tid_86849 = global_tid_128639; + + int64_t gtid_86848; + + gtid_86848 = sext_i32_i64(group_tid_128641) * segmap_group_sizze_87010 + + sext_i32_i64(local_tid_128640); + if (slt64(gtid_86848, m_70861)) { + double defunc_2_reduce_res_87013 = ((__global + double *) mem_124124)[gtid_86848]; + double defunc_0_Q_arg_87014 = 3.0 * defunc_2_reduce_res_87013; + double zs_res_87015 = defunc_0_Q_arg_87014 / 1.4142135623730951; + double abs_res_87016 = fabs(zs_res_87015); + double zs_res_87017 = abs_res_87016 / 2.0; + double zp_res_87018 = 1.0 + zs_res_87017; + double zs_res_87019 = 1.0 / zp_res_87018; + double zt_res_87020 = zs_res_87019 * zs_res_87019; + double zt_res_87021 = zs_res_87019 * zt_res_87020; + double zt_res_87022 = zt_res_87020 * zt_res_87020; + double zt_res_87023 = zt_res_87020 * zt_res_87021; + double zt_res_87024 = zt_res_87021 * zt_res_87021; + double zt_res_87025 = zt_res_87021 * zt_res_87022; + double zt_res_87026 = zt_res_87022 * zt_res_87022; + double zt_res_87027 = zt_res_87022 * zt_res_87023; + double zt_res_87028 = 0.17087277 * zt_res_87027; + double zt_res_87029 = 0.82215223 * zt_res_87026; + double zt_res_87030 = 1.48851587 * zt_res_87025; + double zt_res_87031 = 1.13520398 * zt_res_87024; + double zt_res_87032 = 0.27886807 * zt_res_87023; + double zt_res_87033 = 0.18628806 * zt_res_87022; + double zt_res_87034 = 9.678418e-2 * zt_res_87021; + double zt_res_87035 = 0.37409196 * zt_res_87020; + double zt_res_87036 = 1.00002368 * zs_res_87019; + double zt_res_87037 = zs_res_87015 * zs_res_87015; + double zm_res_87038 = 0.0 - zt_res_87037; + double zm_res_87039 = zm_res_87038 - 1.26551223; + double zp_res_87040 = zt_res_87036 + zm_res_87039; + double zp_res_87041 = zt_res_87035 + zp_res_87040; + double zp_res_87042 = zt_res_87034 + zp_res_87041; + double zm_res_87043 = zp_res_87042 - zt_res_87033; + double zp_res_87044 = zt_res_87032 + zm_res_87043; + double zm_res_87045 = zp_res_87044 - zt_res_87031; + double zp_res_87046 = zt_res_87030 + zm_res_87045; + double zm_res_87047 = zp_res_87046 - zt_res_87029; + double zp_res_87048 = zt_res_87028 + zm_res_87047; + double exp_res_87049; + + exp_res_87049 = futrts_exp64(zp_res_87048); + + double zt_res_87050 = zs_res_87019 * exp_res_87049; + bool zgze_res_87051 = 0.0 <= zs_res_87015; + double erf_res_87052; + + if (zgze_res_87051) { + double zm_res_87053 = 1.0 - zt_res_87050; + + erf_res_87052 = zm_res_87053; + } else { + double zm_res_87054 = zt_res_87050 - 1.0; + + erf_res_87052 = zm_res_87054; + } + + double zp_res_87055 = 1.0 + erf_res_87052; + double zs_res_87056 = zp_res_87055 / 2.0; + double defunc_0_Q_res_87057 = 1.0 - zs_res_87056; + double y_87058 = fpow64(defunc_2_reduce_res_87013, 2.0); + double negate_arg_87059 = 4.0 * y_87058; + double defunc_0_exp_arg_87060 = 0.0 - negate_arg_87059; + double defunc_0_exp_res_87061 = fpow64(2.718281828459045, + defunc_0_exp_arg_87060); + double x_87062 = defunc_0_Q_res_87057 + defunc_0_exp_res_87061; + double zs_res_87063 = defunc_2_reduce_res_87013 / 1.4142135623730951; + double abs_res_87064 = fabs(zs_res_87063); + double zs_res_87065 = abs_res_87064 / 2.0; + double zp_res_87066 = 1.0 + zs_res_87065; + double zs_res_87067 = 1.0 / zp_res_87066; + double zt_res_87068 = zs_res_87067 * zs_res_87067; + double zt_res_87069 = zs_res_87067 * zt_res_87068; + double zt_res_87070 = zt_res_87068 * zt_res_87068; + double zt_res_87071 = zt_res_87068 * zt_res_87069; + double zt_res_87072 = zt_res_87069 * zt_res_87069; + double zt_res_87073 = zt_res_87069 * zt_res_87070; + double zt_res_87074 = zt_res_87070 * zt_res_87070; + double zt_res_87075 = zt_res_87070 * zt_res_87071; + double zt_res_87076 = 0.17087277 * zt_res_87075; + double zt_res_87077 = 0.82215223 * zt_res_87074; + double zt_res_87078 = 1.48851587 * zt_res_87073; + double zt_res_87079 = 1.13520398 * zt_res_87072; + double zt_res_87080 = 0.27886807 * zt_res_87071; + double zt_res_87081 = 0.18628806 * zt_res_87070; + double zt_res_87082 = 9.678418e-2 * zt_res_87069; + double zt_res_87083 = 0.37409196 * zt_res_87068; + double zt_res_87084 = 1.00002368 * zs_res_87067; + double zt_res_87085 = zs_res_87063 * zs_res_87063; + double zm_res_87086 = 0.0 - zt_res_87085; + double zm_res_87087 = zm_res_87086 - 1.26551223; + double zp_res_87088 = zt_res_87084 + zm_res_87087; + double zp_res_87089 = zt_res_87083 + zp_res_87088; + double zp_res_87090 = zt_res_87082 + zp_res_87089; + double zm_res_87091 = zp_res_87090 - zt_res_87081; + double zp_res_87092 = zt_res_87080 + zm_res_87091; + double zm_res_87093 = zp_res_87092 - zt_res_87079; + double zp_res_87094 = zt_res_87078 + zm_res_87093; + double zm_res_87095 = zp_res_87094 - zt_res_87077; + double zp_res_87096 = zt_res_87076 + zm_res_87095; + double exp_res_87097; + + exp_res_87097 = futrts_exp64(zp_res_87096); + + double zt_res_87098 = zs_res_87067 * exp_res_87097; + bool zgze_res_87099 = 0.0 <= zs_res_87063; + double erf_res_87100; + + if (zgze_res_87099) { + double zm_res_87101 = 1.0 - zt_res_87098; + + erf_res_87100 = zm_res_87101; + } else { + double zm_res_87102 = zt_res_87098 - 1.0; + + erf_res_87100 = zm_res_87102; + } + + double zp_res_87103 = 1.0 + erf_res_87100; + double zs_res_87104 = zp_res_87103 / 2.0; + double defunc_0_Q_res_87105 = 1.0 - zs_res_87104; + double y_87106 = defunc_0_exp_res_87061 * defunc_0_Q_res_87105; + double y_87107 = x_87062 - y_87106; + double pval_brownian_motion_max_res_87108 = 2.0 * y_87107; + + ((__global double *) mem_124127)[gtid_86848] = + pval_brownian_motion_max_res_87108; + } + + error_0: + return; + #undef segmap_group_sizze_87010 +} +__kernel void mainDetailedzisegmap_86976(__global int *global_failure, + int64_t m_70861, __global + unsigned char *defunc_3_map_res_mem_124069, + __global unsigned char *mem_124121) +{ + #define segmap_group_sizze_86984 (mainDetailedzisegmap_group_sizze_86978) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128574; + int32_t local_tid_128575; + int64_t group_sizze_128578; + int32_t wave_sizze_128577; + int32_t group_tid_128576; + + global_tid_128574 = get_global_id(0); + local_tid_128575 = get_local_id(0); + group_sizze_128578 = get_local_size(0); + wave_sizze_128577 = LOCKSTEP_WIDTH; + group_tid_128576 = get_group_id(0); + + int32_t phys_tid_86976; + + phys_tid_86976 = global_tid_128574; + + int64_t gtid_86975; + + gtid_86975 = sext_i32_i64(group_tid_128576) * segmap_group_sizze_86984 + + sext_i32_i64(local_tid_128575); + if (slt64(gtid_86975, m_70861)) { + int64_t x_86987 = ((__global + int64_t *) defunc_3_map_res_mem_124069)[gtid_86975]; + double i64_res_86988 = sitofp_i64_f64(x_86987); + + ((__global double *) mem_124121)[gtid_86975] = i64_res_86988; + } + + error_0: + return; + #undef segmap_group_sizze_86984 +} +__kernel void mainDetailedzisegmap_87154(__global int *global_failure, + int64_t N_70860, int64_t m_70861, + __global + unsigned char *images_mem_120108, + __global + unsigned char *hist_inds_mem_124138, + __global unsigned char *mem_124142) +{ + #define segmap_group_sizze_87181 (mainDetailedzisegmap_group_sizze_87157) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128711; + int32_t local_tid_128712; + int64_t group_sizze_128715; + int32_t wave_sizze_128714; + int32_t group_tid_128713; + + global_tid_128711 = get_global_id(0); + local_tid_128712 = get_local_id(0); + group_sizze_128715 = get_local_size(0); + wave_sizze_128714 = LOCKSTEP_WIDTH; + group_tid_128713 = get_group_id(0); + + int32_t phys_tid_87154; + + phys_tid_87154 = global_tid_128711; + + int64_t gtid_87152; + + gtid_87152 = squot64(sext_i32_i64(group_tid_128713) * + segmap_group_sizze_87181 + + sext_i32_i64(local_tid_128712), N_70860); + + int64_t gtid_87153; + + gtid_87153 = sext_i32_i64(group_tid_128713) * segmap_group_sizze_87181 + + sext_i32_i64(local_tid_128712) - + squot64(sext_i32_i64(group_tid_128713) * segmap_group_sizze_87181 + + sext_i32_i64(local_tid_128712), N_70860) * N_70860; + if (slt64(gtid_87152, m_70861) && slt64(gtid_87153, N_70860)) { + int64_t x_87184 = ((__global + int64_t *) hist_inds_mem_124138)[gtid_87152]; + bool cond_87187 = slt64(gtid_87153, x_87184); + double defunc_1_f_res_87188; + + if (cond_87187) { + defunc_1_f_res_87188 = NAN; + } else { + double x_87186 = ((__global + double *) images_mem_120108)[gtid_87152 * + N_70860 + + gtid_87153]; + + defunc_1_f_res_87188 = x_87186; + } + ((__global double *) mem_124142)[gtid_87152 * N_70860 + gtid_87153] = + defunc_1_f_res_87188; + } + + error_0: + return; + #undef segmap_group_sizze_87181 +} +__kernel void mainDetailedzisegmap_87193(__global int *global_failure, + int64_t N_70860, int64_t m_70861, + int64_t n_70864, int64_t k2p2zq_70876, + int64_t num_groups_87218, + int64_t num_threads_125812, __global + unsigned char *binop_p_mem_120117, + __global unsigned char *mem_120124, + __global unsigned char *mem_124145, + __global unsigned char *mem_124149, + __global unsigned char *mem_124191) +{ + #define segmap_group_sizze_87217 (mainDetailedzisegmap_group_sizze_87195) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128719; + int32_t local_tid_128720; + int64_t group_sizze_128723; + int32_t wave_sizze_128722; + int32_t group_tid_128721; + + global_tid_128719 = get_global_id(0); + local_tid_128720 = get_local_id(0); + group_sizze_128723 = get_local_size(0); + wave_sizze_128722 = LOCKSTEP_WIDTH; + group_tid_128721 = get_group_id(0); + + int32_t phys_tid_87193; + + phys_tid_87193 = global_tid_128719; + + int32_t phys_group_id_128724; + + phys_group_id_128724 = get_group_id(0); + for (int32_t i_128725 = 0; i_128725 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, segmap_group_sizze_87217)) - + phys_group_id_128724, sext_i64_i32(num_groups_87218)); + i_128725++) { + int32_t virt_group_id_128726 = phys_group_id_128724 + i_128725 * + sext_i64_i32(num_groups_87218); + int64_t gtid_87192 = sext_i32_i64(virt_group_id_128726) * + segmap_group_sizze_87217 + sext_i32_i64(local_tid_128720); + + if (slt64(gtid_87192, m_70861)) { + for (int64_t i_119914 = 0; i_119914 < k2p2zq_70876; i_119914++) { + for (int64_t i_119918 = 0; i_119918 < k2p2zq_70876; + i_119918++) { + double defunc_2_reduce_res_87226; + double redout_119920 = 0.0; + + for (int64_t i_119921 = 0; i_119921 < n_70864; i_119921++) { + double x_87230 = ((__global + double *) mem_124145)[i_119921 * + m_70861 + + gtid_87192]; + double x_87231 = ((__global + double *) binop_p_mem_120117)[i_119914 * + N_70860 + + i_119921]; + double x_87232 = ((__global + double *) mem_120124)[i_119921 * + k2p2zq_70876 + + i_119918]; + double x_87233 = x_87231 * x_87232; + bool isnan_res_87234; + + isnan_res_87234 = futrts_isnan64(x_87230); + + double y_87235; + + if (isnan_res_87234) { + y_87235 = 0.0; + } else { + y_87235 = 1.0; + } + + double defunc_2_f_res_87236 = x_87233 * y_87235; + double defunc_1_op_res_87229 = defunc_2_f_res_87236 + + redout_119920; + double redout_tmp_128729 = defunc_1_op_res_87229; + + redout_119920 = redout_tmp_128729; + } + defunc_2_reduce_res_87226 = redout_119920; + ((__global double *) mem_124149)[phys_tid_87193 + + (i_119914 * + (num_threads_125812 * + k2p2zq_70876) + + i_119918 * + num_threads_125812)] = + defunc_2_reduce_res_87226; + } + } + for (int64_t i_128730 = 0; i_128730 < k2p2zq_70876; i_128730++) { + for (int64_t i_128731 = 0; i_128731 < k2p2zq_70876; + i_128731++) { + ((__global double *) mem_124191)[i_128730 * (m_70861 * + k2p2zq_70876) + + i_128731 * m_70861 + + gtid_87192] = ((__global + double *) mem_124149)[phys_tid_87193 + + (i_128730 * + (num_threads_125812 * + k2p2zq_70876) + + i_128731 * + num_threads_125812)]; + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_87217 +} +__kernel void mainDetailedzisegmap_87239(__global int *global_failure, + int64_t N_70860, int64_t m_70861, + int64_t n_70864, int64_t k2p2zq_70876, + int64_t num_groups_87395, + int64_t num_threads_125814, __global + unsigned char *mem_120120, __global + unsigned char *mem_120124, __global + unsigned char *mem_124142, __global + unsigned char *mem_124194, __global + unsigned char *mem_124210) +{ + #define segmap_group_sizze_87394 (mainDetailedzisegmap_group_sizze_87242) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128732; + int32_t local_tid_128733; + int64_t group_sizze_128736; + int32_t wave_sizze_128735; + int32_t group_tid_128734; + + global_tid_128732 = get_global_id(0); + local_tid_128733 = get_local_id(0); + group_sizze_128736 = get_local_size(0); + wave_sizze_128735 = LOCKSTEP_WIDTH; + group_tid_128734 = get_group_id(0); + + int32_t phys_tid_87239; + + phys_tid_87239 = global_tid_128732; + + int32_t phys_group_id_128737; + + phys_group_id_128737 = get_group_id(0); + for (int32_t i_128738 = 0; i_128738 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861 * k2p2zq_70876, + segmap_group_sizze_87394)) - + phys_group_id_128737, sext_i64_i32(num_groups_87395)); + i_128738++) { + int32_t virt_group_id_128739 = phys_group_id_128737 + i_128738 * + sext_i64_i32(num_groups_87395); + int64_t gtid_87237 = squot64(sext_i32_i64(virt_group_id_128739) * + segmap_group_sizze_87394 + + sext_i32_i64(local_tid_128733), + k2p2zq_70876); + int64_t gtid_87238 = sext_i32_i64(virt_group_id_128739) * + segmap_group_sizze_87394 + sext_i32_i64(local_tid_128733) - + squot64(sext_i32_i64(virt_group_id_128739) * + segmap_group_sizze_87394 + + sext_i32_i64(local_tid_128733), k2p2zq_70876) * + k2p2zq_70876; + + if (slt64(gtid_87237, m_70861) && slt64(gtid_87238, k2p2zq_70876)) { + for (int64_t i_119924 = 0; i_119924 < k2p2zq_70876; i_119924++) { + double defunc_2_reduce_res_87406; + double redout_119926 = 0.0; + + for (int64_t i_119927 = 0; i_119927 < n_70864; i_119927++) { + double x_87410 = ((__global + double *) mem_124142)[gtid_87237 * + N_70860 + + i_119927]; + double x_87411 = ((__global double *) mem_120120)[i_119927 * + k2p2zq_70876 + + gtid_87238]; + double x_87412 = ((__global double *) mem_120124)[i_119927 * + k2p2zq_70876 + + i_119924]; + double x_87413 = x_87411 * x_87412; + bool isnan_res_87414; + + isnan_res_87414 = futrts_isnan64(x_87410); + + double y_87415; + + if (isnan_res_87414) { + y_87415 = 0.0; + } else { + y_87415 = 1.0; + } + + double defunc_2_f_res_87416 = x_87413 * y_87415; + double defunc_1_op_res_87409 = defunc_2_f_res_87416 + + redout_119926; + double redout_tmp_128741 = defunc_1_op_res_87409; + + redout_119926 = redout_tmp_128741; + } + defunc_2_reduce_res_87406 = redout_119926; + ((__global double *) mem_124194)[phys_tid_87239 + i_119924 * + num_threads_125814] = + defunc_2_reduce_res_87406; + } + for (int64_t i_128742 = 0; i_128742 < k2p2zq_70876; i_128742++) { + ((__global double *) mem_124210)[i_128742 * (k2p2zq_70876 * + m_70861) + + gtid_87237 * k2p2zq_70876 + + gtid_87238] = ((__global + double *) mem_124194)[phys_tid_87239 + + i_128742 * + num_threads_125814]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_87394 +} +__kernel void mainDetailedzisegmap_87609(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t m_72499, int64_t nm_72500, + int64_t gauss_jordan_res_r_ixfn_124361, + int64_t gauss_jordan_res_r_ixfn_124362, + int64_t gauss_jordan_res_r_ixfn_124364, + __global + unsigned char *gauss_jordan_res_r_mem_124366, + __global unsigned char *mem_124371) +{ + #define segmap_group_sizze_88112 (mainDetailedzisegmap_group_sizze_87613) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128892; + int32_t local_tid_128893; + int64_t group_sizze_128896; + int32_t wave_sizze_128895; + int32_t group_tid_128894; + + global_tid_128892 = get_global_id(0); + local_tid_128893 = get_local_id(0); + group_sizze_128896 = get_local_size(0); + wave_sizze_128895 = LOCKSTEP_WIDTH; + group_tid_128894 = get_group_id(0); + + int32_t phys_tid_87609; + + phys_tid_87609 = global_tid_128892; + + int64_t gtid_87606; + + gtid_87606 = squot64(sext_i32_i64(group_tid_128894) * + segmap_group_sizze_88112 + + sext_i32_i64(local_tid_128893), k2p2zq_70876 * + k2p2zq_70876); + + int64_t gtid_slice_87604; + + gtid_slice_87604 = squot64(sext_i32_i64(group_tid_128894) * + segmap_group_sizze_88112 + + sext_i32_i64(local_tid_128893) - + squot64(sext_i32_i64(group_tid_128894) * + segmap_group_sizze_88112 + + sext_i32_i64(local_tid_128893), + k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), k2p2zq_70876); + + int64_t gtid_slice_87605; + + gtid_slice_87605 = sext_i32_i64(group_tid_128894) * + segmap_group_sizze_88112 + sext_i32_i64(local_tid_128893) - + squot64(sext_i32_i64(group_tid_128894) * segmap_group_sizze_88112 + + sext_i32_i64(local_tid_128893), k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876) - squot64(sext_i32_i64(group_tid_128894) * + segmap_group_sizze_88112 + + sext_i32_i64(local_tid_128893) - + squot64(sext_i32_i64(group_tid_128894) * + segmap_group_sizze_88112 + + sext_i32_i64(local_tid_128893), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876) * k2p2zq_70876; + if ((slt64(gtid_87606, m_70861) && slt64(gtid_slice_87604, k2p2zq_70876)) && + slt64(gtid_slice_87605, k2p2zq_70876)) { + int64_t slice_88116 = k2p2zq_70876 + gtid_slice_87605; + int64_t binop_x_115370 = nm_72500 * gtid_87606; + int64_t binop_y_115371 = m_72499 * gtid_slice_87604; + int64_t binop_x_115372 = binop_x_115370 + binop_y_115371; + int64_t binop_x_115373 = slice_88116 + binop_x_115372; + int64_t new_index_115374 = squot64(binop_x_115373, nm_72500); + int64_t binop_y_115386 = nm_72500 * new_index_115374; + int64_t new_index_115387 = binop_x_115373 - binop_y_115386; + double v_88117 = ((__global + double *) gauss_jordan_res_r_mem_124366)[gauss_jordan_res_r_ixfn_124361 + + (new_index_115374 * + gauss_jordan_res_r_ixfn_124362 + + new_index_115387 * + gauss_jordan_res_r_ixfn_124364)]; + + ((__global double *) mem_124371)[gtid_87606 * (k2p2zq_70876 * + k2p2zq_70876) + + gtid_slice_87604 * k2p2zq_70876 + + gtid_slice_87605] = v_88117; + } + + error_0: + return; + #undef segmap_group_sizze_88112 +} +__kernel void mainDetailedzisegmap_87720(__global int *global_failure, + int64_t m_70861, int64_t nm_72500, + int64_t ctx_param_ext_124325, + int64_t ctx_param_ext_124326, + int64_t ctx_param_ext_124328, __global + unsigned char *mem_param_124330, + __global unsigned char *mem_124349) +{ + #define segmap_group_sizze_88102 (mainDetailedzisegmap_group_sizze_87723) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128886; + int32_t local_tid_128887; + int64_t group_sizze_128890; + int32_t wave_sizze_128889; + int32_t group_tid_128888; + + global_tid_128886 = get_global_id(0); + local_tid_128887 = get_local_id(0); + group_sizze_128890 = get_local_size(0); + wave_sizze_128889 = LOCKSTEP_WIDTH; + group_tid_128888 = get_group_id(0); + + int32_t phys_tid_87720; + + phys_tid_87720 = global_tid_128886; + + int64_t gtid_87718; + + gtid_87718 = squot64(sext_i32_i64(group_tid_128888) * + segmap_group_sizze_88102 + + sext_i32_i64(local_tid_128887), nm_72500); + + int64_t gtid_87719; + + gtid_87719 = sext_i32_i64(group_tid_128888) * segmap_group_sizze_88102 + + sext_i32_i64(local_tid_128887) - + squot64(sext_i32_i64(group_tid_128888) * segmap_group_sizze_88102 + + sext_i32_i64(local_tid_128887), nm_72500) * nm_72500; + if (slt64(gtid_87718, m_70861) && slt64(gtid_87719, nm_72500)) { + double write_value_88106 = ((__global double *) mem_124349)[gtid_87718 * + nm_72500 + + gtid_87719]; + + if ((sle64((int64_t) 0, gtid_87718) && slt64(gtid_87718, m_70861)) && + (sle64((int64_t) 0, gtid_87719) && slt64(gtid_87719, nm_72500))) { + ((__global double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_87718 * + ctx_param_ext_124326 + + gtid_87719 * + ctx_param_ext_124328)] = + write_value_88106; + } + } + + error_0: + return; + #undef segmap_group_sizze_88102 +} +__kernel void mainDetailedzisegmap_87740(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t m_72499, int64_t nm_72500, + int64_t i_87986, + int64_t ctx_param_ext_124325, + int64_t ctx_param_ext_124326, + int64_t ctx_param_ext_124328, __global + unsigned char *mem_param_124330, + __global unsigned char *mem_124345, + __global unsigned char *mem_124349) +{ + #define segmap_group_sizze_88057 (mainDetailedzisegmap_group_sizze_87743) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128881; + int32_t local_tid_128882; + int64_t group_sizze_128885; + int32_t wave_sizze_128884; + int32_t group_tid_128883; + + global_tid_128881 = get_global_id(0); + local_tid_128882 = get_local_id(0); + group_sizze_128885 = get_local_size(0); + wave_sizze_128884 = LOCKSTEP_WIDTH; + group_tid_128883 = get_group_id(0); + + int32_t phys_tid_87740; + + phys_tid_87740 = global_tid_128881; + + int64_t gtid_87738; + + gtid_87738 = squot64(sext_i32_i64(group_tid_128883) * + segmap_group_sizze_88057 + + sext_i32_i64(local_tid_128882), nm_72500); + + int64_t gtid_87739; + + gtid_87739 = sext_i32_i64(group_tid_128883) * segmap_group_sizze_88057 + + sext_i32_i64(local_tid_128882) - + squot64(sext_i32_i64(group_tid_128883) * segmap_group_sizze_88057 + + sext_i32_i64(local_tid_128882), nm_72500) * nm_72500; + if (slt64(gtid_87738, m_70861) && slt64(gtid_87739, nm_72500)) { + bool cond_88062 = ((__global bool *) mem_124345)[gtid_87738]; + int64_t defunc_0_f_res_88064 = sdiv64(gtid_87739, m_72499); + int64_t defunc_0_f_res_88065 = smod64(gtid_87739, m_72499); + double defunc_0_f_res_88066; + + if (cond_88062) { + int64_t x_88067 = mul64(m_72499, defunc_0_f_res_88064); + int64_t i_88068 = add64(defunc_0_f_res_88065, x_88067); + bool x_88069 = sle64((int64_t) 0, i_88068); + bool y_88070 = slt64(i_88068, nm_72500); + bool bounds_check_88071 = x_88069 && y_88070; + bool index_certs_88072; + + if (!bounds_check_88071) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 400) == + -1) { + global_failure_args[0] = i_88068; + global_failure_args[1] = nm_72500; + ; + } + return; + } + } + + double defunc_0_f_res_t_res_88073 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_87738 * + ctx_param_ext_124326 + + i_88068 * + ctx_param_ext_124328)]; + + defunc_0_f_res_88066 = defunc_0_f_res_t_res_88073; + } else { + double v1_88061 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_87738 * + ctx_param_ext_124326 + + i_87986 * + ctx_param_ext_124328)]; + bool x_88074 = sle64((int64_t) 0, defunc_0_f_res_88065); + bool y_88075 = slt64(defunc_0_f_res_88065, nm_72500); + bool bounds_check_88076 = x_88074 && y_88075; + bool index_certs_88077; + + if (!bounds_check_88076) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 401) == + -1) { + global_failure_args[0] = defunc_0_f_res_88065; + global_failure_args[1] = nm_72500; + ; + } + return; + } + } + + double x_88078 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_87738 * + ctx_param_ext_124326 + + defunc_0_f_res_88065 * + ctx_param_ext_124328)]; + double x_88079 = x_88078 / v1_88061; + int64_t y_88080 = sub64(k2p2zq_70876, (int64_t) 1); + bool cond_88081 = slt64(defunc_0_f_res_88064, y_88080); + double defunc_0_f_res_f_res_88082; + + if (cond_88081) { + int64_t x_88083 = add64((int64_t) 1, defunc_0_f_res_88064); + int64_t x_88084 = mul64(m_72499, x_88083); + int64_t i_88085 = add64(defunc_0_f_res_88065, x_88084); + bool x_88086 = sle64((int64_t) 0, i_88085); + bool y_88087 = slt64(i_88085, nm_72500); + bool bounds_check_88088 = x_88086 && y_88087; + bool index_certs_88089; + + if (!bounds_check_88088) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 402) == -1) { + global_failure_args[0] = i_88085; + global_failure_args[1] = nm_72500; + ; + } + return; + } + } + + double x_88090 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_87738 * + ctx_param_ext_124326 + + i_88085 * + ctx_param_ext_124328)]; + int64_t i_88091 = add64(i_87986, x_88084); + bool x_88092 = sle64((int64_t) 0, i_88091); + bool y_88093 = slt64(i_88091, nm_72500); + bool bounds_check_88094 = x_88092 && y_88093; + bool index_certs_88095; + + if (!bounds_check_88094) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 403) == -1) { + global_failure_args[0] = i_88091; + global_failure_args[1] = nm_72500; + ; + } + return; + } + } + + double x_88096 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_87738 * + ctx_param_ext_124326 + + i_88091 * + ctx_param_ext_124328)]; + double y_88097 = x_88079 * x_88096; + double defunc_0_f_res_f_res_t_res_88098 = x_88090 - y_88097; + + defunc_0_f_res_f_res_88082 = defunc_0_f_res_f_res_t_res_88098; + } else { + defunc_0_f_res_f_res_88082 = x_88079; + } + defunc_0_f_res_88066 = defunc_0_f_res_f_res_88082; + } + ((__global double *) mem_124349)[gtid_87738 * nm_72500 + gtid_87739] = + defunc_0_f_res_88066; + } + + error_0: + return; + #undef segmap_group_sizze_88057 +} +__kernel void mainDetailedzisegmap_87830(__global int *global_failure, + int64_t m_70861, int64_t i_87986, + int64_t ctx_param_ext_124325, + int64_t ctx_param_ext_124326, + int64_t ctx_param_ext_124328, __global + unsigned char *mem_param_124330, + __global unsigned char *mem_124345) +{ + #define segmap_group_sizze_88044 (mainDetailedzisegmap_group_sizze_87832) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128876; + int32_t local_tid_128877; + int64_t group_sizze_128880; + int32_t wave_sizze_128879; + int32_t group_tid_128878; + + global_tid_128876 = get_global_id(0); + local_tid_128877 = get_local_id(0); + group_sizze_128880 = get_local_size(0); + wave_sizze_128879 = LOCKSTEP_WIDTH; + group_tid_128878 = get_group_id(0); + + int32_t phys_tid_87830; + + phys_tid_87830 = global_tid_128876; + + int64_t gtid_87829; + + gtid_87829 = sext_i32_i64(group_tid_128878) * segmap_group_sizze_88044 + + sext_i32_i64(local_tid_128877); + if (slt64(gtid_87829, m_70861)) { + double v1_88049 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_87829 * + ctx_param_ext_124326 + + i_87986 * + ctx_param_ext_124328)]; + bool cond_88050 = v1_88049 == 0.0; + + ((__global bool *) mem_124345)[gtid_87829] = cond_88050; + } + + error_0: + return; + #undef segmap_group_sizze_88044 +} +__kernel void mainDetailedzisegmap_87913(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t m_72499, int64_t nm_72500, + __global + unsigned char *defunc_3_map_res_mem_124294, + __global unsigned char *mem_124322) +{ + #define segmap_group_sizze_87964 (mainDetailedzisegmap_group_sizze_87916) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128849; + int32_t local_tid_128850; + int64_t group_sizze_128853; + int32_t wave_sizze_128852; + int32_t group_tid_128851; + + global_tid_128849 = get_global_id(0); + local_tid_128850 = get_local_id(0); + group_sizze_128853 = get_local_size(0); + wave_sizze_128852 = LOCKSTEP_WIDTH; + group_tid_128851 = get_group_id(0); + + int32_t phys_tid_87913; + + phys_tid_87913 = global_tid_128849; + + int64_t gtid_87911; + + gtid_87911 = squot64(sext_i32_i64(group_tid_128851) * + segmap_group_sizze_87964 + + sext_i32_i64(local_tid_128850), nm_72500); + + int64_t gtid_87912; + + gtid_87912 = sext_i32_i64(group_tid_128851) * segmap_group_sizze_87964 + + sext_i32_i64(local_tid_128850) - + squot64(sext_i32_i64(group_tid_128851) * segmap_group_sizze_87964 + + sext_i32_i64(local_tid_128850), nm_72500) * nm_72500; + if (slt64(gtid_87911, m_70861) && slt64(gtid_87912, nm_72500)) { + int64_t defunc_0_f_res_87969 = sdiv64(gtid_87912, m_72499); + int64_t defunc_0_f_res_87970 = smod64(gtid_87912, m_72499); + bool cond_87971 = slt64(defunc_0_f_res_87970, k2p2zq_70876); + double defunc_0_f_res_87972; + + if (cond_87971) { + bool x_87973 = sle64((int64_t) 0, defunc_0_f_res_87969); + bool y_87974 = slt64(defunc_0_f_res_87969, k2p2zq_70876); + bool bounds_check_87975 = x_87973 && y_87974; + bool x_87976 = sle64((int64_t) 0, defunc_0_f_res_87970); + bool bounds_check_87977 = cond_87971 && x_87976; + bool index_ok_87978 = bounds_check_87975 && bounds_check_87977; + bool index_certs_87979; + + if (!index_ok_87978) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 395) == + -1) { + global_failure_args[0] = defunc_0_f_res_87969; + global_failure_args[1] = defunc_0_f_res_87970; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + ; + } + return; + } + } + + double defunc_0_f_res_t_res_87980 = ((__global + double *) defunc_3_map_res_mem_124294)[gtid_87911 * + (k2p2zq_70876 * + k2p2zq_70876) + + defunc_0_f_res_87969 * + k2p2zq_70876 + + defunc_0_f_res_87970]; + + defunc_0_f_res_87972 = defunc_0_f_res_t_res_87980; + } else { + int64_t y_87981 = add64(k2p2zq_70876, defunc_0_f_res_87969); + bool cond_87982 = defunc_0_f_res_87970 == y_87981; + double defunc_0_f_res_f_res_87983; + + if (cond_87982) { + defunc_0_f_res_f_res_87983 = 1.0; + } else { + defunc_0_f_res_f_res_87983 = 0.0; + } + defunc_0_f_res_87972 = defunc_0_f_res_f_res_87983; + } + ((__global double *) mem_124322)[gtid_87911 * nm_72500 + gtid_87912] = + defunc_0_f_res_87972; + } + + error_0: + return; + #undef segmap_group_sizze_87964 +} +__kernel void mainDetailedzisegmap_88122(__global int *global_failure, + int64_t N_70860, int64_t m_70861, + int64_t n_70864, int64_t k2p2zq_70876, + int64_t num_groups_88143, + int64_t num_threads_125825, __global + unsigned char *binop_p_mem_120117, + __global unsigned char *mem_124375, + __global unsigned char *mem_124378, + __global unsigned char *mem_124393) +{ + #define segmap_group_sizze_88142 (mainDetailedzisegmap_group_sizze_88124) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128900; + int32_t local_tid_128901; + int64_t group_sizze_128904; + int32_t wave_sizze_128903; + int32_t group_tid_128902; + + global_tid_128900 = get_global_id(0); + local_tid_128901 = get_local_id(0); + group_sizze_128904 = get_local_size(0); + wave_sizze_128903 = LOCKSTEP_WIDTH; + group_tid_128902 = get_group_id(0); + + int32_t phys_tid_88122; + + phys_tid_88122 = global_tid_128900; + + int32_t phys_group_id_128905; + + phys_group_id_128905 = get_group_id(0); + for (int32_t i_128906 = 0; i_128906 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, segmap_group_sizze_88142)) - + phys_group_id_128905, sext_i64_i32(num_groups_88143)); + i_128906++) { + int32_t virt_group_id_128907 = phys_group_id_128905 + i_128906 * + sext_i64_i32(num_groups_88143); + int64_t gtid_88121 = sext_i32_i64(virt_group_id_128907) * + segmap_group_sizze_88142 + sext_i32_i64(local_tid_128901); + + if (slt64(gtid_88121, m_70861)) { + for (int64_t i_119930 = 0; i_119930 < k2p2zq_70876; i_119930++) { + double defunc_2_reduce_res_88149; + double redout_119932 = 0.0; + + for (int64_t i_119933 = 0; i_119933 < n_70864; i_119933++) { + double x_88154 = ((__global double *) mem_124375)[i_119933 * + m_70861 + + gtid_88121]; + bool isnan_res_88155; + + isnan_res_88155 = futrts_isnan64(x_88154); + + double defunc_1_f_res_88156; + + if (isnan_res_88155) { + defunc_1_f_res_88156 = 0.0; + } else { + double x_88153 = ((__global + double *) binop_p_mem_120117)[i_119930 * + N_70860 + + i_119933]; + double defunc_1_f_res_f_res_88157 = x_88153 * x_88154; + + defunc_1_f_res_88156 = defunc_1_f_res_f_res_88157; + } + + double defunc_1_op_res_88152 = defunc_1_f_res_88156 + + redout_119932; + double redout_tmp_128909 = defunc_1_op_res_88152; + + redout_119932 = redout_tmp_128909; + } + defunc_2_reduce_res_88149 = redout_119932; + ((__global double *) mem_124378)[phys_tid_88122 + i_119930 * + num_threads_125825] = + defunc_2_reduce_res_88149; + } + for (int64_t i_128910 = 0; i_128910 < k2p2zq_70876; i_128910++) { + ((__global double *) mem_124393)[i_128910 * m_70861 + + gtid_88121] = ((__global + double *) mem_124378)[phys_tid_88122 + + i_128910 * + num_threads_125825]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_88142 +} +__kernel void mainDetailedzisegmap_88263(__global int *global_failure, + int64_t m_70861, int64_t k2p2zq_70876, + int64_t num_groups_88283, + int64_t num_threads_125829, __global + unsigned char *mem_124597, __global + unsigned char *mem_124600, __global + unsigned char *mem_124603, __global + unsigned char *mem_124618) +{ + #define segmap_group_sizze_88282 (mainDetailedzisegmap_group_sizze_88265) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129032; + int32_t local_tid_129033; + int64_t group_sizze_129036; + int32_t wave_sizze_129035; + int32_t group_tid_129034; + + global_tid_129032 = get_global_id(0); + local_tid_129033 = get_local_id(0); + group_sizze_129036 = get_local_size(0); + wave_sizze_129035 = LOCKSTEP_WIDTH; + group_tid_129034 = get_group_id(0); + + int32_t phys_tid_88263; + + phys_tid_88263 = global_tid_129032; + + int32_t phys_group_id_129037; + + phys_group_id_129037 = get_group_id(0); + for (int32_t i_129038 = 0; i_129038 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, segmap_group_sizze_88282)) - + phys_group_id_129037, sext_i64_i32(num_groups_88283)); + i_129038++) { + int32_t virt_group_id_129039 = phys_group_id_129037 + i_129038 * + sext_i64_i32(num_groups_88283); + int64_t gtid_88262 = sext_i32_i64(virt_group_id_129039) * + segmap_group_sizze_88282 + sext_i32_i64(local_tid_129033); + + if (slt64(gtid_88262, m_70861)) { + for (int64_t i_119936 = 0; i_119936 < k2p2zq_70876; i_119936++) { + double defunc_0_f_res_88290; + double redout_119938 = 0.0; + + for (int64_t i_119939 = 0; i_119939 < k2p2zq_70876; + i_119939++) { + double x_88294 = ((__global double *) mem_124600)[i_119939 * + m_70861 + + gtid_88262]; + double x_88295 = ((__global double *) mem_124597)[i_119936 * + (m_70861 * + k2p2zq_70876) + + i_119939 * + m_70861 + + gtid_88262]; + double defunc_1_f_res_88296 = x_88294 * x_88295; + double defunc_1_op_res_88293 = defunc_1_f_res_88296 + + redout_119938; + double redout_tmp_129041 = defunc_1_op_res_88293; + + redout_119938 = redout_tmp_129041; + } + defunc_0_f_res_88290 = redout_119938; + ((__global double *) mem_124603)[phys_tid_88263 + i_119936 * + num_threads_125829] = + defunc_0_f_res_88290; + } + for (int64_t i_129042 = 0; i_129042 < k2p2zq_70876; i_129042++) { + ((__global double *) mem_124618)[i_129042 * m_70861 + + gtid_88262] = ((__global + double *) mem_124603)[phys_tid_88263 + + i_129042 * + num_threads_125829]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_88282 +} +__kernel void mainDetailedzisegmap_88395(__global int *global_failure, + int64_t N_70860, int64_t m_70861, + int64_t k2p2zq_70876, + int64_t num_groups_88414, + int64_t num_threads_125833, __global + unsigned char *mem_120124, __global + unsigned char *mem_124662, __global + unsigned char *mem_124665, __global + unsigned char *mem_124680) +{ + #define segmap_group_sizze_88413 (mainDetailedzisegmap_group_sizze_88397) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129120; + int32_t local_tid_129121; + int64_t group_sizze_129124; + int32_t wave_sizze_129123; + int32_t group_tid_129122; + + global_tid_129120 = get_global_id(0); + local_tid_129121 = get_local_id(0); + group_sizze_129124 = get_local_size(0); + wave_sizze_129123 = LOCKSTEP_WIDTH; + group_tid_129122 = get_group_id(0); + + int32_t phys_tid_88395; + + phys_tid_88395 = global_tid_129120; + + int32_t phys_group_id_129125; + + phys_group_id_129125 = get_group_id(0); + for (int32_t i_129126 = 0; i_129126 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, segmap_group_sizze_88413)) - + phys_group_id_129125, sext_i64_i32(num_groups_88414)); + i_129126++) { + int32_t virt_group_id_129127 = phys_group_id_129125 + i_129126 * + sext_i64_i32(num_groups_88414); + int64_t gtid_88394 = sext_i32_i64(virt_group_id_129127) * + segmap_group_sizze_88413 + sext_i32_i64(local_tid_129121); + + if (slt64(gtid_88394, m_70861)) { + for (int64_t i_119946 = 0; i_119946 < N_70860; i_119946++) { + double defunc_0_f_res_88420; + double redout_119948 = 0.0; + + for (int64_t i_119949 = 0; i_119949 < k2p2zq_70876; + i_119949++) { + double x_88424 = ((__global double *) mem_124662)[i_119949 * + m_70861 + + gtid_88394]; + double x_88425 = ((__global double *) mem_120124)[i_119946 * + k2p2zq_70876 + + i_119949]; + double defunc_1_f_res_88426 = x_88424 * x_88425; + double defunc_1_op_res_88423 = defunc_1_f_res_88426 + + redout_119948; + double redout_tmp_129129 = defunc_1_op_res_88423; + + redout_119948 = redout_tmp_129129; + } + defunc_0_f_res_88420 = redout_119948; + ((__global double *) mem_124665)[phys_tid_88395 + i_119946 * + num_threads_125833] = + defunc_0_f_res_88420; + } + for (int64_t i_129130 = 0; i_129130 < N_70860; i_129130++) { + ((__global double *) mem_124680)[i_129130 * m_70861 + + gtid_88394] = ((__global + double *) mem_124665)[phys_tid_88395 + + i_129130 * + num_threads_125833]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_88413 +} +__kernel void mainDetailedzisegmap_88573(__global int *global_failure, + int64_t N_70860, int64_t m_70861, + __global unsigned char *mem_124906, + __global unsigned char *mem_124909, + __global unsigned char *mem_124914, + __global unsigned char *mem_124917) +{ + #define segmap_group_sizze_88717 (mainDetailedzisegmap_group_sizze_88576) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129329; + int32_t local_tid_129330; + int64_t group_sizze_129333; + int32_t wave_sizze_129332; + int32_t group_tid_129331; + + global_tid_129329 = get_global_id(0); + local_tid_129330 = get_local_id(0); + group_sizze_129333 = get_local_size(0); + wave_sizze_129332 = LOCKSTEP_WIDTH; + group_tid_129331 = get_group_id(0); + + int32_t phys_tid_88573; + + phys_tid_88573 = global_tid_129329; + + int64_t gtid_88571; + + gtid_88571 = squot64(sext_i32_i64(group_tid_129331) * + segmap_group_sizze_88717 + + sext_i32_i64(local_tid_129330), N_70860); + + int64_t gtid_88572; + + gtid_88572 = sext_i32_i64(group_tid_129331) * segmap_group_sizze_88717 + + sext_i32_i64(local_tid_129330) - + squot64(sext_i32_i64(group_tid_129331) * segmap_group_sizze_88717 + + sext_i32_i64(local_tid_129330), N_70860) * N_70860; + if (slt64(gtid_88571, m_70861) && slt64(gtid_88572, N_70860)) { + double x_88721 = ((__global double *) mem_124909)[gtid_88571 * N_70860 + + gtid_88572]; + bool isnan_res_88724; + + isnan_res_88724 = futrts_isnan64(x_88721); + + bool defunc_0_p_res_88725 = !isnan_res_88724; + int64_t defunc_1_f_res_88726; + + if (defunc_0_p_res_88725) { + int64_t x_88722 = ((__global int64_t *) mem_124906)[gtid_88571 * + N_70860 + + gtid_88572]; + int64_t defunc_1_f_res_t_res_88727 = sub64(x_88722, (int64_t) 1); + + defunc_1_f_res_88726 = defunc_1_f_res_t_res_88727; + } else { + defunc_1_f_res_88726 = (int64_t) -1; + } + if ((sle64((int64_t) 0, gtid_88571) && slt64(gtid_88571, m_70861)) && + (sle64((int64_t) 0, defunc_1_f_res_88726) && + slt64(defunc_1_f_res_88726, N_70860))) { + ((__global int64_t *) mem_124917)[gtid_88571 * N_70860 + + defunc_1_f_res_88726] = + gtid_88572; + } + if ((sle64((int64_t) 0, gtid_88571) && slt64(gtid_88571, m_70861)) && + (sle64((int64_t) 0, defunc_1_f_res_88726) && + slt64(defunc_1_f_res_88726, N_70860))) { + ((__global double *) mem_124914)[gtid_88571 * N_70860 + + defunc_1_f_res_88726] = x_88721; + } + } + + error_0: + return; + #undef segmap_group_sizze_88717 +} +__kernel void mainDetailedzisegmap_88744(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t N_70860, int64_t m_70861, + int64_t n_70864, double hfrac_70866, + int64_t k2p2_70874, __global + unsigned char *mem_124927, __global + unsigned char *mem_124930, __global + unsigned char *mem_124933, __global + unsigned char *mem_124935, __global + unsigned char *mem_124937) +{ + #define segmap_group_sizze_88780 (mainDetailedzisegmap_group_sizze_88746) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129337; + int32_t local_tid_129338; + int64_t group_sizze_129341; + int32_t wave_sizze_129340; + int32_t group_tid_129339; + + global_tid_129337 = get_global_id(0); + local_tid_129338 = get_local_id(0); + group_sizze_129341 = get_local_size(0); + wave_sizze_129340 = LOCKSTEP_WIDTH; + group_tid_129339 = get_group_id(0); + + int32_t phys_tid_88744; + + phys_tid_88744 = global_tid_129337; + + int64_t gtid_88743; + + gtid_88743 = sext_i32_i64(group_tid_129339) * segmap_group_sizze_88780 + + sext_i32_i64(local_tid_129338); + if (slt64(gtid_88743, m_70861)) { + int64_t defunc_0_f_res_88787; + int64_t redout_119950 = (int64_t) 0; + + for (int64_t i_119951 = 0; i_119951 < n_70864; i_119951++) { + double x_88791 = ((__global double *) mem_124927)[i_119951 * + m_70861 + + gtid_88743]; + bool isnan_res_88792; + + isnan_res_88792 = futrts_isnan64(x_88791); + + bool cond_88793 = !isnan_res_88792; + int64_t defunc_0_f_res_88794 = btoi_bool_i64(cond_88793); + int64_t defunc_1_op_res_88790 = add64(defunc_0_f_res_88794, + redout_119950); + int64_t redout_tmp_129342 = defunc_1_op_res_88790; + + redout_119950 = redout_tmp_129342; + } + defunc_0_f_res_88787 = redout_119950; + + double defunc_0_f_res_88795; + double redout_115314 = 0.0; + + for (int64_t i_115315 = 0; i_115315 < n_70864; i_115315++) { + bool cond_88800 = slt64(i_115315, defunc_0_f_res_88787); + double defunc_0_f_res_88801; + + if (cond_88800) { + bool y_88803 = slt64(i_115315, N_70860); + bool index_certs_88805; + + if (!y_88803) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 404) == -1) { + global_failure_args[0] = i_115315; + global_failure_args[1] = N_70860; + ; + } + return; + } + } + + double defunc_0_f_res_t_res_88806 = ((__global + double *) mem_124930)[i_115315 * + m_70861 + + gtid_88743]; + + defunc_0_f_res_88801 = defunc_0_f_res_t_res_88806; + } else { + defunc_0_f_res_88801 = 0.0; + } + + double defunc_0_f_res_88807 = defunc_0_f_res_88801 * + defunc_0_f_res_88801; + double defunc_1_op_res_88798 = defunc_0_f_res_88807 + redout_115314; + double redout_tmp_129343 = defunc_1_op_res_88798; + + redout_115314 = redout_tmp_129343; + } + defunc_0_f_res_88795 = redout_115314; + + int64_t i64_arg_88808 = sub64(defunc_0_f_res_88787, k2p2_70874); + double i64_res_88809 = sitofp_i64_f64(i64_arg_88808); + double sqrt_arg_88810 = defunc_0_f_res_88795 / i64_res_88809; + double sqrt_res_88811; + + sqrt_res_88811 = futrts_sqrt64(sqrt_arg_88810); + + double i64_res_88812 = sitofp_i64_f64(defunc_0_f_res_88787); + double f64_arg_88813 = hfrac_70866 * i64_res_88812; + int64_t f64_res_88814 = fptosi_f64_i64(f64_arg_88813); + + ((__global int64_t *) mem_124933)[gtid_88743] = f64_res_88814; + ((__global int64_t *) mem_124935)[gtid_88743] = defunc_0_f_res_88787; + ((__global double *) mem_124937)[gtid_88743] = sqrt_res_88811; + } + + error_0: + return; + #undef segmap_group_sizze_88780 +} +__kernel void mainDetailedzisegmap_88857(__global int *global_failure, + int64_t m_70861, double hfrac_70866, + int64_t k2p2_70874, __global + unsigned char *mem_124949, __global + unsigned char *mem_124952, __global + unsigned char *mem_124955, __global + unsigned char *mem_124957) +{ + #define segmap_group_sizze_88948 (mainDetailedzisegmap_group_sizze_88859) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129478; + int32_t local_tid_129479; + int64_t group_sizze_129482; + int32_t wave_sizze_129481; + int32_t group_tid_129480; + + global_tid_129478 = get_global_id(0); + local_tid_129479 = get_local_id(0); + group_sizze_129482 = get_local_size(0); + wave_sizze_129481 = LOCKSTEP_WIDTH; + group_tid_129480 = get_group_id(0); + + int32_t phys_tid_88857; + + phys_tid_88857 = global_tid_129478; + + int64_t gtid_88856; + + gtid_88856 = sext_i32_i64(group_tid_129480) * segmap_group_sizze_88948 + + sext_i32_i64(local_tid_129479); + if (slt64(gtid_88856, m_70861)) { + int64_t defunc_0_f_res_88952 = ((__global + int64_t *) mem_124949)[gtid_88856]; + double defunc_0_f_res_88953 = ((__global + double *) mem_124952)[gtid_88856]; + int64_t i64_arg_88954 = sub64(defunc_0_f_res_88952, k2p2_70874); + double i64_res_88955 = sitofp_i64_f64(i64_arg_88954); + double sqrt_arg_88956 = defunc_0_f_res_88953 / i64_res_88955; + double sqrt_res_88957; + + sqrt_res_88957 = futrts_sqrt64(sqrt_arg_88956); + + double i64_res_88958 = sitofp_i64_f64(defunc_0_f_res_88952); + double f64_arg_88959 = hfrac_70866 * i64_res_88958; + int64_t f64_res_88960 = fptosi_f64_i64(f64_arg_88959); + + ((__global int64_t *) mem_124955)[gtid_88856] = f64_res_88960; + ((__global double *) mem_124957)[gtid_88856] = sqrt_res_88957; + } + + error_0: + return; + #undef segmap_group_sizze_88948 +} +__kernel void mainDetailedzisegmap_88980(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t N_70860, int64_t m_70861, + int64_t defunc_2_reduce_comm_res_72722, + __global unsigned char *mem_124924, + __global + unsigned char *defunc_3_map_res_mem_124961, + __global + unsigned char *defunc_3_map_res_mem_124962, + __global unsigned char *mem_124969) +{ + #define segmap_group_sizze_89004 (mainDetailedzisegmap_group_sizze_88982) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129517; + int32_t local_tid_129518; + int64_t group_sizze_129521; + int32_t wave_sizze_129520; + int32_t group_tid_129519; + + global_tid_129517 = get_global_id(0); + local_tid_129518 = get_local_id(0); + group_sizze_129521 = get_local_size(0); + wave_sizze_129520 = LOCKSTEP_WIDTH; + group_tid_129519 = get_group_id(0); + + int32_t phys_tid_88980; + + phys_tid_88980 = global_tid_129517; + + int64_t gtid_88979; + + gtid_88979 = sext_i32_i64(group_tid_129519) * segmap_group_sizze_89004 + + sext_i32_i64(local_tid_129518); + if (slt64(gtid_88979, m_70861)) { + int64_t x_89008 = ((__global + int64_t *) defunc_3_map_res_mem_124962)[gtid_88979]; + int64_t x_89009 = ((__global + int64_t *) defunc_3_map_res_mem_124961)[gtid_88979]; + double defunc_0_f_res_89010; + double redout_115320 = 0.0; + + for (int64_t i_115321 = 0; i_115321 < defunc_2_reduce_comm_res_72722; + i_115321++) { + bool cond_89015 = slt64(i_115321, x_89009); + double defunc_0_f_res_89016; + + if (cond_89015) { + int64_t x_89017 = add64(x_89008, i_115321); + int64_t x_89018 = sub64(x_89017, x_89009); + int64_t i_89019 = add64((int64_t) 1, x_89018); + bool x_89020 = sle64((int64_t) 0, i_89019); + bool y_89021 = slt64(i_89019, N_70860); + bool bounds_check_89022 = x_89020 && y_89021; + bool index_certs_89023; + + if (!bounds_check_89022) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 408) == -1) { + global_failure_args[0] = i_89019; + global_failure_args[1] = N_70860; + ; + } + return; + } + } + + double defunc_0_f_res_t_res_89024 = ((__global + double *) mem_124924)[gtid_88979 * + N_70860 + + i_89019]; + + defunc_0_f_res_89016 = defunc_0_f_res_t_res_89024; + } else { + defunc_0_f_res_89016 = 0.0; + } + + double defunc_1_op_res_89013 = defunc_0_f_res_89016 + redout_115320; + double redout_tmp_129522 = defunc_1_op_res_89013; + + redout_115320 = redout_tmp_129522; + } + defunc_0_f_res_89010 = redout_115320; + ((__global double *) mem_124969)[gtid_88979] = defunc_0_f_res_89010; + } + + error_0: + return; + #undef segmap_group_sizze_89004 +} +__kernel void mainDetailedzisegmap_89077(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t N_70860, int64_t n_70864, + double lam_70868, + int64_t iota_arg_72752, + double i64_res_72759, __global + unsigned char *mappingindices_mem_120107, + __global unsigned char *mem_124976) +{ + #define segmap_group_sizze_89097 (mainDetailedzisegmap_group_sizze_89079) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129584; + int32_t local_tid_129585; + int64_t group_sizze_129588; + int32_t wave_sizze_129587; + int32_t group_tid_129586; + + global_tid_129584 = get_global_id(0); + local_tid_129585 = get_local_id(0); + group_sizze_129588 = get_local_size(0); + wave_sizze_129587 = LOCKSTEP_WIDTH; + group_tid_129586 = get_group_id(0); + + int32_t phys_tid_89077; + + phys_tid_89077 = global_tid_129584; + + int64_t gtid_89076; + + gtid_89076 = sext_i32_i64(group_tid_129586) * segmap_group_sizze_89097 + + sext_i32_i64(local_tid_129585); + if (slt64(gtid_89076, iota_arg_72752)) { + int64_t i_89101 = add64(n_70864, gtid_89076); + bool x_89102 = sle64((int64_t) 0, i_89101); + bool y_89103 = slt64(i_89101, N_70860); + bool bounds_check_89104 = x_89102 && y_89103; + bool index_certs_89105; + + if (!bounds_check_89104) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 411) == -1) { + global_failure_args[0] = i_89101; + global_failure_args[1] = N_70860; + ; + } + return; + } + } + + int64_t time_89106 = ((__global + int64_t *) mappingindices_mem_120107)[i_89101]; + double i64_res_89107 = sitofp_i64_f64(time_89106); + double logplus_arg_89108 = i64_res_89107 / i64_res_72759; + bool cond_89109 = 2.718281828459045 < logplus_arg_89108; + double logplus_res_89110; + + if (cond_89109) { + double log_res_89111; + + log_res_89111 = futrts_log64(logplus_arg_89108); + logplus_res_89110 = log_res_89111; + } else { + logplus_res_89110 = 1.0; + } + + double sqrt_res_89112; + + sqrt_res_89112 = futrts_sqrt64(logplus_res_89110); + + double defunc_0_f_res_89113 = lam_70868 * sqrt_res_89112; + + ((__global double *) mem_124976)[gtid_89076] = defunc_0_f_res_89113; + } + + error_0: + return; + #undef segmap_group_sizze_89097 +} +__kernel void mainDetailedzisegmap_89117(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t m_70861, + int64_t iota_arg_72752, + int64_t iota_arg_72776, + int64_t distance_72783, + int64_t num_threads_125851, __global + unsigned char *defunc_4_map_res_mem_124919, + __global + unsigned char *defunc_3_map_res_mem_124962, + __global unsigned char *mem_124979, + __global unsigned char *mem_124986, + __global unsigned char *mem_124997, + __global unsigned char *mem_125017) +{ + #define segmap_group_sizze_89308 (mainDetailedzisegmap_group_sizze_89119) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129599; + int32_t local_tid_129600; + int64_t group_sizze_129603; + int32_t wave_sizze_129602; + int32_t group_tid_129601; + + global_tid_129599 = get_global_id(0); + local_tid_129600 = get_local_id(0); + group_sizze_129603 = get_local_size(0); + wave_sizze_129602 = LOCKSTEP_WIDTH; + group_tid_129601 = get_group_id(0); + + int32_t phys_tid_89117; + + phys_tid_89117 = global_tid_129599; + + int64_t gtid_89116; + + gtid_89116 = sext_i32_i64(group_tid_129601) * segmap_group_sizze_89308 + + sext_i32_i64(local_tid_129600); + if (slt64(gtid_89116, m_70861)) { + int64_t x_89311 = ((__global + int64_t *) defunc_4_map_res_mem_124919)[gtid_89116]; + int64_t x_89312 = ((__global + int64_t *) defunc_3_map_res_mem_124962)[gtid_89116]; + int64_t y_89313 = ((__global int64_t *) mem_124979)[gtid_89116]; + + for (int64_t i_129604 = 0; i_129604 < iota_arg_72776; i_129604++) { + ((__global double *) mem_124997)[phys_tid_89117 + i_129604 * + num_threads_125851] = ((__global + double *) mem_124986)[gtid_89116 + + i_129604 * + m_70861]; + } + for (int64_t i_89316 = 0; i_89316 < distance_72783; i_89316++) { + int64_t index_primexp_89318 = add64((int64_t) 1, i_89316); + bool cond_89319 = slt64((int64_t) 0, index_primexp_89318); + bool loop_cond_89320; + + if (cond_89319) { + bool x_89321 = sle64((int64_t) 0, index_primexp_89318); + bool y_89322 = slt64(index_primexp_89318, iota_arg_72776); + bool bounds_check_89323 = x_89321 && y_89322; + bool index_certs_89324; + + if (!bounds_check_89323) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 414) == -1) { + global_failure_args[0] = index_primexp_89318; + global_failure_args[1] = iota_arg_72776; + ; + } + return; + } + } + + double defunc_2_lifted_gt_arg_89325 = ((__global + double *) mem_124997)[phys_tid_89117 + + index_primexp_89318 * + num_threads_125851]; + bool y_89326 = slt64(i_89316, iota_arg_72776); + bool index_certs_89327; + + if (!y_89326) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 415) == -1) { + global_failure_args[0] = i_89316; + global_failure_args[1] = iota_arg_72776; + ; + } + return; + } + } + + double defunc_1_lifted_gt_arg_89328 = ((__global + double *) mem_124997)[phys_tid_89117 + + i_89316 * + num_threads_125851]; + bool defunc_1_zlze_res_89329 = defunc_1_lifted_gt_arg_89328 <= + defunc_2_lifted_gt_arg_89325; + bool defunc_2_lifted_gt_res_89330 = !defunc_1_zlze_res_89329; + + loop_cond_89320 = defunc_2_lifted_gt_res_89330; + } else { + loop_cond_89320 = 0; + } + + bool xszq_89331; + int64_t xszq_89332; + bool loop_while_89334; + int64_t j_89335; + + loop_while_89334 = loop_cond_89320; + j_89335 = index_primexp_89318; + while (loop_while_89334) { + int64_t loopres_89337 = sub64(j_89335, (int64_t) 1); + bool x_89338 = sle64((int64_t) 0, j_89335); + bool y_89339 = slt64(j_89335, iota_arg_72776); + bool bounds_check_89340 = x_89338 && y_89339; + bool index_certs_89341; + + if (!bounds_check_89340) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 416) == -1) { + global_failure_args[0] = j_89335; + global_failure_args[1] = iota_arg_72776; + ; + } + return; + } + } + + double copy_arg_89342 = ((__global + double *) mem_124997)[phys_tid_89117 + + j_89335 * + num_threads_125851]; + bool x_89343 = sle64((int64_t) 0, loopres_89337); + bool y_89344 = slt64(loopres_89337, iota_arg_72776); + bool bounds_check_89345 = x_89343 && y_89344; + bool index_certs_89346; + + if (!bounds_check_89345) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 417) == -1) { + global_failure_args[0] = loopres_89337; + global_failure_args[1] = iota_arg_72776; + ; + } + return; + } + } + + double copy_arg_89347 = ((__global + double *) mem_124997)[phys_tid_89117 + + loopres_89337 * + num_threads_125851]; + + ((__global double *) mem_124997)[phys_tid_89117 + j_89335 * + num_threads_125851] = + copy_arg_89347; + ((__global double *) mem_124997)[phys_tid_89117 + + loopres_89337 * + num_threads_125851] = + copy_arg_89342; + + bool cond_89350 = slt64((int64_t) 0, loopres_89337); + bool loop_cond_89351; + + if (cond_89350) { + bool index_certs_89352; + + if (!bounds_check_89345) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 418) == -1) { + global_failure_args[0] = loopres_89337; + global_failure_args[1] = iota_arg_72776; + ; + } + return; + } + } + + double defunc_2_lifted_gt_arg_89353 = ((__global + double *) mem_124997)[phys_tid_89117 + + loopres_89337 * + num_threads_125851]; + int64_t i_89354 = sub64(loopres_89337, (int64_t) 1); + bool x_89355 = sle64((int64_t) 0, i_89354); + bool y_89356 = slt64(i_89354, iota_arg_72776); + bool bounds_check_89357 = x_89355 && y_89356; + bool index_certs_89358; + + if (!bounds_check_89357) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 419) == -1) { + global_failure_args[0] = i_89354; + global_failure_args[1] = iota_arg_72776; + ; + } + return; + } + } + + double defunc_1_lifted_gt_arg_89359 = ((__global + double *) mem_124997)[phys_tid_89117 + + i_89354 * + num_threads_125851]; + bool defunc_1_zlze_res_89360 = + defunc_1_lifted_gt_arg_89359 <= + defunc_2_lifted_gt_arg_89353; + bool defunc_2_lifted_gt_res_89361 = + !defunc_1_zlze_res_89360; + + loop_cond_89351 = defunc_2_lifted_gt_res_89361; + } else { + loop_cond_89351 = 0; + } + + bool loop_while_tmp_129606 = loop_cond_89351; + int64_t j_tmp_129607 = loopres_89337; + + loop_while_89334 = loop_while_tmp_129606; + j_89335 = j_tmp_129607; + } + xszq_89331 = loop_while_89334; + xszq_89332 = j_89335; + } + + int64_t i_89362 = sdiv64(y_89313, (int64_t) 2); + int64_t j_89363 = sub64(i_89362, (int64_t) 1); + bool cond_89364 = x_89311 == x_89312; + double defunc_0_f_res_89365; + + if (cond_89364) { + defunc_0_f_res_89365 = 0.0; + } else { + int64_t x_89366 = smod64(y_89313, (int64_t) 2); + bool cond_89367 = x_89366 == (int64_t) 0; + double defunc_0_f_res_f_res_89368; + + if (cond_89367) { + bool x_89369 = sle64((int64_t) 0, j_89363); + bool y_89370 = slt64(j_89363, iota_arg_72752); + bool bounds_check_89371 = x_89369 && y_89370; + bool index_certs_89372; + + if (!bounds_check_89371) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 420) == -1) { + global_failure_args[0] = j_89363; + global_failure_args[1] = iota_arg_72752; + ; + } + return; + } + } + + double x_89373 = ((__global + double *) mem_124997)[phys_tid_89117 + + j_89363 * + num_threads_125851]; + bool x_89374 = sle64((int64_t) 0, i_89362); + bool y_89375 = slt64(i_89362, iota_arg_72752); + bool bounds_check_89376 = x_89374 && y_89375; + bool index_certs_89377; + + if (!bounds_check_89376) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 421) == -1) { + global_failure_args[0] = i_89362; + global_failure_args[1] = iota_arg_72752; + ; + } + return; + } + } + + double y_89378 = ((__global + double *) mem_124997)[phys_tid_89117 + + i_89362 * + num_threads_125851]; + double x_89379 = x_89373 + y_89378; + double defunc_0_f_res_f_res_t_res_89380 = x_89379 / 2.0; + + defunc_0_f_res_f_res_89368 = defunc_0_f_res_f_res_t_res_89380; + } else { + bool x_89381 = sle64((int64_t) 0, i_89362); + bool y_89382 = slt64(i_89362, iota_arg_72752); + bool bounds_check_89383 = x_89381 && y_89382; + bool index_certs_89384; + + if (!bounds_check_89383) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 422) == -1) { + global_failure_args[0] = i_89362; + global_failure_args[1] = iota_arg_72752; + ; + } + return; + } + } + + double defunc_0_f_res_f_res_f_res_89385 = ((__global + double *) mem_124997)[phys_tid_89117 + + i_89362 * + num_threads_125851]; + + defunc_0_f_res_f_res_89368 = defunc_0_f_res_f_res_f_res_89385; + } + defunc_0_f_res_89365 = defunc_0_f_res_f_res_89368; + } + ((__global double *) mem_125017)[gtid_89116] = defunc_0_f_res_89365; + } + + error_0: + return; + #undef segmap_group_sizze_89308 +} +__kernel void mainDetailedzisegmap_89207(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t N_70860, int64_t m_70861, + int64_t iota_arg_72776, __global + unsigned char *defunc_4_map_res_mem_124920, + __global unsigned char *mem_124924, + __global + unsigned char *defunc_3_map_res_mem_124962, + __global unsigned char *mem_124979, + __global unsigned char *mem_124983) +{ + #define segmap_group_sizze_89282 (mainDetailedzisegmap_group_sizze_89210) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129594; + int32_t local_tid_129595; + int64_t group_sizze_129598; + int32_t wave_sizze_129597; + int32_t group_tid_129596; + + global_tid_129594 = get_global_id(0); + local_tid_129595 = get_local_id(0); + group_sizze_129598 = get_local_size(0); + wave_sizze_129597 = LOCKSTEP_WIDTH; + group_tid_129596 = get_group_id(0); + + int32_t phys_tid_89207; + + phys_tid_89207 = global_tid_129594; + + int64_t gtid_89205; + + gtid_89205 = squot64(sext_i32_i64(group_tid_129596) * + segmap_group_sizze_89282 + + sext_i32_i64(local_tid_129595), iota_arg_72776); + + int64_t gtid_89206; + + gtid_89206 = sext_i32_i64(group_tid_129596) * segmap_group_sizze_89282 + + sext_i32_i64(local_tid_129595) - + squot64(sext_i32_i64(group_tid_129596) * segmap_group_sizze_89282 + + sext_i32_i64(local_tid_129595), iota_arg_72776) * + iota_arg_72776; + if (slt64(gtid_89205, m_70861) && slt64(gtid_89206, iota_arg_72776)) { + int64_t x_89285 = ((__global + int64_t *) defunc_3_map_res_mem_124962)[gtid_89205]; + int64_t y_89287 = ((__global int64_t *) mem_124979)[gtid_89205]; + bool cond_89289 = slt64(gtid_89206, y_89287); + bool cond_89290; + + if (cond_89289) { + int64_t i_89291 = add64(gtid_89206, x_89285); + bool x_89292 = sle64((int64_t) 0, i_89291); + bool y_89293 = slt64(i_89291, N_70860); + bool bounds_check_89294 = x_89292 && y_89293; + bool index_certs_89295; + + if (!bounds_check_89294) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 412) == + -1) { + global_failure_args[0] = i_89291; + global_failure_args[1] = N_70860; + ; + } + return; + } + } + + double isnan_arg_89296 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_89205 * + N_70860 + + i_89291]; + bool isnan_res_89297; + + isnan_res_89297 = futrts_isnan64(isnan_arg_89296); + + bool cond_t_res_89298 = !isnan_res_89297; + + cond_89290 = cond_t_res_89298; + } else { + cond_89290 = 0; + } + + double defunc_0_f_res_89299; + + if (cond_89290) { + int64_t i_89300 = add64(gtid_89206, x_89285); + bool x_89301 = sle64((int64_t) 0, i_89300); + bool y_89302 = slt64(i_89300, N_70860); + bool bounds_check_89303 = x_89301 && y_89302; + bool index_certs_89304; + + if (!bounds_check_89303) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 413) == + -1) { + global_failure_args[0] = i_89300; + global_failure_args[1] = N_70860; + ; + } + return; + } + } + + double defunc_0_f_res_t_res_89305 = ((__global + double *) mem_124924)[gtid_89205 * + N_70860 + + i_89300]; + + defunc_0_f_res_89299 = defunc_0_f_res_t_res_89305; + } else { + defunc_0_f_res_89299 = INFINITY; + } + ((__global double *) mem_124983)[gtid_89205 * iota_arg_72776 + + gtid_89206] = defunc_0_f_res_89299; + } + + error_0: + return; + #undef segmap_group_sizze_89282 +} +__kernel void mainDetailedzisegmap_89261(__global int *global_failure, + int64_t m_70861, __global + unsigned char *defunc_4_map_res_mem_124919, + __global + unsigned char *defunc_3_map_res_mem_124962, + __global unsigned char *mem_124979) +{ + #define segmap_group_sizze_89270 (mainDetailedzisegmap_group_sizze_89263) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129589; + int32_t local_tid_129590; + int64_t group_sizze_129593; + int32_t wave_sizze_129592; + int32_t group_tid_129591; + + global_tid_129589 = get_global_id(0); + local_tid_129590 = get_local_id(0); + group_sizze_129593 = get_local_size(0); + wave_sizze_129592 = LOCKSTEP_WIDTH; + group_tid_129591 = get_group_id(0); + + int32_t phys_tid_89261; + + phys_tid_89261 = global_tid_129589; + + int64_t gtid_89260; + + gtid_89260 = sext_i32_i64(group_tid_129591) * segmap_group_sizze_89270 + + sext_i32_i64(local_tid_129590); + if (slt64(gtid_89260, m_70861)) { + int64_t x_89273 = ((__global + int64_t *) defunc_4_map_res_mem_124919)[gtid_89260]; + int64_t x_89274 = ((__global + int64_t *) defunc_3_map_res_mem_124962)[gtid_89260]; + int64_t y_89275 = sub64(x_89273, x_89274); + + ((__global int64_t *) mem_124979)[gtid_89260] = y_89275; + } + + error_0: + return; + #undef segmap_group_sizze_89270 +} +__kernel void mainDetailedzisegmap_89402(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t N_70860, int64_t m_70861, + int64_t n_70864, + int64_t iota_arg_72752, + int64_t iota_arg_72776, + int64_t num_groups_89512, + int64_t num_threads_125853, __global + unsigned char *defunc_4_map_res_mem_124919, + __global + unsigned char *defunc_4_map_res_mem_124920, + __global + unsigned char *defunc_4_map_res_mem_124921, + __global + unsigned char *defunc_3_map_res_mem_124961, + __global + unsigned char *defunc_3_map_res_mem_124962, + __global + unsigned char *defunc_3_map_res_mem_124963, + __global + unsigned char *defunc_0_f_res_mem_124973, + __global unsigned char *mem_124976, + __global unsigned char *mem_125020, + __global unsigned char *mem_125034, + __global unsigned char *mem_125048, + __global unsigned char *mem_125063, + __global unsigned char *mem_125066, + __global unsigned char *mem_125068, + __global unsigned char *mem_125070) +{ + #define segmap_group_sizze_89511 (mainDetailedzisegmap_group_sizze_89404) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_129613; + int32_t local_tid_129614; + int64_t group_sizze_129617; + int32_t wave_sizze_129616; + int32_t group_tid_129615; + + global_tid_129613 = get_global_id(0); + local_tid_129614 = get_local_id(0); + group_sizze_129617 = get_local_size(0); + wave_sizze_129616 = LOCKSTEP_WIDTH; + group_tid_129615 = get_group_id(0); + + int32_t phys_tid_89402; + + phys_tid_89402 = global_tid_129613; + + int32_t phys_group_id_129618; + + phys_group_id_129618 = get_group_id(0); + for (int32_t i_129619 = 0; i_129619 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, segmap_group_sizze_89511)) - + phys_group_id_129618, sext_i64_i32(num_groups_89512)); + i_129619++) { + int32_t virt_group_id_129620 = phys_group_id_129618 + i_129619 * + sext_i64_i32(num_groups_89512); + int64_t gtid_89401 = sext_i32_i64(virt_group_id_129620) * + segmap_group_sizze_89511 + sext_i32_i64(local_tid_129614); + + if (slt64(gtid_89401, m_70861)) { + int64_t x_89518 = ((__global + int64_t *) defunc_4_map_res_mem_124919)[gtid_89401]; + int64_t x_89519 = ((__global + int64_t *) defunc_3_map_res_mem_124962)[gtid_89401]; + double x_89520 = ((__global + double *) defunc_3_map_res_mem_124963)[gtid_89401]; + int64_t x_89521 = ((__global + int64_t *) defunc_3_map_res_mem_124961)[gtid_89401]; + double x_89522 = ((__global + double *) defunc_0_f_res_mem_124973)[gtid_89401]; + int64_t y_89525 = sub64(x_89518, x_89519); + double discard_119957; + double scanacc_119953 = 0.0; + + for (int64_t i_119955 = 0; i_119955 < iota_arg_72776; i_119955++) { + bool cond_89531 = sle64(y_89525, i_119955); + double defunc_0_f_res_89532; + + if (cond_89531) { + defunc_0_f_res_89532 = 0.0; + } else { + bool cond_89533 = i_119955 == (int64_t) 0; + double defunc_0_f_res_f_res_89534; + + if (cond_89533) { + defunc_0_f_res_f_res_89534 = x_89522; + } else { + int64_t i_89535 = add64(x_89519, i_119955); + bool x_89536 = sle64((int64_t) 0, i_89535); + bool y_89537 = slt64(i_89535, N_70860); + bool bounds_check_89538 = x_89536 && y_89537; + bool index_certs_89539; + + if (!bounds_check_89538) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 423) == -1) { + global_failure_args[0] = i_89535; + global_failure_args[1] = N_70860; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_89540 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_89401 * + N_70860 + + i_89535]; + int64_t x_89541 = sub64(x_89519, x_89521); + int64_t i_89542 = add64(x_89541, i_119955); + bool x_89543 = sle64((int64_t) 0, i_89542); + bool y_89544 = slt64(i_89542, N_70860); + bool bounds_check_89545 = x_89543 && y_89544; + bool index_certs_89546; + + if (!bounds_check_89545) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 424) == -1) { + global_failure_args[0] = i_89542; + global_failure_args[1] = N_70860; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_89547 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_89401 * + N_70860 + + i_89542]; + double defunc_0_f_res_f_res_f_res_89548 = x_89540 - + y_89547; + + defunc_0_f_res_f_res_89534 = + defunc_0_f_res_f_res_f_res_89548; + } + defunc_0_f_res_89532 = defunc_0_f_res_f_res_89534; + } + + double defunc_1_op_res_89529 = defunc_0_f_res_89532 + + scanacc_119953; + + ((__global double *) mem_125020)[phys_tid_89402 + i_119955 * + num_threads_125853] = + defunc_1_op_res_89529; + + double scanacc_tmp_129621 = defunc_1_op_res_89529; + + scanacc_119953 = scanacc_tmp_129621; + } + discard_119957 = scanacc_119953; + + double i64_res_89549 = sitofp_i64_f64(x_89519); + double sqrt_res_89550; + + sqrt_res_89550 = futrts_sqrt64(i64_res_89549); + + double y_89551 = x_89520 * sqrt_res_89550; + bool defunc_0_f_res_89553; + int64_t defunc_0_f_res_89554; + double defunc_0_f_res_89555; + bool redout_119959; + int64_t redout_119960; + double redout_119961; + + redout_119959 = 0; + redout_119960 = (int64_t) -1; + redout_119961 = 0.0; + for (int64_t i_119963 = 0; i_119963 < iota_arg_72752; i_119963++) { + double x_89571 = ((__global + double *) mem_125020)[phys_tid_89402 + + i_119963 * + num_threads_125853]; + double x_89572 = ((__global double *) mem_124976)[i_119963]; + int64_t x_89573 = i_119963; + double defunc_0_f_res_89574 = x_89571 / y_89551; + bool cond_89575 = slt64(i_119963, y_89525); + bool isnan_res_89576; + + isnan_res_89576 = futrts_isnan64(defunc_0_f_res_89574); + + bool cond_t_res_89577 = !isnan_res_89576; + bool x_89578 = cond_89575 && cond_t_res_89577; + double abs_res_89579 = fabs(defunc_0_f_res_89574); + bool defunc_2_f_res_t_res_89580 = x_89572 < abs_res_89579; + bool x_89581 = x_89578 && defunc_2_f_res_t_res_89580; + double defunc_1_f_res_89582; + + if (cond_89575) { + defunc_1_f_res_89582 = defunc_0_f_res_89574; + } else { + defunc_1_f_res_89582 = 0.0; + } + + bool defunc_1_op_res_89561; + int64_t defunc_1_op_res_89562; + + if (redout_119959) { + defunc_1_op_res_89561 = redout_119959; + defunc_1_op_res_89562 = redout_119960; + } else { + bool x_89563 = x_89581 && x_89581; + bool x_89564 = !x_89581; + bool y_89565 = x_89564 && redout_119959; + bool defunc_1_op_res_f_res_89566 = x_89563 || y_89565; + int64_t defunc_1_op_res_f_res_89567; + + if (x_89581) { + defunc_1_op_res_f_res_89567 = x_89573; + } else { + defunc_1_op_res_f_res_89567 = redout_119960; + } + defunc_1_op_res_89561 = defunc_1_op_res_f_res_89566; + defunc_1_op_res_89562 = defunc_1_op_res_f_res_89567; + } + + double defunc_1_op_res_89570 = defunc_1_f_res_89582 + + redout_119961; + + ((__global double *) mem_125034)[phys_tid_89402 + i_119963 * + num_threads_125853] = + defunc_0_f_res_89574; + + bool redout_tmp_129623 = defunc_1_op_res_89561; + int64_t redout_tmp_129624 = defunc_1_op_res_89562; + double redout_tmp_129625 = defunc_1_op_res_89570; + + redout_119959 = redout_tmp_129623; + redout_119960 = redout_tmp_129624; + redout_119961 = redout_tmp_129625; + } + defunc_0_f_res_89553 = redout_119959; + defunc_0_f_res_89554 = redout_119960; + defunc_0_f_res_89555 = redout_119961; + + bool cond_89583 = y_89525 == (int64_t) 0; + double defunc_0_f_res_89584; + + if (cond_89583) { + defunc_0_f_res_89584 = 0.0; + } else { + double i64_res_89585 = sitofp_i64_f64(y_89525); + double defunc_0_f_res_f_res_89586 = defunc_0_f_res_89555 / + i64_res_89585; + + defunc_0_f_res_89584 = defunc_0_f_res_f_res_89586; + } + + bool cond_89587 = !defunc_0_f_res_89553; + int64_t fst_breakzq_89588; + + if (cond_89587) { + fst_breakzq_89588 = (int64_t) -1; + } else { + bool cond_89589 = slt64(defunc_0_f_res_89554, y_89525); + int64_t adjustValInds_res_89590; + + if (cond_89589) { + int64_t i_89591 = add64(x_89519, defunc_0_f_res_89554); + bool x_89592 = sle64((int64_t) 0, i_89591); + bool y_89593 = slt64(i_89591, N_70860); + bool bounds_check_89594 = x_89592 && y_89593; + bool index_certs_89595; + + if (!bounds_check_89594) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 425) == -1) { + global_failure_args[0] = i_89591; + global_failure_args[1] = N_70860; + ; + } + local_failure = true; + goto error_0; + } + } + + int64_t x_89596 = ((__global + int64_t *) defunc_4_map_res_mem_124921)[gtid_89401 * + N_70860 + + i_89591]; + int64_t adjustValInds_res_t_res_89597 = sub64(x_89596, + n_70864); + + adjustValInds_res_89590 = adjustValInds_res_t_res_89597; + } else { + adjustValInds_res_89590 = (int64_t) -1; + } + fst_breakzq_89588 = adjustValInds_res_89590; + } + + bool cond_89598 = sle64(x_89519, (int64_t) 5); + bool cond_f_res_89599 = sle64(y_89525, (int64_t) 5); + bool x_89600 = !cond_89598; + bool y_89601 = cond_f_res_89599 && x_89600; + bool cond_89602 = cond_89598 || y_89601; + int64_t fst_breakzq_89603; + + if (cond_89602) { + fst_breakzq_89603 = (int64_t) -2; + } else { + fst_breakzq_89603 = fst_breakzq_89588; + } + for (int64_t i_129627 = 0; i_129627 < iota_arg_72776; i_129627++) { + ((__global double *) mem_125048)[phys_tid_89402 + i_129627 * + num_threads_125853] = NAN; + } + for (int64_t write_iter_119965 = 0; write_iter_119965 < + iota_arg_72776; write_iter_119965++) { + bool cond_89609 = slt64(write_iter_119965, y_89525); + int64_t defunc_0_f_res_89610; + + if (cond_89609) { + int64_t i_89611 = add64(x_89519, write_iter_119965); + bool x_89612 = sle64((int64_t) 0, i_89611); + bool y_89613 = slt64(i_89611, N_70860); + bool bounds_check_89614 = x_89612 && y_89613; + bool index_certs_89615; + + if (!bounds_check_89614) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 426) == -1) { + global_failure_args[0] = i_89611; + global_failure_args[1] = N_70860; + ; + } + local_failure = true; + goto error_0; + } + } + + int64_t x_89616 = ((__global + int64_t *) defunc_4_map_res_mem_124921)[gtid_89401 * + N_70860 + + i_89611]; + int64_t defunc_0_f_res_t_res_89617 = sub64(x_89616, + n_70864); + + defunc_0_f_res_89610 = defunc_0_f_res_t_res_89617; + } else { + defunc_0_f_res_89610 = (int64_t) -1; + } + + bool less_than_zzero_119969 = slt64(defunc_0_f_res_89610, + (int64_t) 0); + bool greater_than_sizze_119970 = sle64(iota_arg_72776, + defunc_0_f_res_89610); + bool outside_bounds_dim_119971 = less_than_zzero_119969 || + greater_than_sizze_119970; + + if (!outside_bounds_dim_119971) { + for (int64_t i_129629 = 0; i_129629 < (int64_t) 1; + i_129629++) { + ((__global double *) mem_125048)[phys_tid_89402 + + (defunc_0_f_res_89610 + + i_129629) * + num_threads_125853] = + ((__global double *) mem_125034)[phys_tid_89402 + + num_threads_125853 * + write_iter_119965 + + i_129629 * + num_threads_125853]; + } + } + } + for (int64_t i_129630 = 0; i_129630 < iota_arg_72752; i_129630++) { + ((__global double *) mem_125063)[i_129630 * m_70861 + + gtid_89401] = ((__global + double *) mem_125048)[phys_tid_89402 + + i_129630 * + num_threads_125853]; + } + for (int64_t i_129631 = 0; i_129631 < iota_arg_72752; i_129631++) { + ((__global double *) mem_125066)[i_129631 * m_70861 + + gtid_89401] = ((__global + double *) mem_125034)[phys_tid_89402 + + i_129631 * + num_threads_125853]; + } + ((__global int64_t *) mem_125068)[gtid_89401] = fst_breakzq_89603; + ((__global double *) mem_125070)[gtid_89401] = defunc_0_f_res_89584; + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_89511 +} +__kernel void mainDetailedzisegmap_89737(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t N_70860, int64_t m_70861, + int64_t n_70864, + int64_t iota_arg_72752, + int64_t iota_arg_72776, __global + unsigned char *defunc_4_map_res_mem_124921, + __global + unsigned char *defunc_3_map_res_mem_124962, + __global unsigned char *mem_125093, + __global unsigned char *mem_125110, + __global unsigned char *mem_125118) +{ + #define segmap_group_sizze_90050 (mainDetailedzisegmap_group_sizze_89740) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129830; + int32_t local_tid_129831; + int64_t group_sizze_129834; + int32_t wave_sizze_129833; + int32_t group_tid_129832; + + global_tid_129830 = get_global_id(0); + local_tid_129831 = get_local_id(0); + group_sizze_129834 = get_local_size(0); + wave_sizze_129833 = LOCKSTEP_WIDTH; + group_tid_129832 = get_group_id(0); + + int32_t phys_tid_89737; + + phys_tid_89737 = global_tid_129830; + + int64_t gtid_89735; + + gtid_89735 = squot64(sext_i32_i64(group_tid_129832) * + segmap_group_sizze_90050 + + sext_i32_i64(local_tid_129831), iota_arg_72776); + + int64_t gtid_89736; + + gtid_89736 = sext_i32_i64(group_tid_129832) * segmap_group_sizze_90050 + + sext_i32_i64(local_tid_129831) - + squot64(sext_i32_i64(group_tid_129832) * segmap_group_sizze_90050 + + sext_i32_i64(local_tid_129831), iota_arg_72776) * + iota_arg_72776; + if (slt64(gtid_89735, m_70861) && slt64(gtid_89736, iota_arg_72776)) { + int64_t y_90055 = ((__global int64_t *) mem_125093)[gtid_89735]; + int64_t binop_x_115338 = iota_arg_72776 * gtid_89735; + int64_t binop_x_115339 = gtid_89736 + binop_x_115338; + int64_t new_index_115340 = squot64(binop_x_115339, iota_arg_72752); + int64_t binop_y_115346 = iota_arg_72752 * new_index_115340; + int64_t new_index_115347 = binop_x_115339 - binop_y_115346; + double write_value_90057 = ((__global + double *) mem_125110)[new_index_115340 * + iota_arg_72752 + + new_index_115347]; + bool cond_90058 = slt64(gtid_89736, y_90055); + int64_t defunc_0_f_res_90059; + + if (cond_90058) { + int64_t x_90053 = ((__global + int64_t *) defunc_3_map_res_mem_124962)[gtid_89735]; + int64_t i_90060 = add64(gtid_89736, x_90053); + bool x_90061 = sle64((int64_t) 0, i_90060); + bool y_90062 = slt64(i_90060, N_70860); + bool bounds_check_90063 = x_90061 && y_90062; + bool index_certs_90064; + + if (!bounds_check_90063) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 434) == + -1) { + global_failure_args[0] = i_90060; + global_failure_args[1] = N_70860; + ; + } + return; + } + } + + int64_t x_90065 = ((__global + int64_t *) defunc_4_map_res_mem_124921)[gtid_89735 * + N_70860 + + i_90060]; + int64_t defunc_0_f_res_t_res_90066 = sub64(x_90065, n_70864); + + defunc_0_f_res_90059 = defunc_0_f_res_t_res_90066; + } else { + defunc_0_f_res_90059 = (int64_t) -1; + } + if ((sle64((int64_t) 0, gtid_89735) && slt64(gtid_89735, m_70861)) && + (sle64((int64_t) 0, defunc_0_f_res_90059) && + slt64(defunc_0_f_res_90059, iota_arg_72776))) { + ((__global double *) mem_125118)[gtid_89735 * iota_arg_72776 + + defunc_0_f_res_90059] = + write_value_90057; + } + } + + error_0: + return; + #undef segmap_group_sizze_90050 +} +__kernel void mainDetailedzisegmap_89785(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t N_70860, int64_t m_70861, + int64_t n_70864, __global + unsigned char *defunc_4_map_res_mem_124921, + __global + unsigned char *defunc_3_map_res_mem_124962, + __global unsigned char *mem_125093, + __global unsigned char *mem_125103, + __global unsigned char *mem_125105, + __global unsigned char *mem_125107, + __global unsigned char *mem_125113, + __global unsigned char *mem_125115) +{ + #define segmap_group_sizze_90005 (mainDetailedzisegmap_group_sizze_89787) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129825; + int32_t local_tid_129826; + int64_t group_sizze_129829; + int32_t wave_sizze_129828; + int32_t group_tid_129827; + + global_tid_129825 = get_global_id(0); + local_tid_129826 = get_local_id(0); + group_sizze_129829 = get_local_size(0); + wave_sizze_129828 = LOCKSTEP_WIDTH; + group_tid_129827 = get_group_id(0); + + int32_t phys_tid_89785; + + phys_tid_89785 = global_tid_129825; + + int64_t gtid_89784; + + gtid_89784 = sext_i32_i64(group_tid_129827) * segmap_group_sizze_90005 + + sext_i32_i64(local_tid_129826); + if (slt64(gtid_89784, m_70861)) { + int64_t x_90009 = ((__global + int64_t *) defunc_3_map_res_mem_124962)[gtid_89784]; + int64_t y_90011 = ((__global int64_t *) mem_125093)[gtid_89784]; + bool defunc_0_f_res_90012 = ((__global bool *) mem_125103)[gtid_89784]; + bool cond_90015 = y_90011 == (int64_t) 0; + double defunc_0_f_res_90016; + + if (cond_90015) { + defunc_0_f_res_90016 = 0.0; + } else { + double defunc_0_f_res_90014 = ((__global + double *) mem_125107)[gtid_89784]; + double i64_res_90017 = sitofp_i64_f64(y_90011); + double defunc_0_f_res_f_res_90018 = defunc_0_f_res_90014 / + i64_res_90017; + + defunc_0_f_res_90016 = defunc_0_f_res_f_res_90018; + } + + bool cond_90019 = !defunc_0_f_res_90012; + int64_t fst_breakzq_90020; + + if (cond_90019) { + fst_breakzq_90020 = (int64_t) -1; + } else { + int64_t defunc_0_f_res_90013 = ((__global + int64_t *) mem_125105)[gtid_89784]; + bool cond_90021 = slt64(defunc_0_f_res_90013, y_90011); + int64_t adjustValInds_res_90022; + + if (cond_90021) { + int64_t i_90023 = add64(x_90009, defunc_0_f_res_90013); + bool x_90024 = sle64((int64_t) 0, i_90023); + bool y_90025 = slt64(i_90023, N_70860); + bool bounds_check_90026 = x_90024 && y_90025; + bool index_certs_90027; + + if (!bounds_check_90026) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 433) == -1) { + global_failure_args[0] = i_90023; + global_failure_args[1] = N_70860; + ; + } + return; + } + } + + int64_t x_90028 = ((__global + int64_t *) defunc_4_map_res_mem_124921)[gtid_89784 * + N_70860 + + i_90023]; + int64_t adjustValInds_res_t_res_90029 = sub64(x_90028, n_70864); + + adjustValInds_res_90022 = adjustValInds_res_t_res_90029; + } else { + adjustValInds_res_90022 = (int64_t) -1; + } + fst_breakzq_90020 = adjustValInds_res_90022; + } + + bool cond_90030 = sle64(x_90009, (int64_t) 5); + bool cond_f_res_90031 = sle64(y_90011, (int64_t) 5); + bool x_90032 = !cond_90030; + bool y_90033 = cond_f_res_90031 && x_90032; + bool cond_90034 = cond_90030 || y_90033; + int64_t fst_breakzq_90035; + + if (cond_90034) { + fst_breakzq_90035 = (int64_t) -2; + } else { + fst_breakzq_90035 = fst_breakzq_90020; + } + ((__global int64_t *) mem_125113)[gtid_89784] = fst_breakzq_90035; + ((__global double *) mem_125115)[gtid_89784] = defunc_0_f_res_90016; + } + + error_0: + return; + #undef segmap_group_sizze_90005 +} +__kernel void mainDetailedzisegmap_89860(__global int *global_failure, + int64_t m_70861, __global + unsigned char *defunc_3_map_res_mem_124962, + __global + unsigned char *defunc_3_map_res_mem_124963, + __global unsigned char *mem_125100) +{ + #define segmap_group_sizze_89957 (mainDetailedzisegmap_group_sizze_89862) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129722; + int32_t local_tid_129723; + int64_t group_sizze_129726; + int32_t wave_sizze_129725; + int32_t group_tid_129724; + + global_tid_129722 = get_global_id(0); + local_tid_129723 = get_local_id(0); + group_sizze_129726 = get_local_size(0); + wave_sizze_129725 = LOCKSTEP_WIDTH; + group_tid_129724 = get_group_id(0); + + int32_t phys_tid_89860; + + phys_tid_89860 = global_tid_129722; + + int64_t gtid_89859; + + gtid_89859 = sext_i32_i64(group_tid_129724) * segmap_group_sizze_89957 + + sext_i32_i64(local_tid_129723); + if (slt64(gtid_89859, m_70861)) { + int64_t x_89960 = ((__global + int64_t *) defunc_3_map_res_mem_124962)[gtid_89859]; + double x_89961 = ((__global + double *) defunc_3_map_res_mem_124963)[gtid_89859]; + double i64_res_89962 = sitofp_i64_f64(x_89960); + double sqrt_res_89963; + + sqrt_res_89963 = futrts_sqrt64(i64_res_89962); + + double y_89964 = x_89961 * sqrt_res_89963; + + ((__global double *) mem_125100)[gtid_89859] = y_89964; + } + + error_0: + return; + #undef segmap_group_sizze_89957 +} +__kernel void mainDetailedzisegmap_89908(__global int *global_failure, + int64_t m_70861, __global + unsigned char *defunc_4_map_res_mem_124919, + __global + unsigned char *defunc_3_map_res_mem_124962, + __global unsigned char *mem_125093) +{ + #define segmap_group_sizze_89917 (mainDetailedzisegmap_group_sizze_89910) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129661; + int32_t local_tid_129662; + int64_t group_sizze_129665; + int32_t wave_sizze_129664; + int32_t group_tid_129663; + + global_tid_129661 = get_global_id(0); + local_tid_129662 = get_local_id(0); + group_sizze_129665 = get_local_size(0); + wave_sizze_129664 = LOCKSTEP_WIDTH; + group_tid_129663 = get_group_id(0); + + int32_t phys_tid_89908; + + phys_tid_89908 = global_tid_129661; + + int64_t gtid_89907; + + gtid_89907 = sext_i32_i64(group_tid_129663) * segmap_group_sizze_89917 + + sext_i32_i64(local_tid_129662); + if (slt64(gtid_89907, m_70861)) { + int64_t x_89920 = ((__global + int64_t *) defunc_4_map_res_mem_124919)[gtid_89907]; + int64_t x_89921 = ((__global + int64_t *) defunc_3_map_res_mem_124962)[gtid_89907]; + int64_t y_89922 = sub64(x_89920, x_89921); + + ((__global int64_t *) mem_125093)[gtid_89907] = y_89922; + } + + error_0: + return; + #undef segmap_group_sizze_89917 +} +__kernel void mainDetailedzisegmap_intragroup_115661(__global + int *global_failure, + int failure_is_an_option, + __global + int64_t *global_failure_args, + __local volatile + int64_t *mem_125194_backing_aligned_0, + __local volatile + int64_t *mem_121428_backing_aligned_1, + __local volatile + int64_t *mem_121409_backing_aligned_2, + __local volatile + int64_t *mem_121400_backing_aligned_3, + __local volatile + int64_t *mem_121377_backing_aligned_4, + int64_t m_70861, + int64_t k2p2zq_70876, + int64_t num_groups_y_115659, + int64_t ctx_val_121390, + int64_t num_threads_125686, + __global + unsigned char *mem_121359, + __global + unsigned char *mem_121363, + __global + unsigned char *mem_121366, + __global + unsigned char *mem_121368, + __global + unsigned char *mem_121446, + __global + unsigned char *mem_125177) +{ + #define tile_sizze_115656 (mainDetailedzitile_sizze_115655) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_125194_backing_8 = (__local volatile + char *) mem_125194_backing_aligned_0; + __local volatile char *restrict mem_121428_backing_7 = (__local volatile + char *) mem_121428_backing_aligned_1; + __local volatile char *restrict mem_121409_backing_2 = (__local volatile + char *) mem_121409_backing_aligned_2; + __local volatile char *restrict mem_121400_backing_1 = (__local volatile + char *) mem_121400_backing_aligned_3; + __local volatile char *restrict mem_121377_backing_0 = (__local volatile + char *) mem_121377_backing_aligned_4; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_126887; + int32_t local_tid_126888; + int64_t group_sizze_126891; + int32_t wave_sizze_126890; + int32_t group_tid_126889; + + global_tid_126887 = get_global_id(0); + local_tid_126888 = get_local_id(0); + group_sizze_126891 = get_local_size(0); + wave_sizze_126890 = LOCKSTEP_WIDTH; + group_tid_126889 = get_group_id(0); + + int32_t gid_flat_115661; + + gid_flat_115661 = group_tid_126889; + + int32_t ltid_pre_126892; + + ltid_pre_126892 = squot32(local_tid_126888, + sext_i64_i32(tile_sizze_115656)); + + int32_t ltid_pre_126893; + + ltid_pre_126893 = local_tid_126888 - squot32(local_tid_126888, + sext_i64_i32(tile_sizze_115656)) * + sext_i64_i32(tile_sizze_115656); + + int64_t gid_x_115653; + + gid_x_115653 = squot64(sext_i32_i64(group_tid_126889), num_groups_y_115659); + + int64_t gid_y_115654; + + gid_y_115654 = sext_i32_i64(group_tid_126889) - + squot64(sext_i32_i64(group_tid_126889), num_groups_y_115659) * + num_groups_y_115659; + + int64_t binop_x_115688; + + binop_x_115688 = gid_x_115653 * tile_sizze_115656; + + int64_t binop_x_115690 = gid_y_115654 * tile_sizze_115656; + __local char *mem_121377; + + mem_121377 = (__local char *) mem_121377_backing_0; + + int64_t ltid_y_115680 = sext_i32_i64(ltid_pre_126892); + int64_t ltid_x_115678 = sext_i32_i64(ltid_pre_126893); + int32_t ltid_flat_115679 = local_tid_126888; + + if (slt64(ltid_y_115680, tile_sizze_115656) && slt64(ltid_x_115678, + tile_sizze_115656)) { + int64_t gtid_115689 = ltid_y_115680 + binop_x_115688; + int64_t gtid_115691 = ltid_x_115678 + binop_x_115690; + bool binop_x_115692 = slt64(gtid_115689, m_70861); + bool binop_y_115693 = slt64(gtid_115691, k2p2zq_70876); + bool cond_115694 = binop_x_115692 && binop_y_115693; + + if (cond_115694) { + for (int64_t i_126894 = 0; i_126894 < k2p2zq_70876; i_126894++) { + ((__global double *) mem_125177)[gid_flat_115661 + i_126894 * + num_threads_125686] = + ((__global double *) mem_121368)[i_126894]; + } + } + for (int64_t i_126895 = 0; i_126895 < k2p2zq_70876; i_126895++) { + ((__local double *) mem_121377)[ltid_y_115680 * (k2p2zq_70876 * + tile_sizze_115656) + + ltid_x_115678 * k2p2zq_70876 + + i_126895] = ((__global + double *) mem_125177)[gid_flat_115661 + + i_126895 * + num_threads_125686]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_121395[1]; + __local char *mem_121400; + + mem_121400 = (__local char *) mem_121400_backing_1; + + __local char *mem_121409; + + mem_121409 = (__local char *) mem_121409_backing_2; + + double mem_121413[1]; + double mem_125187[1]; + __local char *tiled_inside_loop_mem_121442; + __local char *mem_param_121388; + + mem_param_121388 = mem_121377; + for (int64_t i_81035 = 0; i_81035 < k2p2zq_70876; i_81035++) { + int64_t x_81037 = sub64(k2p2zq_70876, i_81035); + int64_t i_81038 = sub64(x_81037, (int64_t) 1); + bool x_81039 = sle64((int64_t) 0, i_81038); + bool y_81040 = slt64(i_81038, k2p2zq_70876); + bool bounds_check_81041 = x_81039 && y_81040; + int64_t j_m_i_81042 = sub64(k2p2zq_70876, x_81037); + bool empty_slice_81043 = j_m_i_81042 == (int64_t) 0; + int64_t m_81044 = sub64(j_m_i_81042, (int64_t) 1); + int64_t i_p_m_t_s_81045 = add64(x_81037, m_81044); + bool zzero_leq_i_p_m_t_s_81046 = sle64((int64_t) 0, i_p_m_t_s_81045); + bool i_p_m_t_s_leq_w_81047 = slt64(i_p_m_t_s_81045, k2p2zq_70876); + bool zzero_lte_i_81048 = sle64((int64_t) 0, x_81037); + bool i_lte_j_81049 = sle64(x_81037, k2p2zq_70876); + bool y_81050 = i_p_m_t_s_leq_w_81047 && zzero_lte_i_81048; + bool y_81051 = zzero_leq_i_p_m_t_s_81046 && y_81050; + bool y_81052 = i_lte_j_81049 && y_81051; + bool forwards_ok_81053 = zzero_lte_i_81048 && y_81052; + bool ok_or_empty_81054 = empty_slice_81043 || forwards_ok_81053; + bool index_ok_81055 = bounds_check_81041 && ok_or_empty_81054; + bool index_certs_81056; + + if (!index_ok_81055) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 288) == -1) { + global_failure_args[0] = i_81038; + global_failure_args[1] = x_81037; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + global_failure_args[4] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_1; + } + } + + bool index_certs_81057; + + if (!ok_or_empty_81054) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 289) == -1) { + global_failure_args[0] = x_81037; + global_failure_args[1] = k2p2zq_70876; + global_failure_args[2] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_1; + } + } + + int64_t num_whole_tiles_115714 = squot64(j_m_i_81042, + tile_sizze_115656); + int64_t ltid_y_115717 = sext_i32_i64(ltid_pre_126892); + int64_t ltid_x_115715 = sext_i32_i64(ltid_pre_126893); + int32_t ltid_flat_115716 = local_tid_126888; + + if (slt64(ltid_y_115717, tile_sizze_115656) && slt64(ltid_x_115715, + tile_sizze_115656)) { + mem_121395[(int64_t) 0] = 0.0; + } + + error_1: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + double accs_mem_121405[1]; + double mem_param_121396[1]; + + for (int32_t i_3 = 0; i_3 < 1; i_3++) + mem_param_121396[i_3] = mem_121395[i_3]; + for (int64_t tile_id_115726 = 0; tile_id_115726 < + num_whole_tiles_115714; tile_id_115726++) { + int64_t binop_x_115802 = tile_sizze_115656 * tile_id_115726; + int64_t ltid_y_115729 = sext_i32_i64(ltid_pre_126892); + int64_t ltid_x_115727 = sext_i32_i64(ltid_pre_126893); + int32_t ltid_flat_115728 = local_tid_126888; + int64_t j_115803 = ltid_x_115727 + binop_x_115802; + int64_t gtid_115805 = binop_x_115688 + ltid_y_115729; + bool binop_x_115811 = slt64(j_115803, j_m_i_81042); + bool binop_y_115812 = slt64(gtid_115805, m_70861); + bool cond_115813 = binop_x_115811 && binop_y_115812; + double pre_115814; + + if (cond_115813) { + int64_t slice_119565 = x_81037 + j_115803; + double x_115815 = ((__global + double *) mem_121359)[slice_119565 * + (k2p2zq_70876 * + m_70861) + + gtid_115805 * + k2p2zq_70876 + + i_81038]; + + pre_115814 = x_115815; + } else { + pre_115814 = 0.0; + } + ((__local double *) mem_121400)[ltid_y_115729 * tile_sizze_115656 + + ltid_x_115727] = pre_115814; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t slice_119566 = x_81037 + binop_x_115802; + double mem_121404[1]; + int64_t ltid_y_115762 = sext_i32_i64(ltid_pre_126892); + int64_t ltid_x_115760 = sext_i32_i64(ltid_pre_126893); + int32_t ltid_flat_115761 = local_tid_126888; + int64_t gtid_115819 = binop_x_115688 + ltid_y_115762; + int64_t gtid_115821 = binop_x_115690 + ltid_x_115760; + double acc_115825 = mem_param_121396[(int64_t) 0]; + bool binop_x_115829 = slt64(gtid_115819, m_70861); + bool binop_y_115830 = slt64(gtid_115821, k2p2zq_70876); + bool cond_115831 = binop_x_115829 && binop_y_115830; + double acc_115832; + + if (cond_115831) { + double x_115833; + double redout_119719 = acc_115825; + + for (int64_t i_119720 = 0; i_119720 < tile_sizze_115656; + i_119720++) { + int64_t slice_120008 = slice_119566 + i_119720; + double x_115838 = ((__local + double *) mem_121400)[ltid_y_115762 * + tile_sizze_115656 + + i_119720]; + bool isnan_res_115839; + + isnan_res_115839 = futrts_isnan64(x_115838); + + double defunc_1_f_res_115840; + + if (isnan_res_115839) { + defunc_1_f_res_115840 = 0.0; + } else { + double x_115837 = ((__local + double *) mem_param_121388)[ltid_y_115762 * + ctx_val_121390 + + ltid_x_115760 * + k2p2zq_70876 + + slice_120008]; + double defunc_1_f_res_f_res_115841 = x_115837 * + x_115838; + + defunc_1_f_res_115840 = defunc_1_f_res_f_res_115841; + } + + double defunc_1_op_res_115836 = defunc_1_f_res_115840 + + redout_119719; + double redout_tmp_126900 = defunc_1_op_res_115836; + + redout_119719 = redout_tmp_126900; + } + x_115833 = redout_119719; + acc_115832 = x_115833; + } else { + acc_115832 = acc_115825; + } + mem_121404[(int64_t) 0] = acc_115832; + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_param_tmp_126898[1]; + + for (int32_t i_4 = 0; i_4 < 1; i_4++) + mem_param_tmp_126898[i_4] = mem_121404[i_4]; + for (int32_t i_5 = 0; i_5 < 1; i_5++) + mem_param_121396[i_5] = mem_param_tmp_126898[i_5]; + } + for (int32_t i_6 = 0; i_6 < 1; i_6++) + accs_mem_121405[i_6] = mem_param_121396[i_6]; + + int64_t residual_input_115851 = srem64(j_m_i_81042, tile_sizze_115656); + bool cond_115852 = residual_input_115851 == (int64_t) 0; + + if (cond_115852) { + mem_125187[(int64_t) 0] = accs_mem_121405[(int64_t) 0]; + } else { + int64_t binop_x_115929 = tile_sizze_115656 * num_whole_tiles_115714; + int64_t ltid_y_115855 = sext_i32_i64(ltid_pre_126892); + int64_t ltid_x_115853 = sext_i32_i64(ltid_pre_126893); + int32_t ltid_flat_115854 = local_tid_126888; + int64_t j_115930 = ltid_x_115853 + binop_x_115929; + int64_t gtid_115932 = binop_x_115688 + ltid_y_115855; + bool binop_x_115938 = slt64(j_115930, j_m_i_81042); + bool binop_y_115939 = slt64(gtid_115932, m_70861); + bool cond_115940 = binop_x_115938 && binop_y_115939; + double pre_115941; + + if (cond_115940) { + int64_t slice_119567 = x_81037 + j_115930; + double x_115942 = ((__global + double *) mem_121359)[slice_119567 * + (k2p2zq_70876 * + m_70861) + + gtid_115932 * + k2p2zq_70876 + + i_81038]; + + pre_115941 = x_115942; + } else { + pre_115941 = 0.0; + } + ((__local double *) mem_121409)[ltid_y_115855 * tile_sizze_115656 + + ltid_x_115853] = pre_115941; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t slice_119568 = x_81037 + binop_x_115929; + int64_t ltid_y_115889 = sext_i32_i64(ltid_pre_126892); + int64_t ltid_x_115887 = sext_i32_i64(ltid_pre_126893); + int32_t ltid_flat_115888 = local_tid_126888; + int64_t gtid_115947 = binop_x_115688 + ltid_y_115889; + int64_t gtid_115949 = binop_x_115690 + ltid_x_115887; + double acc_115953 = accs_mem_121405[(int64_t) 0]; + bool binop_x_115957 = slt64(gtid_115947, m_70861); + bool binop_y_115958 = slt64(gtid_115949, k2p2zq_70876); + bool cond_115959 = binop_x_115957 && binop_y_115958; + double acc_115960; + + if (cond_115959) { + double x_115961; + double redout_119721 = acc_115953; + + for (int64_t i_119722 = 0; i_119722 < residual_input_115851; + i_119722++) { + int64_t slice_120009 = slice_119568 + i_119722; + double x_115966 = ((__local + double *) mem_121409)[ltid_y_115889 * + tile_sizze_115656 + + i_119722]; + bool isnan_res_115967; + + isnan_res_115967 = futrts_isnan64(x_115966); + + double defunc_1_f_res_115968; + + if (isnan_res_115967) { + defunc_1_f_res_115968 = 0.0; + } else { + double x_115965 = ((__local + double *) mem_param_121388)[ltid_y_115889 * + ctx_val_121390 + + ltid_x_115887 * + k2p2zq_70876 + + slice_120009]; + double defunc_1_f_res_f_res_115969 = x_115965 * + x_115966; + + defunc_1_f_res_115968 = defunc_1_f_res_f_res_115969; + } + + double defunc_1_op_res_115964 = defunc_1_f_res_115968 + + redout_119721; + double redout_tmp_126901 = defunc_1_op_res_115964; + + redout_119721 = redout_tmp_126901; + } + x_115961 = redout_119721; + acc_115960 = x_115961; + } else { + acc_115960 = acc_115953; + } + mem_121413[(int64_t) 0] = acc_115960; + barrier(CLK_LOCAL_MEM_FENCE); + mem_125187[(int64_t) 0] = mem_121413[(int64_t) 0]; + } + + __local char *mem_121428; + + mem_121428 = (__local char *) mem_121428_backing_7; + + int64_t ltid_y_115973 = sext_i32_i64(ltid_pre_126892); + int64_t ltid_x_115971 = sext_i32_i64(ltid_pre_126893); + int32_t ltid_flat_115972 = local_tid_126888; + + if (slt64(ltid_y_115973, tile_sizze_115656) && slt64(ltid_x_115971, + tile_sizze_115656)) { + int64_t gtid_115982 = binop_x_115688 + ltid_y_115973; + int64_t gtid_115984 = binop_x_115690 + ltid_x_115971; + bool binop_x_115986 = slt64(gtid_115982, m_70861); + bool binop_y_115987 = slt64(gtid_115984, k2p2zq_70876); + bool cond_115988 = binop_x_115986 && binop_y_115987; + __local char *mem_125194; + + mem_125194 = (__local char *) mem_125194_backing_8; + if (cond_115988) { + double defunc_2_reduce_res_115985 = mem_125187[(int64_t) 0]; + bool index_ok_115993 = bounds_check_81041 && bounds_check_81041; + bool index_certs_115994; + + if (!index_ok_115993) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 290) == -1) { + global_failure_args[0] = i_81038; + global_failure_args[1] = i_81038; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_6; + } + } + + double zs_arg_115995 = ((__global + double *) mem_121363)[i_81038 * + (k2p2zq_70876 * + m_70861) + + gtid_115982 * + k2p2zq_70876 + + i_81038]; + bool index_certs_115996; + + if (!bounds_check_81041) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 291) == -1) { + global_failure_args[0] = i_81038; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_6; + } + } + + double zm_arg_115997 = ((__global + double *) mem_121366)[i_81038 * + k2p2zq_70876 + + gtid_115984]; + double zm_res_115998 = zm_arg_115997 - + defunc_2_reduce_res_115985; + double zs_res_115999 = zm_res_115998 / zs_arg_115995; + + ((__local double *) mem_param_121388)[ltid_y_115973 * + ctx_val_121390 + + ltid_x_115971 * + k2p2zq_70876 + i_81038] = + zs_res_115999; + for (int64_t i_126902 = 0; i_126902 < k2p2zq_70876; + i_126902++) { + ((__local double *) mem_125194)[i_126902] = ((__local + double *) mem_param_121388)[ltid_y_115973 * + ctx_val_121390 + + ltid_x_115971 * + k2p2zq_70876 + + i_126902]; + } + } + for (int64_t i_126903 = 0; i_126903 < k2p2zq_70876; i_126903++) { + ((__local double *) mem_121428)[ltid_y_115973 * (k2p2zq_70876 * + tile_sizze_115656) + + ltid_x_115971 * k2p2zq_70876 + + i_126903] = ((__local + double *) mem_125194)[i_126903]; + } + } + + error_6: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + __local char *mem_param_tmp_126896; + + mem_param_tmp_126896 = mem_121428; + mem_param_121388 = mem_param_tmp_126896; + } + tiled_inside_loop_mem_121442 = mem_param_121388; + + int64_t thread_out_index_126904 = gid_x_115653 * tile_sizze_115656 + + sext_i32_i64(ltid_pre_126892); + int64_t thread_out_index_126905 = gid_y_115654 * tile_sizze_115656 + + sext_i32_i64(ltid_pre_126893); + + if (slt64(thread_out_index_126904, m_70861) && + slt64(thread_out_index_126905, k2p2zq_70876)) { + for (int64_t i_126906 = 0; i_126906 < k2p2zq_70876; i_126906++) { + ((__global double *) mem_121446)[thread_out_index_126904 * + (k2p2zq_70876 * k2p2zq_70876) + + thread_out_index_126905 * + k2p2zq_70876 + i_126906] = + ((__local + double *) tiled_inside_loop_mem_121442)[sext_i32_i64(ltid_pre_126892) * + ctx_val_121390 + + sext_i32_i64(ltid_pre_126893) * + k2p2zq_70876 + + i_126906]; + } + } + + error_7: + return; + #undef tile_sizze_115656 +} +__kernel void mainDetailedzisegmap_intragroup_116023(__global + int *global_failure, + __local volatile + int64_t *mem_121547_backing_aligned_0, + __local volatile + int64_t *mem_121531_backing_aligned_1, + __local volatile + int64_t *mem_121522_backing_aligned_2, + int64_t m_70861, + int64_t k2p2zq_70876, + int64_t x_81093, + int64_t i_81094, + int64_t j_m_i_81098, + int64_t num_groups_y_116021, + int64_t num_whole_tiles_116039, + int64_t residual_input_116172, + unsigned char cond_116173, + int64_t num_threads_125691, + __global + unsigned char *mem_120252, + __global + unsigned char *mem_121351, + __global + unsigned char *mem_121458, + __global + unsigned char *mem_121508, + __global + unsigned char *mem_121512, + __global + unsigned char *mem_121551, + __global + unsigned char *mem_125219) +{ + #define tile_sizze_116018 (mainDetailedzitile_sizze_116017) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_121547_backing_6 = (__local volatile + char *) mem_121547_backing_aligned_0; + __local volatile char *restrict mem_121531_backing_5 = (__local volatile + char *) mem_121531_backing_aligned_1; + __local volatile char *restrict mem_121522_backing_0 = (__local volatile + char *) mem_121522_backing_aligned_2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126924; + int32_t local_tid_126925; + int64_t group_sizze_126928; + int32_t wave_sizze_126927; + int32_t group_tid_126926; + + global_tid_126924 = get_global_id(0); + local_tid_126925 = get_local_id(0); + group_sizze_126928 = get_local_size(0); + wave_sizze_126927 = LOCKSTEP_WIDTH; + group_tid_126926 = get_group_id(0); + + int32_t gid_flat_116023; + + gid_flat_116023 = group_tid_126926; + + int32_t ltid_pre_126929; + + ltid_pre_126929 = squot32(local_tid_126925, + sext_i64_i32(tile_sizze_116018)); + + int32_t ltid_pre_126930; + + ltid_pre_126930 = local_tid_126925 - squot32(local_tid_126925, + sext_i64_i32(tile_sizze_116018)) * + sext_i64_i32(tile_sizze_116018); + + int64_t gid_x_116015; + + gid_x_116015 = squot64(sext_i32_i64(group_tid_126926), num_groups_y_116021); + + int64_t gid_y_116016; + + gid_y_116016 = sext_i32_i64(group_tid_126926) - + squot64(sext_i32_i64(group_tid_126926), num_groups_y_116021) * + num_groups_y_116021; + + double mem_121517[1]; + int64_t ltid_y_116042 = sext_i32_i64(ltid_pre_126929); + int64_t ltid_x_116040 = sext_i32_i64(ltid_pre_126930); + int32_t ltid_flat_116041 = local_tid_126925; + + if (slt64(ltid_y_116042, tile_sizze_116018) && slt64(ltid_x_116040, + tile_sizze_116018)) { + mem_121517[(int64_t) 0] = 0.0; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t binop_x_116127 = gid_x_116015 * tile_sizze_116018; + int64_t binop_x_116142 = gid_y_116016 * tile_sizze_116018; + __local char *mem_121522; + + mem_121522 = (__local char *) mem_121522_backing_0; + + double accs_mem_121527[1]; + double mem_param_121518[1]; + + for (int32_t i_1 = 0; i_1 < 1; i_1++) + mem_param_121518[i_1] = mem_121517[i_1]; + for (int64_t tile_id_116051 = 0; tile_id_116051 < num_whole_tiles_116039; + tile_id_116051++) { + int64_t binop_x_116125 = tile_sizze_116018 * tile_id_116051; + int64_t ltid_y_116054 = sext_i32_i64(ltid_pre_126929); + int64_t ltid_x_116052 = sext_i32_i64(ltid_pre_126930); + int32_t ltid_flat_116053 = local_tid_126925; + int64_t j_116126 = ltid_x_116052 + binop_x_116125; + int64_t gtid_116128 = ltid_y_116054 + binop_x_116127; + bool binop_x_116133 = slt64(j_116126, j_m_i_81098); + bool binop_y_116134 = slt64(gtid_116128, m_70861); + bool cond_116135 = binop_x_116133 && binop_y_116134; + double pre_116136; + + if (cond_116135) { + int64_t slice_119569 = x_81093 + j_116126; + double x_116137 = ((__global double *) mem_121458)[slice_119569 * + (k2p2zq_70876 * + m_70861) + + gtid_116128 * + k2p2zq_70876 + + i_81094]; + + pre_116136 = x_116137; + } else { + pre_116136 = 0.0; + } + ((__local double *) mem_121522)[ltid_y_116054 * tile_sizze_116018 + + ltid_x_116052] = pre_116136; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t slice_119570 = x_81093 + binop_x_116125; + double mem_121526[1]; + int64_t ltid_y_116086 = sext_i32_i64(ltid_pre_126929); + int64_t ltid_x_116084 = sext_i32_i64(ltid_pre_126930); + int32_t ltid_flat_116085 = local_tid_126925; + int64_t gtid_116141 = ltid_y_116086 + binop_x_116127; + int64_t gtid_116143 = ltid_x_116084 + binop_x_116142; + double acc_116146 = mem_param_121518[(int64_t) 0]; + bool binop_x_116150 = slt64(gtid_116141, m_70861); + bool binop_y_116151 = slt64(gtid_116143, k2p2zq_70876); + bool cond_116152 = binop_x_116150 && binop_y_116151; + double acc_116153; + + if (cond_116152) { + double x_116154; + double redout_119730 = acc_116146; + + for (int64_t i_119731 = 0; i_119731 < tile_sizze_116018; + i_119731++) { + int64_t slice_120012 = slice_119570 + i_119731; + double x_116159 = ((__local + double *) mem_121522)[ltid_y_116086 * + tile_sizze_116018 + + i_119731]; + bool isnan_res_116160; + + isnan_res_116160 = futrts_isnan64(x_116159); + + double defunc_1_f_res_116161; + + if (isnan_res_116160) { + defunc_1_f_res_116161 = 0.0; + } else { + double x_116158 = ((__global + double *) mem_121512)[slice_120012 * + (k2p2zq_70876 * + m_70861) + + gtid_116141 * + k2p2zq_70876 + + gtid_116143]; + double defunc_1_f_res_f_res_116162 = x_116158 * x_116159; + + defunc_1_f_res_116161 = defunc_1_f_res_f_res_116162; + } + + double defunc_1_op_res_116157 = defunc_1_f_res_116161 + + redout_119730; + double redout_tmp_126933 = defunc_1_op_res_116157; + + redout_119730 = redout_tmp_126933; + } + x_116154 = redout_119730; + acc_116153 = x_116154; + } else { + acc_116153 = acc_116146; + } + mem_121526[(int64_t) 0] = acc_116153; + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_param_tmp_126931[1]; + + for (int32_t i_2 = 0; i_2 < 1; i_2++) + mem_param_tmp_126931[i_2] = mem_121526[i_2]; + for (int32_t i_3 = 0; i_3 < 1; i_3++) + mem_param_121518[i_3] = mem_param_tmp_126931[i_3]; + } + for (int32_t i_4 = 0; i_4 < 1; i_4++) + accs_mem_121527[i_4] = mem_param_121518[i_4]; + + __local char *mem_121531; + + mem_121531 = (__local char *) mem_121531_backing_5; + + double mem_121535[1]; + double mem_125212[1]; + + if (cond_116173) { + mem_125212[(int64_t) 0] = accs_mem_121527[(int64_t) 0]; + } else { + int64_t binop_x_116248 = tile_sizze_116018 * num_whole_tiles_116039; + int64_t ltid_y_116176 = sext_i32_i64(ltid_pre_126929); + int64_t ltid_x_116174 = sext_i32_i64(ltid_pre_126930); + int32_t ltid_flat_116175 = local_tid_126925; + int64_t j_116249 = ltid_x_116174 + binop_x_116248; + int64_t gtid_116251 = binop_x_116127 + ltid_y_116176; + bool binop_x_116256 = slt64(j_116249, j_m_i_81098); + bool binop_y_116257 = slt64(gtid_116251, m_70861); + bool cond_116258 = binop_x_116256 && binop_y_116257; + double pre_116259; + + if (cond_116258) { + int64_t slice_119571 = x_81093 + j_116249; + double x_116260 = ((__global double *) mem_121458)[slice_119571 * + (k2p2zq_70876 * + m_70861) + + gtid_116251 * + k2p2zq_70876 + + i_81094]; + + pre_116259 = x_116260; + } else { + pre_116259 = 0.0; + } + ((__local double *) mem_121531)[ltid_y_116176 * tile_sizze_116018 + + ltid_x_116174] = pre_116259; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t slice_119572 = x_81093 + binop_x_116248; + int64_t ltid_y_116209 = sext_i32_i64(ltid_pre_126929); + int64_t ltid_x_116207 = sext_i32_i64(ltid_pre_126930); + int32_t ltid_flat_116208 = local_tid_126925; + int64_t gtid_116265 = binop_x_116127 + ltid_y_116209; + int64_t gtid_116267 = binop_x_116142 + ltid_x_116207; + double acc_116270 = accs_mem_121527[(int64_t) 0]; + bool binop_x_116274 = slt64(gtid_116265, m_70861); + bool binop_y_116275 = slt64(gtid_116267, k2p2zq_70876); + bool cond_116276 = binop_x_116274 && binop_y_116275; + double acc_116277; + + if (cond_116276) { + double x_116278; + double redout_119732 = acc_116270; + + for (int64_t i_119733 = 0; i_119733 < residual_input_116172; + i_119733++) { + int64_t slice_120013 = slice_119572 + i_119733; + double x_116283 = ((__local + double *) mem_121531)[ltid_y_116209 * + tile_sizze_116018 + + i_119733]; + bool isnan_res_116284; + + isnan_res_116284 = futrts_isnan64(x_116283); + + double defunc_1_f_res_116285; + + if (isnan_res_116284) { + defunc_1_f_res_116285 = 0.0; + } else { + double x_116282 = ((__global + double *) mem_121512)[slice_120013 * + (k2p2zq_70876 * + m_70861) + + gtid_116265 * + k2p2zq_70876 + + gtid_116267]; + double defunc_1_f_res_f_res_116286 = x_116282 * x_116283; + + defunc_1_f_res_116285 = defunc_1_f_res_f_res_116286; + } + + double defunc_1_op_res_116281 = defunc_1_f_res_116285 + + redout_119732; + double redout_tmp_126934 = defunc_1_op_res_116281; + + redout_119732 = redout_tmp_126934; + } + x_116278 = redout_119732; + acc_116277 = x_116278; + } else { + acc_116277 = acc_116270; + } + mem_121535[(int64_t) 0] = acc_116277; + barrier(CLK_LOCAL_MEM_FENCE); + mem_125212[(int64_t) 0] = mem_121535[(int64_t) 0]; + } + + __local char *mem_121547; + + mem_121547 = (__local char *) mem_121547_backing_6; + + int64_t ltid_y_116290 = sext_i32_i64(ltid_pre_126929); + int64_t ltid_x_116288 = sext_i32_i64(ltid_pre_126930); + int32_t ltid_flat_116289 = local_tid_126925; + + if (slt64(ltid_y_116290, tile_sizze_116018) && slt64(ltid_x_116288, + tile_sizze_116018)) { + int64_t gtid_116299 = binop_x_116127 + ltid_y_116290; + int64_t gtid_116301 = binop_x_116142 + ltid_x_116288; + bool binop_x_116303 = slt64(gtid_116299, m_70861); + bool binop_y_116304 = slt64(gtid_116301, k2p2zq_70876); + bool cond_116305 = binop_x_116303 && binop_y_116304; + + if (cond_116305) { + double defunc_2_reduce_res_116302 = mem_125212[(int64_t) 0]; + double defunc_3_map_res_r_transformed_row_116309 = ((__global + double *) mem_121351)[gtid_116299 * + (k2p2zq_70876 * + k2p2zq_70876) + + i_81094 * + k2p2zq_70876 + + i_81094]; + double defunc_2_map_res_transformed_row_116311 = ((__global + double *) mem_120252)[gtid_116301 * + k2p2zq_70876 + + i_81094]; + double zm_res_116312 = defunc_2_map_res_transformed_row_116311 - + defunc_2_reduce_res_116302; + double zs_res_116313 = zm_res_116312 / + defunc_3_map_res_r_transformed_row_116309; + + ((__global double *) mem_121508)[gtid_116299 * k2p2zq_70876 + + gtid_116301 + i_81094 * + (k2p2zq_70876 * m_70861)] = + zs_res_116313; + for (int64_t i_126935 = 0; i_126935 < k2p2zq_70876; i_126935++) { + ((__global double *) mem_125219)[gid_flat_116023 + i_126935 * + num_threads_125691] = + ((__global double *) mem_121508)[gtid_116299 * + k2p2zq_70876 + + gtid_116301 + i_126935 * + (k2p2zq_70876 * m_70861)]; + } + } + for (int64_t i_126936 = 0; i_126936 < k2p2zq_70876; i_126936++) { + ((__local double *) mem_121547)[ltid_y_116290 * (k2p2zq_70876 * + tile_sizze_116018) + + ltid_x_116288 * k2p2zq_70876 + + i_126936] = ((__global + double *) mem_125219)[gid_flat_116023 + + i_126936 * + num_threads_125691]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t thread_out_index_126937 = gid_x_116015 * tile_sizze_116018 + + sext_i32_i64(ltid_pre_126929); + int64_t thread_out_index_126938 = gid_y_116016 * tile_sizze_116018 + + sext_i32_i64(ltid_pre_126930); + + if (slt64(thread_out_index_126937, m_70861) && + slt64(thread_out_index_126938, k2p2zq_70876)) { + for (int64_t i_126939 = 0; i_126939 < k2p2zq_70876; i_126939++) { + ((__global double *) mem_121551)[thread_out_index_126937 * + (k2p2zq_70876 * k2p2zq_70876) + + thread_out_index_126938 * + k2p2zq_70876 + i_126939] = + ((__local double *) mem_121547)[sext_i32_i64(ltid_pre_126929) * + (k2p2zq_70876 * + tile_sizze_116018) + + sext_i32_i64(ltid_pre_126930) * + k2p2zq_70876 + i_126939]; + } + } + + error_6: + return; + #undef tile_sizze_116018 +} +__kernel void mainDetailedzisegmap_intragroup_116342(__global + int *global_failure, + __local volatile + int64_t *mem_121654_backing_aligned_0, + __local volatile + int64_t *mem_121652_backing_aligned_1, + int64_t m_70861, + int64_t k2p2zq_70876, + int64_t gridDim_x_116335, + int64_t gridDim_y_116336, + int64_t full_tiles_116367, + int64_t kk_116570, + int64_t binop_x_120251, + __global + unsigned char *defunc_3_map_res_r_mem_121609, + __global + unsigned char *mem_121636, + __global + unsigned char *mem_121827) +{ + #define Ty_116322 (mainDetailedziTy_116319) + #define Ry_116323 (mainDetailedziRy_116321) + #define Tx_116324 (mainDetailedziTx_116318) + #define Rx_116325 (mainDetailedziRx_116320) + #define Tk_116326 (mainDetailedziTk_116317) + #define tk_div_tx_116327 (sdiv_up64(mainDetailedziTk_116317, mainDetailedziTx_116318)) + #define tk_div_ty_116328 (sdiv_up64(mainDetailedziTk_116317, mainDetailedziTy_116319)) + #define TxRx_116329 (mainDetailedziTx_116318 * mainDetailedziRx_116320) + #define TyRy_116330 (mainDetailedziTy_116319 * mainDetailedziRy_116321) + #define a_loc_szz_116332 (mainDetailedziTk_116317 * (mainDetailedziTy_116319 * mainDetailedziRy_116321)) + #define b_loc_szz_116334 (mainDetailedziRx_116320 * (mainDetailedziTx_116318 * mainDetailedziTk_116317)) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_121654_backing_1 = (__local volatile + char *) mem_121654_backing_aligned_0; + __local volatile char *restrict mem_121652_backing_0 = (__local volatile + char *) mem_121652_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127024; + int32_t local_tid_127025; + int64_t group_sizze_127028; + int32_t wave_sizze_127027; + int32_t group_tid_127026; + + global_tid_127024 = get_global_id(0); + local_tid_127025 = get_local_id(0); + group_sizze_127028 = get_local_size(0); + wave_sizze_127027 = LOCKSTEP_WIDTH; + group_tid_127026 = get_group_id(0); + + int32_t gid_flat_116342; + + gid_flat_116342 = group_tid_127026; + + int32_t ltid_pre_127029; + + ltid_pre_127029 = squot32(local_tid_127025, sext_i64_i32(Tx_116324)); + + int32_t ltid_pre_127030; + + ltid_pre_127030 = local_tid_127025 - squot32(local_tid_127025, + sext_i64_i32(Tx_116324)) * + sext_i64_i32(Tx_116324); + + int64_t gtid_79394; + + gtid_79394 = squot64(sext_i32_i64(group_tid_127026), gridDim_y_116336 * + gridDim_x_116335); + + int64_t gid_y_116341; + + gid_y_116341 = squot64(sext_i32_i64(group_tid_127026) - + squot64(sext_i32_i64(group_tid_127026), + gridDim_y_116336 * gridDim_x_116335) * + (gridDim_y_116336 * gridDim_x_116335), + gridDim_x_116335); + + int64_t gid_x_116340; + + gid_x_116340 = sext_i32_i64(group_tid_127026) - + squot64(sext_i32_i64(group_tid_127026), gridDim_y_116336 * + gridDim_x_116335) * (gridDim_y_116336 * gridDim_x_116335) - + squot64(sext_i32_i64(group_tid_127026) - + squot64(sext_i32_i64(group_tid_127026), gridDim_y_116336 * + gridDim_x_116335) * (gridDim_y_116336 * + gridDim_x_116335), + gridDim_x_116335) * gridDim_x_116335; + + int64_t iii_116343; + + iii_116343 = TyRy_116330 * gid_y_116341; + + int64_t jjj_116344 = TxRx_116329 * gid_x_116340; + double mem_121650[Ry_116323 * Rx_116325]; + int64_t ltid_y_116347 = sext_i32_i64(ltid_pre_127029); + int64_t ltid_x_116345 = sext_i32_i64(ltid_pre_127030); + int32_t ltid_flat_116346 = local_tid_127025; + double mem_121641[Ry_116323 * Rx_116325]; + + for (int64_t i_116358 = 0; i_116358 < Ry_116323; i_116358++) { + for (int64_t i_116361 = 0; i_116361 < Rx_116325; i_116361++) { + mem_121641[i_116358 * Rx_116325 + i_116361] = 0.0; + } + } + for (int64_t i_127033 = 0; i_127033 < Ry_116323; i_127033++) { + for (int64_t i_127034 = 0; i_127034 < Rx_116325; i_127034++) { + mem_121650[i_127033 * Rx_116325 + i_127034] = mem_121641[i_127033 * + Rx_116325 + + i_127034]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + __local char *mem_121652; + + mem_121652 = (__local char *) mem_121652_backing_0; + + __local char *mem_121654; + + mem_121654 = (__local char *) mem_121654_backing_1; + + double mem_121725[Ry_116323]; + double mem_121729[Rx_116325]; + double loop_mem_121741[Ry_116323 * Rx_116325]; + double mem_param_121655[Ry_116323 * Rx_116325]; + + for (int32_t i_2 = 0; i_2 < Ry_116323 * Rx_116325; i_2++) + mem_param_121655[i_2] = mem_121650[i_2]; + for (int64_t i_116368 = 0; i_116368 < full_tiles_116367; i_116368++) { + int64_t kk_116372 = Tk_116326 * i_116368; + + for (int64_t i_116373 = 0; i_116373 < Ry_116323; i_116373++) { + int64_t binop_y_116396 = Ty_116322 * i_116373; + + for (int64_t i_116375 = 0; i_116375 < tk_div_tx_116327; + i_116375++) { + int64_t binop_y_116394 = Tx_116324 * i_116375; + int64_t ltid_x_116377 = sext_i32_i64(ltid_pre_127029); + int64_t ltid_y_116378 = sext_i32_i64(ltid_pre_127030); + int32_t ltid_flat_116379 = local_tid_127025; + int64_t k_116395 = ltid_y_116378 + binop_y_116394; + int64_t i_116397 = ltid_x_116377 + binop_y_116396; + int64_t gtid_116398 = iii_116343 + i_116397; + int64_t A_col_idx_116399 = kk_116372 + k_116395; + bool cond_116400 = slt64(gtid_116398, k2p2zq_70876); + double A_elem_116401; + + if (cond_116400) { + double A_elem_116403 = ((__global + double *) mem_121636)[gtid_79394 * + (k2p2zq_70876 * + k2p2zq_70876) + + gtid_116398 * + k2p2zq_70876 + + A_col_idx_116399]; + + A_elem_116401 = A_elem_116403; + } else { + A_elem_116401 = 0.0; + } + + bool cond_116405 = slt64(k_116395, Tk_116326); + int64_t a_loc_ind_116406; + + if (cond_116405) { + int64_t binop_y_116407 = Tk_116326 * i_116397; + int64_t loc_fi_116408 = k_116395 + binop_y_116407; + + a_loc_ind_116406 = loc_fi_116408; + } else { + a_loc_ind_116406 = (int64_t) -1; + } + if (sle64((int64_t) 0, a_loc_ind_116406) && + slt64(a_loc_ind_116406, a_loc_szz_116332)) { + ((__local double *) mem_121652)[a_loc_ind_116406] = + A_elem_116401; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + for (int64_t i_116413 = 0; i_116413 < tk_div_ty_116328; i_116413++) { + int64_t binop_y_116434 = Ty_116322 * i_116413; + + for (int64_t i_116415 = 0; i_116415 < Rx_116325; i_116415++) { + int64_t binop_y_116436 = Tx_116324 * i_116415; + int64_t ltid_x_116417 = sext_i32_i64(ltid_pre_127029); + int64_t ltid_y_116418 = sext_i32_i64(ltid_pre_127030); + int32_t ltid_flat_116419 = local_tid_127025; + int64_t k_116435 = ltid_x_116417 + binop_y_116434; + int64_t j_116437 = ltid_y_116418 + binop_y_116436; + int64_t gtid_116438 = jjj_116344 + j_116437; + int64_t B_row_idx_116439 = kk_116372 + k_116435; + bool cond_116440 = slt64(gtid_116438, k2p2zq_70876); + double B_elem_116441; + + if (cond_116440) { + double B_elem_116443 = ((__global + double *) defunc_3_map_res_r_mem_121609)[gtid_79394 * + binop_x_120251 + + B_row_idx_116439 * + k2p2zq_70876 + + gtid_116438]; + + B_elem_116441 = B_elem_116443; + } else { + B_elem_116441 = 0.0; + } + + bool cond_116445 = slt64(k_116435, Tk_116326); + int64_t b_loc_ind_116446; + + if (cond_116445) { + int64_t binop_y_116447 = TxRx_116329 * k_116435; + int64_t loc_fi_116448 = j_116437 + binop_y_116447; + + b_loc_ind_116446 = loc_fi_116448; + } else { + b_loc_ind_116446 = (int64_t) -1; + } + if (sle64((int64_t) 0, b_loc_ind_116446) && + slt64(b_loc_ind_116446, b_loc_szz_116334)) { + ((__local double *) mem_121654)[b_loc_ind_116446] = + B_elem_116441; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + + double loop_mem_121740[Ry_116323 * Rx_116325]; + double mem_param_121712[Ry_116323 * Rx_116325]; + + for (int32_t i_3 = 0; i_3 < Ry_116323 * Rx_116325; i_3++) + mem_param_121712[i_3] = mem_param_121655[i_3]; + for (int64_t i_116453 = 0; i_116453 < Tk_116326; i_116453++) { + int64_t binop_y_116492 = TxRx_116329 * i_116453; + int64_t ltid_y_116457 = sext_i32_i64(ltid_pre_127029); + int64_t ltid_x_116455 = sext_i32_i64(ltid_pre_127030); + int32_t ltid_flat_116456 = local_tid_127025; + double mem_121715[Ry_116323]; + double mem_121717[Rx_116325]; + int64_t binop_x_116483 = Ry_116323 * ltid_y_116457; + + for (int64_t i_116481 = 0; i_116481 < Ry_116323; i_116481++) { + int64_t binop_x_116484 = i_116481 + binop_x_116483; + int64_t binop_y_116485 = Tk_116326 * binop_x_116484; + int64_t a_loc_ind_116486 = i_116453 + binop_y_116485; + + for (int64_t i_127046 = 0; i_127046 < (int64_t) 1; i_127046++) { + mem_121715[i_116481 + i_127046] = ((__local + double *) mem_121652)[a_loc_ind_116486 + + i_127046]; + } + } + + int64_t binop_y_116494 = Rx_116325 * ltid_x_116455; + + for (int64_t i_116490 = 0; i_116490 < Rx_116325; i_116490++) { + int64_t binop_x_116493 = i_116490 + binop_y_116492; + int64_t b_loc_ind_116495 = binop_x_116493 + binop_y_116494; + + for (int64_t i_127048 = 0; i_127048 < (int64_t) 1; i_127048++) { + mem_121717[i_116490 + i_127048] = ((__local + double *) mem_121654)[b_loc_ind_116495 + + i_127048]; + } + } + for (int64_t i_127049 = 0; i_127049 < Ry_116323; i_127049++) { + mem_121725[i_127049] = mem_121715[i_127049]; + } + for (int64_t i_127050 = 0; i_127050 < Rx_116325; i_127050++) { + mem_121729[i_127050] = mem_121717[i_127050]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_121739[Ry_116323 * Rx_116325]; + int64_t ltid_y_116502 = sext_i32_i64(ltid_pre_127029); + int64_t ltid_x_116500 = sext_i32_i64(ltid_pre_127030); + int32_t ltid_flat_116501 = local_tid_127025; + int64_t binop_y_116543 = Ry_116323 * ltid_y_116502; + int64_t binop_y_116547 = Rx_116325 * ltid_x_116500; + + for (int64_t i_116537 = 0; i_116537 < Ry_116323; i_116537++) { + int64_t binop_x_116542 = iii_116343 + i_116537; + int64_t cmpop_x_116544 = binop_x_116542 + binop_y_116543; + bool binop_x_116545 = slt64(cmpop_x_116544, k2p2zq_70876); + + for (int64_t i_116540 = 0; i_116540 < Rx_116325; i_116540++) { + int64_t binop_x_116546 = jjj_116344 + i_116540; + int64_t cmpop_x_116548 = binop_x_116546 + binop_y_116547; + bool binop_y_116549 = slt64(cmpop_x_116548, k2p2zq_70876); + bool cond_116550 = binop_x_116545 && binop_y_116549; + + if (cond_116550) { + double a_116552 = mem_121725[i_116537]; + double b_116553 = mem_121729[i_116540]; + double c_116554 = mem_param_121712[i_116537 * + Rx_116325 + + i_116540]; + double defunc_1_f_res_116557 = a_116552 * b_116553; + double defunc_1_op_res_116561 = c_116554 + + defunc_1_f_res_116557; + + mem_param_121712[i_116537 * Rx_116325 + i_116540] = + defunc_1_op_res_116561; + } + } + } + for (int64_t i_127053 = 0; i_127053 < Ry_116323; i_127053++) { + for (int64_t i_127054 = 0; i_127054 < Rx_116325; i_127054++) { + mem_121739[i_127053 * Rx_116325 + i_127054] = + mem_param_121712[i_127053 * Rx_116325 + i_127054]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_param_tmp_127043[Ry_116323 * Rx_116325]; + + for (int32_t i_4 = 0; i_4 < Ry_116323 * Rx_116325; i_4++) + mem_param_tmp_127043[i_4] = mem_121739[i_4]; + for (int32_t i_5 = 0; i_5 < Ry_116323 * Rx_116325; i_5++) + mem_param_121712[i_5] = mem_param_tmp_127043[i_5]; + } + for (int32_t i_6 = 0; i_6 < Ry_116323 * Rx_116325; i_6++) + loop_mem_121740[i_6] = mem_param_121712[i_6]; + + double mem_param_tmp_127035[Ry_116323 * Rx_116325]; + + for (int32_t i_7 = 0; i_7 < Ry_116323 * Rx_116325; i_7++) + mem_param_tmp_127035[i_7] = loop_mem_121740[i_7]; + for (int32_t i_8 = 0; i_8 < Ry_116323 * Rx_116325; i_8++) + mem_param_121655[i_8] = mem_param_tmp_127035[i_8]; + } + for (int32_t i_9 = 0; i_9 < Ry_116323 * Rx_116325; i_9++) + loop_mem_121741[i_9] = mem_param_121655[i_9]; + for (int64_t i_116571 = 0; i_116571 < Ry_116323; i_116571++) { + int64_t binop_y_116596 = Ty_116322 * i_116571; + + for (int64_t i_116573 = 0; i_116573 < tk_div_tx_116327; i_116573++) { + int64_t binop_y_116594 = Tx_116324 * i_116573; + int64_t ltid_x_116575 = sext_i32_i64(ltid_pre_127029); + int64_t ltid_y_116576 = sext_i32_i64(ltid_pre_127030); + int32_t ltid_flat_116577 = local_tid_127025; + int64_t k_116595 = ltid_y_116576 + binop_y_116594; + int64_t i_116597 = ltid_x_116575 + binop_y_116596; + int64_t gtid_116598 = iii_116343 + i_116597; + int64_t A_col_idx_116599 = kk_116570 + k_116595; + bool binop_x_116600 = slt64(gtid_116598, k2p2zq_70876); + bool binop_y_116601 = slt64(A_col_idx_116599, k2p2zq_70876); + bool cond_116602 = binop_x_116600 && binop_y_116601; + double A_elem_116603; + + if (cond_116602) { + double A_elem_116605 = ((__global + double *) mem_121636)[gtid_79394 * + (k2p2zq_70876 * + k2p2zq_70876) + + gtid_116598 * + k2p2zq_70876 + + A_col_idx_116599]; + + A_elem_116603 = A_elem_116605; + } else { + A_elem_116603 = 0.0; + } + + bool cond_116607 = slt64(k_116595, Tk_116326); + int64_t a_loc_ind_116608; + + if (cond_116607) { + int64_t binop_y_116609 = Tk_116326 * i_116597; + int64_t loc_fi_116610 = k_116595 + binop_y_116609; + + a_loc_ind_116608 = loc_fi_116610; + } else { + a_loc_ind_116608 = (int64_t) -1; + } + if (sle64((int64_t) 0, a_loc_ind_116608) && slt64(a_loc_ind_116608, + a_loc_szz_116332)) { + ((__local double *) mem_121652)[a_loc_ind_116608] = + A_elem_116603; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + for (int64_t i_116615 = 0; i_116615 < tk_div_ty_116328; i_116615++) { + int64_t binop_y_116638 = Ty_116322 * i_116615; + + for (int64_t i_116617 = 0; i_116617 < Rx_116325; i_116617++) { + int64_t binop_y_116640 = Tx_116324 * i_116617; + int64_t ltid_x_116619 = sext_i32_i64(ltid_pre_127029); + int64_t ltid_y_116620 = sext_i32_i64(ltid_pre_127030); + int32_t ltid_flat_116621 = local_tid_127025; + int64_t k_116639 = ltid_x_116619 + binop_y_116638; + int64_t j_116641 = ltid_y_116620 + binop_y_116640; + int64_t gtid_116642 = jjj_116344 + j_116641; + int64_t B_row_idx_116643 = kk_116570 + k_116639; + bool binop_x_116644 = slt64(gtid_116642, k2p2zq_70876); + bool binop_y_116645 = slt64(B_row_idx_116643, k2p2zq_70876); + bool cond_116646 = binop_x_116644 && binop_y_116645; + double B_elem_116647; + + if (cond_116646) { + double B_elem_116649 = ((__global + double *) defunc_3_map_res_r_mem_121609)[gtid_79394 * + binop_x_120251 + + B_row_idx_116643 * + k2p2zq_70876 + + gtid_116642]; + + B_elem_116647 = B_elem_116649; + } else { + B_elem_116647 = 0.0; + } + + bool cond_116651 = slt64(k_116639, Tk_116326); + int64_t b_loc_ind_116652; + + if (cond_116651) { + int64_t binop_y_116653 = TxRx_116329 * k_116639; + int64_t loc_fi_116654 = j_116641 + binop_y_116653; + + b_loc_ind_116652 = loc_fi_116654; + } else { + b_loc_ind_116652 = (int64_t) -1; + } + if (sle64((int64_t) 0, b_loc_ind_116652) && slt64(b_loc_ind_116652, + b_loc_szz_116334)) { + ((__local double *) mem_121654)[b_loc_ind_116652] = + B_elem_116647; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + + double mem_121807[Ry_116323]; + double mem_121811[Rx_116325]; + double mem_121821[Ry_116323 * Rx_116325]; + double loop_mem_121823[Ry_116323 * Rx_116325]; + double mem_param_121794[Ry_116323 * Rx_116325]; + + for (int32_t i_10 = 0; i_10 < Ry_116323 * Rx_116325; i_10++) + mem_param_121794[i_10] = loop_mem_121741[i_10]; + for (int64_t i_116659 = 0; i_116659 < Tk_116326; i_116659++) { + int64_t cmpop_x_116661 = kk_116570 + i_116659; + bool cond_116662 = slt64(cmpop_x_116661, k2p2zq_70876); + double mem_125235[Ry_116323 * Rx_116325]; + + if (cond_116662) { + int64_t binop_y_116700 = TxRx_116329 * i_116659; + int64_t bytes_121796 = (int64_t) 8 * Ry_116323; + int64_t bytes_121798 = (int64_t) 8 * Rx_116325; + int64_t ltid_y_116665 = sext_i32_i64(ltid_pre_127029); + int64_t ltid_x_116663 = sext_i32_i64(ltid_pre_127030); + int32_t ltid_flat_116664 = local_tid_127025; + double mem_121797[Ry_116323]; + double mem_121799[Rx_116325]; + int64_t binop_x_116691 = Ry_116323 * ltid_y_116665; + + for (int64_t i_116689 = 0; i_116689 < Ry_116323; i_116689++) { + int64_t binop_x_116692 = i_116689 + binop_x_116691; + int64_t binop_y_116693 = Tk_116326 * binop_x_116692; + int64_t a_loc_ind_116694 = i_116659 + binop_y_116693; + + for (int64_t i_127062 = 0; i_127062 < (int64_t) 1; i_127062++) { + mem_121797[i_116689 + i_127062] = ((__local + double *) mem_121652)[a_loc_ind_116694 + + i_127062]; + } + } + + int64_t binop_y_116702 = Rx_116325 * ltid_x_116663; + + for (int64_t i_116698 = 0; i_116698 < Rx_116325; i_116698++) { + int64_t binop_x_116701 = i_116698 + binop_y_116700; + int64_t b_loc_ind_116703 = binop_x_116701 + binop_y_116702; + + for (int64_t i_127064 = 0; i_127064 < (int64_t) 1; i_127064++) { + mem_121799[i_116698 + i_127064] = ((__local + double *) mem_121654)[b_loc_ind_116703 + + i_127064]; + } + } + for (int64_t i_127065 = 0; i_127065 < Ry_116323; i_127065++) { + mem_121807[i_127065] = mem_121797[i_127065]; + } + for (int64_t i_127066 = 0; i_127066 < Rx_116325; i_127066++) { + mem_121811[i_127066] = mem_121799[i_127066]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t ltid_y_116710 = sext_i32_i64(ltid_pre_127029); + int64_t ltid_x_116708 = sext_i32_i64(ltid_pre_127030); + int32_t ltid_flat_116709 = local_tid_127025; + int64_t binop_y_116751 = Ry_116323 * ltid_y_116710; + int64_t binop_y_116755 = Rx_116325 * ltid_x_116708; + + for (int64_t i_116745 = 0; i_116745 < Ry_116323; i_116745++) { + int64_t binop_x_116750 = iii_116343 + i_116745; + int64_t cmpop_x_116752 = binop_x_116750 + binop_y_116751; + bool binop_x_116753 = slt64(cmpop_x_116752, k2p2zq_70876); + + for (int64_t i_116748 = 0; i_116748 < Rx_116325; i_116748++) { + int64_t binop_x_116754 = jjj_116344 + i_116748; + int64_t cmpop_x_116756 = binop_x_116754 + binop_y_116755; + bool binop_y_116757 = slt64(cmpop_x_116756, k2p2zq_70876); + bool cond_116758 = binop_x_116753 && binop_y_116757; + + if (cond_116758) { + double a_116760 = mem_121807[i_116745]; + double b_116761 = mem_121811[i_116748]; + double c_116762 = mem_param_121794[i_116745 * + Rx_116325 + + i_116748]; + double defunc_1_f_res_116765 = a_116760 * b_116761; + double defunc_1_op_res_116769 = c_116762 + + defunc_1_f_res_116765; + + mem_param_121794[i_116745 * Rx_116325 + i_116748] = + defunc_1_op_res_116769; + } + } + } + for (int64_t i_127069 = 0; i_127069 < Ry_116323; i_127069++) { + for (int64_t i_127070 = 0; i_127070 < Rx_116325; i_127070++) { + mem_121821[i_127069 * Rx_116325 + i_127070] = + mem_param_121794[i_127069 * Rx_116325 + i_127070]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + for (int64_t i_127071 = 0; i_127071 < Ry_116323; i_127071++) { + for (int64_t i_127072 = 0; i_127072 < Rx_116325; i_127072++) { + mem_125235[i_127071 * Rx_116325 + i_127072] = + mem_121821[i_127071 * Rx_116325 + i_127072]; + } + } + } else { + for (int64_t i_127073 = 0; i_127073 < Ry_116323; i_127073++) { + for (int64_t i_127074 = 0; i_127074 < Rx_116325; i_127074++) { + mem_125235[i_127073 * Rx_116325 + i_127074] = + mem_param_121794[i_127073 * Rx_116325 + i_127074]; + } + } + } + + double mem_param_tmp_127059[Ry_116323 * Rx_116325]; + + for (int32_t i_11 = 0; i_11 < Ry_116323 * Rx_116325; i_11++) + mem_param_tmp_127059[i_11] = mem_125235[i_11]; + for (int32_t i_12 = 0; i_12 < Ry_116323 * Rx_116325; i_12++) + mem_param_121794[i_12] = mem_param_tmp_127059[i_12]; + } + for (int32_t i_13 = 0; i_13 < Ry_116323 * Rx_116325; i_13++) + loop_mem_121823[i_13] = mem_param_121794[i_13]; + + int64_t reg_tile_i_127075 = squot64(sext_i32_i64(local_tid_127025), + Ty_116322 * Tx_116324); + int64_t reg_tile_i_127076 = squot64(sext_i32_i64(local_tid_127025) - + squot64(sext_i32_i64(local_tid_127025), + Ty_116322 * Tx_116324) * + (Ty_116322 * Tx_116324), Tx_116324); + int64_t reg_tile_i_127077 = sext_i32_i64(local_tid_127025) - + squot64(sext_i32_i64(local_tid_127025), Ty_116322 * Tx_116324) * + (Ty_116322 * Tx_116324) - squot64(sext_i32_i64(local_tid_127025) - + squot64(sext_i32_i64(local_tid_127025), + Ty_116322 * Tx_116324) * + (Ty_116322 * Tx_116324), + Tx_116324) * Tx_116324; + int64_t tile_dim_start_127078 = gtid_79394 + reg_tile_i_127075; + int64_t tile_dim_start_127079 = Ry_116323 * (Ty_116322 * gid_y_116341 + + reg_tile_i_127076); + int64_t tile_dim_start_127080 = Rx_116325 * (Tx_116324 * gid_x_116340 + + reg_tile_i_127077); + + for (int64_t nest_i_127081 = 0; nest_i_127081 < (int64_t) 1; + nest_i_127081++) { + for (int64_t nest_i_127082 = 0; nest_i_127082 < Ry_116323; + nest_i_127082++) { + for (int64_t nest_i_127083 = 0; nest_i_127083 < Rx_116325; + nest_i_127083++) { + if ((slt64(tile_dim_start_127078 + nest_i_127081, m_70861) && + slt64(tile_dim_start_127079 + nest_i_127082, + k2p2zq_70876)) && slt64(tile_dim_start_127080 + + nest_i_127083, + k2p2zq_70876)) { + ((__global double *) mem_121827)[(tile_dim_start_127078 + + nest_i_127081) * + (k2p2zq_70876 * + k2p2zq_70876) + + (tile_dim_start_127079 + + nest_i_127082) * + k2p2zq_70876 + + (tile_dim_start_127080 + + nest_i_127083)] = + loop_mem_121823[squot64(nest_i_127082 * Rx_116325 + + nest_i_127083 - + squot64(nest_i_127082 * + Rx_116325 + + nest_i_127083, + Tx_116324 * Ry_116323 * + Rx_116325) * + (Tx_116324 * Ry_116323 * + Rx_116325) - + squot64(nest_i_127082 * + Rx_116325 + + nest_i_127083 - + squot64(nest_i_127082 * + Rx_116325 + + nest_i_127083, + Tx_116324 * + Ry_116323 * + Rx_116325) * + (Tx_116324 * Ry_116323 * + Rx_116325), Ry_116323 * + Rx_116325) * + (Ry_116323 * Rx_116325), + Rx_116325) * Rx_116325 + + (nest_i_127082 * Rx_116325 + + nest_i_127083 - squot64(nest_i_127082 * + Rx_116325 + + nest_i_127083, + Tx_116324 * + Ry_116323 * + Rx_116325) * + (Tx_116324 * Ry_116323 * Rx_116325) - + squot64(nest_i_127082 * Rx_116325 + + nest_i_127083 - + squot64(nest_i_127082 * + Rx_116325 + + nest_i_127083, + Tx_116324 * Ry_116323 * + Rx_116325) * + (Tx_116324 * Ry_116323 * + Rx_116325), Ry_116323 * + Rx_116325) * (Ry_116323 * + Rx_116325) - + squot64(nest_i_127082 * Rx_116325 + + nest_i_127083 - + squot64(nest_i_127082 * + Rx_116325 + + nest_i_127083, + Tx_116324 * Ry_116323 * + Rx_116325) * + (Tx_116324 * Ry_116323 * + Rx_116325) - + squot64(nest_i_127082 * + Rx_116325 + + nest_i_127083 - + squot64(nest_i_127082 * + Rx_116325 + + nest_i_127083, + Tx_116324 * + Ry_116323 * + Rx_116325) * + (Tx_116324 * + Ry_116323 * + Rx_116325), + Ry_116323 * + Rx_116325) * + (Ry_116323 * Rx_116325), + Rx_116325) * Rx_116325)]; + } + } + } + } + + error_9: + return; + #undef Ty_116322 + #undef Ry_116323 + #undef Tx_116324 + #undef Rx_116325 + #undef Tk_116326 + #undef tk_div_tx_116327 + #undef tk_div_ty_116328 + #undef TxRx_116329 + #undef TyRy_116330 + #undef a_loc_szz_116332 + #undef b_loc_szz_116334 +} +__kernel void mainDetailedzisegmap_intragroup_116784(__global + int *global_failure, + int failure_is_an_option, + __global + int64_t *global_failure_args, + __local volatile + int64_t *mem_125292_backing_aligned_0, + __local volatile + int64_t *mem_123215_backing_aligned_1, + __local volatile + int64_t *mem_123196_backing_aligned_2, + __local volatile + int64_t *mem_123187_backing_aligned_3, + __local volatile + int64_t *mem_123164_backing_aligned_4, + int64_t m_70861, + int64_t k2p2zq_70876, + int64_t num_groups_y_116782, + int64_t ctx_val_123177, + int64_t num_threads_125756, + __global + unsigned char *mem_121944, + __global + unsigned char *mem_121946, + __global + unsigned char *mem_123151, + __global + unsigned char *mem_123155, + __global + unsigned char *mem_123233, + __global + unsigned char *mem_125275) +{ + #define tile_sizze_116779 (mainDetailedzitile_sizze_116778) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_125292_backing_8 = (__local volatile + char *) mem_125292_backing_aligned_0; + __local volatile char *restrict mem_123215_backing_7 = (__local volatile + char *) mem_123215_backing_aligned_1; + __local volatile char *restrict mem_123196_backing_2 = (__local volatile + char *) mem_123196_backing_aligned_2; + __local volatile char *restrict mem_123187_backing_1 = (__local volatile + char *) mem_123187_backing_aligned_3; + __local volatile char *restrict mem_123164_backing_0 = (__local volatile + char *) mem_123164_backing_aligned_4; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_127671; + int32_t local_tid_127672; + int64_t group_sizze_127675; + int32_t wave_sizze_127674; + int32_t group_tid_127673; + + global_tid_127671 = get_global_id(0); + local_tid_127672 = get_local_id(0); + group_sizze_127675 = get_local_size(0); + wave_sizze_127674 = LOCKSTEP_WIDTH; + group_tid_127673 = get_group_id(0); + + int32_t gid_flat_116784; + + gid_flat_116784 = group_tid_127673; + + int32_t ltid_pre_127676; + + ltid_pre_127676 = squot32(local_tid_127672, + sext_i64_i32(tile_sizze_116779)); + + int32_t ltid_pre_127677; + + ltid_pre_127677 = local_tid_127672 - squot32(local_tid_127672, + sext_i64_i32(tile_sizze_116779)) * + sext_i64_i32(tile_sizze_116779); + + int64_t gid_x_116776; + + gid_x_116776 = squot64(sext_i32_i64(group_tid_127673), num_groups_y_116782); + + int64_t gid_y_116777; + + gid_y_116777 = sext_i32_i64(group_tid_127673) - + squot64(sext_i32_i64(group_tid_127673), num_groups_y_116782) * + num_groups_y_116782; + + int64_t binop_x_116811; + + binop_x_116811 = gid_x_116776 * tile_sizze_116779; + + int64_t binop_x_116813 = gid_y_116777 * tile_sizze_116779; + __local char *mem_123164; + + mem_123164 = (__local char *) mem_123164_backing_0; + + int64_t ltid_y_116803 = sext_i32_i64(ltid_pre_127676); + int64_t ltid_x_116801 = sext_i32_i64(ltid_pre_127677); + int32_t ltid_flat_116802 = local_tid_127672; + + if (slt64(ltid_y_116803, tile_sizze_116779) && slt64(ltid_x_116801, + tile_sizze_116779)) { + int64_t gtid_116812 = ltid_y_116803 + binop_x_116811; + int64_t gtid_116814 = ltid_x_116801 + binop_x_116813; + bool binop_x_116815 = slt64(gtid_116812, m_70861); + bool binop_y_116816 = slt64(gtid_116814, k2p2zq_70876); + bool cond_116817 = binop_x_116815 && binop_y_116816; + + if (cond_116817) { + for (int64_t i_127678 = 0; i_127678 < k2p2zq_70876; i_127678++) { + ((__global double *) mem_125275)[gid_flat_116784 + i_127678 * + num_threads_125756] = + ((__global double *) mem_121946)[i_127678]; + } + } + for (int64_t i_127679 = 0; i_127679 < k2p2zq_70876; i_127679++) { + ((__local double *) mem_123164)[ltid_y_116803 * (k2p2zq_70876 * + tile_sizze_116779) + + ltid_x_116801 * k2p2zq_70876 + + i_127679] = ((__global + double *) mem_125275)[gid_flat_116784 + + i_127679 * + num_threads_125756]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_123182[1]; + __local char *mem_123187; + + mem_123187 = (__local char *) mem_123187_backing_1; + + __local char *mem_123196; + + mem_123196 = (__local char *) mem_123196_backing_2; + + double mem_123200[1]; + double mem_125285[1]; + __local char *tiled_inside_loop_mem_123229; + __local char *mem_param_123175; + + mem_param_123175 = mem_123164; + for (int64_t i_84935 = 0; i_84935 < k2p2zq_70876; i_84935++) { + int64_t x_84937 = sub64(k2p2zq_70876, i_84935); + int64_t i_84938 = sub64(x_84937, (int64_t) 1); + bool x_84939 = sle64((int64_t) 0, i_84938); + bool y_84940 = slt64(i_84938, k2p2zq_70876); + bool bounds_check_84941 = x_84939 && y_84940; + int64_t j_m_i_84942 = sub64(k2p2zq_70876, x_84937); + bool empty_slice_84943 = j_m_i_84942 == (int64_t) 0; + int64_t m_84944 = sub64(j_m_i_84942, (int64_t) 1); + int64_t i_p_m_t_s_84945 = add64(x_84937, m_84944); + bool zzero_leq_i_p_m_t_s_84946 = sle64((int64_t) 0, i_p_m_t_s_84945); + bool i_p_m_t_s_leq_w_84947 = slt64(i_p_m_t_s_84945, k2p2zq_70876); + bool zzero_lte_i_84948 = sle64((int64_t) 0, x_84937); + bool i_lte_j_84949 = sle64(x_84937, k2p2zq_70876); + bool y_84950 = i_p_m_t_s_leq_w_84947 && zzero_lte_i_84948; + bool y_84951 = zzero_leq_i_p_m_t_s_84946 && y_84950; + bool y_84952 = i_lte_j_84949 && y_84951; + bool forwards_ok_84953 = zzero_lte_i_84948 && y_84952; + bool ok_or_empty_84954 = empty_slice_84943 || forwards_ok_84953; + bool index_ok_84955 = bounds_check_84941 && ok_or_empty_84954; + bool index_certs_84956; + + if (!index_ok_84955) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 376) == -1) { + global_failure_args[0] = i_84938; + global_failure_args[1] = x_84937; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + global_failure_args[4] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_1; + } + } + + bool index_certs_84957; + + if (!ok_or_empty_84954) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 377) == -1) { + global_failure_args[0] = x_84937; + global_failure_args[1] = k2p2zq_70876; + global_failure_args[2] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_1; + } + } + + int64_t num_whole_tiles_116837 = squot64(j_m_i_84942, + tile_sizze_116779); + int64_t ltid_y_116840 = sext_i32_i64(ltid_pre_127676); + int64_t ltid_x_116838 = sext_i32_i64(ltid_pre_127677); + int32_t ltid_flat_116839 = local_tid_127672; + + if (slt64(ltid_y_116840, tile_sizze_116779) && slt64(ltid_x_116838, + tile_sizze_116779)) { + mem_123182[(int64_t) 0] = 0.0; + } + + error_1: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + double accs_mem_123192[1]; + double mem_param_123183[1]; + + for (int32_t i_3 = 0; i_3 < 1; i_3++) + mem_param_123183[i_3] = mem_123182[i_3]; + for (int64_t tile_id_116849 = 0; tile_id_116849 < + num_whole_tiles_116837; tile_id_116849++) { + int64_t binop_x_116925 = tile_sizze_116779 * tile_id_116849; + int64_t ltid_y_116852 = sext_i32_i64(ltid_pre_127676); + int64_t ltid_x_116850 = sext_i32_i64(ltid_pre_127677); + int32_t ltid_flat_116851 = local_tid_127672; + int64_t j_116926 = ltid_x_116850 + binop_x_116925; + int64_t gtid_116928 = binop_x_116811 + ltid_y_116852; + bool binop_x_116934 = slt64(j_116926, j_m_i_84942); + bool binop_y_116935 = slt64(gtid_116928, m_70861); + bool cond_116936 = binop_x_116934 && binop_y_116935; + double pre_116937; + + if (cond_116936) { + int64_t slice_119577 = x_84937 + j_116926; + double x_116938 = ((__global + double *) mem_123151)[slice_119577 * + (k2p2zq_70876 * + m_70861) + + gtid_116928 * + k2p2zq_70876 + + i_84938]; + + pre_116937 = x_116938; + } else { + pre_116937 = 0.0; + } + ((__local double *) mem_123187)[ltid_y_116852 * tile_sizze_116779 + + ltid_x_116850] = pre_116937; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t slice_119578 = x_84937 + binop_x_116925; + double mem_123191[1]; + int64_t ltid_y_116885 = sext_i32_i64(ltid_pre_127676); + int64_t ltid_x_116883 = sext_i32_i64(ltid_pre_127677); + int32_t ltid_flat_116884 = local_tid_127672; + int64_t gtid_116942 = binop_x_116811 + ltid_y_116885; + int64_t gtid_116944 = binop_x_116813 + ltid_x_116883; + double acc_116948 = mem_param_123183[(int64_t) 0]; + bool binop_x_116952 = slt64(gtid_116942, m_70861); + bool binop_y_116953 = slt64(gtid_116944, k2p2zq_70876); + bool cond_116954 = binop_x_116952 && binop_y_116953; + double acc_116955; + + if (cond_116954) { + double x_116956; + double redout_119837 = acc_116948; + + for (int64_t i_119838 = 0; i_119838 < tile_sizze_116779; + i_119838++) { + int64_t slice_120037 = slice_119578 + i_119838; + double x_116961 = ((__local + double *) mem_123187)[ltid_y_116885 * + tile_sizze_116779 + + i_119838]; + bool isnan_res_116962; + + isnan_res_116962 = futrts_isnan64(x_116961); + + double defunc_1_f_res_116963; + + if (isnan_res_116962) { + defunc_1_f_res_116963 = 0.0; + } else { + double x_116960 = ((__local + double *) mem_param_123175)[ltid_y_116885 * + ctx_val_123177 + + ltid_x_116883 * + k2p2zq_70876 + + slice_120037]; + double defunc_1_f_res_f_res_116964 = x_116960 * + x_116961; + + defunc_1_f_res_116963 = defunc_1_f_res_f_res_116964; + } + + double defunc_1_op_res_116959 = defunc_1_f_res_116963 + + redout_119837; + double redout_tmp_127684 = defunc_1_op_res_116959; + + redout_119837 = redout_tmp_127684; + } + x_116956 = redout_119837; + acc_116955 = x_116956; + } else { + acc_116955 = acc_116948; + } + mem_123191[(int64_t) 0] = acc_116955; + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_param_tmp_127682[1]; + + for (int32_t i_4 = 0; i_4 < 1; i_4++) + mem_param_tmp_127682[i_4] = mem_123191[i_4]; + for (int32_t i_5 = 0; i_5 < 1; i_5++) + mem_param_123183[i_5] = mem_param_tmp_127682[i_5]; + } + for (int32_t i_6 = 0; i_6 < 1; i_6++) + accs_mem_123192[i_6] = mem_param_123183[i_6]; + + int64_t residual_input_116974 = srem64(j_m_i_84942, tile_sizze_116779); + bool cond_116975 = residual_input_116974 == (int64_t) 0; + + if (cond_116975) { + mem_125285[(int64_t) 0] = accs_mem_123192[(int64_t) 0]; + } else { + int64_t binop_x_117052 = tile_sizze_116779 * num_whole_tiles_116837; + int64_t ltid_y_116978 = sext_i32_i64(ltid_pre_127676); + int64_t ltid_x_116976 = sext_i32_i64(ltid_pre_127677); + int32_t ltid_flat_116977 = local_tid_127672; + int64_t j_117053 = ltid_x_116976 + binop_x_117052; + int64_t gtid_117055 = binop_x_116811 + ltid_y_116978; + bool binop_x_117061 = slt64(j_117053, j_m_i_84942); + bool binop_y_117062 = slt64(gtid_117055, m_70861); + bool cond_117063 = binop_x_117061 && binop_y_117062; + double pre_117064; + + if (cond_117063) { + int64_t slice_119579 = x_84937 + j_117053; + double x_117065 = ((__global + double *) mem_123151)[slice_119579 * + (k2p2zq_70876 * + m_70861) + + gtid_117055 * + k2p2zq_70876 + + i_84938]; + + pre_117064 = x_117065; + } else { + pre_117064 = 0.0; + } + ((__local double *) mem_123196)[ltid_y_116978 * tile_sizze_116779 + + ltid_x_116976] = pre_117064; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t slice_119580 = x_84937 + binop_x_117052; + int64_t ltid_y_117012 = sext_i32_i64(ltid_pre_127676); + int64_t ltid_x_117010 = sext_i32_i64(ltid_pre_127677); + int32_t ltid_flat_117011 = local_tid_127672; + int64_t gtid_117070 = binop_x_116811 + ltid_y_117012; + int64_t gtid_117072 = binop_x_116813 + ltid_x_117010; + double acc_117076 = accs_mem_123192[(int64_t) 0]; + bool binop_x_117080 = slt64(gtid_117070, m_70861); + bool binop_y_117081 = slt64(gtid_117072, k2p2zq_70876); + bool cond_117082 = binop_x_117080 && binop_y_117081; + double acc_117083; + + if (cond_117082) { + double x_117084; + double redout_119839 = acc_117076; + + for (int64_t i_119840 = 0; i_119840 < residual_input_116974; + i_119840++) { + int64_t slice_120038 = slice_119580 + i_119840; + double x_117089 = ((__local + double *) mem_123196)[ltid_y_117012 * + tile_sizze_116779 + + i_119840]; + bool isnan_res_117090; + + isnan_res_117090 = futrts_isnan64(x_117089); + + double defunc_1_f_res_117091; + + if (isnan_res_117090) { + defunc_1_f_res_117091 = 0.0; + } else { + double x_117088 = ((__local + double *) mem_param_123175)[ltid_y_117012 * + ctx_val_123177 + + ltid_x_117010 * + k2p2zq_70876 + + slice_120038]; + double defunc_1_f_res_f_res_117092 = x_117088 * + x_117089; + + defunc_1_f_res_117091 = defunc_1_f_res_f_res_117092; + } + + double defunc_1_op_res_117087 = defunc_1_f_res_117091 + + redout_119839; + double redout_tmp_127685 = defunc_1_op_res_117087; + + redout_119839 = redout_tmp_127685; + } + x_117084 = redout_119839; + acc_117083 = x_117084; + } else { + acc_117083 = acc_117076; + } + mem_123200[(int64_t) 0] = acc_117083; + barrier(CLK_LOCAL_MEM_FENCE); + mem_125285[(int64_t) 0] = mem_123200[(int64_t) 0]; + } + + __local char *mem_123215; + + mem_123215 = (__local char *) mem_123215_backing_7; + + int64_t ltid_y_117096 = sext_i32_i64(ltid_pre_127676); + int64_t ltid_x_117094 = sext_i32_i64(ltid_pre_127677); + int32_t ltid_flat_117095 = local_tid_127672; + + if (slt64(ltid_y_117096, tile_sizze_116779) && slt64(ltid_x_117094, + tile_sizze_116779)) { + int64_t gtid_117105 = binop_x_116811 + ltid_y_117096; + int64_t gtid_117107 = binop_x_116813 + ltid_x_117094; + bool binop_x_117109 = slt64(gtid_117105, m_70861); + bool binop_y_117110 = slt64(gtid_117107, k2p2zq_70876); + bool cond_117111 = binop_x_117109 && binop_y_117110; + __local char *mem_125292; + + mem_125292 = (__local char *) mem_125292_backing_8; + if (cond_117111) { + double defunc_2_reduce_res_117108 = mem_125285[(int64_t) 0]; + bool index_ok_117116 = bounds_check_84941 && bounds_check_84941; + bool index_certs_117117; + + if (!index_ok_117116) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 378) == -1) { + global_failure_args[0] = i_84938; + global_failure_args[1] = i_84938; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_6; + } + } + + double zs_arg_117118 = ((__global + double *) mem_123155)[i_84938 * + (k2p2zq_70876 * + m_70861) + + gtid_117105 * + k2p2zq_70876 + + i_84938]; + bool index_certs_117119; + + if (!bounds_check_84941) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 379) == -1) { + global_failure_args[0] = i_84938; + global_failure_args[1] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_6; + } + } + + double zm_arg_117120 = ((__global + double *) mem_121944)[i_84938 * + k2p2zq_70876 + + gtid_117107]; + double zm_res_117121 = zm_arg_117120 - + defunc_2_reduce_res_117108; + double zs_res_117122 = zm_res_117121 / zs_arg_117118; + + ((__local double *) mem_param_123175)[ltid_y_117096 * + ctx_val_123177 + + ltid_x_117094 * + k2p2zq_70876 + i_84938] = + zs_res_117122; + for (int64_t i_127686 = 0; i_127686 < k2p2zq_70876; + i_127686++) { + ((__local double *) mem_125292)[i_127686] = ((__local + double *) mem_param_123175)[ltid_y_117096 * + ctx_val_123177 + + ltid_x_117094 * + k2p2zq_70876 + + i_127686]; + } + } + for (int64_t i_127687 = 0; i_127687 < k2p2zq_70876; i_127687++) { + ((__local double *) mem_123215)[ltid_y_117096 * (k2p2zq_70876 * + tile_sizze_116779) + + ltid_x_117094 * k2p2zq_70876 + + i_127687] = ((__local + double *) mem_125292)[i_127687]; + } + } + + error_6: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + __local char *mem_param_tmp_127680; + + mem_param_tmp_127680 = mem_123215; + mem_param_123175 = mem_param_tmp_127680; + } + tiled_inside_loop_mem_123229 = mem_param_123175; + + int64_t thread_out_index_127688 = gid_x_116776 * tile_sizze_116779 + + sext_i32_i64(ltid_pre_127676); + int64_t thread_out_index_127689 = gid_y_116777 * tile_sizze_116779 + + sext_i32_i64(ltid_pre_127677); + + if (slt64(thread_out_index_127688, m_70861) && + slt64(thread_out_index_127689, k2p2zq_70876)) { + for (int64_t i_127690 = 0; i_127690 < k2p2zq_70876; i_127690++) { + ((__global double *) mem_123233)[thread_out_index_127688 * + (k2p2zq_70876 * k2p2zq_70876) + + thread_out_index_127689 * + k2p2zq_70876 + i_127690] = + ((__local + double *) tiled_inside_loop_mem_123229)[sext_i32_i64(ltid_pre_127676) * + ctx_val_123177 + + sext_i32_i64(ltid_pre_127677) * + k2p2zq_70876 + + i_127690]; + } + } + + error_7: + return; + #undef tile_sizze_116779 +} +__kernel void mainDetailedzisegmap_intragroup_117146(__global + int *global_failure, + __local volatile + int64_t *mem_123330_backing_aligned_0, + __local volatile + int64_t *mem_123314_backing_aligned_1, + __local volatile + int64_t *mem_123305_backing_aligned_2, + int64_t m_70861, + int64_t k2p2zq_70876, + int64_t x_84993, + int64_t i_84994, + int64_t j_m_i_84998, + int64_t num_groups_y_117144, + int64_t num_whole_tiles_117162, + int64_t residual_input_117295, + unsigned char cond_117296, + int64_t num_threads_125761, + __global + unsigned char *mem_121938, + __global + unsigned char *mem_123143, + __global + unsigned char *mem_123241, + __global + unsigned char *mem_123291, + __global + unsigned char *mem_123295, + __global + unsigned char *mem_123334, + __global + unsigned char *mem_125317) +{ + #define tile_sizze_117141 (mainDetailedzitile_sizze_117140) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_123330_backing_6 = (__local volatile + char *) mem_123330_backing_aligned_0; + __local volatile char *restrict mem_123314_backing_5 = (__local volatile + char *) mem_123314_backing_aligned_1; + __local volatile char *restrict mem_123305_backing_0 = (__local volatile + char *) mem_123305_backing_aligned_2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127708; + int32_t local_tid_127709; + int64_t group_sizze_127712; + int32_t wave_sizze_127711; + int32_t group_tid_127710; + + global_tid_127708 = get_global_id(0); + local_tid_127709 = get_local_id(0); + group_sizze_127712 = get_local_size(0); + wave_sizze_127711 = LOCKSTEP_WIDTH; + group_tid_127710 = get_group_id(0); + + int32_t gid_flat_117146; + + gid_flat_117146 = group_tid_127710; + + int32_t ltid_pre_127713; + + ltid_pre_127713 = squot32(local_tid_127709, + sext_i64_i32(tile_sizze_117141)); + + int32_t ltid_pre_127714; + + ltid_pre_127714 = local_tid_127709 - squot32(local_tid_127709, + sext_i64_i32(tile_sizze_117141)) * + sext_i64_i32(tile_sizze_117141); + + int64_t gid_x_117138; + + gid_x_117138 = squot64(sext_i32_i64(group_tid_127710), num_groups_y_117144); + + int64_t gid_y_117139; + + gid_y_117139 = sext_i32_i64(group_tid_127710) - + squot64(sext_i32_i64(group_tid_127710), num_groups_y_117144) * + num_groups_y_117144; + + double mem_123300[1]; + int64_t ltid_y_117165 = sext_i32_i64(ltid_pre_127713); + int64_t ltid_x_117163 = sext_i32_i64(ltid_pre_127714); + int32_t ltid_flat_117164 = local_tid_127709; + + if (slt64(ltid_y_117165, tile_sizze_117141) && slt64(ltid_x_117163, + tile_sizze_117141)) { + mem_123300[(int64_t) 0] = 0.0; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t binop_x_117250 = gid_x_117138 * tile_sizze_117141; + int64_t binop_x_117265 = gid_y_117139 * tile_sizze_117141; + __local char *mem_123305; + + mem_123305 = (__local char *) mem_123305_backing_0; + + double accs_mem_123310[1]; + double mem_param_123301[1]; + + for (int32_t i_1 = 0; i_1 < 1; i_1++) + mem_param_123301[i_1] = mem_123300[i_1]; + for (int64_t tile_id_117174 = 0; tile_id_117174 < num_whole_tiles_117162; + tile_id_117174++) { + int64_t binop_x_117248 = tile_sizze_117141 * tile_id_117174; + int64_t ltid_y_117177 = sext_i32_i64(ltid_pre_127713); + int64_t ltid_x_117175 = sext_i32_i64(ltid_pre_127714); + int32_t ltid_flat_117176 = local_tid_127709; + int64_t j_117249 = ltid_x_117175 + binop_x_117248; + int64_t gtid_117251 = ltid_y_117177 + binop_x_117250; + bool binop_x_117256 = slt64(j_117249, j_m_i_84998); + bool binop_y_117257 = slt64(gtid_117251, m_70861); + bool cond_117258 = binop_x_117256 && binop_y_117257; + double pre_117259; + + if (cond_117258) { + int64_t slice_119581 = x_84993 + j_117249; + double x_117260 = ((__global double *) mem_123241)[slice_119581 * + (k2p2zq_70876 * + m_70861) + + gtid_117251 * + k2p2zq_70876 + + i_84994]; + + pre_117259 = x_117260; + } else { + pre_117259 = 0.0; + } + ((__local double *) mem_123305)[ltid_y_117177 * tile_sizze_117141 + + ltid_x_117175] = pre_117259; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t slice_119582 = x_84993 + binop_x_117248; + double mem_123309[1]; + int64_t ltid_y_117209 = sext_i32_i64(ltid_pre_127713); + int64_t ltid_x_117207 = sext_i32_i64(ltid_pre_127714); + int32_t ltid_flat_117208 = local_tid_127709; + int64_t gtid_117264 = ltid_y_117209 + binop_x_117250; + int64_t gtid_117266 = ltid_x_117207 + binop_x_117265; + double acc_117269 = mem_param_123301[(int64_t) 0]; + bool binop_x_117273 = slt64(gtid_117264, m_70861); + bool binop_y_117274 = slt64(gtid_117266, k2p2zq_70876); + bool cond_117275 = binop_x_117273 && binop_y_117274; + double acc_117276; + + if (cond_117275) { + double x_117277; + double redout_119848 = acc_117269; + + for (int64_t i_119849 = 0; i_119849 < tile_sizze_117141; + i_119849++) { + int64_t slice_120041 = slice_119582 + i_119849; + double x_117282 = ((__local + double *) mem_123305)[ltid_y_117209 * + tile_sizze_117141 + + i_119849]; + bool isnan_res_117283; + + isnan_res_117283 = futrts_isnan64(x_117282); + + double defunc_1_f_res_117284; + + if (isnan_res_117283) { + defunc_1_f_res_117284 = 0.0; + } else { + double x_117281 = ((__global + double *) mem_123295)[slice_120041 * + (k2p2zq_70876 * + m_70861) + + gtid_117264 * + k2p2zq_70876 + + gtid_117266]; + double defunc_1_f_res_f_res_117285 = x_117281 * x_117282; + + defunc_1_f_res_117284 = defunc_1_f_res_f_res_117285; + } + + double defunc_1_op_res_117280 = defunc_1_f_res_117284 + + redout_119848; + double redout_tmp_127717 = defunc_1_op_res_117280; + + redout_119848 = redout_tmp_127717; + } + x_117277 = redout_119848; + acc_117276 = x_117277; + } else { + acc_117276 = acc_117269; + } + mem_123309[(int64_t) 0] = acc_117276; + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_param_tmp_127715[1]; + + for (int32_t i_2 = 0; i_2 < 1; i_2++) + mem_param_tmp_127715[i_2] = mem_123309[i_2]; + for (int32_t i_3 = 0; i_3 < 1; i_3++) + mem_param_123301[i_3] = mem_param_tmp_127715[i_3]; + } + for (int32_t i_4 = 0; i_4 < 1; i_4++) + accs_mem_123310[i_4] = mem_param_123301[i_4]; + + __local char *mem_123314; + + mem_123314 = (__local char *) mem_123314_backing_5; + + double mem_123318[1]; + double mem_125310[1]; + + if (cond_117296) { + mem_125310[(int64_t) 0] = accs_mem_123310[(int64_t) 0]; + } else { + int64_t binop_x_117371 = tile_sizze_117141 * num_whole_tiles_117162; + int64_t ltid_y_117299 = sext_i32_i64(ltid_pre_127713); + int64_t ltid_x_117297 = sext_i32_i64(ltid_pre_127714); + int32_t ltid_flat_117298 = local_tid_127709; + int64_t j_117372 = ltid_x_117297 + binop_x_117371; + int64_t gtid_117374 = binop_x_117250 + ltid_y_117299; + bool binop_x_117379 = slt64(j_117372, j_m_i_84998); + bool binop_y_117380 = slt64(gtid_117374, m_70861); + bool cond_117381 = binop_x_117379 && binop_y_117380; + double pre_117382; + + if (cond_117381) { + int64_t slice_119583 = x_84993 + j_117372; + double x_117383 = ((__global double *) mem_123241)[slice_119583 * + (k2p2zq_70876 * + m_70861) + + gtid_117374 * + k2p2zq_70876 + + i_84994]; + + pre_117382 = x_117383; + } else { + pre_117382 = 0.0; + } + ((__local double *) mem_123314)[ltid_y_117299 * tile_sizze_117141 + + ltid_x_117297] = pre_117382; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t slice_119584 = x_84993 + binop_x_117371; + int64_t ltid_y_117332 = sext_i32_i64(ltid_pre_127713); + int64_t ltid_x_117330 = sext_i32_i64(ltid_pre_127714); + int32_t ltid_flat_117331 = local_tid_127709; + int64_t gtid_117388 = binop_x_117250 + ltid_y_117332; + int64_t gtid_117390 = binop_x_117265 + ltid_x_117330; + double acc_117393 = accs_mem_123310[(int64_t) 0]; + bool binop_x_117397 = slt64(gtid_117388, m_70861); + bool binop_y_117398 = slt64(gtid_117390, k2p2zq_70876); + bool cond_117399 = binop_x_117397 && binop_y_117398; + double acc_117400; + + if (cond_117399) { + double x_117401; + double redout_119850 = acc_117393; + + for (int64_t i_119851 = 0; i_119851 < residual_input_117295; + i_119851++) { + int64_t slice_120042 = slice_119584 + i_119851; + double x_117406 = ((__local + double *) mem_123314)[ltid_y_117332 * + tile_sizze_117141 + + i_119851]; + bool isnan_res_117407; + + isnan_res_117407 = futrts_isnan64(x_117406); + + double defunc_1_f_res_117408; + + if (isnan_res_117407) { + defunc_1_f_res_117408 = 0.0; + } else { + double x_117405 = ((__global + double *) mem_123295)[slice_120042 * + (k2p2zq_70876 * + m_70861) + + gtid_117388 * + k2p2zq_70876 + + gtid_117390]; + double defunc_1_f_res_f_res_117409 = x_117405 * x_117406; + + defunc_1_f_res_117408 = defunc_1_f_res_f_res_117409; + } + + double defunc_1_op_res_117404 = defunc_1_f_res_117408 + + redout_119850; + double redout_tmp_127718 = defunc_1_op_res_117404; + + redout_119850 = redout_tmp_127718; + } + x_117401 = redout_119850; + acc_117400 = x_117401; + } else { + acc_117400 = acc_117393; + } + mem_123318[(int64_t) 0] = acc_117400; + barrier(CLK_LOCAL_MEM_FENCE); + mem_125310[(int64_t) 0] = mem_123318[(int64_t) 0]; + } + + __local char *mem_123330; + + mem_123330 = (__local char *) mem_123330_backing_6; + + int64_t ltid_y_117413 = sext_i32_i64(ltid_pre_127713); + int64_t ltid_x_117411 = sext_i32_i64(ltid_pre_127714); + int32_t ltid_flat_117412 = local_tid_127709; + + if (slt64(ltid_y_117413, tile_sizze_117141) && slt64(ltid_x_117411, + tile_sizze_117141)) { + int64_t gtid_117422 = binop_x_117250 + ltid_y_117413; + int64_t gtid_117424 = binop_x_117265 + ltid_x_117411; + bool binop_x_117426 = slt64(gtid_117422, m_70861); + bool binop_y_117427 = slt64(gtid_117424, k2p2zq_70876); + bool cond_117428 = binop_x_117426 && binop_y_117427; + + if (cond_117428) { + double defunc_2_reduce_res_117425 = mem_125310[(int64_t) 0]; + double defunc_3_map_res_r_transformed_row_117432 = ((__global + double *) mem_123143)[gtid_117422 * + (k2p2zq_70876 * + k2p2zq_70876) + + i_84994 * + k2p2zq_70876 + + i_84994]; + double defunc_2_map_res_transformed_row_117434 = ((__global + double *) mem_121938)[gtid_117424 * + k2p2zq_70876 + + i_84994]; + double zm_res_117435 = defunc_2_map_res_transformed_row_117434 - + defunc_2_reduce_res_117425; + double zs_res_117436 = zm_res_117435 / + defunc_3_map_res_r_transformed_row_117432; + + ((__global double *) mem_123291)[gtid_117422 * k2p2zq_70876 + + gtid_117424 + i_84994 * + (k2p2zq_70876 * m_70861)] = + zs_res_117436; + for (int64_t i_127719 = 0; i_127719 < k2p2zq_70876; i_127719++) { + ((__global double *) mem_125317)[gid_flat_117146 + i_127719 * + num_threads_125761] = + ((__global double *) mem_123291)[gtid_117422 * + k2p2zq_70876 + + gtid_117424 + i_127719 * + (k2p2zq_70876 * m_70861)]; + } + } + for (int64_t i_127720 = 0; i_127720 < k2p2zq_70876; i_127720++) { + ((__local double *) mem_123330)[ltid_y_117413 * (k2p2zq_70876 * + tile_sizze_117141) + + ltid_x_117411 * k2p2zq_70876 + + i_127720] = ((__global + double *) mem_125317)[gid_flat_117146 + + i_127720 * + num_threads_125761]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t thread_out_index_127721 = gid_x_117138 * tile_sizze_117141 + + sext_i32_i64(ltid_pre_127713); + int64_t thread_out_index_127722 = gid_y_117139 * tile_sizze_117141 + + sext_i32_i64(ltid_pre_127714); + + if (slt64(thread_out_index_127721, m_70861) && + slt64(thread_out_index_127722, k2p2zq_70876)) { + for (int64_t i_127723 = 0; i_127723 < k2p2zq_70876; i_127723++) { + ((__global double *) mem_123334)[thread_out_index_127721 * + (k2p2zq_70876 * k2p2zq_70876) + + thread_out_index_127722 * + k2p2zq_70876 + i_127723] = + ((__local double *) mem_123330)[sext_i32_i64(ltid_pre_127713) * + (k2p2zq_70876 * + tile_sizze_117141) + + sext_i32_i64(ltid_pre_127714) * + k2p2zq_70876 + i_127723]; + } + } + + error_6: + return; + #undef tile_sizze_117141 +} +__kernel void mainDetailedzisegmap_intragroup_117465(__global + int *global_failure, + __local volatile + int64_t *mem_123437_backing_aligned_0, + __local volatile + int64_t *mem_123435_backing_aligned_1, + int64_t m_70861, + int64_t k2p2zq_70876, + int64_t gridDim_x_117458, + int64_t gridDim_y_117459, + int64_t full_tiles_117490, + int64_t kk_117693, + int64_t binop_x_120251, + __global + unsigned char *defunc_3_map_res_r_mem_123392, + __global + unsigned char *mem_123419, + __global + unsigned char *mem_123610) +{ + #define Ty_117445 (mainDetailedziTy_117442) + #define Ry_117446 (mainDetailedziRy_117444) + #define Tx_117447 (mainDetailedziTx_117441) + #define Rx_117448 (mainDetailedziRx_117443) + #define Tk_117449 (mainDetailedziTk_117440) + #define tk_div_tx_117450 (sdiv_up_safe64(mainDetailedziTk_117440, mainDetailedziTx_117441)) + #define tk_div_ty_117451 (sdiv_up_safe64(mainDetailedziTk_117440, mainDetailedziTy_117442)) + #define TxRx_117452 (mainDetailedziTx_117441 * mainDetailedziRx_117443) + #define TyRy_117453 (mainDetailedziTy_117442 * mainDetailedziRy_117444) + #define a_loc_szz_117455 (mainDetailedziTk_117440 * (mainDetailedziTy_117442 * mainDetailedziRy_117444)) + #define b_loc_szz_117457 (mainDetailedziRx_117443 * (mainDetailedziTx_117441 * mainDetailedziTk_117440)) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_123437_backing_1 = (__local volatile + char *) mem_123437_backing_aligned_0; + __local volatile char *restrict mem_123435_backing_0 = (__local volatile + char *) mem_123435_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127808; + int32_t local_tid_127809; + int64_t group_sizze_127812; + int32_t wave_sizze_127811; + int32_t group_tid_127810; + + global_tid_127808 = get_global_id(0); + local_tid_127809 = get_local_id(0); + group_sizze_127812 = get_local_size(0); + wave_sizze_127811 = LOCKSTEP_WIDTH; + group_tid_127810 = get_group_id(0); + + int32_t gid_flat_117465; + + gid_flat_117465 = group_tid_127810; + + int32_t ltid_pre_127813; + + ltid_pre_127813 = squot32(local_tid_127809, sext_i64_i32(Tx_117447)); + + int32_t ltid_pre_127814; + + ltid_pre_127814 = local_tid_127809 - squot32(local_tid_127809, + sext_i64_i32(Tx_117447)) * + sext_i64_i32(Tx_117447); + + int64_t gtid_83144; + + gtid_83144 = squot64(sext_i32_i64(group_tid_127810), gridDim_y_117459 * + gridDim_x_117458); + + int64_t gid_y_117464; + + gid_y_117464 = squot64(sext_i32_i64(group_tid_127810) - + squot64(sext_i32_i64(group_tid_127810), + gridDim_y_117459 * gridDim_x_117458) * + (gridDim_y_117459 * gridDim_x_117458), + gridDim_x_117458); + + int64_t gid_x_117463; + + gid_x_117463 = sext_i32_i64(group_tid_127810) - + squot64(sext_i32_i64(group_tid_127810), gridDim_y_117459 * + gridDim_x_117458) * (gridDim_y_117459 * gridDim_x_117458) - + squot64(sext_i32_i64(group_tid_127810) - + squot64(sext_i32_i64(group_tid_127810), gridDim_y_117459 * + gridDim_x_117458) * (gridDim_y_117459 * + gridDim_x_117458), + gridDim_x_117458) * gridDim_x_117458; + + int64_t iii_117466; + + iii_117466 = TyRy_117453 * gid_y_117464; + + int64_t jjj_117467 = TxRx_117452 * gid_x_117463; + double mem_123433[Ry_117446 * Rx_117448]; + int64_t ltid_y_117470 = sext_i32_i64(ltid_pre_127813); + int64_t ltid_x_117468 = sext_i32_i64(ltid_pre_127814); + int32_t ltid_flat_117469 = local_tid_127809; + double mem_123424[Ry_117446 * Rx_117448]; + + for (int64_t i_117481 = 0; i_117481 < Ry_117446; i_117481++) { + for (int64_t i_117484 = 0; i_117484 < Rx_117448; i_117484++) { + mem_123424[i_117481 * Rx_117448 + i_117484] = 0.0; + } + } + for (int64_t i_127817 = 0; i_127817 < Ry_117446; i_127817++) { + for (int64_t i_127818 = 0; i_127818 < Rx_117448; i_127818++) { + mem_123433[i_127817 * Rx_117448 + i_127818] = mem_123424[i_127817 * + Rx_117448 + + i_127818]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + __local char *mem_123435; + + mem_123435 = (__local char *) mem_123435_backing_0; + + __local char *mem_123437; + + mem_123437 = (__local char *) mem_123437_backing_1; + + double mem_123508[Ry_117446]; + double mem_123512[Rx_117448]; + double loop_mem_123524[Ry_117446 * Rx_117448]; + double mem_param_123438[Ry_117446 * Rx_117448]; + + for (int32_t i_2 = 0; i_2 < Ry_117446 * Rx_117448; i_2++) + mem_param_123438[i_2] = mem_123433[i_2]; + for (int64_t i_117491 = 0; i_117491 < full_tiles_117490; i_117491++) { + int64_t kk_117495 = Tk_117449 * i_117491; + + for (int64_t i_117496 = 0; i_117496 < Ry_117446; i_117496++) { + int64_t binop_y_117519 = Ty_117445 * i_117496; + + for (int64_t i_117498 = 0; i_117498 < tk_div_tx_117450; + i_117498++) { + int64_t binop_y_117517 = Tx_117447 * i_117498; + int64_t ltid_x_117500 = sext_i32_i64(ltid_pre_127813); + int64_t ltid_y_117501 = sext_i32_i64(ltid_pre_127814); + int32_t ltid_flat_117502 = local_tid_127809; + int64_t k_117518 = ltid_y_117501 + binop_y_117517; + int64_t i_117520 = ltid_x_117500 + binop_y_117519; + int64_t gtid_117521 = iii_117466 + i_117520; + int64_t A_col_idx_117522 = kk_117495 + k_117518; + bool cond_117523 = slt64(gtid_117521, k2p2zq_70876); + double A_elem_117524; + + if (cond_117523) { + double A_elem_117526 = ((__global + double *) mem_123419)[gtid_83144 * + (k2p2zq_70876 * + k2p2zq_70876) + + gtid_117521 * + k2p2zq_70876 + + A_col_idx_117522]; + + A_elem_117524 = A_elem_117526; + } else { + A_elem_117524 = 0.0; + } + + bool cond_117528 = slt64(k_117518, Tk_117449); + int64_t a_loc_ind_117529; + + if (cond_117528) { + int64_t binop_y_117530 = Tk_117449 * i_117520; + int64_t loc_fi_117531 = k_117518 + binop_y_117530; + + a_loc_ind_117529 = loc_fi_117531; + } else { + a_loc_ind_117529 = (int64_t) -1; + } + if (sle64((int64_t) 0, a_loc_ind_117529) && + slt64(a_loc_ind_117529, a_loc_szz_117455)) { + ((__local double *) mem_123435)[a_loc_ind_117529] = + A_elem_117524; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + for (int64_t i_117536 = 0; i_117536 < tk_div_ty_117451; i_117536++) { + int64_t binop_y_117557 = Ty_117445 * i_117536; + + for (int64_t i_117538 = 0; i_117538 < Rx_117448; i_117538++) { + int64_t binop_y_117559 = Tx_117447 * i_117538; + int64_t ltid_x_117540 = sext_i32_i64(ltid_pre_127813); + int64_t ltid_y_117541 = sext_i32_i64(ltid_pre_127814); + int32_t ltid_flat_117542 = local_tid_127809; + int64_t k_117558 = ltid_x_117540 + binop_y_117557; + int64_t j_117560 = ltid_y_117541 + binop_y_117559; + int64_t gtid_117561 = jjj_117467 + j_117560; + int64_t B_row_idx_117562 = kk_117495 + k_117558; + bool cond_117563 = slt64(gtid_117561, k2p2zq_70876); + double B_elem_117564; + + if (cond_117563) { + double B_elem_117566 = ((__global + double *) defunc_3_map_res_r_mem_123392)[gtid_83144 * + binop_x_120251 + + B_row_idx_117562 * + k2p2zq_70876 + + gtid_117561]; + + B_elem_117564 = B_elem_117566; + } else { + B_elem_117564 = 0.0; + } + + bool cond_117568 = slt64(k_117558, Tk_117449); + int64_t b_loc_ind_117569; + + if (cond_117568) { + int64_t binop_y_117570 = TxRx_117452 * k_117558; + int64_t loc_fi_117571 = j_117560 + binop_y_117570; + + b_loc_ind_117569 = loc_fi_117571; + } else { + b_loc_ind_117569 = (int64_t) -1; + } + if (sle64((int64_t) 0, b_loc_ind_117569) && + slt64(b_loc_ind_117569, b_loc_szz_117457)) { + ((__local double *) mem_123437)[b_loc_ind_117569] = + B_elem_117564; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + + double loop_mem_123523[Ry_117446 * Rx_117448]; + double mem_param_123495[Ry_117446 * Rx_117448]; + + for (int32_t i_3 = 0; i_3 < Ry_117446 * Rx_117448; i_3++) + mem_param_123495[i_3] = mem_param_123438[i_3]; + for (int64_t i_117576 = 0; i_117576 < Tk_117449; i_117576++) { + int64_t binop_y_117615 = TxRx_117452 * i_117576; + int64_t ltid_y_117580 = sext_i32_i64(ltid_pre_127813); + int64_t ltid_x_117578 = sext_i32_i64(ltid_pre_127814); + int32_t ltid_flat_117579 = local_tid_127809; + double mem_123498[Ry_117446]; + double mem_123500[Rx_117448]; + int64_t binop_x_117606 = Ry_117446 * ltid_y_117580; + + for (int64_t i_117604 = 0; i_117604 < Ry_117446; i_117604++) { + int64_t binop_x_117607 = i_117604 + binop_x_117606; + int64_t binop_y_117608 = Tk_117449 * binop_x_117607; + int64_t a_loc_ind_117609 = i_117576 + binop_y_117608; + + for (int64_t i_127830 = 0; i_127830 < (int64_t) 1; i_127830++) { + mem_123498[i_117604 + i_127830] = ((__local + double *) mem_123435)[a_loc_ind_117609 + + i_127830]; + } + } + + int64_t binop_y_117617 = Rx_117448 * ltid_x_117578; + + for (int64_t i_117613 = 0; i_117613 < Rx_117448; i_117613++) { + int64_t binop_x_117616 = i_117613 + binop_y_117615; + int64_t b_loc_ind_117618 = binop_x_117616 + binop_y_117617; + + for (int64_t i_127832 = 0; i_127832 < (int64_t) 1; i_127832++) { + mem_123500[i_117613 + i_127832] = ((__local + double *) mem_123437)[b_loc_ind_117618 + + i_127832]; + } + } + for (int64_t i_127833 = 0; i_127833 < Ry_117446; i_127833++) { + mem_123508[i_127833] = mem_123498[i_127833]; + } + for (int64_t i_127834 = 0; i_127834 < Rx_117448; i_127834++) { + mem_123512[i_127834] = mem_123500[i_127834]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_123522[Ry_117446 * Rx_117448]; + int64_t ltid_y_117625 = sext_i32_i64(ltid_pre_127813); + int64_t ltid_x_117623 = sext_i32_i64(ltid_pre_127814); + int32_t ltid_flat_117624 = local_tid_127809; + int64_t binop_y_117666 = Ry_117446 * ltid_y_117625; + int64_t binop_y_117670 = Rx_117448 * ltid_x_117623; + + for (int64_t i_117660 = 0; i_117660 < Ry_117446; i_117660++) { + int64_t binop_x_117665 = iii_117466 + i_117660; + int64_t cmpop_x_117667 = binop_x_117665 + binop_y_117666; + bool binop_x_117668 = slt64(cmpop_x_117667, k2p2zq_70876); + + for (int64_t i_117663 = 0; i_117663 < Rx_117448; i_117663++) { + int64_t binop_x_117669 = jjj_117467 + i_117663; + int64_t cmpop_x_117671 = binop_x_117669 + binop_y_117670; + bool binop_y_117672 = slt64(cmpop_x_117671, k2p2zq_70876); + bool cond_117673 = binop_x_117668 && binop_y_117672; + + if (cond_117673) { + double a_117675 = mem_123508[i_117660]; + double b_117676 = mem_123512[i_117663]; + double c_117677 = mem_param_123495[i_117660 * + Rx_117448 + + i_117663]; + double defunc_1_f_res_117680 = a_117675 * b_117676; + double defunc_1_op_res_117684 = c_117677 + + defunc_1_f_res_117680; + + mem_param_123495[i_117660 * Rx_117448 + i_117663] = + defunc_1_op_res_117684; + } + } + } + for (int64_t i_127837 = 0; i_127837 < Ry_117446; i_127837++) { + for (int64_t i_127838 = 0; i_127838 < Rx_117448; i_127838++) { + mem_123522[i_127837 * Rx_117448 + i_127838] = + mem_param_123495[i_127837 * Rx_117448 + i_127838]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_param_tmp_127827[Ry_117446 * Rx_117448]; + + for (int32_t i_4 = 0; i_4 < Ry_117446 * Rx_117448; i_4++) + mem_param_tmp_127827[i_4] = mem_123522[i_4]; + for (int32_t i_5 = 0; i_5 < Ry_117446 * Rx_117448; i_5++) + mem_param_123495[i_5] = mem_param_tmp_127827[i_5]; + } + for (int32_t i_6 = 0; i_6 < Ry_117446 * Rx_117448; i_6++) + loop_mem_123523[i_6] = mem_param_123495[i_6]; + + double mem_param_tmp_127819[Ry_117446 * Rx_117448]; + + for (int32_t i_7 = 0; i_7 < Ry_117446 * Rx_117448; i_7++) + mem_param_tmp_127819[i_7] = loop_mem_123523[i_7]; + for (int32_t i_8 = 0; i_8 < Ry_117446 * Rx_117448; i_8++) + mem_param_123438[i_8] = mem_param_tmp_127819[i_8]; + } + for (int32_t i_9 = 0; i_9 < Ry_117446 * Rx_117448; i_9++) + loop_mem_123524[i_9] = mem_param_123438[i_9]; + for (int64_t i_117694 = 0; i_117694 < Ry_117446; i_117694++) { + int64_t binop_y_117719 = Ty_117445 * i_117694; + + for (int64_t i_117696 = 0; i_117696 < tk_div_tx_117450; i_117696++) { + int64_t binop_y_117717 = Tx_117447 * i_117696; + int64_t ltid_x_117698 = sext_i32_i64(ltid_pre_127813); + int64_t ltid_y_117699 = sext_i32_i64(ltid_pre_127814); + int32_t ltid_flat_117700 = local_tid_127809; + int64_t k_117718 = ltid_y_117699 + binop_y_117717; + int64_t i_117720 = ltid_x_117698 + binop_y_117719; + int64_t gtid_117721 = iii_117466 + i_117720; + int64_t A_col_idx_117722 = kk_117693 + k_117718; + bool binop_x_117723 = slt64(gtid_117721, k2p2zq_70876); + bool binop_y_117724 = slt64(A_col_idx_117722, k2p2zq_70876); + bool cond_117725 = binop_x_117723 && binop_y_117724; + double A_elem_117726; + + if (cond_117725) { + double A_elem_117728 = ((__global + double *) mem_123419)[gtid_83144 * + (k2p2zq_70876 * + k2p2zq_70876) + + gtid_117721 * + k2p2zq_70876 + + A_col_idx_117722]; + + A_elem_117726 = A_elem_117728; + } else { + A_elem_117726 = 0.0; + } + + bool cond_117730 = slt64(k_117718, Tk_117449); + int64_t a_loc_ind_117731; + + if (cond_117730) { + int64_t binop_y_117732 = Tk_117449 * i_117720; + int64_t loc_fi_117733 = k_117718 + binop_y_117732; + + a_loc_ind_117731 = loc_fi_117733; + } else { + a_loc_ind_117731 = (int64_t) -1; + } + if (sle64((int64_t) 0, a_loc_ind_117731) && slt64(a_loc_ind_117731, + a_loc_szz_117455)) { + ((__local double *) mem_123435)[a_loc_ind_117731] = + A_elem_117726; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + for (int64_t i_117738 = 0; i_117738 < tk_div_ty_117451; i_117738++) { + int64_t binop_y_117761 = Ty_117445 * i_117738; + + for (int64_t i_117740 = 0; i_117740 < Rx_117448; i_117740++) { + int64_t binop_y_117763 = Tx_117447 * i_117740; + int64_t ltid_x_117742 = sext_i32_i64(ltid_pre_127813); + int64_t ltid_y_117743 = sext_i32_i64(ltid_pre_127814); + int32_t ltid_flat_117744 = local_tid_127809; + int64_t k_117762 = ltid_x_117742 + binop_y_117761; + int64_t j_117764 = ltid_y_117743 + binop_y_117763; + int64_t gtid_117765 = jjj_117467 + j_117764; + int64_t B_row_idx_117766 = kk_117693 + k_117762; + bool binop_x_117767 = slt64(gtid_117765, k2p2zq_70876); + bool binop_y_117768 = slt64(B_row_idx_117766, k2p2zq_70876); + bool cond_117769 = binop_x_117767 && binop_y_117768; + double B_elem_117770; + + if (cond_117769) { + double B_elem_117772 = ((__global + double *) defunc_3_map_res_r_mem_123392)[gtid_83144 * + binop_x_120251 + + B_row_idx_117766 * + k2p2zq_70876 + + gtid_117765]; + + B_elem_117770 = B_elem_117772; + } else { + B_elem_117770 = 0.0; + } + + bool cond_117774 = slt64(k_117762, Tk_117449); + int64_t b_loc_ind_117775; + + if (cond_117774) { + int64_t binop_y_117776 = TxRx_117452 * k_117762; + int64_t loc_fi_117777 = j_117764 + binop_y_117776; + + b_loc_ind_117775 = loc_fi_117777; + } else { + b_loc_ind_117775 = (int64_t) -1; + } + if (sle64((int64_t) 0, b_loc_ind_117775) && slt64(b_loc_ind_117775, + b_loc_szz_117457)) { + ((__local double *) mem_123437)[b_loc_ind_117775] = + B_elem_117770; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + + double mem_123590[Ry_117446]; + double mem_123594[Rx_117448]; + double mem_123604[Ry_117446 * Rx_117448]; + double loop_mem_123606[Ry_117446 * Rx_117448]; + double mem_param_123577[Ry_117446 * Rx_117448]; + + for (int32_t i_10 = 0; i_10 < Ry_117446 * Rx_117448; i_10++) + mem_param_123577[i_10] = loop_mem_123524[i_10]; + for (int64_t i_117782 = 0; i_117782 < Tk_117449; i_117782++) { + int64_t cmpop_x_117784 = kk_117693 + i_117782; + bool cond_117785 = slt64(cmpop_x_117784, k2p2zq_70876); + double mem_125333[Ry_117446 * Rx_117448]; + + if (cond_117785) { + int64_t binop_y_117823 = TxRx_117452 * i_117782; + int64_t bytes_123579 = (int64_t) 8 * Ry_117446; + int64_t bytes_123581 = (int64_t) 8 * Rx_117448; + int64_t ltid_y_117788 = sext_i32_i64(ltid_pre_127813); + int64_t ltid_x_117786 = sext_i32_i64(ltid_pre_127814); + int32_t ltid_flat_117787 = local_tid_127809; + double mem_123580[Ry_117446]; + double mem_123582[Rx_117448]; + int64_t binop_x_117814 = Ry_117446 * ltid_y_117788; + + for (int64_t i_117812 = 0; i_117812 < Ry_117446; i_117812++) { + int64_t binop_x_117815 = i_117812 + binop_x_117814; + int64_t binop_y_117816 = Tk_117449 * binop_x_117815; + int64_t a_loc_ind_117817 = i_117782 + binop_y_117816; + + for (int64_t i_127846 = 0; i_127846 < (int64_t) 1; i_127846++) { + mem_123580[i_117812 + i_127846] = ((__local + double *) mem_123435)[a_loc_ind_117817 + + i_127846]; + } + } + + int64_t binop_y_117825 = Rx_117448 * ltid_x_117786; + + for (int64_t i_117821 = 0; i_117821 < Rx_117448; i_117821++) { + int64_t binop_x_117824 = i_117821 + binop_y_117823; + int64_t b_loc_ind_117826 = binop_x_117824 + binop_y_117825; + + for (int64_t i_127848 = 0; i_127848 < (int64_t) 1; i_127848++) { + mem_123582[i_117821 + i_127848] = ((__local + double *) mem_123437)[b_loc_ind_117826 + + i_127848]; + } + } + for (int64_t i_127849 = 0; i_127849 < Ry_117446; i_127849++) { + mem_123590[i_127849] = mem_123580[i_127849]; + } + for (int64_t i_127850 = 0; i_127850 < Rx_117448; i_127850++) { + mem_123594[i_127850] = mem_123582[i_127850]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t ltid_y_117833 = sext_i32_i64(ltid_pre_127813); + int64_t ltid_x_117831 = sext_i32_i64(ltid_pre_127814); + int32_t ltid_flat_117832 = local_tid_127809; + int64_t binop_y_117874 = Ry_117446 * ltid_y_117833; + int64_t binop_y_117878 = Rx_117448 * ltid_x_117831; + + for (int64_t i_117868 = 0; i_117868 < Ry_117446; i_117868++) { + int64_t binop_x_117873 = iii_117466 + i_117868; + int64_t cmpop_x_117875 = binop_x_117873 + binop_y_117874; + bool binop_x_117876 = slt64(cmpop_x_117875, k2p2zq_70876); + + for (int64_t i_117871 = 0; i_117871 < Rx_117448; i_117871++) { + int64_t binop_x_117877 = jjj_117467 + i_117871; + int64_t cmpop_x_117879 = binop_x_117877 + binop_y_117878; + bool binop_y_117880 = slt64(cmpop_x_117879, k2p2zq_70876); + bool cond_117881 = binop_x_117876 && binop_y_117880; + + if (cond_117881) { + double a_117883 = mem_123590[i_117868]; + double b_117884 = mem_123594[i_117871]; + double c_117885 = mem_param_123577[i_117868 * + Rx_117448 + + i_117871]; + double defunc_1_f_res_117888 = a_117883 * b_117884; + double defunc_1_op_res_117892 = c_117885 + + defunc_1_f_res_117888; + + mem_param_123577[i_117868 * Rx_117448 + i_117871] = + defunc_1_op_res_117892; + } + } + } + for (int64_t i_127853 = 0; i_127853 < Ry_117446; i_127853++) { + for (int64_t i_127854 = 0; i_127854 < Rx_117448; i_127854++) { + mem_123604[i_127853 * Rx_117448 + i_127854] = + mem_param_123577[i_127853 * Rx_117448 + i_127854]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + for (int64_t i_127855 = 0; i_127855 < Ry_117446; i_127855++) { + for (int64_t i_127856 = 0; i_127856 < Rx_117448; i_127856++) { + mem_125333[i_127855 * Rx_117448 + i_127856] = + mem_123604[i_127855 * Rx_117448 + i_127856]; + } + } + } else { + for (int64_t i_127857 = 0; i_127857 < Ry_117446; i_127857++) { + for (int64_t i_127858 = 0; i_127858 < Rx_117448; i_127858++) { + mem_125333[i_127857 * Rx_117448 + i_127858] = + mem_param_123577[i_127857 * Rx_117448 + i_127858]; + } + } + } + + double mem_param_tmp_127843[Ry_117446 * Rx_117448]; + + for (int32_t i_11 = 0; i_11 < Ry_117446 * Rx_117448; i_11++) + mem_param_tmp_127843[i_11] = mem_125333[i_11]; + for (int32_t i_12 = 0; i_12 < Ry_117446 * Rx_117448; i_12++) + mem_param_123577[i_12] = mem_param_tmp_127843[i_12]; + } + for (int32_t i_13 = 0; i_13 < Ry_117446 * Rx_117448; i_13++) + loop_mem_123606[i_13] = mem_param_123577[i_13]; + + int64_t reg_tile_i_127859 = squot64(sext_i32_i64(local_tid_127809), + Ty_117445 * Tx_117447); + int64_t reg_tile_i_127860 = squot64(sext_i32_i64(local_tid_127809) - + squot64(sext_i32_i64(local_tid_127809), + Ty_117445 * Tx_117447) * + (Ty_117445 * Tx_117447), Tx_117447); + int64_t reg_tile_i_127861 = sext_i32_i64(local_tid_127809) - + squot64(sext_i32_i64(local_tid_127809), Ty_117445 * Tx_117447) * + (Ty_117445 * Tx_117447) - squot64(sext_i32_i64(local_tid_127809) - + squot64(sext_i32_i64(local_tid_127809), + Ty_117445 * Tx_117447) * + (Ty_117445 * Tx_117447), + Tx_117447) * Tx_117447; + int64_t tile_dim_start_127862 = gtid_83144 + reg_tile_i_127859; + int64_t tile_dim_start_127863 = Ry_117446 * (Ty_117445 * gid_y_117464 + + reg_tile_i_127860); + int64_t tile_dim_start_127864 = Rx_117448 * (Tx_117447 * gid_x_117463 + + reg_tile_i_127861); + + for (int64_t nest_i_127865 = 0; nest_i_127865 < (int64_t) 1; + nest_i_127865++) { + for (int64_t nest_i_127866 = 0; nest_i_127866 < Ry_117446; + nest_i_127866++) { + for (int64_t nest_i_127867 = 0; nest_i_127867 < Rx_117448; + nest_i_127867++) { + if ((slt64(tile_dim_start_127862 + nest_i_127865, m_70861) && + slt64(tile_dim_start_127863 + nest_i_127866, + k2p2zq_70876)) && slt64(tile_dim_start_127864 + + nest_i_127867, + k2p2zq_70876)) { + ((__global double *) mem_123610)[(tile_dim_start_127862 + + nest_i_127865) * + (k2p2zq_70876 * + k2p2zq_70876) + + (tile_dim_start_127863 + + nest_i_127866) * + k2p2zq_70876 + + (tile_dim_start_127864 + + nest_i_127867)] = + loop_mem_123606[squot64(nest_i_127866 * Rx_117448 + + nest_i_127867 - + squot64(nest_i_127866 * + Rx_117448 + + nest_i_127867, + Tx_117447 * Ry_117446 * + Rx_117448) * + (Tx_117447 * Ry_117446 * + Rx_117448) - + squot64(nest_i_127866 * + Rx_117448 + + nest_i_127867 - + squot64(nest_i_127866 * + Rx_117448 + + nest_i_127867, + Tx_117447 * + Ry_117446 * + Rx_117448) * + (Tx_117447 * Ry_117446 * + Rx_117448), Ry_117446 * + Rx_117448) * + (Ry_117446 * Rx_117448), + Rx_117448) * Rx_117448 + + (nest_i_127866 * Rx_117448 + + nest_i_127867 - squot64(nest_i_127866 * + Rx_117448 + + nest_i_127867, + Tx_117447 * + Ry_117446 * + Rx_117448) * + (Tx_117447 * Ry_117446 * Rx_117448) - + squot64(nest_i_127866 * Rx_117448 + + nest_i_127867 - + squot64(nest_i_127866 * + Rx_117448 + + nest_i_127867, + Tx_117447 * Ry_117446 * + Rx_117448) * + (Tx_117447 * Ry_117446 * + Rx_117448), Ry_117446 * + Rx_117448) * (Ry_117446 * + Rx_117448) - + squot64(nest_i_127866 * Rx_117448 + + nest_i_127867 - + squot64(nest_i_127866 * + Rx_117448 + + nest_i_127867, + Tx_117447 * Ry_117446 * + Rx_117448) * + (Tx_117447 * Ry_117446 * + Rx_117448) - + squot64(nest_i_127866 * + Rx_117448 + + nest_i_127867 - + squot64(nest_i_127866 * + Rx_117448 + + nest_i_127867, + Tx_117447 * + Ry_117446 * + Rx_117448) * + (Tx_117447 * + Ry_117446 * + Rx_117448), + Ry_117446 * + Rx_117448) * + (Ry_117446 * Rx_117448), + Rx_117448) * Rx_117448)]; + } + } + } + } + + error_9: + return; + #undef Ty_117445 + #undef Ry_117446 + #undef Tx_117447 + #undef Rx_117448 + #undef Tk_117449 + #undef tk_div_tx_117450 + #undef tk_div_ty_117451 + #undef TxRx_117452 + #undef TyRy_117453 + #undef a_loc_szz_117455 + #undef b_loc_szz_117457 +} +__kernel void mainDetailedzisegmap_intragroup_117900(__global + int *global_failure, + __local volatile + int64_t *mem_124104_backing_aligned_0, + __local volatile + int64_t *mem_124097_backing_aligned_1, + int64_t m_70861, + double level_70867, + int64_t num_recresids_padded_71534, + int64_t num_whole_tiles_117920, + int64_t residual_input_118032, + unsigned char cond_118033, + __global + unsigned char *defunc_3_map_res_mem_124069, + __global + unsigned char *mem_124081, + __global + unsigned char *mem_124084, + __global + unsigned char *mem_124113) +{ + #define segmap_group_sizze_86533 (mainDetailedzisegmap_group_sizze_86398) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_124104_backing_5 = (__local volatile + char *) mem_124104_backing_aligned_0; + __local volatile char *restrict mem_124097_backing_0 = (__local volatile + char *) mem_124097_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128549; + int32_t local_tid_128550; + int64_t group_sizze_128553; + int32_t wave_sizze_128552; + int32_t group_tid_128551; + + global_tid_128549 = get_global_id(0); + local_tid_128550 = get_local_id(0); + group_sizze_128553 = get_local_size(0); + wave_sizze_128552 = LOCKSTEP_WIDTH; + group_tid_128551 = get_group_id(0); + + int32_t gid_flat_117900; + + gid_flat_117900 = group_tid_128551; + + int32_t ltid_pre_128554; + + ltid_pre_128554 = local_tid_128550; + + int64_t gid_117899; + + gid_117899 = sext_i32_i64(group_tid_128551); + + int64_t binop_x_117909; + + binop_x_117909 = segmap_group_sizze_86533 * gid_117899; + + int64_t mem_124088[1]; + double mem_124090[1]; + int64_t ltid_117901 = sext_i32_i64(ltid_pre_128554); + int32_t ltid_flat_117902 = local_tid_128550; + int64_t gtid_117910 = ltid_117901 + binop_x_117909; + bool cond_117911 = slt64(gtid_117910, m_70861); + int64_t pre_117912; + double pre_117913; + + if (cond_117911) { + int64_t x_117914 = ((__global + int64_t *) defunc_3_map_res_mem_124069)[gtid_117910]; + double i64_res_117915 = sitofp_i64_f64(x_117914); + + pre_117912 = x_117914; + pre_117913 = i64_res_117915; + } else { + pre_117912 = (int64_t) 0; + pre_117913 = 0.0; + } + mem_124088[(int64_t) 0] = pre_117912; + mem_124090[(int64_t) 0] = pre_117913; + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_124093[1]; + int64_t ltid_117921 = sext_i32_i64(ltid_pre_128554); + int32_t ltid_flat_117922 = local_tid_128550; + + mem_124093[(int64_t) 0] = -INFINITY; + barrier(CLK_LOCAL_MEM_FENCE); + + __local char *mem_124097; + + mem_124097 = (__local char *) mem_124097_backing_0; + + double accs_mem_124101[1]; + double mem_param_124094[1]; + + for (int32_t i_1 = 0; i_1 < 1; i_1++) + mem_param_124094[i_1] = mem_124093[i_1]; + for (int64_t tile_id_117928 = 0; tile_id_117928 < num_whole_tiles_117920; + tile_id_117928++) { + int64_t binop_x_117984 = segmap_group_sizze_86533 * tile_id_117928; + int64_t ltid_117929 = sext_i32_i64(ltid_pre_128554); + int32_t ltid_flat_117930 = local_tid_128550; + int64_t j_117985 = ltid_117929 + binop_x_117984; + bool cond_117991 = slt64(j_117985, num_recresids_padded_71534); + int64_t pre_117992; + + if (cond_117991) { + pre_117992 = j_117985; + } else { + pre_117992 = (int64_t) 0; + } + ((__local int64_t *) mem_124097)[ltid_117929] = pre_117992; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t slice_119591 = (int64_t) 1 + binop_x_117984; + double mem_124100[1]; + int64_t ltid_117950 = sext_i32_i64(ltid_pre_128554); + int32_t ltid_flat_117951 = local_tid_128550; + int64_t gtid_117997 = binop_x_117909 + ltid_117950; + double acc_118001 = mem_param_124094[(int64_t) 0]; + bool cond_118004 = slt64(gtid_117997, m_70861); + double acc_118005; + + if (cond_118004) { + double i64_res_117999 = mem_124090[(int64_t) 0]; + double x_118006; + double redout_119906 = acc_118001; + + for (int64_t i_119907 = 0; i_119907 < segmap_group_sizze_86533; + i_119907++) { + int64_t slice_120045 = slice_119591 + i_119907; + double x_118010 = ((__global + double *) mem_124081)[slice_120045 * + m_70861 + + gtid_117997]; + int64_t x_118011 = ((__local int64_t *) mem_124097)[i_119907]; + int64_t x_118012 = mul64((int64_t) 2, x_118011); + int64_t i64_arg_118013 = add64((int64_t) 2, x_118012); + double i64_res_118014 = sitofp_i64_f64(i64_arg_118013); + double y_118015 = i64_res_118014 / i64_res_117999; + double lifted_div_res_118016 = 1.0 + y_118015; + double abs_arg_118017 = x_118010 / lifted_div_res_118016; + double abs_res_118018 = fabs(abs_arg_118017); + double defunc_1_op_res_118009 = fmax64(abs_res_118018, + redout_119906); + double redout_tmp_128557 = defunc_1_op_res_118009; + + redout_119906 = redout_tmp_128557; + } + x_118006 = redout_119906; + acc_118005 = x_118006; + } else { + acc_118005 = acc_118001; + } + mem_124100[(int64_t) 0] = acc_118005; + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_param_tmp_128555[1]; + + for (int32_t i_2 = 0; i_2 < 1; i_2++) + mem_param_tmp_128555[i_2] = mem_124100[i_2]; + for (int32_t i_3 = 0; i_3 < 1; i_3++) + mem_param_124094[i_3] = mem_param_tmp_128555[i_3]; + } + for (int32_t i_4 = 0; i_4 < 1; i_4++) + accs_mem_124101[i_4] = mem_param_124094[i_4]; + + __local char *mem_124104; + + mem_124104 = (__local char *) mem_124104_backing_5; + + double mem_124107[1]; + double mem_125358[1]; + + if (cond_118033) { + mem_125358[(int64_t) 0] = accs_mem_124101[(int64_t) 0]; + } else { + int64_t binop_x_118043 = segmap_group_sizze_86533 * + num_whole_tiles_117920; + int64_t ltid_118034 = sext_i32_i64(ltid_pre_128554); + int32_t ltid_flat_118035 = local_tid_128550; + int64_t j_118044 = ltid_118034 + binop_x_118043; + bool cond_118050 = slt64(j_118044, num_recresids_padded_71534); + int64_t pre_118051; + + if (cond_118050) { + pre_118051 = j_118044; + } else { + pre_118051 = (int64_t) 0; + } + ((__local int64_t *) mem_124104)[ltid_118034] = pre_118051; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t slice_offset_118071 = num_whole_tiles_117920 * + residual_input_118032; + int64_t slice_119594 = (int64_t) 1 + slice_offset_118071; + int64_t ltid_118056 = sext_i32_i64(ltid_pre_128554); + int32_t ltid_flat_118057 = local_tid_128550; + int64_t gtid_118066 = binop_x_117909 + ltid_118056; + double acc_118070 = accs_mem_124101[(int64_t) 0]; + bool cond_118073 = slt64(gtid_118066, m_70861); + double acc_118074; + + if (cond_118073) { + double i64_res_118068 = mem_124090[(int64_t) 0]; + double x_118075; + double redout_119908 = acc_118070; + + for (int64_t i_119909 = 0; i_119909 < residual_input_118032; + i_119909++) { + int64_t slice_120046 = slice_119594 + i_119909; + double x_118079 = ((__global + double *) mem_124081)[slice_120046 * + m_70861 + + gtid_118066]; + int64_t x_118080 = ((__local int64_t *) mem_124104)[i_119909]; + int64_t x_118081 = mul64((int64_t) 2, x_118080); + int64_t i64_arg_118082 = add64((int64_t) 2, x_118081); + double i64_res_118083 = sitofp_i64_f64(i64_arg_118082); + double y_118084 = i64_res_118083 / i64_res_118068; + double lifted_div_res_118085 = 1.0 + y_118084; + double abs_arg_118086 = x_118079 / lifted_div_res_118085; + double abs_res_118087 = fabs(abs_arg_118086); + double defunc_1_op_res_118078 = fmax64(abs_res_118087, + redout_119908); + double redout_tmp_128558 = defunc_1_op_res_118078; + + redout_119908 = redout_tmp_128558; + } + x_118075 = redout_119908; + acc_118074 = x_118075; + } else { + acc_118074 = acc_118070; + } + mem_124107[(int64_t) 0] = acc_118074; + barrier(CLK_LOCAL_MEM_FENCE); + mem_125358[(int64_t) 0] = mem_124107[(int64_t) 0]; + } + + int64_t mem_124111[1]; + int64_t ltid_118090 = sext_i32_i64(ltid_pre_128554); + int32_t ltid_flat_118091 = local_tid_128550; + int64_t gtid_118097 = binop_x_117909 + ltid_118090; + bool cond_118099 = slt64(gtid_118097, m_70861); + int64_t postlude_118100; + + if (cond_118099) { + double defunc_2_reduce_res_118098 = mem_125358[(int64_t) 0]; + double defunc_0_Q_arg_118105 = 3.0 * defunc_2_reduce_res_118098; + double zs_res_118106 = defunc_0_Q_arg_118105 / 1.4142135623730951; + double abs_res_118107 = fabs(zs_res_118106); + double zs_res_118108 = abs_res_118107 / 2.0; + double zp_res_118109 = 1.0 + zs_res_118108; + double zs_res_118110 = 1.0 / zp_res_118109; + double zt_res_118111 = zs_res_118110 * zs_res_118110; + double zt_res_118112 = zs_res_118110 * zt_res_118111; + double zt_res_118113 = zt_res_118111 * zt_res_118111; + double zt_res_118114 = zt_res_118111 * zt_res_118112; + double zt_res_118115 = zt_res_118112 * zt_res_118112; + double zt_res_118116 = zt_res_118112 * zt_res_118113; + double zt_res_118117 = zt_res_118113 * zt_res_118113; + double zt_res_118118 = zt_res_118113 * zt_res_118114; + double zt_res_118119 = 0.17087277 * zt_res_118118; + double zt_res_118120 = 0.82215223 * zt_res_118117; + double zt_res_118121 = 1.48851587 * zt_res_118116; + double zt_res_118122 = 1.13520398 * zt_res_118115; + double zt_res_118123 = 0.27886807 * zt_res_118114; + double zt_res_118124 = 0.18628806 * zt_res_118113; + double zt_res_118125 = 9.678418e-2 * zt_res_118112; + double zt_res_118126 = 0.37409196 * zt_res_118111; + double zt_res_118127 = 1.00002368 * zs_res_118110; + double zt_res_118128 = zs_res_118106 * zs_res_118106; + double zm_res_118129 = 0.0 - zt_res_118128; + double zm_res_118130 = zm_res_118129 - 1.26551223; + double zp_res_118131 = zt_res_118127 + zm_res_118130; + double zp_res_118132 = zt_res_118126 + zp_res_118131; + double zp_res_118133 = zt_res_118125 + zp_res_118132; + double zm_res_118134 = zp_res_118133 - zt_res_118124; + double zp_res_118135 = zt_res_118123 + zm_res_118134; + double zm_res_118136 = zp_res_118135 - zt_res_118122; + double zp_res_118137 = zt_res_118121 + zm_res_118136; + double zm_res_118138 = zp_res_118137 - zt_res_118120; + double zp_res_118139 = zt_res_118119 + zm_res_118138; + double exp_res_118140; + + exp_res_118140 = futrts_exp64(zp_res_118139); + + double zt_res_118141 = zs_res_118110 * exp_res_118140; + bool zgze_res_118142 = 0.0 <= zs_res_118106; + double erf_res_118143; + + if (zgze_res_118142) { + double zm_res_118144 = 1.0 - zt_res_118141; + + erf_res_118143 = zm_res_118144; + } else { + double zm_res_118145 = zt_res_118141 - 1.0; + + erf_res_118143 = zm_res_118145; + } + + double zp_res_118146 = 1.0 + erf_res_118143; + double zs_res_118147 = zp_res_118146 / 2.0; + double defunc_0_Q_res_118148 = 1.0 - zs_res_118147; + double y_118149 = fpow64(defunc_2_reduce_res_118098, 2.0); + double negate_arg_118150 = 4.0 * y_118149; + double defunc_0_exp_arg_118151 = 0.0 - negate_arg_118150; + double defunc_0_exp_res_118152 = fpow64(2.718281828459045, + defunc_0_exp_arg_118151); + double x_118153 = defunc_0_Q_res_118148 + defunc_0_exp_res_118152; + double zs_res_118154 = defunc_2_reduce_res_118098 / 1.4142135623730951; + double abs_res_118155 = fabs(zs_res_118154); + double zs_res_118156 = abs_res_118155 / 2.0; + double zp_res_118157 = 1.0 + zs_res_118156; + double zs_res_118158 = 1.0 / zp_res_118157; + double zt_res_118159 = zs_res_118158 * zs_res_118158; + double zt_res_118160 = zs_res_118158 * zt_res_118159; + double zt_res_118161 = zt_res_118159 * zt_res_118159; + double zt_res_118162 = zt_res_118159 * zt_res_118160; + double zt_res_118163 = zt_res_118160 * zt_res_118160; + double zt_res_118164 = zt_res_118160 * zt_res_118161; + double zt_res_118165 = zt_res_118161 * zt_res_118161; + double zt_res_118166 = zt_res_118161 * zt_res_118162; + double zt_res_118167 = 0.17087277 * zt_res_118166; + double zt_res_118168 = 0.82215223 * zt_res_118165; + double zt_res_118169 = 1.48851587 * zt_res_118164; + double zt_res_118170 = 1.13520398 * zt_res_118163; + double zt_res_118171 = 0.27886807 * zt_res_118162; + double zt_res_118172 = 0.18628806 * zt_res_118161; + double zt_res_118173 = 9.678418e-2 * zt_res_118160; + double zt_res_118174 = 0.37409196 * zt_res_118159; + double zt_res_118175 = 1.00002368 * zs_res_118158; + double zt_res_118176 = zs_res_118154 * zs_res_118154; + double zm_res_118177 = 0.0 - zt_res_118176; + double zm_res_118178 = zm_res_118177 - 1.26551223; + double zp_res_118179 = zt_res_118175 + zm_res_118178; + double zp_res_118180 = zt_res_118174 + zp_res_118179; + double zp_res_118181 = zt_res_118173 + zp_res_118180; + double zm_res_118182 = zp_res_118181 - zt_res_118172; + double zp_res_118183 = zt_res_118171 + zm_res_118182; + double zm_res_118184 = zp_res_118183 - zt_res_118170; + double zp_res_118185 = zt_res_118169 + zm_res_118184; + double zm_res_118186 = zp_res_118185 - zt_res_118168; + double zp_res_118187 = zt_res_118167 + zm_res_118186; + double exp_res_118188; + + exp_res_118188 = futrts_exp64(zp_res_118187); + + double zt_res_118189 = zs_res_118158 * exp_res_118188; + bool zgze_res_118190 = 0.0 <= zs_res_118154; + double erf_res_118191; + + if (zgze_res_118190) { + double zm_res_118192 = 1.0 - zt_res_118189; + + erf_res_118191 = zm_res_118192; + } else { + double zm_res_118193 = zt_res_118189 - 1.0; + + erf_res_118191 = zm_res_118193; + } + + double zp_res_118194 = 1.0 + erf_res_118191; + double zs_res_118195 = zp_res_118194 / 2.0; + double defunc_0_Q_res_118196 = 1.0 - zs_res_118195; + double y_118197 = defunc_0_exp_res_118152 * defunc_0_Q_res_118196; + double y_118198 = x_118153 - y_118197; + double pval_brownian_motion_max_res_118199 = 2.0 * y_118198; + int64_t defunc_0_f_res_118200; + int64_t redout_119910 = (int64_t) 9223372036854775807; + + for (int64_t i_119911 = 0; i_119911 < num_recresids_padded_71534; + i_119911++) { + int64_t slice_120048 = (int64_t) 1 + i_119911; + double x_118205 = ((__global double *) mem_124081)[slice_120048 * + m_70861 + + gtid_118097]; + double x_118206 = ((__global double *) mem_124084)[slice_120048 * + m_70861 + + gtid_118097]; + double abs_res_118207 = fabs(x_118205); + bool cond_118208 = x_118206 < abs_res_118207; + int64_t defunc_2_f_res_118209; + + if (cond_118208) { + defunc_2_f_res_118209 = i_119911; + } else { + defunc_2_f_res_118209 = (int64_t) 9223372036854775807; + } + + int64_t defunc_1_op_res_118203 = smin64(defunc_2_f_res_118209, + redout_119910); + int64_t redout_tmp_128559 = defunc_1_op_res_118203; + + redout_119910 = redout_tmp_128559; + } + defunc_0_f_res_118200 = redout_119910; + + bool isnan_res_118210; + + isnan_res_118210 = futrts_isnan64(pval_brownian_motion_max_res_118199); + + bool cond_118211 = !isnan_res_118210; + bool cond_t_res_118212 = pval_brownian_motion_max_res_118199 < + level_70867; + bool x_118213 = cond_118211 && cond_t_res_118212; + bool chk_t_res_118214 = defunc_0_f_res_118200 == + (int64_t) 9223372036854775807; + bool chk_t_res_118215 = !chk_t_res_118214; + bool x_118216 = x_118213 && chk_t_res_118215; + int64_t y_start_118217; + + if (x_118216) { + int64_t x_118101 = mem_124088[(int64_t) 0]; + int64_t y_start_t_res_118218 = sub64(x_118101, + defunc_0_f_res_118200); + + y_start_118217 = y_start_t_res_118218; + } else { + y_start_118217 = (int64_t) 0; + } + postlude_118100 = y_start_118217; + } else { + postlude_118100 = (int64_t) 0; + } + mem_124111[(int64_t) 0] = postlude_118100; + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64(sext_i32_i64(local_tid_128550) + segmap_group_sizze_86533 * + sext_i32_i64(group_tid_128551), m_70861)) { + ((__global int64_t *) mem_124113)[sext_i32_i64(local_tid_128550) + + segmap_group_sizze_86533 * + sext_i32_i64(group_tid_128551)] = + mem_124111[(int64_t) 0]; + } + + error_7: + return; + #undef segmap_group_sizze_86533 +} +__kernel void mainDetailedzisegmap_intragroup_118238(__global + int *global_failure, + __local volatile + int64_t *mem_124225_backing_aligned_0, + int64_t m_70861, + int64_t n_70864, + int64_t k2p2zq_70876, + int64_t Ty_118226, + int64_t Tx_118227, + int64_t gridDim_x_118228, + int64_t gridDim_y_118229, + int64_t group_sizze_tile3d_118233, + int64_t count_shmem_118234, + __global + unsigned char *mem_120120, + __global + unsigned char *mem_120124, + __global + unsigned char *mem_124213, + __global + unsigned char *mem_124273) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_124225_backing_0 = (__local volatile + char *) mem_124225_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128743; + int32_t local_tid_128744; + int64_t group_sizze_128747; + int32_t wave_sizze_128746; + int32_t group_tid_128745; + + global_tid_128743 = get_global_id(0); + local_tid_128744 = get_local_id(0); + group_sizze_128747 = get_local_size(0); + wave_sizze_128746 = LOCKSTEP_WIDTH; + group_tid_128745 = get_group_id(0); + + int32_t gid_flat_118238; + + gid_flat_118238 = group_tid_128745; + + int32_t ltid_pre_128748; + + ltid_pre_128748 = squot32(local_tid_128744, sext_i64_i32(Ty_118226) * + sext_i64_i32(Tx_118227)); + + int32_t ltid_pre_128749; + + ltid_pre_128749 = squot32(local_tid_128744 - squot32(local_tid_128744, + sext_i64_i32(Ty_118226) * + sext_i64_i32(Tx_118227)) * + (sext_i64_i32(Ty_118226) * + sext_i64_i32(Tx_118227)), + sext_i64_i32(Tx_118227)); + + int32_t ltid_pre_128750; + + ltid_pre_128750 = local_tid_128744 - squot32(local_tid_128744, + sext_i64_i32(Ty_118226) * + sext_i64_i32(Tx_118227)) * + (sext_i64_i32(Ty_118226) * sext_i64_i32(Tx_118227)) - + squot32(local_tid_128744 - squot32(local_tid_128744, + sext_i64_i32(Ty_118226) * + sext_i64_i32(Tx_118227)) * + (sext_i64_i32(Ty_118226) * sext_i64_i32(Tx_118227)), + sext_i64_i32(Tx_118227)) * sext_i64_i32(Tx_118227); + + int32_t ltid_pre_128751; + + ltid_pre_128751 = squot32(local_tid_128744, sext_i64_i32(Tx_118227)); + + int32_t ltid_pre_128752; + + ltid_pre_128752 = local_tid_128744 - squot32(local_tid_128744, + sext_i64_i32(Tx_118227)) * + sext_i64_i32(Tx_118227); + + int32_t ltid_pre_128753; + + ltid_pre_128753 = local_tid_128744; + + int64_t gid_zz_118237; + + gid_zz_118237 = squot64(sext_i32_i64(group_tid_128745), gridDim_y_118229 * + gridDim_x_118228); + + int64_t gid_y_118236; + + gid_y_118236 = squot64(sext_i32_i64(group_tid_128745) - + squot64(sext_i32_i64(group_tid_128745), + gridDim_y_118229 * gridDim_x_118228) * + (gridDim_y_118229 * gridDim_x_118228), + gridDim_x_118228); + + int64_t gid_x_118235; + + gid_x_118235 = sext_i32_i64(group_tid_128745) - + squot64(sext_i32_i64(group_tid_128745), gridDim_y_118229 * + gridDim_x_118228) * (gridDim_y_118229 * gridDim_x_118228) - + squot64(sext_i32_i64(group_tid_128745) - + squot64(sext_i32_i64(group_tid_128745), gridDim_y_118229 * + gridDim_x_118228) * (gridDim_y_118229 * + gridDim_x_118228), + gridDim_x_118228) * gridDim_x_118228; + + int64_t ii_118239; + + ii_118239 = (int64_t) 30 * gid_zz_118237; + + int64_t jj1_118240 = Ty_118226 * gid_y_118236; + int64_t jj2_118241 = Tx_118227 * gid_x_118235; + double mem_124223[30]; + int64_t ltid_y_118244 = sext_i32_i64(ltid_pre_128751); + int64_t ltid_x_118242 = sext_i32_i64(ltid_pre_128752); + int32_t ltid_flat_118243 = local_tid_128744; + double mem_124217[30]; + + for (int32_t i_119595 = 0; i_119595 < 30; i_119595++) { + int64_t i_118252 = sext_i32_i64(i_119595); + + mem_124217[i_118252] = 0.0; + } + for (int64_t i_128755 = 0; i_128755 < (int64_t) 30; i_128755++) { + mem_124223[i_128755] = mem_124217[i_128755]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + __local char *mem_124225; + + mem_124225 = (__local char *) mem_124225_backing_0; + + double loop_mem_124255[30]; + double mem_param_124226[30]; + + for (int32_t i_1 = 0; i_1 < 30; i_1++) + mem_param_124226[i_1] = mem_124223[i_1]; + for (int64_t i_118257 = 0; i_118257 < n_70864; i_118257++) { + for (int64_t i_118260 = 0; i_118260 < count_shmem_118234; i_118260++) { + int64_t offs_118273 = group_sizze_tile3d_118233 * i_118260; + int64_t ltid_118263 = sext_i32_i64(ltid_pre_128753); + int32_t ltid_flat_118262 = local_tid_128744; + int64_t loc_ind_118274 = ltid_118263 + offs_118273; + int64_t gtid_118275 = ii_118239 + loc_ind_118274; + bool cond_118276 = slt64(gtid_118275, m_70861); + double y_elem_118277; + + if (cond_118276) { + double Y_elem_118279 = ((__global + double *) mem_124213)[i_118257 * + m_70861 + + gtid_118275]; + + y_elem_118277 = Y_elem_118279; + } else { + y_elem_118277 = 0.0; + } + + bool cond_118281 = slt64(loc_ind_118274, (int64_t) 30); + int64_t y_loc_ind_118282; + + if (cond_118281) { + y_loc_ind_118282 = loc_ind_118274; + } else { + y_loc_ind_118282 = (int64_t) -1; + } + if (sle64((int64_t) 0, y_loc_ind_118282) && slt64(y_loc_ind_118282, + (int64_t) 30)) { + ((__local double *) mem_124225)[y_loc_ind_118282] = + y_elem_118277; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + + double mem_124254[30]; + int64_t ltid_y_118288 = sext_i32_i64(ltid_pre_128751); + int64_t ltid_x_118286 = sext_i32_i64(ltid_pre_128752); + int32_t ltid_flat_118287 = local_tid_128744; + int64_t gtid_118315 = jj1_118240 + ltid_y_118288; + int64_t gtid_118316 = jj2_118241 + ltid_x_118286; + bool binop_x_118318 = slt64(gtid_118315, k2p2zq_70876); + bool binop_y_118319 = slt64(gtid_118316, k2p2zq_70876); + bool cond_118320 = binop_x_118318 && binop_y_118319; + double mem_125364[30]; + + if (cond_118320) { + double x_118323 = ((__global double *) mem_120120)[i_118257 * + k2p2zq_70876 + + gtid_118315]; + double x_118325 = ((__global double *) mem_120124)[i_118257 * + k2p2zq_70876 + + gtid_118316]; + + for (int32_t i_119596 = 0; i_119596 < 30; i_119596++) { + int64_t i_118327 = sext_i32_i64(i_119596); + int64_t gtid_118329 = ii_118239 + i_118327; + bool cond_118330 = slt64(gtid_118329, m_70861); + + if (cond_118330) { + double inp_reg_var2zz_118332 = ((__local + double *) mem_124225)[i_118327]; + double res_reg_var2zz_118333 = mem_param_124226[i_118327]; + double x_118337 = x_118323 * x_118325; + bool isnan_res_118338; + + isnan_res_118338 = futrts_isnan64(inp_reg_var2zz_118332); + + double y_118339; + + if (isnan_res_118338) { + y_118339 = 0.0; + } else { + y_118339 = 1.0; + } + + double defunc_2_f_res_118340 = x_118337 * y_118339; + double defunc_1_op_res_118344 = res_reg_var2zz_118333 + + defunc_2_f_res_118340; + + mem_param_124226[i_118327] = defunc_1_op_res_118344; + } + } + for (int64_t i_128761 = 0; i_128761 < (int64_t) 30; i_128761++) { + mem_125364[i_128761] = mem_param_124226[i_128761]; + } + } else { + for (int64_t i_128762 = 0; i_128762 < (int64_t) 30; i_128762++) { + mem_125364[i_128762] = mem_param_124226[i_128762]; + } + } + for (int64_t i_128763 = 0; i_128763 < (int64_t) 30; i_128763++) { + mem_124254[i_128763] = mem_125364[i_128763]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_param_tmp_128756[30]; + + for (int32_t i_2 = 0; i_2 < 30; i_2++) + mem_param_tmp_128756[i_2] = mem_124254[i_2]; + for (int32_t i_3 = 0; i_3 < 30; i_3++) + mem_param_124226[i_3] = mem_param_tmp_128756[i_3]; + } + for (int32_t i_4 = 0; i_4 < 30; i_4++) + loop_mem_124255[i_4] = mem_param_124226[i_4]; + + double mem_124269[30 * 1 * 1]; + int64_t ltid_zz_118353 = sext_i32_i64(ltid_pre_128748); + int64_t ltid_y_118352 = sext_i32_i64(ltid_pre_128749); + int64_t ltid_x_118350 = sext_i32_i64(ltid_pre_128750); + int32_t ltid_flat_118351 = local_tid_128744; + double mem_124263[30 * 1 * 1]; + + for (int32_t i_119598 = 0; i_119598 < 30; i_119598++) { + int64_t i_118362 = sext_i32_i64(i_119598); + + for (int64_t i_128765 = 0; i_128765 < (int64_t) 1; i_128765++) { + mem_124263[i_118362 + i_128765] = loop_mem_124255[i_118362 + + i_128765]; + } + } + for (int64_t i_128766 = 0; i_128766 < (int64_t) 30; i_128766++) { + for (int64_t i_128767 = 0; i_128767 < (int64_t) 1; i_128767++) { + for (int64_t i_128768 = 0; i_128768 < (int64_t) 1; i_128768++) { + mem_124269[i_128766 + i_128767 + i_128768] = + mem_124263[i_128766 + i_128767 + i_128768]; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t reg_tile_i_128769 = squot64(sext_i32_i64(local_tid_128744), + Ty_118226 * Tx_118227); + int64_t reg_tile_i_128770 = squot64(sext_i32_i64(local_tid_128744) - + squot64(sext_i32_i64(local_tid_128744), + Ty_118226 * Tx_118227) * + (Ty_118226 * Tx_118227), Tx_118227); + int64_t reg_tile_i_128771 = sext_i32_i64(local_tid_128744) - + squot64(sext_i32_i64(local_tid_128744), Ty_118226 * Tx_118227) * + (Ty_118226 * Tx_118227) - squot64(sext_i32_i64(local_tid_128744) - + squot64(sext_i32_i64(local_tid_128744), + Ty_118226 * Tx_118227) * + (Ty_118226 * Tx_118227), + Tx_118227) * Tx_118227; + int64_t tile_dim_start_128772 = (int64_t) 30 * (gid_zz_118237 + + reg_tile_i_128769); + int64_t tile_dim_start_128773 = Ty_118226 * gid_y_118236 + + reg_tile_i_128770; + int64_t tile_dim_start_128774 = Tx_118227 * gid_x_118235 + + reg_tile_i_128771; + + for (int64_t nest_i_128775 = 0; nest_i_128775 < (int64_t) 30; + nest_i_128775++) { + for (int64_t nest_i_128776 = 0; nest_i_128776 < (int64_t) 1; + nest_i_128776++) { + for (int64_t nest_i_128777 = 0; nest_i_128777 < (int64_t) 1; + nest_i_128777++) { + if ((slt64(tile_dim_start_128772 + nest_i_128775, m_70861) && + slt64(tile_dim_start_128773 + nest_i_128776, + k2p2zq_70876)) && slt64(tile_dim_start_128774 + + nest_i_128777, + k2p2zq_70876)) { + ((__global double *) mem_124273)[(tile_dim_start_128772 + + nest_i_128775) * + (k2p2zq_70876 * + k2p2zq_70876) + + (tile_dim_start_128773 + + nest_i_128776) * + k2p2zq_70876 + + (tile_dim_start_128774 + + nest_i_128777)] = + mem_124269[nest_i_128775 + nest_i_128776 + + nest_i_128777]; + } + } + } + } + + error_4: + return; +} +__kernel void mainDetailedzisegmap_intragroup_118391(__global + int *global_failure, + __local volatile + int64_t *mem_124411_backing_aligned_0, + __local volatile + int64_t *mem_124409_backing_aligned_1, + int64_t N_70860, + int64_t m_70861, + int64_t n_70864, + int64_t k2p2zq_70876, + int64_t gridDim_x_118385, + int64_t full_tiles_118416, + int64_t kk_118623, __global + unsigned char *mem_120120, + __global + unsigned char *mem_124142, + __global + unsigned char *mem_124583) +{ + #define Ty_118372 (mainDetailedziTy_118369) + #define Ry_118373 (mainDetailedziRy_118371) + #define Tx_118374 (mainDetailedziTx_118368) + #define Rx_118375 (mainDetailedziRx_118370) + #define Tk_118376 (mainDetailedziTk_118367) + #define tk_div_tx_118377 (sdiv_up64(mainDetailedziTk_118367, mainDetailedziTx_118368)) + #define tk_div_ty_118378 (sdiv_up64(mainDetailedziTk_118367, mainDetailedziTy_118369)) + #define TxRx_118379 (mainDetailedziTx_118368 * mainDetailedziRx_118370) + #define TyRy_118380 (mainDetailedziTy_118369 * mainDetailedziRy_118371) + #define a_loc_szz_118382 (mainDetailedziTk_118367 * (mainDetailedziTy_118369 * mainDetailedziRy_118371)) + #define b_loc_szz_118384 (mainDetailedziRx_118370 * (mainDetailedziTx_118368 * mainDetailedziTk_118367)) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_124411_backing_1 = (__local volatile + char *) mem_124411_backing_aligned_0; + __local volatile char *restrict mem_124409_backing_0 = (__local volatile + char *) mem_124409_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128911; + int32_t local_tid_128912; + int64_t group_sizze_128915; + int32_t wave_sizze_128914; + int32_t group_tid_128913; + + global_tid_128911 = get_global_id(0); + local_tid_128912 = get_local_id(0); + group_sizze_128915 = get_local_size(0); + wave_sizze_128914 = LOCKSTEP_WIDTH; + group_tid_128913 = get_group_id(0); + + int32_t gid_flat_118391; + + gid_flat_118391 = group_tid_128913; + + int32_t ltid_pre_128916; + + ltid_pre_128916 = squot32(local_tid_128912, sext_i64_i32(Tx_118374)); + + int32_t ltid_pre_128917; + + ltid_pre_128917 = local_tid_128912 - squot32(local_tid_128912, + sext_i64_i32(Tx_118374)) * + sext_i64_i32(Tx_118374); + + int64_t gid_y_118390; + + gid_y_118390 = squot64(sext_i32_i64(group_tid_128913), gridDim_x_118385); + + int64_t gid_x_118389; + + gid_x_118389 = sext_i32_i64(group_tid_128913) - + squot64(sext_i32_i64(group_tid_128913), gridDim_x_118385) * + gridDim_x_118385; + + int64_t iii_118392; + + iii_118392 = TyRy_118380 * gid_y_118390; + + int64_t jjj_118393 = TxRx_118379 * gid_x_118389; + double mem_124407[Ry_118373 * Rx_118375]; + int64_t ltid_y_118396 = sext_i32_i64(ltid_pre_128916); + int64_t ltid_x_118394 = sext_i32_i64(ltid_pre_128917); + int32_t ltid_flat_118395 = local_tid_128912; + double mem_124398[Ry_118373 * Rx_118375]; + + for (int64_t i_118407 = 0; i_118407 < Ry_118373; i_118407++) { + for (int64_t i_118410 = 0; i_118410 < Rx_118375; i_118410++) { + mem_124398[i_118407 * Rx_118375 + i_118410] = 0.0; + } + } + for (int64_t i_128920 = 0; i_128920 < Ry_118373; i_128920++) { + for (int64_t i_128921 = 0; i_128921 < Rx_118375; i_128921++) { + mem_124407[i_128920 * Rx_118375 + i_128921] = mem_124398[i_128920 * + Rx_118375 + + i_128921]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + __local char *mem_124409; + + mem_124409 = (__local char *) mem_124409_backing_0; + + __local char *mem_124411; + + mem_124411 = (__local char *) mem_124411_backing_1; + + double mem_124482[Ry_118373]; + double mem_124486[Rx_118375]; + double loop_mem_124498[Ry_118373 * Rx_118375]; + double mem_param_124412[Ry_118373 * Rx_118375]; + + for (int32_t i_2 = 0; i_2 < Ry_118373 * Rx_118375; i_2++) + mem_param_124412[i_2] = mem_124407[i_2]; + for (int64_t i_118417 = 0; i_118417 < full_tiles_118416; i_118417++) { + int64_t kk_118421 = Tk_118376 * i_118417; + + for (int64_t i_118422 = 0; i_118422 < Ry_118373; i_118422++) { + int64_t binop_y_118445 = Ty_118372 * i_118422; + + for (int64_t i_118424 = 0; i_118424 < tk_div_tx_118377; + i_118424++) { + int64_t binop_y_118443 = Tx_118374 * i_118424; + int64_t ltid_x_118426 = sext_i32_i64(ltid_pre_128916); + int64_t ltid_y_118427 = sext_i32_i64(ltid_pre_128917); + int32_t ltid_flat_118428 = local_tid_128912; + int64_t k_118444 = ltid_y_118427 + binop_y_118443; + int64_t i_118446 = ltid_x_118426 + binop_y_118445; + int64_t gtid_118447 = iii_118392 + i_118446; + int64_t A_col_idx_118448 = kk_118421 + k_118444; + bool cond_118449 = slt64(gtid_118447, m_70861); + double A_elem_118450; + + if (cond_118449) { + double A_elem_118452 = ((__global + double *) mem_124142)[gtid_118447 * + N_70860 + + A_col_idx_118448]; + + A_elem_118450 = A_elem_118452; + } else { + A_elem_118450 = 0.0; + } + + bool cond_118454 = slt64(k_118444, Tk_118376); + int64_t a_loc_ind_118455; + + if (cond_118454) { + int64_t binop_y_118456 = Tk_118376 * i_118446; + int64_t loc_fi_118457 = k_118444 + binop_y_118456; + + a_loc_ind_118455 = loc_fi_118457; + } else { + a_loc_ind_118455 = (int64_t) -1; + } + if (sle64((int64_t) 0, a_loc_ind_118455) && + slt64(a_loc_ind_118455, a_loc_szz_118382)) { + ((__local double *) mem_124409)[a_loc_ind_118455] = + A_elem_118450; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + for (int64_t i_118462 = 0; i_118462 < tk_div_ty_118378; i_118462++) { + int64_t binop_y_118483 = Ty_118372 * i_118462; + + for (int64_t i_118464 = 0; i_118464 < Rx_118375; i_118464++) { + int64_t binop_y_118485 = Tx_118374 * i_118464; + int64_t ltid_x_118466 = sext_i32_i64(ltid_pre_128916); + int64_t ltid_y_118467 = sext_i32_i64(ltid_pre_128917); + int32_t ltid_flat_118468 = local_tid_128912; + int64_t k_118484 = ltid_x_118466 + binop_y_118483; + int64_t j_118486 = ltid_y_118467 + binop_y_118485; + int64_t gtid_118487 = jjj_118393 + j_118486; + int64_t B_row_idx_118488 = kk_118421 + k_118484; + bool cond_118489 = slt64(gtid_118487, k2p2zq_70876); + double B_elem_118490; + + if (cond_118489) { + double B_elem_118492 = ((__global + double *) mem_120120)[B_row_idx_118488 * + k2p2zq_70876 + + gtid_118487]; + + B_elem_118490 = B_elem_118492; + } else { + B_elem_118490 = 0.0; + } + + bool cond_118494 = slt64(k_118484, Tk_118376); + int64_t b_loc_ind_118495; + + if (cond_118494) { + int64_t binop_y_118496 = TxRx_118379 * k_118484; + int64_t loc_fi_118497 = j_118486 + binop_y_118496; + + b_loc_ind_118495 = loc_fi_118497; + } else { + b_loc_ind_118495 = (int64_t) -1; + } + if (sle64((int64_t) 0, b_loc_ind_118495) && + slt64(b_loc_ind_118495, b_loc_szz_118384)) { + ((__local double *) mem_124411)[b_loc_ind_118495] = + B_elem_118490; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + + double loop_mem_124497[Ry_118373 * Rx_118375]; + double mem_param_124469[Ry_118373 * Rx_118375]; + + for (int32_t i_3 = 0; i_3 < Ry_118373 * Rx_118375; i_3++) + mem_param_124469[i_3] = mem_param_124412[i_3]; + for (int64_t i_118502 = 0; i_118502 < Tk_118376; i_118502++) { + int64_t binop_y_118541 = TxRx_118379 * i_118502; + int64_t ltid_y_118506 = sext_i32_i64(ltid_pre_128916); + int64_t ltid_x_118504 = sext_i32_i64(ltid_pre_128917); + int32_t ltid_flat_118505 = local_tid_128912; + double mem_124472[Ry_118373]; + double mem_124474[Rx_118375]; + int64_t binop_x_118532 = Ry_118373 * ltid_y_118506; + + for (int64_t i_118530 = 0; i_118530 < Ry_118373; i_118530++) { + int64_t binop_x_118533 = i_118530 + binop_x_118532; + int64_t binop_y_118534 = Tk_118376 * binop_x_118533; + int64_t a_loc_ind_118535 = i_118502 + binop_y_118534; + + for (int64_t i_128933 = 0; i_128933 < (int64_t) 1; i_128933++) { + mem_124472[i_118530 + i_128933] = ((__local + double *) mem_124409)[a_loc_ind_118535 + + i_128933]; + } + } + + int64_t binop_y_118543 = Rx_118375 * ltid_x_118504; + + for (int64_t i_118539 = 0; i_118539 < Rx_118375; i_118539++) { + int64_t binop_x_118542 = i_118539 + binop_y_118541; + int64_t b_loc_ind_118544 = binop_x_118542 + binop_y_118543; + + for (int64_t i_128935 = 0; i_128935 < (int64_t) 1; i_128935++) { + mem_124474[i_118539 + i_128935] = ((__local + double *) mem_124411)[b_loc_ind_118544 + + i_128935]; + } + } + for (int64_t i_128936 = 0; i_128936 < Ry_118373; i_128936++) { + mem_124482[i_128936] = mem_124472[i_128936]; + } + for (int64_t i_128937 = 0; i_128937 < Rx_118375; i_128937++) { + mem_124486[i_128937] = mem_124474[i_128937]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_124496[Ry_118373 * Rx_118375]; + int64_t ltid_y_118551 = sext_i32_i64(ltid_pre_128916); + int64_t ltid_x_118549 = sext_i32_i64(ltid_pre_128917); + int32_t ltid_flat_118550 = local_tid_128912; + int64_t binop_y_118594 = Ry_118373 * ltid_y_118551; + int64_t binop_y_118598 = Rx_118375 * ltid_x_118549; + + for (int64_t i_118588 = 0; i_118588 < Ry_118373; i_118588++) { + int64_t binop_x_118593 = iii_118392 + i_118588; + int64_t cmpop_x_118595 = binop_x_118593 + binop_y_118594; + bool binop_x_118596 = slt64(cmpop_x_118595, m_70861); + + for (int64_t i_118591 = 0; i_118591 < Rx_118375; i_118591++) { + int64_t binop_x_118597 = jjj_118393 + i_118591; + int64_t cmpop_x_118599 = binop_x_118597 + binop_y_118598; + bool binop_y_118600 = slt64(cmpop_x_118599, k2p2zq_70876); + bool cond_118601 = binop_x_118596 && binop_y_118600; + + if (cond_118601) { + double a_118603 = mem_124482[i_118588]; + double c_118605 = mem_param_124469[i_118588 * + Rx_118375 + + i_118591]; + bool isnan_res_118608; + + isnan_res_118608 = futrts_isnan64(a_118603); + + double defunc_1_f_res_118609; + + if (isnan_res_118608) { + defunc_1_f_res_118609 = 0.0; + } else { + double b_118604 = mem_124486[i_118591]; + double defunc_1_f_res_f_res_118610 = a_118603 * + b_118604; + + defunc_1_f_res_118609 = defunc_1_f_res_f_res_118610; + } + + double defunc_1_op_res_118614 = c_118605 + + defunc_1_f_res_118609; + + mem_param_124469[i_118588 * Rx_118375 + i_118591] = + defunc_1_op_res_118614; + } + } + } + for (int64_t i_128940 = 0; i_128940 < Ry_118373; i_128940++) { + for (int64_t i_128941 = 0; i_128941 < Rx_118375; i_128941++) { + mem_124496[i_128940 * Rx_118375 + i_128941] = + mem_param_124469[i_128940 * Rx_118375 + i_128941]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_param_tmp_128930[Ry_118373 * Rx_118375]; + + for (int32_t i_4 = 0; i_4 < Ry_118373 * Rx_118375; i_4++) + mem_param_tmp_128930[i_4] = mem_124496[i_4]; + for (int32_t i_5 = 0; i_5 < Ry_118373 * Rx_118375; i_5++) + mem_param_124469[i_5] = mem_param_tmp_128930[i_5]; + } + for (int32_t i_6 = 0; i_6 < Ry_118373 * Rx_118375; i_6++) + loop_mem_124497[i_6] = mem_param_124469[i_6]; + + double mem_param_tmp_128922[Ry_118373 * Rx_118375]; + + for (int32_t i_7 = 0; i_7 < Ry_118373 * Rx_118375; i_7++) + mem_param_tmp_128922[i_7] = loop_mem_124497[i_7]; + for (int32_t i_8 = 0; i_8 < Ry_118373 * Rx_118375; i_8++) + mem_param_124412[i_8] = mem_param_tmp_128922[i_8]; + } + for (int32_t i_9 = 0; i_9 < Ry_118373 * Rx_118375; i_9++) + loop_mem_124498[i_9] = mem_param_124412[i_9]; + for (int64_t i_118624 = 0; i_118624 < Ry_118373; i_118624++) { + int64_t binop_y_118649 = Ty_118372 * i_118624; + + for (int64_t i_118626 = 0; i_118626 < tk_div_tx_118377; i_118626++) { + int64_t binop_y_118647 = Tx_118374 * i_118626; + int64_t ltid_x_118628 = sext_i32_i64(ltid_pre_128916); + int64_t ltid_y_118629 = sext_i32_i64(ltid_pre_128917); + int32_t ltid_flat_118630 = local_tid_128912; + int64_t k_118648 = ltid_y_118629 + binop_y_118647; + int64_t i_118650 = ltid_x_118628 + binop_y_118649; + int64_t gtid_118651 = iii_118392 + i_118650; + int64_t A_col_idx_118652 = kk_118623 + k_118648; + bool binop_x_118653 = slt64(gtid_118651, m_70861); + bool binop_y_118654 = slt64(A_col_idx_118652, n_70864); + bool cond_118655 = binop_x_118653 && binop_y_118654; + double A_elem_118656; + + if (cond_118655) { + double A_elem_118658 = ((__global + double *) mem_124142)[gtid_118651 * + N_70860 + + A_col_idx_118652]; + + A_elem_118656 = A_elem_118658; + } else { + A_elem_118656 = 0.0; + } + + bool cond_118660 = slt64(k_118648, Tk_118376); + int64_t a_loc_ind_118661; + + if (cond_118660) { + int64_t binop_y_118662 = Tk_118376 * i_118650; + int64_t loc_fi_118663 = k_118648 + binop_y_118662; + + a_loc_ind_118661 = loc_fi_118663; + } else { + a_loc_ind_118661 = (int64_t) -1; + } + if (sle64((int64_t) 0, a_loc_ind_118661) && slt64(a_loc_ind_118661, + a_loc_szz_118382)) { + ((__local double *) mem_124409)[a_loc_ind_118661] = + A_elem_118656; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + for (int64_t i_118668 = 0; i_118668 < tk_div_ty_118378; i_118668++) { + int64_t binop_y_118691 = Ty_118372 * i_118668; + + for (int64_t i_118670 = 0; i_118670 < Rx_118375; i_118670++) { + int64_t binop_y_118693 = Tx_118374 * i_118670; + int64_t ltid_x_118672 = sext_i32_i64(ltid_pre_128916); + int64_t ltid_y_118673 = sext_i32_i64(ltid_pre_128917); + int32_t ltid_flat_118674 = local_tid_128912; + int64_t k_118692 = ltid_x_118672 + binop_y_118691; + int64_t j_118694 = ltid_y_118673 + binop_y_118693; + int64_t gtid_118695 = jjj_118393 + j_118694; + int64_t B_row_idx_118696 = kk_118623 + k_118692; + bool binop_x_118697 = slt64(gtid_118695, k2p2zq_70876); + bool binop_y_118698 = slt64(B_row_idx_118696, n_70864); + bool cond_118699 = binop_x_118697 && binop_y_118698; + double B_elem_118700; + + if (cond_118699) { + double B_elem_118702 = ((__global + double *) mem_120120)[B_row_idx_118696 * + k2p2zq_70876 + + gtid_118695]; + + B_elem_118700 = B_elem_118702; + } else { + B_elem_118700 = 0.0; + } + + bool cond_118704 = slt64(k_118692, Tk_118376); + int64_t b_loc_ind_118705; + + if (cond_118704) { + int64_t binop_y_118706 = TxRx_118379 * k_118692; + int64_t loc_fi_118707 = j_118694 + binop_y_118706; + + b_loc_ind_118705 = loc_fi_118707; + } else { + b_loc_ind_118705 = (int64_t) -1; + } + if (sle64((int64_t) 0, b_loc_ind_118705) && slt64(b_loc_ind_118705, + b_loc_szz_118384)) { + ((__local double *) mem_124411)[b_loc_ind_118705] = + B_elem_118700; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + + double mem_124564[Ry_118373]; + double mem_124568[Rx_118375]; + double mem_124578[Ry_118373 * Rx_118375]; + double loop_mem_124580[Ry_118373 * Rx_118375]; + double mem_param_124551[Ry_118373 * Rx_118375]; + + for (int32_t i_10 = 0; i_10 < Ry_118373 * Rx_118375; i_10++) + mem_param_124551[i_10] = loop_mem_124498[i_10]; + for (int64_t i_118712 = 0; i_118712 < Tk_118376; i_118712++) { + int64_t cmpop_x_118714 = kk_118623 + i_118712; + bool cond_118715 = slt64(cmpop_x_118714, n_70864); + double mem_125382[Ry_118373 * Rx_118375]; + + if (cond_118715) { + int64_t binop_y_118753 = TxRx_118379 * i_118712; + int64_t bytes_124553 = (int64_t) 8 * Ry_118373; + int64_t bytes_124555 = (int64_t) 8 * Rx_118375; + int64_t ltid_y_118718 = sext_i32_i64(ltid_pre_128916); + int64_t ltid_x_118716 = sext_i32_i64(ltid_pre_128917); + int32_t ltid_flat_118717 = local_tid_128912; + double mem_124554[Ry_118373]; + double mem_124556[Rx_118375]; + int64_t binop_x_118744 = Ry_118373 * ltid_y_118718; + + for (int64_t i_118742 = 0; i_118742 < Ry_118373; i_118742++) { + int64_t binop_x_118745 = i_118742 + binop_x_118744; + int64_t binop_y_118746 = Tk_118376 * binop_x_118745; + int64_t a_loc_ind_118747 = i_118712 + binop_y_118746; + + for (int64_t i_128949 = 0; i_128949 < (int64_t) 1; i_128949++) { + mem_124554[i_118742 + i_128949] = ((__local + double *) mem_124409)[a_loc_ind_118747 + + i_128949]; + } + } + + int64_t binop_y_118755 = Rx_118375 * ltid_x_118716; + + for (int64_t i_118751 = 0; i_118751 < Rx_118375; i_118751++) { + int64_t binop_x_118754 = i_118751 + binop_y_118753; + int64_t b_loc_ind_118756 = binop_x_118754 + binop_y_118755; + + for (int64_t i_128951 = 0; i_128951 < (int64_t) 1; i_128951++) { + mem_124556[i_118751 + i_128951] = ((__local + double *) mem_124411)[b_loc_ind_118756 + + i_128951]; + } + } + for (int64_t i_128952 = 0; i_128952 < Ry_118373; i_128952++) { + mem_124564[i_128952] = mem_124554[i_128952]; + } + for (int64_t i_128953 = 0; i_128953 < Rx_118375; i_128953++) { + mem_124568[i_128953] = mem_124556[i_128953]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t ltid_y_118763 = sext_i32_i64(ltid_pre_128916); + int64_t ltid_x_118761 = sext_i32_i64(ltid_pre_128917); + int32_t ltid_flat_118762 = local_tid_128912; + int64_t binop_y_118806 = Ry_118373 * ltid_y_118763; + int64_t binop_y_118810 = Rx_118375 * ltid_x_118761; + + for (int64_t i_118800 = 0; i_118800 < Ry_118373; i_118800++) { + int64_t binop_x_118805 = iii_118392 + i_118800; + int64_t cmpop_x_118807 = binop_x_118805 + binop_y_118806; + bool binop_x_118808 = slt64(cmpop_x_118807, m_70861); + + for (int64_t i_118803 = 0; i_118803 < Rx_118375; i_118803++) { + int64_t binop_x_118809 = jjj_118393 + i_118803; + int64_t cmpop_x_118811 = binop_x_118809 + binop_y_118810; + bool binop_y_118812 = slt64(cmpop_x_118811, k2p2zq_70876); + bool cond_118813 = binop_x_118808 && binop_y_118812; + + if (cond_118813) { + double a_118815 = mem_124564[i_118800]; + double c_118817 = mem_param_124551[i_118800 * + Rx_118375 + + i_118803]; + bool isnan_res_118820; + + isnan_res_118820 = futrts_isnan64(a_118815); + + double defunc_1_f_res_118821; + + if (isnan_res_118820) { + defunc_1_f_res_118821 = 0.0; + } else { + double b_118816 = mem_124568[i_118803]; + double defunc_1_f_res_f_res_118822 = a_118815 * + b_118816; + + defunc_1_f_res_118821 = defunc_1_f_res_f_res_118822; + } + + double defunc_1_op_res_118826 = c_118817 + + defunc_1_f_res_118821; + + mem_param_124551[i_118800 * Rx_118375 + i_118803] = + defunc_1_op_res_118826; + } + } + } + for (int64_t i_128956 = 0; i_128956 < Ry_118373; i_128956++) { + for (int64_t i_128957 = 0; i_128957 < Rx_118375; i_128957++) { + mem_124578[i_128956 * Rx_118375 + i_128957] = + mem_param_124551[i_128956 * Rx_118375 + i_128957]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + for (int64_t i_128958 = 0; i_128958 < Ry_118373; i_128958++) { + for (int64_t i_128959 = 0; i_128959 < Rx_118375; i_128959++) { + mem_125382[i_128958 * Rx_118375 + i_128959] = + mem_124578[i_128958 * Rx_118375 + i_128959]; + } + } + } else { + for (int64_t i_128960 = 0; i_128960 < Ry_118373; i_128960++) { + for (int64_t i_128961 = 0; i_128961 < Rx_118375; i_128961++) { + mem_125382[i_128960 * Rx_118375 + i_128961] = + mem_param_124551[i_128960 * Rx_118375 + i_128961]; + } + } + } + + double mem_param_tmp_128946[Ry_118373 * Rx_118375]; + + for (int32_t i_11 = 0; i_11 < Ry_118373 * Rx_118375; i_11++) + mem_param_tmp_128946[i_11] = mem_125382[i_11]; + for (int32_t i_12 = 0; i_12 < Ry_118373 * Rx_118375; i_12++) + mem_param_124551[i_12] = mem_param_tmp_128946[i_12]; + } + for (int32_t i_13 = 0; i_13 < Ry_118373 * Rx_118375; i_13++) + loop_mem_124580[i_13] = mem_param_124551[i_13]; + + int64_t reg_tile_i_128962 = squot64(sext_i32_i64(local_tid_128912), + Tx_118374); + int64_t reg_tile_i_128963 = sext_i32_i64(local_tid_128912) - + squot64(sext_i32_i64(local_tid_128912), Tx_118374) * Tx_118374; + int64_t tile_dim_start_128964 = Ry_118373 * (Ty_118372 * gid_y_118390 + + reg_tile_i_128962); + int64_t tile_dim_start_128965 = Rx_118375 * (Tx_118374 * gid_x_118389 + + reg_tile_i_128963); + + for (int64_t nest_i_128966 = 0; nest_i_128966 < Ry_118373; + nest_i_128966++) { + for (int64_t nest_i_128967 = 0; nest_i_128967 < Rx_118375; + nest_i_128967++) { + if (slt64(tile_dim_start_128964 + nest_i_128966, m_70861) && + slt64(tile_dim_start_128965 + nest_i_128967, k2p2zq_70876)) { + ((__global double *) mem_124583)[(tile_dim_start_128964 + + nest_i_128966) * + k2p2zq_70876 + + (tile_dim_start_128965 + + nest_i_128967)] = + loop_mem_124580[nest_i_128966 * Rx_118375 + nest_i_128967]; + } + } + } + + error_9: + return; + #undef Ty_118372 + #undef Ry_118373 + #undef Tx_118374 + #undef Rx_118375 + #undef Tk_118376 + #undef tk_div_tx_118377 + #undef tk_div_ty_118378 + #undef TxRx_118379 + #undef TyRy_118380 + #undef a_loc_szz_118382 + #undef b_loc_szz_118384 +} +__kernel void mainDetailedzisegmap_intragroup_118840(__global + int *global_failure, + __local volatile + int64_t *mem_124641_backing_aligned_0, + __local volatile + int64_t *mem_124632_backing_aligned_1, + int64_t m_70861, + int64_t k2p2zq_70876, + int64_t num_groups_y_118838, + int64_t num_whole_tiles_118856, + int64_t residual_input_118983, + unsigned char cond_118984, + __global + unsigned char *defunc_3_map_res_mem_124593, + __global + unsigned char *mem_124622, + __global + unsigned char *mem_124649) +{ + #define tile_sizze_118835 (mainDetailedzitile_sizze_118834) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_124641_backing_5 = (__local volatile + char *) mem_124641_backing_aligned_0; + __local volatile char *restrict mem_124632_backing_0 = (__local volatile + char *) mem_124632_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129043; + int32_t local_tid_129044; + int64_t group_sizze_129047; + int32_t wave_sizze_129046; + int32_t group_tid_129045; + + global_tid_129043 = get_global_id(0); + local_tid_129044 = get_local_id(0); + group_sizze_129047 = get_local_size(0); + wave_sizze_129046 = LOCKSTEP_WIDTH; + group_tid_129045 = get_group_id(0); + + int32_t gid_flat_118840; + + gid_flat_118840 = group_tid_129045; + + int32_t ltid_pre_129048; + + ltid_pre_129048 = squot32(local_tid_129044, + sext_i64_i32(tile_sizze_118835)); + + int32_t ltid_pre_129049; + + ltid_pre_129049 = local_tid_129044 - squot32(local_tid_129044, + sext_i64_i32(tile_sizze_118835)) * + sext_i64_i32(tile_sizze_118835); + + int64_t gid_x_118832; + + gid_x_118832 = squot64(sext_i32_i64(group_tid_129045), num_groups_y_118838); + + int64_t gid_y_118833; + + gid_y_118833 = sext_i32_i64(group_tid_129045) - + squot64(sext_i32_i64(group_tid_129045), num_groups_y_118838) * + num_groups_y_118838; + + double mem_124627[1]; + int64_t ltid_y_118859 = sext_i32_i64(ltid_pre_129048); + int64_t ltid_x_118857 = sext_i32_i64(ltid_pre_129049); + int32_t ltid_flat_118858 = local_tid_129044; + + if (slt64(ltid_y_118859, tile_sizze_118835) && slt64(ltid_x_118857, + tile_sizze_118835)) { + mem_124627[(int64_t) 0] = 0.0; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t binop_x_118942 = gid_x_118832 * tile_sizze_118835; + int64_t binop_x_118957 = gid_y_118833 * tile_sizze_118835; + __local char *mem_124632; + + mem_124632 = (__local char *) mem_124632_backing_0; + + double accs_mem_124637[1]; + double mem_param_124628[1]; + + for (int32_t i_1 = 0; i_1 < 1; i_1++) + mem_param_124628[i_1] = mem_124627[i_1]; + for (int64_t tile_id_118868 = 0; tile_id_118868 < num_whole_tiles_118856; + tile_id_118868++) { + int64_t binop_x_118940 = tile_sizze_118835 * tile_id_118868; + int64_t ltid_y_118871 = sext_i32_i64(ltid_pre_129048); + int64_t ltid_x_118869 = sext_i32_i64(ltid_pre_129049); + int32_t ltid_flat_118870 = local_tid_129044; + int64_t j_118941 = ltid_x_118869 + binop_x_118940; + int64_t gtid_118943 = ltid_y_118871 + binop_x_118942; + bool binop_x_118948 = slt64(j_118941, k2p2zq_70876); + bool binop_y_118949 = slt64(gtid_118943, m_70861); + bool cond_118950 = binop_x_118948 && binop_y_118949; + double pre_118951; + + if (cond_118950) { + double x_118952 = ((__global + double *) defunc_3_map_res_mem_124593)[gtid_118943 * + k2p2zq_70876 + + j_118941]; + + pre_118951 = x_118952; + } else { + pre_118951 = 0.0; + } + ((__local double *) mem_124632)[ltid_y_118871 * tile_sizze_118835 + + ltid_x_118869] = pre_118951; + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_124636[1]; + int64_t ltid_y_118903 = sext_i32_i64(ltid_pre_129048); + int64_t ltid_x_118901 = sext_i32_i64(ltid_pre_129049); + int32_t ltid_flat_118902 = local_tid_129044; + int64_t gtid_118956 = ltid_y_118903 + binop_x_118942; + int64_t gtid_118958 = ltid_x_118901 + binop_x_118957; + double acc_118961 = mem_param_124628[(int64_t) 0]; + bool binop_x_118965 = slt64(gtid_118956, m_70861); + bool binop_y_118966 = slt64(gtid_118958, k2p2zq_70876); + bool cond_118967 = binop_x_118965 && binop_y_118966; + double acc_118968; + + if (cond_118967) { + double x_118969; + double redout_119940 = acc_118961; + + for (int64_t i_119941 = 0; i_119941 < tile_sizze_118835; + i_119941++) { + double x_118973 = ((__local + double *) mem_124632)[ltid_y_118903 * + tile_sizze_118835 + + i_119941]; + int64_t slice_120051 = binop_x_118940 + i_119941; + double x_118974 = ((__global + double *) mem_124622)[slice_120051 * + (k2p2zq_70876 * + m_70861) + + gtid_118956 * + k2p2zq_70876 + + gtid_118958]; + double defunc_1_f_res_118975 = x_118973 * x_118974; + double defunc_1_op_res_118972 = defunc_1_f_res_118975 + + redout_119940; + double redout_tmp_129052 = defunc_1_op_res_118972; + + redout_119940 = redout_tmp_129052; + } + x_118969 = redout_119940; + acc_118968 = x_118969; + } else { + acc_118968 = acc_118961; + } + mem_124636[(int64_t) 0] = acc_118968; + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_param_tmp_129050[1]; + + for (int32_t i_2 = 0; i_2 < 1; i_2++) + mem_param_tmp_129050[i_2] = mem_124636[i_2]; + for (int32_t i_3 = 0; i_3 < 1; i_3++) + mem_param_124628[i_3] = mem_param_tmp_129050[i_3]; + } + for (int32_t i_4 = 0; i_4 < 1; i_4++) + accs_mem_124637[i_4] = mem_param_124628[i_4]; + + __local char *mem_124641; + + mem_124641 = (__local char *) mem_124641_backing_5; + + double mem_124645[1]; + double mem_125396[1]; + + if (cond_118984) { + mem_125396[(int64_t) 0] = accs_mem_124637[(int64_t) 0]; + } else { + int64_t binop_x_119057 = tile_sizze_118835 * num_whole_tiles_118856; + int64_t ltid_y_118987 = sext_i32_i64(ltid_pre_129048); + int64_t ltid_x_118985 = sext_i32_i64(ltid_pre_129049); + int32_t ltid_flat_118986 = local_tid_129044; + int64_t j_119058 = ltid_x_118985 + binop_x_119057; + int64_t gtid_119060 = binop_x_118942 + ltid_y_118987; + bool binop_x_119065 = slt64(j_119058, k2p2zq_70876); + bool binop_y_119066 = slt64(gtid_119060, m_70861); + bool cond_119067 = binop_x_119065 && binop_y_119066; + double pre_119068; + + if (cond_119067) { + double x_119069 = ((__global + double *) defunc_3_map_res_mem_124593)[gtid_119060 * + k2p2zq_70876 + + j_119058]; + + pre_119068 = x_119069; + } else { + pre_119068 = 0.0; + } + ((__local double *) mem_124641)[ltid_y_118987 * tile_sizze_118835 + + ltid_x_118985] = pre_119068; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t ltid_y_119020 = sext_i32_i64(ltid_pre_129048); + int64_t ltid_x_119018 = sext_i32_i64(ltid_pre_129049); + int32_t ltid_flat_119019 = local_tid_129044; + int64_t gtid_119074 = binop_x_118942 + ltid_y_119020; + int64_t gtid_119076 = binop_x_118957 + ltid_x_119018; + double acc_119079 = accs_mem_124637[(int64_t) 0]; + bool binop_x_119083 = slt64(gtid_119074, m_70861); + bool binop_y_119084 = slt64(gtid_119076, k2p2zq_70876); + bool cond_119085 = binop_x_119083 && binop_y_119084; + double acc_119086; + + if (cond_119085) { + double x_119087; + double redout_119942 = acc_119079; + + for (int64_t i_119943 = 0; i_119943 < residual_input_118983; + i_119943++) { + double x_119091 = ((__local + double *) mem_124641)[ltid_y_119020 * + tile_sizze_118835 + + i_119943]; + int64_t slice_120052 = binop_x_119057 + i_119943; + double x_119092 = ((__global + double *) mem_124622)[slice_120052 * + (k2p2zq_70876 * + m_70861) + + gtid_119074 * + k2p2zq_70876 + + gtid_119076]; + double defunc_1_f_res_119093 = x_119091 * x_119092; + double defunc_1_op_res_119090 = defunc_1_f_res_119093 + + redout_119942; + double redout_tmp_129053 = defunc_1_op_res_119090; + + redout_119942 = redout_tmp_129053; + } + x_119087 = redout_119942; + acc_119086 = x_119087; + } else { + acc_119086 = acc_119079; + } + mem_124645[(int64_t) 0] = acc_119086; + barrier(CLK_LOCAL_MEM_FENCE); + mem_125396[(int64_t) 0] = mem_124645[(int64_t) 0]; + } + + int64_t thread_out_index_129054 = gid_x_118832 * tile_sizze_118835 + + sext_i32_i64(ltid_pre_129048); + int64_t thread_out_index_129055 = gid_y_118833 * tile_sizze_118835 + + sext_i32_i64(ltid_pre_129049); + + if (slt64(thread_out_index_129054, m_70861) && + slt64(thread_out_index_129055, k2p2zq_70876)) { + ((__global double *) mem_124649)[thread_out_index_129054 * + k2p2zq_70876 + + thread_out_index_129055] = + mem_125396[(int64_t) 0]; + } + + error_5: + return; + #undef tile_sizze_118835 +} +__kernel void mainDetailedzisegmap_intragroup_119132(__global + int *global_failure, + __local volatile + int64_t *mem_124701_backing_aligned_0, + __local volatile + int64_t *mem_124699_backing_aligned_1, + int64_t N_70860, + int64_t m_70861, + int64_t k2p2zq_70876, + int64_t gridDim_x_119126, + int64_t full_tiles_119157, + int64_t kk_119360, __global + unsigned char *defunc_4_map_res_mem_124659, + __global + unsigned char *mem_124683, + __global + unsigned char *mem_124873) +{ + #define Ty_119113 (mainDetailedziTy_119110) + #define Ry_119114 (mainDetailedziRy_119112) + #define Tx_119115 (mainDetailedziTx_119109) + #define Rx_119116 (mainDetailedziRx_119111) + #define Tk_119117 (mainDetailedziTk_119108) + #define tk_div_tx_119118 (sdiv_up64(mainDetailedziTk_119108, mainDetailedziTx_119109)) + #define tk_div_ty_119119 (sdiv_up64(mainDetailedziTk_119108, mainDetailedziTy_119110)) + #define TxRx_119120 (mainDetailedziTx_119109 * mainDetailedziRx_119111) + #define TyRy_119121 (mainDetailedziTy_119110 * mainDetailedziRy_119112) + #define a_loc_szz_119123 (mainDetailedziTk_119108 * (mainDetailedziTy_119110 * mainDetailedziRy_119112)) + #define b_loc_szz_119125 (mainDetailedziRx_119111 * (mainDetailedziTx_119109 * mainDetailedziTk_119108)) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_124701_backing_1 = (__local volatile + char *) mem_124701_backing_aligned_0; + __local volatile char *restrict mem_124699_backing_0 = (__local volatile + char *) mem_124699_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129131; + int32_t local_tid_129132; + int64_t group_sizze_129135; + int32_t wave_sizze_129134; + int32_t group_tid_129133; + + global_tid_129131 = get_global_id(0); + local_tid_129132 = get_local_id(0); + group_sizze_129135 = get_local_size(0); + wave_sizze_129134 = LOCKSTEP_WIDTH; + group_tid_129133 = get_group_id(0); + + int32_t gid_flat_119132; + + gid_flat_119132 = group_tid_129133; + + int32_t ltid_pre_129136; + + ltid_pre_129136 = squot32(local_tid_129132, sext_i64_i32(Tx_119115)); + + int32_t ltid_pre_129137; + + ltid_pre_129137 = local_tid_129132 - squot32(local_tid_129132, + sext_i64_i32(Tx_119115)) * + sext_i64_i32(Tx_119115); + + int64_t gid_y_119131; + + gid_y_119131 = squot64(sext_i32_i64(group_tid_129133), gridDim_x_119126); + + int64_t gid_x_119130; + + gid_x_119130 = sext_i32_i64(group_tid_129133) - + squot64(sext_i32_i64(group_tid_129133), gridDim_x_119126) * + gridDim_x_119126; + + int64_t iii_119133; + + iii_119133 = TyRy_119121 * gid_y_119131; + + int64_t jjj_119134 = TxRx_119120 * gid_x_119130; + double mem_124697[Ry_119114 * Rx_119116]; + int64_t ltid_y_119137 = sext_i32_i64(ltid_pre_129136); + int64_t ltid_x_119135 = sext_i32_i64(ltid_pre_129137); + int32_t ltid_flat_119136 = local_tid_129132; + double mem_124688[Ry_119114 * Rx_119116]; + + for (int64_t i_119148 = 0; i_119148 < Ry_119114; i_119148++) { + for (int64_t i_119151 = 0; i_119151 < Rx_119116; i_119151++) { + mem_124688[i_119148 * Rx_119116 + i_119151] = 0.0; + } + } + for (int64_t i_129140 = 0; i_129140 < Ry_119114; i_129140++) { + for (int64_t i_129141 = 0; i_129141 < Rx_119116; i_129141++) { + mem_124697[i_129140 * Rx_119116 + i_129141] = mem_124688[i_129140 * + Rx_119116 + + i_129141]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + __local char *mem_124699; + + mem_124699 = (__local char *) mem_124699_backing_0; + + __local char *mem_124701; + + mem_124701 = (__local char *) mem_124701_backing_1; + + double mem_124772[Ry_119114]; + double mem_124776[Rx_119116]; + double loop_mem_124788[Ry_119114 * Rx_119116]; + double mem_param_124702[Ry_119114 * Rx_119116]; + + for (int32_t i_2 = 0; i_2 < Ry_119114 * Rx_119116; i_2++) + mem_param_124702[i_2] = mem_124697[i_2]; + for (int64_t i_119158 = 0; i_119158 < full_tiles_119157; i_119158++) { + int64_t kk_119162 = Tk_119117 * i_119158; + + for (int64_t i_119163 = 0; i_119163 < Ry_119114; i_119163++) { + int64_t binop_y_119186 = Ty_119113 * i_119163; + + for (int64_t i_119165 = 0; i_119165 < tk_div_tx_119118; + i_119165++) { + int64_t binop_y_119184 = Tx_119115 * i_119165; + int64_t ltid_x_119167 = sext_i32_i64(ltid_pre_129136); + int64_t ltid_y_119168 = sext_i32_i64(ltid_pre_129137); + int32_t ltid_flat_119169 = local_tid_129132; + int64_t k_119185 = ltid_y_119168 + binop_y_119184; + int64_t i_119187 = ltid_x_119167 + binop_y_119186; + int64_t gtid_119188 = iii_119133 + i_119187; + int64_t A_col_idx_119189 = kk_119162 + k_119185; + bool cond_119190 = slt64(gtid_119188, m_70861); + double A_elem_119191; + + if (cond_119190) { + double A_elem_119193 = ((__global + double *) defunc_4_map_res_mem_124659)[gtid_119188 * + k2p2zq_70876 + + A_col_idx_119189]; + + A_elem_119191 = A_elem_119193; + } else { + A_elem_119191 = 0.0; + } + + bool cond_119195 = slt64(k_119185, Tk_119117); + int64_t a_loc_ind_119196; + + if (cond_119195) { + int64_t binop_y_119197 = Tk_119117 * i_119187; + int64_t loc_fi_119198 = k_119185 + binop_y_119197; + + a_loc_ind_119196 = loc_fi_119198; + } else { + a_loc_ind_119196 = (int64_t) -1; + } + if (sle64((int64_t) 0, a_loc_ind_119196) && + slt64(a_loc_ind_119196, a_loc_szz_119123)) { + ((__local double *) mem_124699)[a_loc_ind_119196] = + A_elem_119191; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + for (int64_t i_119203 = 0; i_119203 < tk_div_ty_119119; i_119203++) { + int64_t binop_y_119224 = Ty_119113 * i_119203; + + for (int64_t i_119205 = 0; i_119205 < Rx_119116; i_119205++) { + int64_t binop_y_119226 = Tx_119115 * i_119205; + int64_t ltid_x_119207 = sext_i32_i64(ltid_pre_129136); + int64_t ltid_y_119208 = sext_i32_i64(ltid_pre_129137); + int32_t ltid_flat_119209 = local_tid_129132; + int64_t k_119225 = ltid_x_119207 + binop_y_119224; + int64_t j_119227 = ltid_y_119208 + binop_y_119226; + int64_t gtid_119228 = jjj_119134 + j_119227; + int64_t B_row_idx_119229 = kk_119162 + k_119225; + bool cond_119230 = slt64(gtid_119228, N_70860); + double B_elem_119231; + + if (cond_119230) { + double B_elem_119233 = ((__global + double *) mem_124683)[B_row_idx_119229 * + N_70860 + + gtid_119228]; + + B_elem_119231 = B_elem_119233; + } else { + B_elem_119231 = 0.0; + } + + bool cond_119235 = slt64(k_119225, Tk_119117); + int64_t b_loc_ind_119236; + + if (cond_119235) { + int64_t binop_y_119237 = TxRx_119120 * k_119225; + int64_t loc_fi_119238 = j_119227 + binop_y_119237; + + b_loc_ind_119236 = loc_fi_119238; + } else { + b_loc_ind_119236 = (int64_t) -1; + } + if (sle64((int64_t) 0, b_loc_ind_119236) && + slt64(b_loc_ind_119236, b_loc_szz_119125)) { + ((__local double *) mem_124701)[b_loc_ind_119236] = + B_elem_119231; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + + double loop_mem_124787[Ry_119114 * Rx_119116]; + double mem_param_124759[Ry_119114 * Rx_119116]; + + for (int32_t i_3 = 0; i_3 < Ry_119114 * Rx_119116; i_3++) + mem_param_124759[i_3] = mem_param_124702[i_3]; + for (int64_t i_119243 = 0; i_119243 < Tk_119117; i_119243++) { + int64_t binop_y_119282 = TxRx_119120 * i_119243; + int64_t ltid_y_119247 = sext_i32_i64(ltid_pre_129136); + int64_t ltid_x_119245 = sext_i32_i64(ltid_pre_129137); + int32_t ltid_flat_119246 = local_tid_129132; + double mem_124762[Ry_119114]; + double mem_124764[Rx_119116]; + int64_t binop_x_119273 = Ry_119114 * ltid_y_119247; + + for (int64_t i_119271 = 0; i_119271 < Ry_119114; i_119271++) { + int64_t binop_x_119274 = i_119271 + binop_x_119273; + int64_t binop_y_119275 = Tk_119117 * binop_x_119274; + int64_t a_loc_ind_119276 = i_119243 + binop_y_119275; + + for (int64_t i_129153 = 0; i_129153 < (int64_t) 1; i_129153++) { + mem_124762[i_119271 + i_129153] = ((__local + double *) mem_124699)[a_loc_ind_119276 + + i_129153]; + } + } + + int64_t binop_y_119284 = Rx_119116 * ltid_x_119245; + + for (int64_t i_119280 = 0; i_119280 < Rx_119116; i_119280++) { + int64_t binop_x_119283 = i_119280 + binop_y_119282; + int64_t b_loc_ind_119285 = binop_x_119283 + binop_y_119284; + + for (int64_t i_129155 = 0; i_129155 < (int64_t) 1; i_129155++) { + mem_124764[i_119280 + i_129155] = ((__local + double *) mem_124701)[b_loc_ind_119285 + + i_129155]; + } + } + for (int64_t i_129156 = 0; i_129156 < Ry_119114; i_129156++) { + mem_124772[i_129156] = mem_124762[i_129156]; + } + for (int64_t i_129157 = 0; i_129157 < Rx_119116; i_129157++) { + mem_124776[i_129157] = mem_124764[i_129157]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_124786[Ry_119114 * Rx_119116]; + int64_t ltid_y_119292 = sext_i32_i64(ltid_pre_129136); + int64_t ltid_x_119290 = sext_i32_i64(ltid_pre_129137); + int32_t ltid_flat_119291 = local_tid_129132; + int64_t binop_y_119333 = Ry_119114 * ltid_y_119292; + int64_t binop_y_119337 = Rx_119116 * ltid_x_119290; + + for (int64_t i_119327 = 0; i_119327 < Ry_119114; i_119327++) { + int64_t binop_x_119332 = iii_119133 + i_119327; + int64_t cmpop_x_119334 = binop_x_119332 + binop_y_119333; + bool binop_x_119335 = slt64(cmpop_x_119334, m_70861); + + for (int64_t i_119330 = 0; i_119330 < Rx_119116; i_119330++) { + int64_t binop_x_119336 = jjj_119134 + i_119330; + int64_t cmpop_x_119338 = binop_x_119336 + binop_y_119337; + bool binop_y_119339 = slt64(cmpop_x_119338, N_70860); + bool cond_119340 = binop_x_119335 && binop_y_119339; + + if (cond_119340) { + double a_119342 = mem_124772[i_119327]; + double b_119343 = mem_124776[i_119330]; + double c_119344 = mem_param_124759[i_119327 * + Rx_119116 + + i_119330]; + double defunc_1_f_res_119347 = a_119342 * b_119343; + double defunc_1_op_res_119351 = c_119344 + + defunc_1_f_res_119347; + + mem_param_124759[i_119327 * Rx_119116 + i_119330] = + defunc_1_op_res_119351; + } + } + } + for (int64_t i_129160 = 0; i_129160 < Ry_119114; i_129160++) { + for (int64_t i_129161 = 0; i_129161 < Rx_119116; i_129161++) { + mem_124786[i_129160 * Rx_119116 + i_129161] = + mem_param_124759[i_129160 * Rx_119116 + i_129161]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_param_tmp_129150[Ry_119114 * Rx_119116]; + + for (int32_t i_4 = 0; i_4 < Ry_119114 * Rx_119116; i_4++) + mem_param_tmp_129150[i_4] = mem_124786[i_4]; + for (int32_t i_5 = 0; i_5 < Ry_119114 * Rx_119116; i_5++) + mem_param_124759[i_5] = mem_param_tmp_129150[i_5]; + } + for (int32_t i_6 = 0; i_6 < Ry_119114 * Rx_119116; i_6++) + loop_mem_124787[i_6] = mem_param_124759[i_6]; + + double mem_param_tmp_129142[Ry_119114 * Rx_119116]; + + for (int32_t i_7 = 0; i_7 < Ry_119114 * Rx_119116; i_7++) + mem_param_tmp_129142[i_7] = loop_mem_124787[i_7]; + for (int32_t i_8 = 0; i_8 < Ry_119114 * Rx_119116; i_8++) + mem_param_124702[i_8] = mem_param_tmp_129142[i_8]; + } + for (int32_t i_9 = 0; i_9 < Ry_119114 * Rx_119116; i_9++) + loop_mem_124788[i_9] = mem_param_124702[i_9]; + for (int64_t i_119361 = 0; i_119361 < Ry_119114; i_119361++) { + int64_t binop_y_119386 = Ty_119113 * i_119361; + + for (int64_t i_119363 = 0; i_119363 < tk_div_tx_119118; i_119363++) { + int64_t binop_y_119384 = Tx_119115 * i_119363; + int64_t ltid_x_119365 = sext_i32_i64(ltid_pre_129136); + int64_t ltid_y_119366 = sext_i32_i64(ltid_pre_129137); + int32_t ltid_flat_119367 = local_tid_129132; + int64_t k_119385 = ltid_y_119366 + binop_y_119384; + int64_t i_119387 = ltid_x_119365 + binop_y_119386; + int64_t gtid_119388 = iii_119133 + i_119387; + int64_t A_col_idx_119389 = kk_119360 + k_119385; + bool binop_x_119390 = slt64(gtid_119388, m_70861); + bool binop_y_119391 = slt64(A_col_idx_119389, k2p2zq_70876); + bool cond_119392 = binop_x_119390 && binop_y_119391; + double A_elem_119393; + + if (cond_119392) { + double A_elem_119395 = ((__global + double *) defunc_4_map_res_mem_124659)[gtid_119388 * + k2p2zq_70876 + + A_col_idx_119389]; + + A_elem_119393 = A_elem_119395; + } else { + A_elem_119393 = 0.0; + } + + bool cond_119397 = slt64(k_119385, Tk_119117); + int64_t a_loc_ind_119398; + + if (cond_119397) { + int64_t binop_y_119399 = Tk_119117 * i_119387; + int64_t loc_fi_119400 = k_119385 + binop_y_119399; + + a_loc_ind_119398 = loc_fi_119400; + } else { + a_loc_ind_119398 = (int64_t) -1; + } + if (sle64((int64_t) 0, a_loc_ind_119398) && slt64(a_loc_ind_119398, + a_loc_szz_119123)) { + ((__local double *) mem_124699)[a_loc_ind_119398] = + A_elem_119393; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + for (int64_t i_119405 = 0; i_119405 < tk_div_ty_119119; i_119405++) { + int64_t binop_y_119428 = Ty_119113 * i_119405; + + for (int64_t i_119407 = 0; i_119407 < Rx_119116; i_119407++) { + int64_t binop_y_119430 = Tx_119115 * i_119407; + int64_t ltid_x_119409 = sext_i32_i64(ltid_pre_129136); + int64_t ltid_y_119410 = sext_i32_i64(ltid_pre_129137); + int32_t ltid_flat_119411 = local_tid_129132; + int64_t k_119429 = ltid_x_119409 + binop_y_119428; + int64_t j_119431 = ltid_y_119410 + binop_y_119430; + int64_t gtid_119432 = jjj_119134 + j_119431; + int64_t B_row_idx_119433 = kk_119360 + k_119429; + bool binop_x_119434 = slt64(gtid_119432, N_70860); + bool binop_y_119435 = slt64(B_row_idx_119433, k2p2zq_70876); + bool cond_119436 = binop_x_119434 && binop_y_119435; + double B_elem_119437; + + if (cond_119436) { + double B_elem_119439 = ((__global + double *) mem_124683)[B_row_idx_119433 * + N_70860 + + gtid_119432]; + + B_elem_119437 = B_elem_119439; + } else { + B_elem_119437 = 0.0; + } + + bool cond_119441 = slt64(k_119429, Tk_119117); + int64_t b_loc_ind_119442; + + if (cond_119441) { + int64_t binop_y_119443 = TxRx_119120 * k_119429; + int64_t loc_fi_119444 = j_119431 + binop_y_119443; + + b_loc_ind_119442 = loc_fi_119444; + } else { + b_loc_ind_119442 = (int64_t) -1; + } + if (sle64((int64_t) 0, b_loc_ind_119442) && slt64(b_loc_ind_119442, + b_loc_szz_119125)) { + ((__local double *) mem_124701)[b_loc_ind_119442] = + B_elem_119437; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + + double mem_124854[Ry_119114]; + double mem_124858[Rx_119116]; + double mem_124868[Ry_119114 * Rx_119116]; + double loop_mem_124870[Ry_119114 * Rx_119116]; + double mem_param_124841[Ry_119114 * Rx_119116]; + + for (int32_t i_10 = 0; i_10 < Ry_119114 * Rx_119116; i_10++) + mem_param_124841[i_10] = loop_mem_124788[i_10]; + for (int64_t i_119449 = 0; i_119449 < Tk_119117; i_119449++) { + int64_t cmpop_x_119451 = kk_119360 + i_119449; + bool cond_119452 = slt64(cmpop_x_119451, k2p2zq_70876); + double mem_125412[Ry_119114 * Rx_119116]; + + if (cond_119452) { + int64_t binop_y_119490 = TxRx_119120 * i_119449; + int64_t bytes_124843 = (int64_t) 8 * Ry_119114; + int64_t bytes_124845 = (int64_t) 8 * Rx_119116; + int64_t ltid_y_119455 = sext_i32_i64(ltid_pre_129136); + int64_t ltid_x_119453 = sext_i32_i64(ltid_pre_129137); + int32_t ltid_flat_119454 = local_tid_129132; + double mem_124844[Ry_119114]; + double mem_124846[Rx_119116]; + int64_t binop_x_119481 = Ry_119114 * ltid_y_119455; + + for (int64_t i_119479 = 0; i_119479 < Ry_119114; i_119479++) { + int64_t binop_x_119482 = i_119479 + binop_x_119481; + int64_t binop_y_119483 = Tk_119117 * binop_x_119482; + int64_t a_loc_ind_119484 = i_119449 + binop_y_119483; + + for (int64_t i_129169 = 0; i_129169 < (int64_t) 1; i_129169++) { + mem_124844[i_119479 + i_129169] = ((__local + double *) mem_124699)[a_loc_ind_119484 + + i_129169]; + } + } + + int64_t binop_y_119492 = Rx_119116 * ltid_x_119453; + + for (int64_t i_119488 = 0; i_119488 < Rx_119116; i_119488++) { + int64_t binop_x_119491 = i_119488 + binop_y_119490; + int64_t b_loc_ind_119493 = binop_x_119491 + binop_y_119492; + + for (int64_t i_129171 = 0; i_129171 < (int64_t) 1; i_129171++) { + mem_124846[i_119488 + i_129171] = ((__local + double *) mem_124701)[b_loc_ind_119493 + + i_129171]; + } + } + for (int64_t i_129172 = 0; i_129172 < Ry_119114; i_129172++) { + mem_124854[i_129172] = mem_124844[i_129172]; + } + for (int64_t i_129173 = 0; i_129173 < Rx_119116; i_129173++) { + mem_124858[i_129173] = mem_124846[i_129173]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t ltid_y_119500 = sext_i32_i64(ltid_pre_129136); + int64_t ltid_x_119498 = sext_i32_i64(ltid_pre_129137); + int32_t ltid_flat_119499 = local_tid_129132; + int64_t binop_y_119541 = Ry_119114 * ltid_y_119500; + int64_t binop_y_119545 = Rx_119116 * ltid_x_119498; + + for (int64_t i_119535 = 0; i_119535 < Ry_119114; i_119535++) { + int64_t binop_x_119540 = iii_119133 + i_119535; + int64_t cmpop_x_119542 = binop_x_119540 + binop_y_119541; + bool binop_x_119543 = slt64(cmpop_x_119542, m_70861); + + for (int64_t i_119538 = 0; i_119538 < Rx_119116; i_119538++) { + int64_t binop_x_119544 = jjj_119134 + i_119538; + int64_t cmpop_x_119546 = binop_x_119544 + binop_y_119545; + bool binop_y_119547 = slt64(cmpop_x_119546, N_70860); + bool cond_119548 = binop_x_119543 && binop_y_119547; + + if (cond_119548) { + double a_119550 = mem_124854[i_119535]; + double b_119551 = mem_124858[i_119538]; + double c_119552 = mem_param_124841[i_119535 * + Rx_119116 + + i_119538]; + double defunc_1_f_res_119555 = a_119550 * b_119551; + double defunc_1_op_res_119559 = c_119552 + + defunc_1_f_res_119555; + + mem_param_124841[i_119535 * Rx_119116 + i_119538] = + defunc_1_op_res_119559; + } + } + } + for (int64_t i_129176 = 0; i_129176 < Ry_119114; i_129176++) { + for (int64_t i_129177 = 0; i_129177 < Rx_119116; i_129177++) { + mem_124868[i_129176 * Rx_119116 + i_129177] = + mem_param_124841[i_129176 * Rx_119116 + i_129177]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + for (int64_t i_129178 = 0; i_129178 < Ry_119114; i_129178++) { + for (int64_t i_129179 = 0; i_129179 < Rx_119116; i_129179++) { + mem_125412[i_129178 * Rx_119116 + i_129179] = + mem_124868[i_129178 * Rx_119116 + i_129179]; + } + } + } else { + for (int64_t i_129180 = 0; i_129180 < Ry_119114; i_129180++) { + for (int64_t i_129181 = 0; i_129181 < Rx_119116; i_129181++) { + mem_125412[i_129180 * Rx_119116 + i_129181] = + mem_param_124841[i_129180 * Rx_119116 + i_129181]; + } + } + } + + double mem_param_tmp_129166[Ry_119114 * Rx_119116]; + + for (int32_t i_11 = 0; i_11 < Ry_119114 * Rx_119116; i_11++) + mem_param_tmp_129166[i_11] = mem_125412[i_11]; + for (int32_t i_12 = 0; i_12 < Ry_119114 * Rx_119116; i_12++) + mem_param_124841[i_12] = mem_param_tmp_129166[i_12]; + } + for (int32_t i_13 = 0; i_13 < Ry_119114 * Rx_119116; i_13++) + loop_mem_124870[i_13] = mem_param_124841[i_13]; + + int64_t reg_tile_i_129182 = squot64(sext_i32_i64(local_tid_129132), + Tx_119115); + int64_t reg_tile_i_129183 = sext_i32_i64(local_tid_129132) - + squot64(sext_i32_i64(local_tid_129132), Tx_119115) * Tx_119115; + int64_t tile_dim_start_129184 = Ry_119114 * (Ty_119113 * gid_y_119131 + + reg_tile_i_129182); + int64_t tile_dim_start_129185 = Rx_119116 * (Tx_119115 * gid_x_119130 + + reg_tile_i_129183); + + for (int64_t nest_i_129186 = 0; nest_i_129186 < Ry_119114; + nest_i_129186++) { + for (int64_t nest_i_129187 = 0; nest_i_129187 < Rx_119116; + nest_i_129187++) { + if (slt64(tile_dim_start_129184 + nest_i_129186, m_70861) && + slt64(tile_dim_start_129185 + nest_i_129187, N_70860)) { + ((__global double *) mem_124873)[(tile_dim_start_129184 + + nest_i_129186) * N_70860 + + (tile_dim_start_129185 + + nest_i_129187)] = + loop_mem_124870[nest_i_129186 * Rx_119116 + nest_i_129187]; + } + } + } + + error_9: + return; + #undef Ty_119113 + #undef Ry_119114 + #undef Tx_119115 + #undef Rx_119116 + #undef Tk_119117 + #undef tk_div_tx_119118 + #undef tk_div_ty_119119 + #undef TxRx_119120 + #undef TyRy_119121 + #undef a_loc_szz_119123 + #undef b_loc_szz_119125 +} +__kernel void mainDetailedzisegmap_intragroup_77536(__global + int *global_failure, + __local volatile + int64_t *mem_120188_backing_aligned_0, + __local volatile + int64_t *mem_120186_backing_aligned_1, + __local volatile + int64_t *mem_120184_backing_aligned_2, + int64_t N_70860, + int64_t n_70864, + int64_t m_70956, __global + unsigned char *images_mem_120108, + __global + unsigned char *mem_120191, + __global + unsigned char *mem_120194, + __global + unsigned char *mem_120197) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_120188_backing_2 = (__local volatile + char *) mem_120188_backing_aligned_0; + __local volatile char *restrict mem_120186_backing_1 = (__local volatile + char *) mem_120186_backing_aligned_1; + __local volatile char *restrict mem_120184_backing_0 = (__local volatile + char *) mem_120184_backing_aligned_2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126405; + int32_t local_tid_126406; + int64_t group_sizze_126409; + int32_t wave_sizze_126408; + int32_t group_tid_126407; + + global_tid_126405 = get_global_id(0); + local_tid_126406 = get_local_id(0); + group_sizze_126409 = get_local_size(0); + wave_sizze_126408 = LOCKSTEP_WIDTH; + group_tid_126407 = get_group_id(0); + + int32_t phys_tid_77536; + + phys_tid_77536 = group_tid_126407; + + int32_t ltid_pre_126410; + + ltid_pre_126410 = local_tid_126406; + + int64_t gtid_77528; + + gtid_77528 = sext_i32_i64(group_tid_126407); + + __local char *mem_120184; + + mem_120184 = (__local char *) mem_120184_backing_0; + + int64_t gtid_77531 = sext_i32_i64(ltid_pre_126410); + int32_t phys_tid_77532 = local_tid_126406; + int64_t binop_y_115020 = (int64_t) -1 * gtid_77531; + int64_t slice_115021 = m_70956 + binop_y_115020; + double x_77746 = ((__global double *) images_mem_120108)[gtid_77528 * + N_70860 + + slice_115021]; + bool defunc_0_f_res_77747; + + defunc_0_f_res_77747 = futrts_isnan64(x_77746); + + bool defunc_0_g_res_77748 = !defunc_0_f_res_77747; + int64_t defunc_0_f_res_77749 = btoi_bool_i64(defunc_0_g_res_77748); + + ((__local int64_t *) mem_120184)[gtid_77531] = defunc_0_f_res_77749; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t dims_flat_126411; + + dims_flat_126411 = n_70864; + + int64_t x_77743; + int64_t x_77744; + int64_t x_126413; + int64_t x_126414; + bool ltid_in_bounds_126416; + + ltid_in_bounds_126416 = slt64(sext_i32_i64(local_tid_126406), n_70864); + + int32_t skip_threads_126417; + + // read input for in-block scan + { + if (ltid_in_bounds_126416) { + x_77744 = ((volatile __local + int64_t *) mem_120184)[sext_i32_i64(local_tid_126406)]; + if ((local_tid_126406 - squot32(local_tid_126406, 32) * 32) == 0) { + x_77743 = x_77744; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_126417 = 1; + while (slt32(skip_threads_126417, 32)) { + if (sle32(skip_threads_126417, local_tid_126406 - + squot32(local_tid_126406, 32) * 32) && + ltid_in_bounds_126416) { + // read operands + { + x_77743 = ((volatile __local + int64_t *) mem_120184)[sext_i32_i64(local_tid_126406) - + sext_i32_i64(skip_threads_126417)]; + } + // perform operation + { + bool inactive_126418 = + slt64(srem64(sext_i32_i64(local_tid_126406), n_70864), + sext_i32_i64(local_tid_126406) - + sext_i32_i64(local_tid_126406 - + skip_threads_126417)); + + if (inactive_126418) { + x_77743 = x_77744; + } + if (!inactive_126418) { + int64_t defunc_1_op_res_77745 = add64(x_77743, x_77744); + + x_77743 = defunc_1_op_res_77745; + } + } + } + if (sle32(wave_sizze_126408, skip_threads_126417)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_126417, local_tid_126406 - + squot32(local_tid_126406, 32) * 32) && + ltid_in_bounds_126416) { + // write result + { + ((volatile __local + int64_t *) mem_120184)[sext_i32_i64(local_tid_126406)] = + x_77743; + x_77744 = x_77743; + } + } + if (sle32(wave_sizze_126408, skip_threads_126417)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_126417 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_126406 - squot32(local_tid_126406, 32) * 32) == 31 && + ltid_in_bounds_126416) { + ((volatile __local + int64_t *) mem_120184)[sext_i32_i64(squot32(local_tid_126406, + 32))] = x_77743; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_126419; + + // read input for in-block scan + { + if (squot32(local_tid_126406, 32) == 0 && ltid_in_bounds_126416) { + x_126414 = ((volatile __local + int64_t *) mem_120184)[sext_i32_i64(local_tid_126406)]; + if ((local_tid_126406 - squot32(local_tid_126406, 32) * 32) == + 0) { + x_126413 = x_126414; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_126419 = 1; + while (slt32(skip_threads_126419, 32)) { + if (sle32(skip_threads_126419, local_tid_126406 - + squot32(local_tid_126406, 32) * 32) && + (squot32(local_tid_126406, 32) == 0 && + ltid_in_bounds_126416)) { + // read operands + { + x_126413 = ((volatile __local + int64_t *) mem_120184)[sext_i32_i64(local_tid_126406) - + sext_i32_i64(skip_threads_126419)]; + } + // perform operation + { + bool inactive_126420 = + slt64(srem64(sext_i32_i64(local_tid_126406 * 32 + + 32 - 1), n_70864), + sext_i32_i64(local_tid_126406 * 32 + 32 - + 1) - sext_i32_i64((local_tid_126406 - + skip_threads_126419) * + 32 + 32 - 1)); + + if (inactive_126420) { + x_126413 = x_126414; + } + if (!inactive_126420) { + int64_t defunc_1_op_res_126415 = add64(x_126413, + x_126414); + + x_126413 = defunc_1_op_res_126415; + } + } + } + if (sle32(wave_sizze_126408, skip_threads_126419)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_126419, local_tid_126406 - + squot32(local_tid_126406, 32) * 32) && + (squot32(local_tid_126406, 32) == 0 && + ltid_in_bounds_126416)) { + // write result + { + ((volatile __local + int64_t *) mem_120184)[sext_i32_i64(local_tid_126406)] = + x_126413; + x_126414 = x_126413; + } + } + if (sle32(wave_sizze_126408, skip_threads_126419)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_126419 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_126406, 32) == 0 || !ltid_in_bounds_126416)) { + // read operands + { + x_77744 = x_77743; + x_77743 = ((__local + int64_t *) mem_120184)[sext_i32_i64(squot32(local_tid_126406, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_126421 = + slt64(srem64(sext_i32_i64(local_tid_126406), n_70864), + sext_i32_i64(local_tid_126406) - + sext_i32_i64(squot32(local_tid_126406, 32) * 32 - + 1)); + + if (inactive_126421) { + x_77743 = x_77744; + } + if (!inactive_126421) { + int64_t defunc_1_op_res_77745 = add64(x_77743, x_77744); + + x_77743 = defunc_1_op_res_77745; + } + } + // write final result + { + ((__local + int64_t *) mem_120184)[sext_i32_i64(local_tid_126406)] = + x_77743; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_126406, 32) == 0) { + ((__local int64_t *) mem_120184)[sext_i32_i64(local_tid_126406)] = + x_77744; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t last_res_77750 = ((__local int64_t *) mem_120184)[m_70956]; + __local char *mem_120186; + + mem_120186 = (__local char *) mem_120186_backing_1; + ((__local double *) mem_120186)[sext_i32_i64(local_tid_126406)] = NAN; + barrier(CLK_LOCAL_MEM_FENCE); + + __local char *mem_120188; + + mem_120188 = (__local char *) mem_120188_backing_2; + ((__local int64_t *) mem_120188)[sext_i32_i64(local_tid_126406)] = + (int64_t) 0; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t write_i_77533 = sext_i32_i64(ltid_pre_126410); + int32_t phys_tid_77534 = local_tid_126406; + int64_t binop_y_115024 = (int64_t) -1 * write_i_77533; + int64_t slice_115025 = m_70956 + binop_y_115024; + double x_77755 = ((__global double *) images_mem_120108)[gtid_77528 * + N_70860 + + slice_115025]; + bool defunc_0_f_res_77758; + + defunc_0_f_res_77758 = futrts_isnan64(x_77755); + + bool defunc_0_g_res_77759 = !defunc_0_f_res_77758; + int64_t defunc_1_f_res_77760; + + if (defunc_0_g_res_77759) { + int64_t x_77756 = ((__local int64_t *) mem_120184)[write_i_77533]; + int64_t defunc_1_f_res_t_res_77761 = sub64(x_77756, (int64_t) 1); + + defunc_1_f_res_77760 = defunc_1_f_res_t_res_77761; + } else { + defunc_1_f_res_77760 = (int64_t) -1; + } + if (sle64((int64_t) 0, defunc_1_f_res_77760) && slt64(defunc_1_f_res_77760, + n_70864)) { + ((__local int64_t *) mem_120188)[defunc_1_f_res_77760] = write_i_77533; + } + if (sle64((int64_t) 0, defunc_1_f_res_77760) && slt64(defunc_1_f_res_77760, + n_70864)) { + ((__local double *) mem_120186)[defunc_1_f_res_77760] = x_77755; + } + barrier(CLK_LOCAL_MEM_FENCE); + if (local_tid_126406 == 0) { + ((__global int64_t *) mem_120191)[gtid_77528] = last_res_77750; + } + ((__global double *) mem_120194)[gtid_77528 * n_70864 + + sext_i32_i64(local_tid_126406)] = ((__local + double *) mem_120186)[sext_i32_i64(local_tid_126406)]; + barrier(CLK_LOCAL_MEM_FENCE); + ((__global int64_t *) mem_120197)[gtid_77528 * n_70864 + + sext_i32_i64(local_tid_126406)] = + ((__local int64_t *) mem_120188)[sext_i32_i64(local_tid_126406)]; + barrier(CLK_LOCAL_MEM_FENCE); + + error_2: + return; +} +__kernel void mainDetailedzisegmap_intragroup_85470(__global + int *global_failure, + __local volatile + int64_t *mem_123888_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128063_backing_aligned_1, + __local volatile + int64_t *mem_123885_backing_aligned_2, + __local volatile + int64_t *mem_123882_backing_aligned_3, + __local volatile + int64_t *red_arr_mem_128059_backing_aligned_4, + __local volatile + int64_t *red_arr_mem_128054_backing_aligned_5, + __local volatile + int64_t *mem_123878_backing_aligned_6, + int64_t m_70861, + int64_t n_70864, + int64_t k2p2zq_70876, + int64_t defunc_2_reduce_res_70985, + int64_t index_primexp_72162, + int64_t computed_group_sizze_85437, + int64_t binop_x_120251, + __global + unsigned char *defunc_3_map_res_mem_120231, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_param_123778, + __global + unsigned char *mem_param_123786, + __global + unsigned char *mem_123892, + __global + unsigned char *mem_123895, + __global + unsigned char *mem_123897) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_123888_backing_6 = (__local volatile + char *) mem_123888_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128063_backing_5 = + (__local volatile + char *) red_arr_mem_128063_backing_aligned_1; + __local volatile char *restrict mem_123885_backing_4 = (__local volatile + char *) mem_123885_backing_aligned_2; + __local volatile char *restrict mem_123882_backing_3 = (__local volatile + char *) mem_123882_backing_aligned_3; + __local volatile char *restrict red_arr_mem_128059_backing_2 = + (__local volatile + char *) red_arr_mem_128059_backing_aligned_4; + __local volatile char *restrict red_arr_mem_128054_backing_1 = + (__local volatile + char *) red_arr_mem_128054_backing_aligned_5; + __local volatile char *restrict mem_123878_backing_0 = (__local volatile + char *) mem_123878_backing_aligned_6; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128046; + int32_t local_tid_128047; + int64_t group_sizze_128050; + int32_t wave_sizze_128049; + int32_t group_tid_128048; + + global_tid_128046 = get_global_id(0); + local_tid_128047 = get_local_id(0); + group_sizze_128050 = get_local_size(0); + wave_sizze_128049 = LOCKSTEP_WIDTH; + group_tid_128048 = get_group_id(0); + + int32_t phys_tid_85470; + + phys_tid_85470 = group_tid_128048; + + int32_t ltid_pre_128051; + + ltid_pre_128051 = local_tid_128047; + + int32_t ltid_pre_128052; + + ltid_pre_128052 = squot32(local_tid_128047, sext_i64_i32(k2p2zq_70876)); + + int32_t ltid_pre_128053; + + ltid_pre_128053 = local_tid_128047 - squot32(local_tid_128047, + sext_i64_i32(k2p2zq_70876)) * + sext_i64_i32(k2p2zq_70876); + + int64_t gtid_85435; + + gtid_85435 = sext_i32_i64(group_tid_128048); + + double defunc_11_internal_map_res_transformed_row_85600; + + defunc_11_internal_map_res_transformed_row_85600 = ((__global + double *) defunc_3_map_res_mem_120231)[gtid_85435 * + n_70864 + + index_primexp_72162]; + + __local char *mem_123878; + + mem_123878 = (__local char *) mem_123878_backing_0; + + double defunc_0_f_res_85601; + int64_t gtid_85438 = sext_i32_i64(ltid_pre_128051); + int32_t phys_tid_85439 = local_tid_128047; + __local char *red_arr_mem_128054; + + red_arr_mem_128054 = (__local char *) red_arr_mem_128054_backing_1; + if (slt64(gtid_85438, k2p2zq_70876)) { + double x_85607 = ((__global double *) mem_120246)[gtid_85438 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_85435 * + defunc_2_reduce_res_70985 + + index_primexp_72162]; + double defunc_0_f_res_85608; + double redout_119887 = 0.0; + + for (int64_t i_119888 = 0; i_119888 < k2p2zq_70876; i_119888++) { + double x_85612 = ((__global double *) mem_120246)[i_119888 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_85435 * + defunc_2_reduce_res_70985 + + index_primexp_72162]; + double x_85613 = ((__global double *) mem_param_123778)[gtid_85435 * + binop_x_120251 + + gtid_85438 * + k2p2zq_70876 + + i_119888]; + double defunc_1_f_res_85614 = x_85612 * x_85613; + double defunc_1_op_res_85611 = defunc_1_f_res_85614 + redout_119887; + double redout_tmp_128056 = defunc_1_op_res_85611; + + redout_119887 = redout_tmp_128056; + } + defunc_0_f_res_85608 = redout_119887; + + double defunc_1_f_res_85615 = x_85607 * defunc_0_f_res_85608; + + ((__local double *) red_arr_mem_128054)[gtid_85438] = + defunc_1_f_res_85615; + ((__local double *) mem_123878)[gtid_85438] = defunc_0_f_res_85608; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128057; + int32_t skip_waves_128058; + + skip_waves_128058 = 1; + + double x_85603; + double x_85604; + + offset_128057 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128047, sext_i64_i32(k2p2zq_70876))) { + x_85603 = ((__local + double *) red_arr_mem_128054)[sext_i32_i64(local_tid_128047 + + offset_128057)]; + } + } + offset_128057 = 1; + while (slt32(offset_128057, wave_sizze_128049)) { + if (slt32(local_tid_128047 + offset_128057, + sext_i64_i32(k2p2zq_70876)) && ((local_tid_128047 - + squot32(local_tid_128047, + wave_sizze_128049) * + wave_sizze_128049) & (2 * + offset_128057 - + 1)) == + 0) { + // read array element + { + x_85604 = ((volatile __local + double *) red_arr_mem_128054)[sext_i32_i64(local_tid_128047 + + offset_128057)]; + } + // apply reduction operation + { + double defunc_1_op_res_85605 = x_85603 + x_85604; + + x_85603 = defunc_1_op_res_85605; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128054)[sext_i32_i64(local_tid_128047)] = + x_85603; + } + } + offset_128057 *= 2; + } + while (slt32(skip_waves_128058, + squot32(sext_i64_i32(computed_group_sizze_85437) + + wave_sizze_128049 - 1, wave_sizze_128049))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128057 = skip_waves_128058 * wave_sizze_128049; + if (slt32(local_tid_128047 + offset_128057, + sext_i64_i32(k2p2zq_70876)) && ((local_tid_128047 - + squot32(local_tid_128047, + wave_sizze_128049) * + wave_sizze_128049) == 0 && + (squot32(local_tid_128047, + wave_sizze_128049) & + (2 * skip_waves_128058 - + 1)) == 0)) { + // read array element + { + x_85604 = ((__local + double *) red_arr_mem_128054)[sext_i32_i64(local_tid_128047 + + offset_128057)]; + } + // apply reduction operation + { + double defunc_1_op_res_85605 = x_85603 + x_85604; + + x_85603 = defunc_1_op_res_85605; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128054)[sext_i32_i64(local_tid_128047)] = + x_85603; + } + } + skip_waves_128058 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + defunc_0_f_res_85601 = ((__local double *) red_arr_mem_128054)[(int64_t) 0]; + + double fr_85616 = 1.0 + defunc_0_f_res_85601; + double defunc_0_f_res_85617; + int64_t gtid_85440 = sext_i32_i64(ltid_pre_128051); + int32_t phys_tid_85441 = local_tid_128047; + __local char *red_arr_mem_128059; + + red_arr_mem_128059 = (__local char *) red_arr_mem_128059_backing_2; + if (slt64(gtid_85440, k2p2zq_70876)) { + double x_85621 = ((__global double *) mem_120246)[gtid_85440 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_85435 * + defunc_2_reduce_res_70985 + + index_primexp_72162]; + double x_85622 = ((__global double *) mem_param_123786)[gtid_85435 * + k2p2zq_70876 + + gtid_85440]; + double defunc_1_f_res_85623 = x_85621 * x_85622; + + ((__local double *) red_arr_mem_128059)[gtid_85440] = + defunc_1_f_res_85623; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128061; + int32_t skip_waves_128062; + + skip_waves_128062 = 1; + + double x_85618; + double x_85619; + + offset_128061 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128047, sext_i64_i32(k2p2zq_70876))) { + x_85618 = ((__local + double *) red_arr_mem_128059)[sext_i32_i64(local_tid_128047 + + offset_128061)]; + } + } + offset_128061 = 1; + while (slt32(offset_128061, wave_sizze_128049)) { + if (slt32(local_tid_128047 + offset_128061, + sext_i64_i32(k2p2zq_70876)) && ((local_tid_128047 - + squot32(local_tid_128047, + wave_sizze_128049) * + wave_sizze_128049) & (2 * + offset_128061 - + 1)) == + 0) { + // read array element + { + x_85619 = ((volatile __local + double *) red_arr_mem_128059)[sext_i32_i64(local_tid_128047 + + offset_128061)]; + } + // apply reduction operation + { + double defunc_1_op_res_85620 = x_85618 + x_85619; + + x_85618 = defunc_1_op_res_85620; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128059)[sext_i32_i64(local_tid_128047)] = + x_85618; + } + } + offset_128061 *= 2; + } + while (slt32(skip_waves_128062, + squot32(sext_i64_i32(computed_group_sizze_85437) + + wave_sizze_128049 - 1, wave_sizze_128049))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128061 = skip_waves_128062 * wave_sizze_128049; + if (slt32(local_tid_128047 + offset_128061, + sext_i64_i32(k2p2zq_70876)) && ((local_tid_128047 - + squot32(local_tid_128047, + wave_sizze_128049) * + wave_sizze_128049) == 0 && + (squot32(local_tid_128047, + wave_sizze_128049) & + (2 * skip_waves_128062 - + 1)) == 0)) { + // read array element + { + x_85619 = ((__local + double *) red_arr_mem_128059)[sext_i32_i64(local_tid_128047 + + offset_128061)]; + } + // apply reduction operation + { + double defunc_1_op_res_85620 = x_85618 + x_85619; + + x_85618 = defunc_1_op_res_85620; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128059)[sext_i32_i64(local_tid_128047)] = + x_85618; + } + } + skip_waves_128062 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + defunc_0_f_res_85617 = ((__local double *) red_arr_mem_128059)[(int64_t) 0]; + + double resid_85624 = defunc_11_internal_map_res_transformed_row_85600 - + defunc_0_f_res_85617; + double sqrt_res_85625; + + sqrt_res_85625 = futrts_sqrt64(fr_85616); + + double recresid_r_85626 = resid_85624 / sqrt_res_85625; + __local char *mem_123882; + + mem_123882 = (__local char *) mem_123882_backing_3; + + __local char *mem_123885; + + mem_123885 = (__local char *) mem_123885_backing_4; + + int64_t gtid_85449 = sext_i32_i64(ltid_pre_128052); + int64_t gtid_85450 = sext_i32_i64(ltid_pre_128053); + int32_t phys_tid_85451 = local_tid_128047; + __local char *red_arr_mem_128063; + + red_arr_mem_128063 = (__local char *) red_arr_mem_128063_backing_5; + if (slt64(gtid_85449, k2p2zq_70876) && slt64(gtid_85450, k2p2zq_70876)) { + double x_85632 = ((__local double *) mem_123878)[gtid_85449]; + double x_85634 = ((__local double *) mem_123878)[gtid_85450]; + double x_85635 = ((__global double *) mem_param_123778)[gtid_85435 * + binop_x_120251 + + gtid_85449 * + k2p2zq_70876 + + gtid_85450]; + double x_85636 = ((__global double *) mem_120246)[gtid_85450 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_85435 * + defunc_2_reduce_res_70985 + + index_primexp_72162]; + double x_85637 = x_85632 * x_85634; + double y_85638 = x_85637 / fr_85616; + double defunc_1_f_res_85639 = x_85635 - y_85638; + double defunc_1_f_res_85640 = x_85636 * defunc_1_f_res_85639; + + ((__local double *) red_arr_mem_128063)[gtid_85449 * k2p2zq_70876 + + gtid_85450] = + defunc_1_f_res_85640; + ((__local double *) mem_123885)[gtid_85449 * k2p2zq_70876 + + gtid_85450] = defunc_1_f_res_85639; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t dims_flat_128065; + + dims_flat_128065 = k2p2zq_70876 * k2p2zq_70876; + + double x_85629; + double x_85630; + double x_128067; + double x_128068; + bool ltid_in_bounds_128070; + + ltid_in_bounds_128070 = slt64(sext_i32_i64(local_tid_128047), k2p2zq_70876 * + k2p2zq_70876); + + int32_t skip_threads_128071; + + // read input for in-block scan + { + if (ltid_in_bounds_128070) { + x_85630 = ((volatile __local + double *) red_arr_mem_128063)[sext_i32_i64(local_tid_128047)]; + if ((local_tid_128047 - squot32(local_tid_128047, 32) * 32) == 0) { + x_85629 = x_85630; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128071 = 1; + while (slt32(skip_threads_128071, 32)) { + if (sle32(skip_threads_128071, local_tid_128047 - + squot32(local_tid_128047, 32) * 32) && + ltid_in_bounds_128070) { + // read operands + { + x_85629 = ((volatile __local + double *) red_arr_mem_128063)[sext_i32_i64(local_tid_128047) - + sext_i32_i64(skip_threads_128071)]; + } + // perform operation + { + bool inactive_128072 = + slt64(srem64(sext_i32_i64(local_tid_128047), + k2p2zq_70876), + sext_i32_i64(local_tid_128047) - + sext_i32_i64(local_tid_128047 - + skip_threads_128071)); + + if (inactive_128072) { + x_85629 = x_85630; + } + if (!inactive_128072) { + double defunc_1_op_res_85631 = x_85629 + x_85630; + + x_85629 = defunc_1_op_res_85631; + } + } + } + if (sle32(wave_sizze_128049, skip_threads_128071)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128071, local_tid_128047 - + squot32(local_tid_128047, 32) * 32) && + ltid_in_bounds_128070) { + // write result + { + ((volatile __local + double *) red_arr_mem_128063)[sext_i32_i64(local_tid_128047)] = + x_85629; + x_85630 = x_85629; + } + } + if (sle32(wave_sizze_128049, skip_threads_128071)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128071 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128047 - squot32(local_tid_128047, 32) * 32) == 31 && + ltid_in_bounds_128070) { + ((volatile __local + double *) red_arr_mem_128063)[sext_i32_i64(squot32(local_tid_128047, + 32))] = + x_85629; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128073; + + // read input for in-block scan + { + if (squot32(local_tid_128047, 32) == 0 && ltid_in_bounds_128070) { + x_128068 = ((volatile __local + double *) red_arr_mem_128063)[sext_i32_i64(local_tid_128047)]; + if ((local_tid_128047 - squot32(local_tid_128047, 32) * 32) == + 0) { + x_128067 = x_128068; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128073 = 1; + while (slt32(skip_threads_128073, 32)) { + if (sle32(skip_threads_128073, local_tid_128047 - + squot32(local_tid_128047, 32) * 32) && + (squot32(local_tid_128047, 32) == 0 && + ltid_in_bounds_128070)) { + // read operands + { + x_128067 = ((volatile __local + double *) red_arr_mem_128063)[sext_i32_i64(local_tid_128047) - + sext_i32_i64(skip_threads_128073)]; + } + // perform operation + { + bool inactive_128074 = + slt64(srem64(sext_i32_i64(local_tid_128047 * 32 + + 32 - 1), k2p2zq_70876), + sext_i32_i64(local_tid_128047 * 32 + 32 - + 1) - sext_i32_i64((local_tid_128047 - + skip_threads_128073) * + 32 + 32 - 1)); + + if (inactive_128074) { + x_128067 = x_128068; + } + if (!inactive_128074) { + double defunc_1_op_res_128069 = x_128067 + x_128068; + + x_128067 = defunc_1_op_res_128069; + } + } + } + if (sle32(wave_sizze_128049, skip_threads_128073)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128073, local_tid_128047 - + squot32(local_tid_128047, 32) * 32) && + (squot32(local_tid_128047, 32) == 0 && + ltid_in_bounds_128070)) { + // write result + { + ((volatile __local + double *) red_arr_mem_128063)[sext_i32_i64(local_tid_128047)] = + x_128067; + x_128068 = x_128067; + } + } + if (sle32(wave_sizze_128049, skip_threads_128073)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128073 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128047, 32) == 0 || !ltid_in_bounds_128070)) { + // read operands + { + x_85630 = x_85629; + x_85629 = ((__local + double *) red_arr_mem_128063)[sext_i32_i64(squot32(local_tid_128047, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128075 = + slt64(srem64(sext_i32_i64(local_tid_128047), k2p2zq_70876), + sext_i32_i64(local_tid_128047) - + sext_i32_i64(squot32(local_tid_128047, 32) * 32 - + 1)); + + if (inactive_128075) { + x_85629 = x_85630; + } + if (!inactive_128075) { + double defunc_1_op_res_85631 = x_85629 + x_85630; + + x_85629 = defunc_1_op_res_85631; + } + } + // write final result + { + ((__local + double *) red_arr_mem_128063)[sext_i32_i64(local_tid_128047)] = + x_85629; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128047, 32) == 0) { + ((__local + double *) red_arr_mem_128063)[sext_i32_i64(local_tid_128047)] = + x_85630; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); + for (int64_t i_128076 = 0; i_128076 < sdiv_up64(k2p2zq_70876 - + sext_i32_i64(local_tid_128047), + computed_group_sizze_85437); + i_128076++) { + ((__local double *) mem_123882)[i_128076 * computed_group_sizze_85437 + + sext_i32_i64(local_tid_128047)] = + ((__local double *) red_arr_mem_128063)[(i_128076 * + computed_group_sizze_85437 + + sext_i32_i64(local_tid_128047)) * + k2p2zq_70876 + + (k2p2zq_70876 - + (int64_t) 1)]; + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); + + __local char *mem_123888; + + mem_123888 = (__local char *) mem_123888_backing_6; + + int64_t gtid_85443 = sext_i32_i64(ltid_pre_128051); + int32_t phys_tid_85444 = local_tid_128047; + + if (slt64(gtid_85443, k2p2zq_70876)) { + double x_85642 = ((__global double *) mem_param_123786)[gtid_85435 * + k2p2zq_70876 + + gtid_85443]; + double defunc_0_f_res_85643 = ((__local + double *) mem_123882)[gtid_85443]; + double defunc_0_g_res_85644 = resid_85624 * defunc_0_f_res_85643; + double defunc_1_f_res_85645 = x_85642 + defunc_0_g_res_85644; + + ((__local double *) mem_123888)[gtid_85443] = defunc_1_f_res_85645; + } + barrier(CLK_LOCAL_MEM_FENCE); + for (int64_t i_128077 = 0; i_128077 < sdiv_up64(k2p2zq_70876 * + k2p2zq_70876 - + sext_i32_i64(local_tid_128047), + computed_group_sizze_85437); + i_128077++) { + ((__global double *) mem_123892)[gtid_85435 * (k2p2zq_70876 * + k2p2zq_70876) + + squot64(i_128077 * + computed_group_sizze_85437 + + sext_i32_i64(local_tid_128047), + k2p2zq_70876) * k2p2zq_70876 + + (i_128077 * + computed_group_sizze_85437 + + sext_i32_i64(local_tid_128047) - + squot64(i_128077 * + computed_group_sizze_85437 + + sext_i32_i64(local_tid_128047), + k2p2zq_70876) * + k2p2zq_70876)] = ((__local + double *) mem_123885)[squot64(i_128077 * + computed_group_sizze_85437 + + sext_i32_i64(local_tid_128047), + k2p2zq_70876) * + k2p2zq_70876 + + (i_128077 * + computed_group_sizze_85437 + + sext_i32_i64(local_tid_128047) - + squot64(i_128077 * + computed_group_sizze_85437 + + sext_i32_i64(local_tid_128047), + k2p2zq_70876) * + k2p2zq_70876)]; + } + barrier(CLK_LOCAL_MEM_FENCE); + for (int64_t i_128078 = 0; i_128078 < sdiv_up64(k2p2zq_70876 - + sext_i32_i64(local_tid_128047), + computed_group_sizze_85437); + i_128078++) { + ((__global double *) mem_123895)[gtid_85435 * k2p2zq_70876 + (i_128078 * + computed_group_sizze_85437 + + sext_i32_i64(local_tid_128047))] = + ((__local double *) mem_123888)[i_128078 * + computed_group_sizze_85437 + + sext_i32_i64(local_tid_128047)]; + } + barrier(CLK_LOCAL_MEM_FENCE); + if (local_tid_128047 == 0) { + ((__global double *) mem_123897)[gtid_85435] = recresid_r_85626; + } + + error_7: + return; +} +__kernel void mainDetailedzisegmap_intragroup_85968(__global + int *global_failure, + int failure_is_an_option, + __global + int64_t *global_failure_args, + __local volatile + int64_t *mem_124032_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128318_backing_aligned_1, + __local volatile + int64_t *red_arr_mem_128314_backing_aligned_2, + int64_t m_70861, + int64_t k2p2zq_70876, + int64_t num_recresids_padded_71534, + int64_t Nmk_72261, + int64_t computed_group_sizze_85959, + __global + unsigned char *defunc_3_map_res_mem_120230, + __global + unsigned char *mem_121934, + __global + unsigned char *mem_124035, + __global + unsigned char *mem_124037) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_124032_backing_2 = (__local volatile + char *) mem_124032_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128318_backing_1 = + (__local volatile + char *) red_arr_mem_128318_backing_aligned_1; + __local volatile char *restrict red_arr_mem_128314_backing_0 = + (__local volatile + char *) red_arr_mem_128314_backing_aligned_2; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_128307; + int32_t local_tid_128308; + int64_t group_sizze_128311; + int32_t wave_sizze_128310; + int32_t group_tid_128309; + + global_tid_128307 = get_global_id(0); + local_tid_128308 = get_local_id(0); + group_sizze_128311 = get_local_size(0); + wave_sizze_128310 = LOCKSTEP_WIDTH; + group_tid_128309 = get_group_id(0); + + int32_t phys_tid_85968; + + phys_tid_85968 = group_tid_128309; + + int32_t ltid_pre_128312; + + ltid_pre_128312 = local_tid_128308; + + int32_t ltid_pre_128313; + + ltid_pre_128313 = local_tid_128308; + + int64_t gtid_85957; + + gtid_85957 = sext_i32_i64(group_tid_128309); + + int64_t x_86078; + + x_86078 = ((__global int64_t *) defunc_3_map_res_mem_120230)[gtid_85957]; + + int64_t n_86079 = sub64(x_86078, k2p2zq_70876); + double i64_res_86080 = sitofp_i64_f64(n_86079); + double defunc_2_reduce_res_86081; + int64_t gtid_85960 = sext_i32_i64(ltid_pre_128312); + int32_t phys_tid_85961 = local_tid_128308; + __local char *red_arr_mem_128314; + + red_arr_mem_128314 = (__local char *) red_arr_mem_128314_backing_0; + if (slt64(gtid_85960, num_recresids_padded_71534)) { + double x_86089 = ((__global double *) mem_121934)[gtid_85960 * m_70861 + + gtid_85957]; + + ((__local double *) red_arr_mem_128314)[gtid_85960] = x_86089; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128316; + int32_t skip_waves_128317; + + skip_waves_128317 = 1; + + double x_86082; + double x_86083; + + offset_128316 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128308, sext_i64_i32(num_recresids_padded_71534))) { + x_86082 = ((__local + double *) red_arr_mem_128314)[sext_i32_i64(local_tid_128308 + + offset_128316)]; + } + } + offset_128316 = 1; + while (slt32(offset_128316, wave_sizze_128310)) { + if (slt32(local_tid_128308 + offset_128316, + sext_i64_i32(num_recresids_padded_71534)) && + ((local_tid_128308 - squot32(local_tid_128308, wave_sizze_128310) * + wave_sizze_128310) & (2 * offset_128316 - 1)) == 0) { + // read array element + { + x_86083 = ((volatile __local + double *) red_arr_mem_128314)[sext_i32_i64(local_tid_128308 + + offset_128316)]; + } + // apply reduction operation + { + bool isnan_res_86084; + + isnan_res_86084 = futrts_isnan64(x_86082); + + double defunc_1_op_res_86085; + + if (isnan_res_86084) { + defunc_1_op_res_86085 = x_86083; + } else { + bool isnan_res_86086; + + isnan_res_86086 = futrts_isnan64(x_86083); + + double defunc_1_op_res_f_res_86087; + + if (isnan_res_86086) { + defunc_1_op_res_f_res_86087 = x_86082; + } else { + double defunc_1_op_res_f_res_f_res_86088 = x_86082 + + x_86083; + + defunc_1_op_res_f_res_86087 = + defunc_1_op_res_f_res_f_res_86088; + } + defunc_1_op_res_86085 = defunc_1_op_res_f_res_86087; + } + x_86082 = defunc_1_op_res_86085; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128314)[sext_i32_i64(local_tid_128308)] = + x_86082; + } + } + offset_128316 *= 2; + } + while (slt32(skip_waves_128317, + squot32(sext_i64_i32(computed_group_sizze_85959) + + wave_sizze_128310 - 1, wave_sizze_128310))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128316 = skip_waves_128317 * wave_sizze_128310; + if (slt32(local_tid_128308 + offset_128316, + sext_i64_i32(num_recresids_padded_71534)) && + ((local_tid_128308 - squot32(local_tid_128308, wave_sizze_128310) * + wave_sizze_128310) == 0 && (squot32(local_tid_128308, + wave_sizze_128310) & (2 * + skip_waves_128317 - + 1)) == + 0)) { + // read array element + { + x_86083 = ((__local + double *) red_arr_mem_128314)[sext_i32_i64(local_tid_128308 + + offset_128316)]; + } + // apply reduction operation + { + bool isnan_res_86084; + + isnan_res_86084 = futrts_isnan64(x_86082); + + double defunc_1_op_res_86085; + + if (isnan_res_86084) { + defunc_1_op_res_86085 = x_86083; + } else { + bool isnan_res_86086; + + isnan_res_86086 = futrts_isnan64(x_86083); + + double defunc_1_op_res_f_res_86087; + + if (isnan_res_86086) { + defunc_1_op_res_f_res_86087 = x_86082; + } else { + double defunc_1_op_res_f_res_f_res_86088 = x_86082 + + x_86083; + + defunc_1_op_res_f_res_86087 = + defunc_1_op_res_f_res_f_res_86088; + } + defunc_1_op_res_86085 = defunc_1_op_res_f_res_86087; + } + x_86082 = defunc_1_op_res_86085; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128314)[sext_i32_i64(local_tid_128308)] = + x_86082; + } + } + skip_waves_128317 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + defunc_2_reduce_res_86081 = ((__local + double *) red_arr_mem_128314)[(int64_t) 0]; + + double x_mean_86090 = defunc_2_reduce_res_86081 / i64_res_86080; + double defunc_2_reduce_res_86091; + int64_t gtid_85962 = sext_i32_i64(ltid_pre_128312); + int32_t phys_tid_85963 = local_tid_128308; + __local char *red_arr_mem_128318; + + red_arr_mem_128318 = (__local char *) red_arr_mem_128318_backing_1; + if (slt64(gtid_85962, num_recresids_padded_71534)) { + double x_86095 = ((__global double *) mem_121934)[gtid_85962 * m_70861 + + gtid_85957]; + bool isnan_res_86096; + + isnan_res_86096 = futrts_isnan64(x_86095); + + double defunc_0_f_res_86097; + + if (isnan_res_86096) { + defunc_0_f_res_86097 = 0.0; + } else { + double x_86098 = x_86095 - x_mean_86090; + double defunc_0_f_res_f_res_86099 = fpow64(x_86098, 2.0); + + defunc_0_f_res_86097 = defunc_0_f_res_f_res_86099; + } + ((__local double *) red_arr_mem_128318)[gtid_85962] = + defunc_0_f_res_86097; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128320; + int32_t skip_waves_128321; + + skip_waves_128321 = 1; + + double x_86092; + double x_86093; + + offset_128320 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128308, sext_i64_i32(num_recresids_padded_71534))) { + x_86092 = ((__local + double *) red_arr_mem_128318)[sext_i32_i64(local_tid_128308 + + offset_128320)]; + } + } + offset_128320 = 1; + while (slt32(offset_128320, wave_sizze_128310)) { + if (slt32(local_tid_128308 + offset_128320, + sext_i64_i32(num_recresids_padded_71534)) && + ((local_tid_128308 - squot32(local_tid_128308, wave_sizze_128310) * + wave_sizze_128310) & (2 * offset_128320 - 1)) == 0) { + // read array element + { + x_86093 = ((volatile __local + double *) red_arr_mem_128318)[sext_i32_i64(local_tid_128308 + + offset_128320)]; + } + // apply reduction operation + { + double defunc_1_op_res_86094 = x_86092 + x_86093; + + x_86092 = defunc_1_op_res_86094; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128318)[sext_i32_i64(local_tid_128308)] = + x_86092; + } + } + offset_128320 *= 2; + } + while (slt32(skip_waves_128321, + squot32(sext_i64_i32(computed_group_sizze_85959) + + wave_sizze_128310 - 1, wave_sizze_128310))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128320 = skip_waves_128321 * wave_sizze_128310; + if (slt32(local_tid_128308 + offset_128320, + sext_i64_i32(num_recresids_padded_71534)) && + ((local_tid_128308 - squot32(local_tid_128308, wave_sizze_128310) * + wave_sizze_128310) == 0 && (squot32(local_tid_128308, + wave_sizze_128310) & (2 * + skip_waves_128321 - + 1)) == + 0)) { + // read array element + { + x_86093 = ((__local + double *) red_arr_mem_128318)[sext_i32_i64(local_tid_128308 + + offset_128320)]; + } + // apply reduction operation + { + double defunc_1_op_res_86094 = x_86092 + x_86093; + + x_86092 = defunc_1_op_res_86094; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128318)[sext_i32_i64(local_tid_128308)] = + x_86092; + } + } + skip_waves_128321 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + defunc_2_reduce_res_86091 = ((__local + double *) red_arr_mem_128318)[(int64_t) 0]; + + double y_86100 = i64_res_86080 - 1.0; + double binop_p_86101 = defunc_2_reduce_res_86091 / y_86100; + double defunc_0_f_res_86102; + + defunc_0_f_res_86102 = futrts_sqrt64(binop_p_86101); + + double sqrt_res_86103; + + sqrt_res_86103 = futrts_sqrt64(i64_res_86080); + + double fr_86104 = defunc_0_f_res_86102 * sqrt_res_86103; + __local char *mem_124032; + + mem_124032 = (__local char *) mem_124032_backing_2; + + int64_t gtid_85964 = sext_i32_i64(ltid_pre_128313); + int32_t phys_tid_85965 = local_tid_128308; + + if (slt64(gtid_85964, Nmk_72261)) { + bool cond_86110 = gtid_85964 == (int64_t) 0; + double defunc_0_f_res_86111; + + if (cond_86110) { + defunc_0_f_res_86111 = 0.0; + } else { + int64_t i_86112 = sub64(gtid_85964, (int64_t) 1); + bool x_86113 = sle64((int64_t) 0, i_86112); + bool y_86114 = slt64(i_86112, num_recresids_padded_71534); + bool bounds_check_86115 = x_86113 && y_86114; + bool index_certs_86116; + + if (!bounds_check_86115) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 387) == + -1) { + global_failure_args[0] = i_86112; + global_failure_args[1] = num_recresids_padded_71534; + ; + } + local_failure = true; + goto error_4; + } + } + + double x_86117 = ((__global double *) mem_121934)[i_86112 * + m_70861 + + gtid_85957]; + double defunc_0_f_res_f_res_86118 = x_86117 / fr_86104; + + defunc_0_f_res_86111 = defunc_0_f_res_f_res_86118; + } + ((__local double *) mem_124032)[gtid_85964] = defunc_0_f_res_86111; + } + + error_4: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t dims_flat_128322; + + dims_flat_128322 = Nmk_72261; + + double x_86106; + double x_86107; + double x_128324; + double x_128325; + bool ltid_in_bounds_128327; + + ltid_in_bounds_128327 = slt64(sext_i32_i64(local_tid_128308), Nmk_72261); + + int32_t skip_threads_128328; + + // read input for in-block scan + { + if (ltid_in_bounds_128327) { + x_86107 = ((volatile __local + double *) mem_124032)[sext_i32_i64(local_tid_128308)]; + if ((local_tid_128308 - squot32(local_tid_128308, 32) * 32) == 0) { + x_86106 = x_86107; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128328 = 1; + while (slt32(skip_threads_128328, 32)) { + if (sle32(skip_threads_128328, local_tid_128308 - + squot32(local_tid_128308, 32) * 32) && + ltid_in_bounds_128327) { + // read operands + { + x_86106 = ((volatile __local + double *) mem_124032)[sext_i32_i64(local_tid_128308) - + sext_i32_i64(skip_threads_128328)]; + } + // perform operation + { + bool inactive_128329 = + slt64(srem64(sext_i32_i64(local_tid_128308), + Nmk_72261), + sext_i32_i64(local_tid_128308) - + sext_i32_i64(local_tid_128308 - + skip_threads_128328)); + + if (inactive_128329) { + x_86106 = x_86107; + } + if (!inactive_128329) { + double defunc_1_op_res_86108 = x_86106 + x_86107; + + x_86106 = defunc_1_op_res_86108; + } + } + } + if (sle32(wave_sizze_128310, skip_threads_128328)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128328, local_tid_128308 - + squot32(local_tid_128308, 32) * 32) && + ltid_in_bounds_128327) { + // write result + { + ((volatile __local + double *) mem_124032)[sext_i32_i64(local_tid_128308)] = + x_86106; + x_86107 = x_86106; + } + } + if (sle32(wave_sizze_128310, skip_threads_128328)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128328 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128308 - squot32(local_tid_128308, 32) * 32) == 31 && + ltid_in_bounds_128327) { + ((volatile __local + double *) mem_124032)[sext_i32_i64(squot32(local_tid_128308, + 32))] = x_86106; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128330; + + // read input for in-block scan + { + if (squot32(local_tid_128308, 32) == 0 && ltid_in_bounds_128327) { + x_128325 = ((volatile __local + double *) mem_124032)[sext_i32_i64(local_tid_128308)]; + if ((local_tid_128308 - squot32(local_tid_128308, 32) * 32) == + 0) { + x_128324 = x_128325; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128330 = 1; + while (slt32(skip_threads_128330, 32)) { + if (sle32(skip_threads_128330, local_tid_128308 - + squot32(local_tid_128308, 32) * 32) && + (squot32(local_tid_128308, 32) == 0 && + ltid_in_bounds_128327)) { + // read operands + { + x_128324 = ((volatile __local + double *) mem_124032)[sext_i32_i64(local_tid_128308) - + sext_i32_i64(skip_threads_128330)]; + } + // perform operation + { + bool inactive_128331 = + slt64(srem64(sext_i32_i64(local_tid_128308 * 32 + + 32 - 1), Nmk_72261), + sext_i32_i64(local_tid_128308 * 32 + 32 - + 1) - sext_i32_i64((local_tid_128308 - + skip_threads_128330) * + 32 + 32 - 1)); + + if (inactive_128331) { + x_128324 = x_128325; + } + if (!inactive_128331) { + double defunc_1_op_res_128326 = x_128324 + x_128325; + + x_128324 = defunc_1_op_res_128326; + } + } + } + if (sle32(wave_sizze_128310, skip_threads_128330)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128330, local_tid_128308 - + squot32(local_tid_128308, 32) * 32) && + (squot32(local_tid_128308, 32) == 0 && + ltid_in_bounds_128327)) { + // write result + { + ((volatile __local + double *) mem_124032)[sext_i32_i64(local_tid_128308)] = + x_128324; + x_128325 = x_128324; + } + } + if (sle32(wave_sizze_128310, skip_threads_128330)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128330 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128308, 32) == 0 || !ltid_in_bounds_128327)) { + // read operands + { + x_86107 = x_86106; + x_86106 = ((__local + double *) mem_124032)[sext_i32_i64(squot32(local_tid_128308, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128332 = + slt64(srem64(sext_i32_i64(local_tid_128308), Nmk_72261), + sext_i32_i64(local_tid_128308) - + sext_i32_i64(squot32(local_tid_128308, 32) * 32 - + 1)); + + if (inactive_128332) { + x_86106 = x_86107; + } + if (!inactive_128332) { + double defunc_1_op_res_86108 = x_86106 + x_86107; + + x_86106 = defunc_1_op_res_86108; + } + } + // write final result + { + ((__local + double *) mem_124032)[sext_i32_i64(local_tid_128308)] = + x_86106; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128308, 32) == 0) { + ((__local double *) mem_124032)[sext_i32_i64(local_tid_128308)] = + x_86107; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + for (int64_t i_128333 = 0; i_128333 < sdiv_up64(Nmk_72261 - + sext_i32_i64(local_tid_128308), + computed_group_sizze_85959); + i_128333++) { + ((__global double *) mem_124035)[gtid_85957 * Nmk_72261 + (i_128333 * + computed_group_sizze_85959 + + sext_i32_i64(local_tid_128308))] = + ((__local double *) mem_124032)[i_128333 * + computed_group_sizze_85959 + + sext_i32_i64(local_tid_128308)]; + } + barrier(CLK_LOCAL_MEM_FENCE); + if (local_tid_128308 == 0) { + ((__global int64_t *) mem_124037)[gtid_85957] = n_86079; + } + + error_5: + return; +} +__kernel void mainDetailedzisegmap_intragroup_86392(__global + int *global_failure, + __local volatile + int64_t *red_arr_mem_128570_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128566_backing_aligned_1, + double level_70867, + int64_t num_recresids_padded_71534, + int64_t Nmk_72261, __global + unsigned char *defunc_3_map_res_mem_124068, + __global + unsigned char *defunc_3_map_res_mem_124069, + __global + unsigned char *mem_124078, + __global + unsigned char *mem_124118) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_128570_backing_1 = + (__local volatile + char *) red_arr_mem_128570_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128566_backing_0 = + (__local volatile + char *) red_arr_mem_128566_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128560; + int32_t local_tid_128561; + int64_t group_sizze_128564; + int32_t wave_sizze_128563; + int32_t group_tid_128562; + + global_tid_128560 = get_global_id(0); + local_tid_128561 = get_local_id(0); + group_sizze_128564 = get_local_size(0); + wave_sizze_128563 = LOCKSTEP_WIDTH; + group_tid_128562 = get_group_id(0); + + int32_t phys_tid_86392; + + phys_tid_86392 = group_tid_128562; + + int32_t ltid_pre_128565; + + ltid_pre_128565 = local_tid_128561; + + int64_t gtid_86383; + + gtid_86383 = sext_i32_i64(group_tid_128562); + + int64_t x_86673; + + x_86673 = ((__global int64_t *) defunc_3_map_res_mem_124069)[gtid_86383]; + + double i64_res_86678 = sitofp_i64_f64(x_86673); + double defunc_2_reduce_res_86679; + int64_t gtid_86386 = sext_i32_i64(ltid_pre_128565); + int32_t phys_tid_86387 = local_tid_128561; + __local char *red_arr_mem_128566; + + red_arr_mem_128566 = (__local char *) red_arr_mem_128566_backing_0; + + int64_t slice_115279; + + slice_115279 = (int64_t) 1 + gtid_86386; + + double x_86683 = ((__global + double *) defunc_3_map_res_mem_124068)[gtid_86383 * + Nmk_72261 + + slice_115279]; + int64_t x_86685 = mul64((int64_t) 2, gtid_86386); + int64_t i64_arg_86686 = add64((int64_t) 2, x_86685); + double i64_res_86687 = sitofp_i64_f64(i64_arg_86686); + double y_86688 = i64_res_86687 / i64_res_86678; + double lifted_div_res_86689 = 1.0 + y_86688; + double abs_arg_86690 = x_86683 / lifted_div_res_86689; + double abs_res_86691 = fabs(abs_arg_86690); + + ((__local double *) red_arr_mem_128566)[gtid_86386] = abs_res_86691; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128568; + int32_t skip_waves_128569; + + skip_waves_128569 = 1; + + double x_86680; + double x_86681; + + offset_128568 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128561, sext_i64_i32(num_recresids_padded_71534))) { + x_86680 = ((__local + double *) red_arr_mem_128566)[sext_i32_i64(local_tid_128561 + + offset_128568)]; + } + } + offset_128568 = 1; + while (slt32(offset_128568, wave_sizze_128563)) { + if (slt32(local_tid_128561 + offset_128568, + sext_i64_i32(num_recresids_padded_71534)) && + ((local_tid_128561 - squot32(local_tid_128561, wave_sizze_128563) * + wave_sizze_128563) & (2 * offset_128568 - 1)) == 0) { + // read array element + { + x_86681 = ((volatile __local + double *) red_arr_mem_128566)[sext_i32_i64(local_tid_128561 + + offset_128568)]; + } + // apply reduction operation + { + double defunc_1_op_res_86682 = fmax64(x_86680, x_86681); + + x_86680 = defunc_1_op_res_86682; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128566)[sext_i32_i64(local_tid_128561)] = + x_86680; + } + } + offset_128568 *= 2; + } + while (slt32(skip_waves_128569, + squot32(sext_i64_i32(num_recresids_padded_71534) + + wave_sizze_128563 - 1, wave_sizze_128563))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128568 = skip_waves_128569 * wave_sizze_128563; + if (slt32(local_tid_128561 + offset_128568, + sext_i64_i32(num_recresids_padded_71534)) && + ((local_tid_128561 - squot32(local_tid_128561, wave_sizze_128563) * + wave_sizze_128563) == 0 && (squot32(local_tid_128561, + wave_sizze_128563) & (2 * + skip_waves_128569 - + 1)) == + 0)) { + // read array element + { + x_86681 = ((__local + double *) red_arr_mem_128566)[sext_i32_i64(local_tid_128561 + + offset_128568)]; + } + // apply reduction operation + { + double defunc_1_op_res_86682 = fmax64(x_86680, x_86681); + + x_86680 = defunc_1_op_res_86682; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128566)[sext_i32_i64(local_tid_128561)] = + x_86680; + } + } + skip_waves_128569 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + defunc_2_reduce_res_86679 = ((__local + double *) red_arr_mem_128566)[(int64_t) 0]; + + double defunc_0_Q_arg_86692 = 3.0 * defunc_2_reduce_res_86679; + double zs_res_86693 = defunc_0_Q_arg_86692 / 1.4142135623730951; + double abs_res_86694 = fabs(zs_res_86693); + double zs_res_86695 = abs_res_86694 / 2.0; + double zp_res_86696 = 1.0 + zs_res_86695; + double zs_res_86697 = 1.0 / zp_res_86696; + double zt_res_86698 = zs_res_86697 * zs_res_86697; + double zt_res_86699 = zs_res_86697 * zt_res_86698; + double zt_res_86700 = zt_res_86698 * zt_res_86698; + double zt_res_86701 = zt_res_86698 * zt_res_86699; + double zt_res_86702 = zt_res_86699 * zt_res_86699; + double zt_res_86703 = zt_res_86699 * zt_res_86700; + double zt_res_86704 = zt_res_86700 * zt_res_86700; + double zt_res_86705 = zt_res_86700 * zt_res_86701; + double zt_res_86706 = 0.17087277 * zt_res_86705; + double zt_res_86707 = 0.82215223 * zt_res_86704; + double zt_res_86708 = 1.48851587 * zt_res_86703; + double zt_res_86709 = 1.13520398 * zt_res_86702; + double zt_res_86710 = 0.27886807 * zt_res_86701; + double zt_res_86711 = 0.18628806 * zt_res_86700; + double zt_res_86712 = 9.678418e-2 * zt_res_86699; + double zt_res_86713 = 0.37409196 * zt_res_86698; + double zt_res_86714 = 1.00002368 * zs_res_86697; + double zt_res_86715 = zs_res_86693 * zs_res_86693; + double zm_res_86716 = 0.0 - zt_res_86715; + double zm_res_86717 = zm_res_86716 - 1.26551223; + double zp_res_86718 = zt_res_86714 + zm_res_86717; + double zp_res_86719 = zt_res_86713 + zp_res_86718; + double zp_res_86720 = zt_res_86712 + zp_res_86719; + double zm_res_86721 = zp_res_86720 - zt_res_86711; + double zp_res_86722 = zt_res_86710 + zm_res_86721; + double zm_res_86723 = zp_res_86722 - zt_res_86709; + double zp_res_86724 = zt_res_86708 + zm_res_86723; + double zm_res_86725 = zp_res_86724 - zt_res_86707; + double zp_res_86726 = zt_res_86706 + zm_res_86725; + double exp_res_86727; + + exp_res_86727 = futrts_exp64(zp_res_86726); + + double zt_res_86728 = zs_res_86697 * exp_res_86727; + bool zgze_res_86729 = 0.0 <= zs_res_86693; + double erf_res_86730; + + if (zgze_res_86729) { + double zm_res_86731 = 1.0 - zt_res_86728; + + erf_res_86730 = zm_res_86731; + } else { + double zm_res_86732 = zt_res_86728 - 1.0; + + erf_res_86730 = zm_res_86732; + } + + double zp_res_86733 = 1.0 + erf_res_86730; + double zs_res_86734 = zp_res_86733 / 2.0; + double defunc_0_Q_res_86735 = 1.0 - zs_res_86734; + double y_86736 = fpow64(defunc_2_reduce_res_86679, 2.0); + double negate_arg_86737 = 4.0 * y_86736; + double defunc_0_exp_arg_86738 = 0.0 - negate_arg_86737; + double defunc_0_exp_res_86739 = fpow64(2.718281828459045, + defunc_0_exp_arg_86738); + double x_86740 = defunc_0_Q_res_86735 + defunc_0_exp_res_86739; + double zs_res_86741 = defunc_2_reduce_res_86679 / 1.4142135623730951; + double abs_res_86742 = fabs(zs_res_86741); + double zs_res_86743 = abs_res_86742 / 2.0; + double zp_res_86744 = 1.0 + zs_res_86743; + double zs_res_86745 = 1.0 / zp_res_86744; + double zt_res_86746 = zs_res_86745 * zs_res_86745; + double zt_res_86747 = zs_res_86745 * zt_res_86746; + double zt_res_86748 = zt_res_86746 * zt_res_86746; + double zt_res_86749 = zt_res_86746 * zt_res_86747; + double zt_res_86750 = zt_res_86747 * zt_res_86747; + double zt_res_86751 = zt_res_86747 * zt_res_86748; + double zt_res_86752 = zt_res_86748 * zt_res_86748; + double zt_res_86753 = zt_res_86748 * zt_res_86749; + double zt_res_86754 = 0.17087277 * zt_res_86753; + double zt_res_86755 = 0.82215223 * zt_res_86752; + double zt_res_86756 = 1.48851587 * zt_res_86751; + double zt_res_86757 = 1.13520398 * zt_res_86750; + double zt_res_86758 = 0.27886807 * zt_res_86749; + double zt_res_86759 = 0.18628806 * zt_res_86748; + double zt_res_86760 = 9.678418e-2 * zt_res_86747; + double zt_res_86761 = 0.37409196 * zt_res_86746; + double zt_res_86762 = 1.00002368 * zs_res_86745; + double zt_res_86763 = zs_res_86741 * zs_res_86741; + double zm_res_86764 = 0.0 - zt_res_86763; + double zm_res_86765 = zm_res_86764 - 1.26551223; + double zp_res_86766 = zt_res_86762 + zm_res_86765; + double zp_res_86767 = zt_res_86761 + zp_res_86766; + double zp_res_86768 = zt_res_86760 + zp_res_86767; + double zm_res_86769 = zp_res_86768 - zt_res_86759; + double zp_res_86770 = zt_res_86758 + zm_res_86769; + double zm_res_86771 = zp_res_86770 - zt_res_86757; + double zp_res_86772 = zt_res_86756 + zm_res_86771; + double zm_res_86773 = zp_res_86772 - zt_res_86755; + double zp_res_86774 = zt_res_86754 + zm_res_86773; + double exp_res_86775; + + exp_res_86775 = futrts_exp64(zp_res_86774); + + double zt_res_86776 = zs_res_86745 * exp_res_86775; + bool zgze_res_86777 = 0.0 <= zs_res_86741; + double erf_res_86778; + + if (zgze_res_86777) { + double zm_res_86779 = 1.0 - zt_res_86776; + + erf_res_86778 = zm_res_86779; + } else { + double zm_res_86780 = zt_res_86776 - 1.0; + + erf_res_86778 = zm_res_86780; + } + + double zp_res_86781 = 1.0 + erf_res_86778; + double zs_res_86782 = zp_res_86781 / 2.0; + double defunc_0_Q_res_86783 = 1.0 - zs_res_86782; + double y_86784 = defunc_0_exp_res_86739 * defunc_0_Q_res_86783; + double y_86785 = x_86740 - y_86784; + double pval_brownian_motion_max_res_86786 = 2.0 * y_86785; + int64_t defunc_0_f_res_86787; + int64_t gtid_86388 = sext_i32_i64(ltid_pre_128565); + int32_t phys_tid_86389 = local_tid_128561; + __local char *red_arr_mem_128570; + + red_arr_mem_128570 = (__local char *) red_arr_mem_128570_backing_1; + + int64_t slice_115281; + + slice_115281 = (int64_t) 1 + gtid_86388; + + double x_86792 = ((__global + double *) defunc_3_map_res_mem_124068)[gtid_86383 * + Nmk_72261 + + slice_115281]; + double x_86793 = ((__global double *) mem_124078)[gtid_86383 * Nmk_72261 + + slice_115281]; + double abs_res_86794 = fabs(x_86792); + bool cond_86795 = x_86793 < abs_res_86794; + int64_t defunc_2_f_res_86796; + + if (cond_86795) { + defunc_2_f_res_86796 = gtid_86388; + } else { + defunc_2_f_res_86796 = (int64_t) 9223372036854775807; + } + ((__local int64_t *) red_arr_mem_128570)[gtid_86388] = defunc_2_f_res_86796; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128572; + int32_t skip_waves_128573; + + skip_waves_128573 = 1; + + int64_t x_86788; + int64_t x_86789; + + offset_128572 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128561, sext_i64_i32(num_recresids_padded_71534))) { + x_86788 = ((__local + int64_t *) red_arr_mem_128570)[sext_i32_i64(local_tid_128561 + + offset_128572)]; + } + } + offset_128572 = 1; + while (slt32(offset_128572, wave_sizze_128563)) { + if (slt32(local_tid_128561 + offset_128572, + sext_i64_i32(num_recresids_padded_71534)) && + ((local_tid_128561 - squot32(local_tid_128561, wave_sizze_128563) * + wave_sizze_128563) & (2 * offset_128572 - 1)) == 0) { + // read array element + { + x_86789 = ((volatile __local + int64_t *) red_arr_mem_128570)[sext_i32_i64(local_tid_128561 + + offset_128572)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_86790 = smin64(x_86788, x_86789); + + x_86788 = defunc_1_op_res_86790; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_128570)[sext_i32_i64(local_tid_128561)] = + x_86788; + } + } + offset_128572 *= 2; + } + while (slt32(skip_waves_128573, + squot32(sext_i64_i32(num_recresids_padded_71534) + + wave_sizze_128563 - 1, wave_sizze_128563))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128572 = skip_waves_128573 * wave_sizze_128563; + if (slt32(local_tid_128561 + offset_128572, + sext_i64_i32(num_recresids_padded_71534)) && + ((local_tid_128561 - squot32(local_tid_128561, wave_sizze_128563) * + wave_sizze_128563) == 0 && (squot32(local_tid_128561, + wave_sizze_128563) & (2 * + skip_waves_128573 - + 1)) == + 0)) { + // read array element + { + x_86789 = ((__local + int64_t *) red_arr_mem_128570)[sext_i32_i64(local_tid_128561 + + offset_128572)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_86790 = smin64(x_86788, x_86789); + + x_86788 = defunc_1_op_res_86790; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_128570)[sext_i32_i64(local_tid_128561)] = + x_86788; + } + } + skip_waves_128573 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + defunc_0_f_res_86787 = ((__local + int64_t *) red_arr_mem_128570)[(int64_t) 0]; + + bool isnan_res_86797; + + isnan_res_86797 = futrts_isnan64(pval_brownian_motion_max_res_86786); + + bool cond_86798 = !isnan_res_86797; + bool cond_t_res_86799 = pval_brownian_motion_max_res_86786 < level_70867; + bool x_86800 = cond_86798 && cond_t_res_86799; + bool chk_t_res_86801 = defunc_0_f_res_86787 == + (int64_t) 9223372036854775807; + bool chk_t_res_86802 = !chk_t_res_86801; + bool x_86803 = x_86800 && chk_t_res_86802; + int64_t y_start_86804; + + if (x_86803) { + int64_t y_start_t_res_86805 = sub64(x_86673, defunc_0_f_res_86787); + + y_start_86804 = y_start_t_res_86805; + } else { + y_start_86804 = (int64_t) 0; + } + if (local_tid_128561 == 0) { + ((__global int64_t *) mem_124118)[gtid_86383] = y_start_86804; + } + + error_4: + return; +} +__kernel void mainDetailedzisegmap_intragroup_87528(__global + int *global_failure, + int failure_is_an_option, + __global + int64_t *global_failure_args, + __local volatile + int64_t *mem_124308_backing_aligned_0, + __local volatile + int64_t *mem_124298_backing_aligned_1, + int64_t k2p2zq_70876, + int64_t m_72499, + int64_t nm_72500, __global + unsigned char *defunc_3_map_res_mem_124294, + __global + unsigned char *mem_124318) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_124308_backing_1 = (__local volatile + char *) mem_124308_backing_aligned_0; + __local volatile char *restrict mem_124298_backing_0 = (__local volatile + char *) mem_124298_backing_aligned_1; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_128841; + int32_t local_tid_128842; + int64_t group_sizze_128845; + int32_t wave_sizze_128844; + int32_t group_tid_128843; + + global_tid_128841 = get_global_id(0); + local_tid_128842 = get_local_id(0); + group_sizze_128845 = get_local_size(0); + wave_sizze_128844 = LOCKSTEP_WIDTH; + group_tid_128843 = get_group_id(0); + + int32_t phys_tid_87528; + + phys_tid_87528 = group_tid_128843; + + int32_t ltid_pre_128846; + + ltid_pre_128846 = local_tid_128842; + + int64_t gtid_87466; + + gtid_87466 = sext_i32_i64(group_tid_128843); + + __local char *mem_124298; + + mem_124298 = (__local char *) mem_124298_backing_0; + + int64_t gtid_87469 = sext_i32_i64(ltid_pre_128846); + int32_t phys_tid_87470 = local_tid_128842; + int64_t defunc_0_f_res_87539 = sdiv64(gtid_87469, m_72499); + int64_t defunc_0_f_res_87540 = smod64(gtid_87469, m_72499); + bool cond_87541 = slt64(defunc_0_f_res_87540, k2p2zq_70876); + double defunc_0_f_res_87542; + + if (cond_87541) { + bool x_87543 = sle64((int64_t) 0, defunc_0_f_res_87539); + bool y_87544 = slt64(defunc_0_f_res_87539, k2p2zq_70876); + bool bounds_check_87545 = x_87543 && y_87544; + bool x_87546 = sle64((int64_t) 0, defunc_0_f_res_87540); + bool bounds_check_87547 = cond_87541 && x_87546; + bool index_ok_87548 = bounds_check_87545 && bounds_check_87547; + bool index_certs_87549; + + if (!index_ok_87548) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 389) == -1) { + global_failure_args[0] = defunc_0_f_res_87539; + global_failure_args[1] = defunc_0_f_res_87540; + global_failure_args[2] = k2p2zq_70876; + global_failure_args[3] = k2p2zq_70876; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_0_f_res_t_res_87550 = ((__global + double *) defunc_3_map_res_mem_124294)[gtid_87466 * + (k2p2zq_70876 * + k2p2zq_70876) + + defunc_0_f_res_87539 * + k2p2zq_70876 + + defunc_0_f_res_87540]; + + defunc_0_f_res_87542 = defunc_0_f_res_t_res_87550; + } else { + int64_t y_87551 = add64(k2p2zq_70876, defunc_0_f_res_87539); + bool cond_87552 = defunc_0_f_res_87540 == y_87551; + double defunc_0_f_res_f_res_87553; + + if (cond_87552) { + defunc_0_f_res_f_res_87553 = 1.0; + } else { + defunc_0_f_res_f_res_87553 = 0.0; + } + defunc_0_f_res_87542 = defunc_0_f_res_f_res_87553; + } + ((__local double *) mem_124298)[gtid_87469] = defunc_0_f_res_87542; + + error_0: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + __local char *mem_124308; + + mem_124308 = (__local char *) mem_124308_backing_1; + for (int64_t i_87555 = 0; i_87555 < k2p2zq_70876; i_87555++) { + bool y_87557 = slt64(i_87555, nm_72500); + bool index_certs_87558; + + if (!y_87557) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 390) == -1) { + global_failure_args[0] = i_87555; + global_failure_args[1] = nm_72500; + ; + } + local_failure = true; + goto error_1; + } + } + + double v1_87559 = ((__local double *) mem_124298)[i_87555]; + bool cond_87560 = v1_87559 == 0.0; + int64_t gtid_87487 = sext_i32_i64(ltid_pre_128846); + int32_t phys_tid_87488 = local_tid_128842; + int64_t defunc_0_f_res_87563 = sdiv64(gtid_87487, m_72499); + int64_t defunc_0_f_res_87564 = smod64(gtid_87487, m_72499); + double defunc_0_f_res_87565; + + if (cond_87560) { + int64_t x_87566 = mul64(m_72499, defunc_0_f_res_87563); + int64_t i_87567 = add64(defunc_0_f_res_87564, x_87566); + bool x_87568 = sle64((int64_t) 0, i_87567); + bool y_87569 = slt64(i_87567, nm_72500); + bool bounds_check_87570 = x_87568 && y_87569; + bool index_certs_87571; + + if (!bounds_check_87570) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 391) == + -1) { + global_failure_args[0] = i_87567; + global_failure_args[1] = nm_72500; + ; + } + local_failure = true; + goto error_1; + } + } + + double defunc_0_f_res_t_res_87572 = ((__local + double *) mem_124298)[i_87567]; + + defunc_0_f_res_87565 = defunc_0_f_res_t_res_87572; + } else { + bool x_87573 = sle64((int64_t) 0, defunc_0_f_res_87564); + bool y_87574 = slt64(defunc_0_f_res_87564, nm_72500); + bool bounds_check_87575 = x_87573 && y_87574; + bool index_certs_87576; + + if (!bounds_check_87575) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 392) == + -1) { + global_failure_args[0] = defunc_0_f_res_87564; + global_failure_args[1] = nm_72500; + ; + } + local_failure = true; + goto error_1; + } + } + + double x_87577 = ((__local + double *) mem_124298)[defunc_0_f_res_87564]; + double x_87578 = x_87577 / v1_87559; + int64_t y_87579 = sub64(k2p2zq_70876, (int64_t) 1); + bool cond_87580 = slt64(defunc_0_f_res_87563, y_87579); + double defunc_0_f_res_f_res_87581; + + if (cond_87580) { + int64_t x_87582 = add64((int64_t) 1, defunc_0_f_res_87563); + int64_t x_87583 = mul64(m_72499, x_87582); + int64_t i_87584 = add64(defunc_0_f_res_87564, x_87583); + bool x_87585 = sle64((int64_t) 0, i_87584); + bool y_87586 = slt64(i_87584, nm_72500); + bool bounds_check_87587 = x_87585 && y_87586; + bool index_certs_87588; + + if (!bounds_check_87587) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 393) == -1) { + global_failure_args[0] = i_87584; + global_failure_args[1] = nm_72500; + ; + } + local_failure = true; + goto error_1; + } + } + + double x_87589 = ((__local double *) mem_124298)[i_87584]; + int64_t i_87590 = add64(i_87555, x_87583); + bool x_87591 = sle64((int64_t) 0, i_87590); + bool y_87592 = slt64(i_87590, nm_72500); + bool bounds_check_87593 = x_87591 && y_87592; + bool index_certs_87594; + + if (!bounds_check_87593) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 394) == -1) { + global_failure_args[0] = i_87590; + global_failure_args[1] = nm_72500; + ; + } + local_failure = true; + goto error_1; + } + } + + double x_87595 = ((__local double *) mem_124298)[i_87590]; + double y_87596 = x_87578 * x_87595; + double defunc_0_f_res_f_res_t_res_87597 = x_87589 - y_87596; + + defunc_0_f_res_f_res_87581 = defunc_0_f_res_f_res_t_res_87597; + } else { + defunc_0_f_res_f_res_87581 = x_87578; + } + defunc_0_f_res_87565 = defunc_0_f_res_f_res_87581; + } + ((__local double *) mem_124308)[gtid_87487] = defunc_0_f_res_87565; + + error_1: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t write_i_87525 = sext_i32_i64(ltid_pre_128846); + int32_t phys_tid_87526 = local_tid_128842; + double write_value_87600 = ((__local + double *) mem_124308)[write_i_87525]; + + if (sle64((int64_t) 0, write_i_87525) && slt64(write_i_87525, + nm_72500)) { + ((__local double *) mem_124298)[write_i_87525] = write_value_87600; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + for (int64_t i_128848 = 0; i_128848 < sdiv_up64(k2p2zq_70876 * + k2p2zq_70876 - + sext_i32_i64(local_tid_128842), + nm_72500); i_128848++) { + ((__global double *) mem_124318)[gtid_87466 * (k2p2zq_70876 * + k2p2zq_70876) + + squot64(i_128848 * nm_72500 + + sext_i32_i64(local_tid_128842), + k2p2zq_70876) * k2p2zq_70876 + + (i_128848 * nm_72500 + + sext_i32_i64(local_tid_128842) - + squot64(i_128848 * nm_72500 + + sext_i32_i64(local_tid_128842), + k2p2zq_70876) * + k2p2zq_70876)] = ((__local + double *) mem_124298)[k2p2zq_70876 + + (squot64(i_128848 * + nm_72500 + + sext_i32_i64(local_tid_128842), + k2p2zq_70876) * + m_72499 + + (i_128848 * + nm_72500 + + sext_i32_i64(local_tid_128842) - + squot64(i_128848 * + nm_72500 + + sext_i32_i64(local_tid_128842), + k2p2zq_70876) * + k2p2zq_70876))]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + error_3: + return; +} +__kernel void mainDetailedzisegmap_intragroup_87666(__global + int *global_failure, + int failure_is_an_option, + __global + int64_t *global_failure_args, + __local volatile + int64_t *mem_124338_backing_aligned_0, + __local volatile + int64_t *mem_124335_backing_aligned_1, + int64_t k2p2zq_70876, + int64_t m_72499, + int64_t nm_72500, + int64_t i_87986, + int64_t ctx_param_ext_124325, + int64_t ctx_param_ext_124326, + int64_t ctx_param_ext_124328, + __global + unsigned char *mem_param_124330, + __global + unsigned char *mem_124342) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_124338_backing_1 = (__local volatile + char *) mem_124338_backing_aligned_0; + __local volatile char *restrict mem_124335_backing_0 = (__local volatile + char *) mem_124335_backing_aligned_1; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_128870; + int32_t local_tid_128871; + int64_t group_sizze_128874; + int32_t wave_sizze_128873; + int32_t group_tid_128872; + + global_tid_128870 = get_global_id(0); + local_tid_128871 = get_local_id(0); + group_sizze_128874 = get_local_size(0); + wave_sizze_128873 = LOCKSTEP_WIDTH; + group_tid_128872 = get_group_id(0); + + int32_t phys_tid_87666; + + phys_tid_87666 = group_tid_128872; + + int32_t ltid_pre_128875; + + ltid_pre_128875 = local_tid_128871; + + int64_t gtid_87622; + + gtid_87622 = sext_i32_i64(group_tid_128872); + + __local char *mem_124335; + + mem_124335 = (__local char *) mem_124335_backing_0; + ((__local double *) mem_124335)[sext_i32_i64(local_tid_128871)] = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + gtid_87622 * + ctx_param_ext_124326 + + sext_i32_i64(local_tid_128871) * + ctx_param_ext_124328]; + barrier(CLK_LOCAL_MEM_FENCE); + + double v1_88001 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_87622 * + ctx_param_ext_124326 + + i_87986 * + ctx_param_ext_124328)]; + bool cond_88002 = v1_88001 == 0.0; + __local char *mem_124338; + + mem_124338 = (__local char *) mem_124338_backing_1; + + int64_t gtid_87625 = sext_i32_i64(ltid_pre_128875); + int32_t phys_tid_87626 = local_tid_128871; + int64_t defunc_0_f_res_88005 = sdiv64(gtid_87625, m_72499); + int64_t defunc_0_f_res_88006 = smod64(gtid_87625, m_72499); + double defunc_0_f_res_88007; + + if (cond_88002) { + int64_t x_88008 = mul64(m_72499, defunc_0_f_res_88005); + int64_t i_88009 = add64(defunc_0_f_res_88006, x_88008); + bool x_88010 = sle64((int64_t) 0, i_88009); + bool y_88011 = slt64(i_88009, nm_72500); + bool bounds_check_88012 = x_88010 && y_88011; + bool index_certs_88013; + + if (!bounds_check_88012) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 396) == -1) { + global_failure_args[0] = i_88009; + global_failure_args[1] = nm_72500; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_0_f_res_t_res_88014 = ((__local + double *) mem_124335)[i_88009]; + + defunc_0_f_res_88007 = defunc_0_f_res_t_res_88014; + } else { + bool x_88015 = sle64((int64_t) 0, defunc_0_f_res_88006); + bool y_88016 = slt64(defunc_0_f_res_88006, nm_72500); + bool bounds_check_88017 = x_88015 && y_88016; + bool index_certs_88018; + + if (!bounds_check_88017) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 397) == -1) { + global_failure_args[0] = defunc_0_f_res_88006; + global_failure_args[1] = nm_72500; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_88019 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_87622 * + ctx_param_ext_124326 + + defunc_0_f_res_88006 * + ctx_param_ext_124328)]; + double x_88020 = x_88019 / v1_88001; + int64_t y_88021 = sub64(k2p2zq_70876, (int64_t) 1); + bool cond_88022 = slt64(defunc_0_f_res_88005, y_88021); + double defunc_0_f_res_f_res_88023; + + if (cond_88022) { + int64_t x_88024 = add64((int64_t) 1, defunc_0_f_res_88005); + int64_t x_88025 = mul64(m_72499, x_88024); + int64_t i_88026 = add64(defunc_0_f_res_88006, x_88025); + bool x_88027 = sle64((int64_t) 0, i_88026); + bool y_88028 = slt64(i_88026, nm_72500); + bool bounds_check_88029 = x_88027 && y_88028; + bool index_certs_88030; + + if (!bounds_check_88029) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 398) == + -1) { + global_failure_args[0] = i_88026; + global_failure_args[1] = nm_72500; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_88031 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_87622 * + ctx_param_ext_124326 + + i_88026 * + ctx_param_ext_124328)]; + int64_t i_88032 = add64(i_87986, x_88025); + bool x_88033 = sle64((int64_t) 0, i_88032); + bool y_88034 = slt64(i_88032, nm_72500); + bool bounds_check_88035 = x_88033 && y_88034; + bool index_certs_88036; + + if (!bounds_check_88035) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 399) == + -1) { + global_failure_args[0] = i_88032; + global_failure_args[1] = nm_72500; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_88037 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_87622 * + ctx_param_ext_124326 + + i_88032 * + ctx_param_ext_124328)]; + double y_88038 = x_88020 * x_88037; + double defunc_0_f_res_f_res_t_res_88039 = x_88031 - y_88038; + + defunc_0_f_res_f_res_88023 = defunc_0_f_res_f_res_t_res_88039; + } else { + defunc_0_f_res_f_res_88023 = x_88020; + } + defunc_0_f_res_88007 = defunc_0_f_res_f_res_88023; + } + ((__local double *) mem_124338)[gtid_87625] = defunc_0_f_res_88007; + + error_0: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t write_i_87663 = sext_i32_i64(ltid_pre_128875); + int32_t phys_tid_87664 = local_tid_128871; + double write_value_88042 = ((__local double *) mem_124338)[write_i_87663]; + + if (sle64((int64_t) 0, write_i_87663) && slt64(write_i_87663, nm_72500)) { + ((__local double *) mem_124335)[write_i_87663] = write_value_88042; + } + barrier(CLK_LOCAL_MEM_FENCE); + ((__global double *) mem_124342)[gtid_87622 * nm_72500 + + sext_i32_i64(local_tid_128871)] = ((__local + double *) mem_124335)[sext_i32_i64(local_tid_128871)]; + barrier(CLK_LOCAL_MEM_FENCE); + + error_2: + return; +} +__kernel void mainDetailedzisegmap_intragroup_88528(__global + int *global_failure, + __local volatile + int64_t *mem_124893_backing_aligned_0, + __local volatile + int64_t *mem_124891_backing_aligned_1, + __local volatile + int64_t *mem_124889_backing_aligned_2, + __local volatile + int64_t *mem_124887_backing_aligned_3, + int64_t N_70860, + int64_t i_72637, __global + unsigned char *mem_124142, + __global + unsigned char *defunc_3_map_res_mem_124883, + __global + unsigned char *mem_124896, + __global + unsigned char *mem_124899, + __global + unsigned char *mem_124902) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_124893_backing_3 = (__local volatile + char *) mem_124893_backing_aligned_0; + __local volatile char *restrict mem_124891_backing_2 = (__local volatile + char *) mem_124891_backing_aligned_1; + __local volatile char *restrict mem_124889_backing_1 = (__local volatile + char *) mem_124889_backing_aligned_2; + __local volatile char *restrict mem_124887_backing_0 = (__local volatile + char *) mem_124887_backing_aligned_3; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129251; + int32_t local_tid_129252; + int64_t group_sizze_129255; + int32_t wave_sizze_129254; + int32_t group_tid_129253; + + global_tid_129251 = get_global_id(0); + local_tid_129252 = get_local_id(0); + group_sizze_129255 = get_local_size(0); + wave_sizze_129254 = LOCKSTEP_WIDTH; + group_tid_129253 = get_group_id(0); + + int32_t phys_tid_88528; + + phys_tid_88528 = group_tid_129253; + + int32_t ltid_pre_129256; + + ltid_pre_129256 = local_tid_129252; + + int64_t gtid_88519; + + gtid_88519 = sext_i32_i64(group_tid_129253); + + __local char *mem_124887; + + mem_124887 = (__local char *) mem_124887_backing_0; + + __local char *mem_124889; + + mem_124889 = (__local char *) mem_124889_backing_1; + + int64_t gtid_88522 = sext_i32_i64(ltid_pre_129256); + int32_t phys_tid_88523 = local_tid_129252; + double x_88546 = ((__global double *) mem_124142)[gtid_88519 * N_70860 + + gtid_88522]; + bool isnan_res_88548; + + isnan_res_88548 = futrts_isnan64(x_88546); + + bool cond_88549 = !isnan_res_88548; + double defunc_1_f_res_88550; + + if (cond_88549) { + double x_88547 = ((__global + double *) defunc_3_map_res_mem_124883)[gtid_88519 * + N_70860 + + gtid_88522]; + double defunc_1_f_res_t_res_88551 = x_88546 - x_88547; + + defunc_1_f_res_88550 = defunc_1_f_res_t_res_88551; + } else { + defunc_1_f_res_88550 = NAN; + } + + bool isnan_res_88552; + + isnan_res_88552 = futrts_isnan64(defunc_1_f_res_88550); + + bool defunc_0_p_res_88553 = !isnan_res_88552; + int64_t defunc_0_f_res_88554 = btoi_bool_i64(defunc_0_p_res_88553); + + ((__local int64_t *) mem_124887)[gtid_88522] = defunc_0_f_res_88554; + ((__local double *) mem_124889)[gtid_88522] = defunc_1_f_res_88550; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t dims_flat_129257; + + dims_flat_129257 = N_70860; + + int64_t x_88543; + int64_t x_88544; + int64_t x_129259; + int64_t x_129260; + bool ltid_in_bounds_129262; + + ltid_in_bounds_129262 = slt64(sext_i32_i64(local_tid_129252), N_70860); + + int32_t skip_threads_129263; + + // read input for in-block scan + { + if (ltid_in_bounds_129262) { + x_88544 = ((volatile __local + int64_t *) mem_124887)[sext_i32_i64(local_tid_129252)]; + if ((local_tid_129252 - squot32(local_tid_129252, 32) * 32) == 0) { + x_88543 = x_88544; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129263 = 1; + while (slt32(skip_threads_129263, 32)) { + if (sle32(skip_threads_129263, local_tid_129252 - + squot32(local_tid_129252, 32) * 32) && + ltid_in_bounds_129262) { + // read operands + { + x_88543 = ((volatile __local + int64_t *) mem_124887)[sext_i32_i64(local_tid_129252) - + sext_i32_i64(skip_threads_129263)]; + } + // perform operation + { + bool inactive_129264 = + slt64(srem64(sext_i32_i64(local_tid_129252), N_70860), + sext_i32_i64(local_tid_129252) - + sext_i32_i64(local_tid_129252 - + skip_threads_129263)); + + if (inactive_129264) { + x_88543 = x_88544; + } + if (!inactive_129264) { + int64_t defunc_1_op_res_88545 = add64(x_88543, x_88544); + + x_88543 = defunc_1_op_res_88545; + } + } + } + if (sle32(wave_sizze_129254, skip_threads_129263)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129263, local_tid_129252 - + squot32(local_tid_129252, 32) * 32) && + ltid_in_bounds_129262) { + // write result + { + ((volatile __local + int64_t *) mem_124887)[sext_i32_i64(local_tid_129252)] = + x_88543; + x_88544 = x_88543; + } + } + if (sle32(wave_sizze_129254, skip_threads_129263)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129263 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129252 - squot32(local_tid_129252, 32) * 32) == 31 && + ltid_in_bounds_129262) { + ((volatile __local + int64_t *) mem_124887)[sext_i32_i64(squot32(local_tid_129252, + 32))] = x_88543; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129265; + + // read input for in-block scan + { + if (squot32(local_tid_129252, 32) == 0 && ltid_in_bounds_129262) { + x_129260 = ((volatile __local + int64_t *) mem_124887)[sext_i32_i64(local_tid_129252)]; + if ((local_tid_129252 - squot32(local_tid_129252, 32) * 32) == + 0) { + x_129259 = x_129260; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129265 = 1; + while (slt32(skip_threads_129265, 32)) { + if (sle32(skip_threads_129265, local_tid_129252 - + squot32(local_tid_129252, 32) * 32) && + (squot32(local_tid_129252, 32) == 0 && + ltid_in_bounds_129262)) { + // read operands + { + x_129259 = ((volatile __local + int64_t *) mem_124887)[sext_i32_i64(local_tid_129252) - + sext_i32_i64(skip_threads_129265)]; + } + // perform operation + { + bool inactive_129266 = + slt64(srem64(sext_i32_i64(local_tid_129252 * 32 + + 32 - 1), N_70860), + sext_i32_i64(local_tid_129252 * 32 + 32 - + 1) - sext_i32_i64((local_tid_129252 - + skip_threads_129265) * + 32 + 32 - 1)); + + if (inactive_129266) { + x_129259 = x_129260; + } + if (!inactive_129266) { + int64_t defunc_1_op_res_129261 = add64(x_129259, + x_129260); + + x_129259 = defunc_1_op_res_129261; + } + } + } + if (sle32(wave_sizze_129254, skip_threads_129265)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129265, local_tid_129252 - + squot32(local_tid_129252, 32) * 32) && + (squot32(local_tid_129252, 32) == 0 && + ltid_in_bounds_129262)) { + // write result + { + ((volatile __local + int64_t *) mem_124887)[sext_i32_i64(local_tid_129252)] = + x_129259; + x_129260 = x_129259; + } + } + if (sle32(wave_sizze_129254, skip_threads_129265)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129265 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129252, 32) == 0 || !ltid_in_bounds_129262)) { + // read operands + { + x_88544 = x_88543; + x_88543 = ((__local + int64_t *) mem_124887)[sext_i32_i64(squot32(local_tid_129252, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129267 = + slt64(srem64(sext_i32_i64(local_tid_129252), N_70860), + sext_i32_i64(local_tid_129252) - + sext_i32_i64(squot32(local_tid_129252, 32) * 32 - + 1)); + + if (inactive_129267) { + x_88543 = x_88544; + } + if (!inactive_129267) { + int64_t defunc_1_op_res_88545 = add64(x_88543, x_88544); + + x_88543 = defunc_1_op_res_88545; + } + } + // write final result + { + ((__local + int64_t *) mem_124887)[sext_i32_i64(local_tid_129252)] = + x_88543; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129252, 32) == 0) { + ((__local int64_t *) mem_124887)[sext_i32_i64(local_tid_129252)] = + x_88544; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t last_res_88555 = ((__local int64_t *) mem_124887)[i_72637]; + __local char *mem_124891; + + mem_124891 = (__local char *) mem_124891_backing_2; + ((__local double *) mem_124891)[sext_i32_i64(local_tid_129252)] = NAN; + barrier(CLK_LOCAL_MEM_FENCE); + + __local char *mem_124893; + + mem_124893 = (__local char *) mem_124893_backing_3; + ((__local int64_t *) mem_124893)[sext_i32_i64(local_tid_129252)] = + (int64_t) 0; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t write_i_88524 = sext_i32_i64(ltid_pre_129256); + int32_t phys_tid_88525 = local_tid_129252; + double x_88560 = ((__local double *) mem_124889)[write_i_88524]; + bool isnan_res_88563; + + isnan_res_88563 = futrts_isnan64(x_88560); + + bool defunc_0_p_res_88564 = !isnan_res_88563; + int64_t defunc_1_f_res_88565; + + if (defunc_0_p_res_88564) { + int64_t x_88561 = ((__local int64_t *) mem_124887)[write_i_88524]; + int64_t defunc_1_f_res_t_res_88566 = sub64(x_88561, (int64_t) 1); + + defunc_1_f_res_88565 = defunc_1_f_res_t_res_88566; + } else { + defunc_1_f_res_88565 = (int64_t) -1; + } + if (sle64((int64_t) 0, defunc_1_f_res_88565) && slt64(defunc_1_f_res_88565, + N_70860)) { + ((__local int64_t *) mem_124893)[defunc_1_f_res_88565] = write_i_88524; + } + if (sle64((int64_t) 0, defunc_1_f_res_88565) && slt64(defunc_1_f_res_88565, + N_70860)) { + ((__local double *) mem_124891)[defunc_1_f_res_88565] = x_88560; + } + barrier(CLK_LOCAL_MEM_FENCE); + if (local_tid_129252 == 0) { + ((__global int64_t *) mem_124896)[gtid_88519] = last_res_88555; + } + ((__global double *) mem_124899)[gtid_88519 * N_70860 + + sext_i32_i64(local_tid_129252)] = ((__local + double *) mem_124891)[sext_i32_i64(local_tid_129252)]; + barrier(CLK_LOCAL_MEM_FENCE); + ((__global int64_t *) mem_124902)[gtid_88519 * N_70860 + + sext_i32_i64(local_tid_129252)] = + ((__local int64_t *) mem_124893)[sext_i32_i64(local_tid_129252)]; + barrier(CLK_LOCAL_MEM_FENCE); + + error_2: + return; +} +__kernel void mainDetailedzisegmap_intragroup_88740(__global + int *global_failure, + int failure_is_an_option, + __global + int64_t *global_failure_args, + __local volatile + int64_t *red_arr_mem_129354_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_129350_backing_aligned_1, + int64_t N_70860, + int64_t n_70864, + double hfrac_70866, + int64_t k2p2_70874, __global + unsigned char *mem_124142, + __global + unsigned char *mem_124924, + __global + unsigned char *mem_124942, + __global + unsigned char *mem_124944, + __global + unsigned char *mem_124946) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_129354_backing_1 = + (__local volatile + char *) red_arr_mem_129354_backing_aligned_0; + __local volatile char *restrict red_arr_mem_129350_backing_0 = + (__local volatile + char *) red_arr_mem_129350_backing_aligned_1; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_129344; + int32_t local_tid_129345; + int64_t group_sizze_129348; + int32_t wave_sizze_129347; + int32_t group_tid_129346; + + global_tid_129344 = get_global_id(0); + local_tid_129345 = get_local_id(0); + group_sizze_129348 = get_local_size(0); + wave_sizze_129347 = LOCKSTEP_WIDTH; + group_tid_129346 = get_group_id(0); + + int32_t phys_tid_88740; + + phys_tid_88740 = group_tid_129346; + + int32_t ltid_pre_129349; + + ltid_pre_129349 = local_tid_129345; + + int64_t gtid_88731; + + gtid_88731 = sext_i32_i64(group_tid_129346); + + int64_t defunc_0_f_res_88827; + int64_t gtid_88734 = sext_i32_i64(ltid_pre_129349); + int32_t phys_tid_88735 = local_tid_129345; + __local char *red_arr_mem_129350; + + red_arr_mem_129350 = (__local char *) red_arr_mem_129350_backing_0; + + double x_88831; + + x_88831 = ((__global double *) mem_124142)[gtid_88731 * N_70860 + + gtid_88734]; + + bool isnan_res_88832; + + isnan_res_88832 = futrts_isnan64(x_88831); + + bool cond_88833 = !isnan_res_88832; + int64_t defunc_0_f_res_88834 = btoi_bool_i64(cond_88833); + + ((__local int64_t *) red_arr_mem_129350)[gtid_88734] = defunc_0_f_res_88834; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_129352; + int32_t skip_waves_129353; + + skip_waves_129353 = 1; + + int64_t x_88828; + int64_t x_88829; + + offset_129352 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129345, sext_i64_i32(n_70864))) { + x_88828 = ((__local + int64_t *) red_arr_mem_129350)[sext_i32_i64(local_tid_129345 + + offset_129352)]; + } + } + offset_129352 = 1; + while (slt32(offset_129352, wave_sizze_129347)) { + if (slt32(local_tid_129345 + offset_129352, sext_i64_i32(n_70864)) && + ((local_tid_129345 - squot32(local_tid_129345, wave_sizze_129347) * + wave_sizze_129347) & (2 * offset_129352 - 1)) == 0) { + // read array element + { + x_88829 = ((volatile __local + int64_t *) red_arr_mem_129350)[sext_i32_i64(local_tid_129345 + + offset_129352)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_88830 = add64(x_88828, x_88829); + + x_88828 = defunc_1_op_res_88830; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_129350)[sext_i32_i64(local_tid_129345)] = + x_88828; + } + } + offset_129352 *= 2; + } + while (slt32(skip_waves_129353, squot32(sext_i64_i32(n_70864) + + wave_sizze_129347 - 1, + wave_sizze_129347))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129352 = skip_waves_129353 * wave_sizze_129347; + if (slt32(local_tid_129345 + offset_129352, sext_i64_i32(n_70864)) && + ((local_tid_129345 - squot32(local_tid_129345, wave_sizze_129347) * + wave_sizze_129347) == 0 && (squot32(local_tid_129345, + wave_sizze_129347) & (2 * + skip_waves_129353 - + 1)) == + 0)) { + // read array element + { + x_88829 = ((__local + int64_t *) red_arr_mem_129350)[sext_i32_i64(local_tid_129345 + + offset_129352)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_88830 = add64(x_88828, x_88829); + + x_88828 = defunc_1_op_res_88830; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_129350)[sext_i32_i64(local_tid_129345)] = + x_88828; + } + } + skip_waves_129353 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + defunc_0_f_res_88827 = ((__local + int64_t *) red_arr_mem_129350)[(int64_t) 0]; + + double defunc_0_f_res_88835; + int64_t gtid_88736 = sext_i32_i64(ltid_pre_129349); + int32_t phys_tid_88737 = local_tid_129345; + __local char *red_arr_mem_129354; + + red_arr_mem_129354 = (__local char *) red_arr_mem_129354_backing_1; + + bool cond_88840; + + cond_88840 = slt64(gtid_88736, defunc_0_f_res_88827); + + double defunc_0_f_res_88841; + + if (cond_88840) { + bool y_88843 = slt64(gtid_88736, N_70860); + bool index_certs_88845; + + if (!y_88843) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 405) == -1) { + global_failure_args[0] = gtid_88736; + global_failure_args[1] = N_70860; + ; + } + local_failure = true; + goto error_2; + } + } + + double defunc_0_f_res_t_res_88846 = ((__global + double *) mem_124924)[gtid_88731 * + N_70860 + + gtid_88736]; + + defunc_0_f_res_88841 = defunc_0_f_res_t_res_88846; + } else { + defunc_0_f_res_88841 = 0.0; + } + + double defunc_0_f_res_88847 = defunc_0_f_res_88841 * defunc_0_f_res_88841; + + ((__local double *) red_arr_mem_129354)[gtid_88736] = defunc_0_f_res_88847; + + error_2: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_129356; + int32_t skip_waves_129357; + + skip_waves_129357 = 1; + + double x_88836; + double x_88837; + + offset_129356 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129345, sext_i64_i32(n_70864))) { + x_88836 = ((__local + double *) red_arr_mem_129354)[sext_i32_i64(local_tid_129345 + + offset_129356)]; + } + } + offset_129356 = 1; + while (slt32(offset_129356, wave_sizze_129347)) { + if (slt32(local_tid_129345 + offset_129356, sext_i64_i32(n_70864)) && + ((local_tid_129345 - squot32(local_tid_129345, wave_sizze_129347) * + wave_sizze_129347) & (2 * offset_129356 - 1)) == 0) { + // read array element + { + x_88837 = ((volatile __local + double *) red_arr_mem_129354)[sext_i32_i64(local_tid_129345 + + offset_129356)]; + } + // apply reduction operation + { + double defunc_1_op_res_88838 = x_88836 + x_88837; + + x_88836 = defunc_1_op_res_88838; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_129354)[sext_i32_i64(local_tid_129345)] = + x_88836; + } + } + offset_129356 *= 2; + } + while (slt32(skip_waves_129357, squot32(sext_i64_i32(n_70864) + + wave_sizze_129347 - 1, + wave_sizze_129347))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129356 = skip_waves_129357 * wave_sizze_129347; + if (slt32(local_tid_129345 + offset_129356, sext_i64_i32(n_70864)) && + ((local_tid_129345 - squot32(local_tid_129345, wave_sizze_129347) * + wave_sizze_129347) == 0 && (squot32(local_tid_129345, + wave_sizze_129347) & (2 * + skip_waves_129357 - + 1)) == + 0)) { + // read array element + { + x_88837 = ((__local + double *) red_arr_mem_129354)[sext_i32_i64(local_tid_129345 + + offset_129356)]; + } + // apply reduction operation + { + double defunc_1_op_res_88838 = x_88836 + x_88837; + + x_88836 = defunc_1_op_res_88838; + } + // write result of operation + { + ((__local + double *) red_arr_mem_129354)[sext_i32_i64(local_tid_129345)] = + x_88836; + } + } + skip_waves_129357 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + defunc_0_f_res_88835 = ((__local double *) red_arr_mem_129354)[(int64_t) 0]; + + int64_t i64_arg_88848 = sub64(defunc_0_f_res_88827, k2p2_70874); + double i64_res_88849 = sitofp_i64_f64(i64_arg_88848); + double sqrt_arg_88850 = defunc_0_f_res_88835 / i64_res_88849; + double sqrt_res_88851; + + sqrt_res_88851 = futrts_sqrt64(sqrt_arg_88850); + + double i64_res_88852 = sitofp_i64_f64(defunc_0_f_res_88827); + double f64_arg_88853 = hfrac_70866 * i64_res_88852; + int64_t f64_res_88854 = fptosi_f64_i64(f64_arg_88853); + + if (local_tid_129345 == 0) { + ((__global int64_t *) mem_124942)[gtid_88731] = f64_res_88854; + } + if (local_tid_129345 == 0) { + ((__global int64_t *) mem_124944)[gtid_88731] = defunc_0_f_res_88827; + } + if (local_tid_129345 == 0) { + ((__global double *) mem_124946)[gtid_88731] = sqrt_res_88851; + } + + error_4: + return; +} +__kernel void mainDetailedzisegmap_intragroup_89398(__global + int *global_failure, + int failure_is_an_option, + __global + int64_t *global_failure_args, + __local volatile + int64_t *mem_125079_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_129654_backing_aligned_1, + __local volatile + int64_t *red_arr_mem_129652_backing_aligned_2, + __local volatile + int64_t *red_arr_mem_129650_backing_aligned_3, + __local volatile + int64_t *mem_125077_backing_aligned_4, + __local volatile + int64_t *mem_125074_backing_aligned_5, + int64_t N_70860, + int64_t n_70864, + int64_t iota_arg_72752, + int64_t iota_arg_72776, + int64_t computed_group_sizze_89388, + __global + unsigned char *defunc_4_map_res_mem_124919, + __global + unsigned char *defunc_4_map_res_mem_124920, + __global + unsigned char *defunc_4_map_res_mem_124921, + __global + unsigned char *defunc_3_map_res_mem_124961, + __global + unsigned char *defunc_3_map_res_mem_124962, + __global + unsigned char *defunc_3_map_res_mem_124963, + __global + unsigned char *defunc_0_f_res_mem_124973, + __global + unsigned char *mem_124976, + __global + unsigned char *mem_125083, + __global + unsigned char *mem_125086, + __global + unsigned char *mem_125088, + __global + unsigned char *mem_125090) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_125079_backing_5 = (__local volatile + char *) mem_125079_backing_aligned_0; + __local volatile char *restrict red_arr_mem_129654_backing_4 = + (__local volatile + char *) red_arr_mem_129654_backing_aligned_1; + __local volatile char *restrict red_arr_mem_129652_backing_3 = + (__local volatile + char *) red_arr_mem_129652_backing_aligned_2; + __local volatile char *restrict red_arr_mem_129650_backing_2 = + (__local volatile + char *) red_arr_mem_129650_backing_aligned_3; + __local volatile char *restrict mem_125077_backing_1 = (__local volatile + char *) mem_125077_backing_aligned_4; + __local volatile char *restrict mem_125074_backing_0 = (__local volatile + char *) mem_125074_backing_aligned_5; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_129632; + int32_t local_tid_129633; + int64_t group_sizze_129636; + int32_t wave_sizze_129635; + int32_t group_tid_129634; + + global_tid_129632 = get_global_id(0); + local_tid_129633 = get_local_id(0); + group_sizze_129636 = get_local_size(0); + wave_sizze_129635 = LOCKSTEP_WIDTH; + group_tid_129634 = get_group_id(0); + + int32_t phys_tid_89398; + + phys_tid_89398 = group_tid_129634; + + int32_t ltid_pre_129637; + + ltid_pre_129637 = local_tid_129633; + + int32_t ltid_pre_129638; + + ltid_pre_129638 = local_tid_129633; + + int64_t gtid_89386; + + gtid_89386 = sext_i32_i64(group_tid_129634); + + int64_t x_89628; + + x_89628 = ((__global int64_t *) defunc_4_map_res_mem_124919)[gtid_89386]; + + int64_t x_89629 = ((__global + int64_t *) defunc_3_map_res_mem_124962)[gtid_89386]; + double x_89630 = ((__global + double *) defunc_3_map_res_mem_124963)[gtid_89386]; + int64_t x_89631 = ((__global + int64_t *) defunc_3_map_res_mem_124961)[gtid_89386]; + double x_89632 = ((__global + double *) defunc_0_f_res_mem_124973)[gtid_89386]; + int64_t y_89637 = sub64(x_89628, x_89629); + __local char *mem_125074; + + mem_125074 = (__local char *) mem_125074_backing_0; + + int64_t gtid_89389 = sext_i32_i64(ltid_pre_129638); + int32_t phys_tid_89390 = local_tid_129633; + + if (slt64(gtid_89389, iota_arg_72776)) { + bool cond_89643 = sle64(y_89637, gtid_89389); + double defunc_0_f_res_89644; + + if (cond_89643) { + defunc_0_f_res_89644 = 0.0; + } else { + bool cond_89645 = gtid_89389 == (int64_t) 0; + double defunc_0_f_res_f_res_89646; + + if (cond_89645) { + defunc_0_f_res_f_res_89646 = x_89632; + } else { + int64_t i_89647 = add64(gtid_89389, x_89629); + bool x_89648 = sle64((int64_t) 0, i_89647); + bool y_89649 = slt64(i_89647, N_70860); + bool bounds_check_89650 = x_89648 && y_89649; + bool index_certs_89651; + + if (!bounds_check_89650) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 427) == -1) { + global_failure_args[0] = i_89647; + global_failure_args[1] = N_70860; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_89652 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_89386 * + N_70860 + + i_89647]; + int64_t x_89653 = sub64(x_89629, x_89631); + int64_t i_89654 = add64(gtid_89389, x_89653); + bool x_89655 = sle64((int64_t) 0, i_89654); + bool y_89656 = slt64(i_89654, N_70860); + bool bounds_check_89657 = x_89655 && y_89656; + bool index_certs_89658; + + if (!bounds_check_89657) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 428) == -1) { + global_failure_args[0] = i_89654; + global_failure_args[1] = N_70860; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_89659 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_89386 * + N_70860 + + i_89654]; + double defunc_0_f_res_f_res_f_res_89660 = x_89652 - y_89659; + + defunc_0_f_res_f_res_89646 = defunc_0_f_res_f_res_f_res_89660; + } + defunc_0_f_res_89644 = defunc_0_f_res_f_res_89646; + } + ((__local double *) mem_125074)[gtid_89389] = defunc_0_f_res_89644; + } + + error_0: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t dims_flat_129639; + + dims_flat_129639 = iota_arg_72776; + + double x_89639; + double x_89640; + double x_129641; + double x_129642; + bool ltid_in_bounds_129644; + + ltid_in_bounds_129644 = slt64(sext_i32_i64(local_tid_129633), + iota_arg_72776); + + int32_t skip_threads_129645; + + // read input for in-block scan + { + if (ltid_in_bounds_129644) { + x_89640 = ((volatile __local + double *) mem_125074)[sext_i32_i64(local_tid_129633)]; + if ((local_tid_129633 - squot32(local_tid_129633, 32) * 32) == 0) { + x_89639 = x_89640; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129645 = 1; + while (slt32(skip_threads_129645, 32)) { + if (sle32(skip_threads_129645, local_tid_129633 - + squot32(local_tid_129633, 32) * 32) && + ltid_in_bounds_129644) { + // read operands + { + x_89639 = ((volatile __local + double *) mem_125074)[sext_i32_i64(local_tid_129633) - + sext_i32_i64(skip_threads_129645)]; + } + // perform operation + { + bool inactive_129646 = + slt64(srem64(sext_i32_i64(local_tid_129633), + iota_arg_72776), + sext_i32_i64(local_tid_129633) - + sext_i32_i64(local_tid_129633 - + skip_threads_129645)); + + if (inactive_129646) { + x_89639 = x_89640; + } + if (!inactive_129646) { + double defunc_1_op_res_89641 = x_89639 + x_89640; + + x_89639 = defunc_1_op_res_89641; + } + } + } + if (sle32(wave_sizze_129635, skip_threads_129645)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129645, local_tid_129633 - + squot32(local_tid_129633, 32) * 32) && + ltid_in_bounds_129644) { + // write result + { + ((volatile __local + double *) mem_125074)[sext_i32_i64(local_tid_129633)] = + x_89639; + x_89640 = x_89639; + } + } + if (sle32(wave_sizze_129635, skip_threads_129645)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129645 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129633 - squot32(local_tid_129633, 32) * 32) == 31 && + ltid_in_bounds_129644) { + ((volatile __local + double *) mem_125074)[sext_i32_i64(squot32(local_tid_129633, + 32))] = x_89639; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129647; + + // read input for in-block scan + { + if (squot32(local_tid_129633, 32) == 0 && ltid_in_bounds_129644) { + x_129642 = ((volatile __local + double *) mem_125074)[sext_i32_i64(local_tid_129633)]; + if ((local_tid_129633 - squot32(local_tid_129633, 32) * 32) == + 0) { + x_129641 = x_129642; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129647 = 1; + while (slt32(skip_threads_129647, 32)) { + if (sle32(skip_threads_129647, local_tid_129633 - + squot32(local_tid_129633, 32) * 32) && + (squot32(local_tid_129633, 32) == 0 && + ltid_in_bounds_129644)) { + // read operands + { + x_129641 = ((volatile __local + double *) mem_125074)[sext_i32_i64(local_tid_129633) - + sext_i32_i64(skip_threads_129647)]; + } + // perform operation + { + bool inactive_129648 = + slt64(srem64(sext_i32_i64(local_tid_129633 * 32 + + 32 - 1), iota_arg_72776), + sext_i32_i64(local_tid_129633 * 32 + 32 - + 1) - sext_i32_i64((local_tid_129633 - + skip_threads_129647) * + 32 + 32 - 1)); + + if (inactive_129648) { + x_129641 = x_129642; + } + if (!inactive_129648) { + double defunc_1_op_res_129643 = x_129641 + x_129642; + + x_129641 = defunc_1_op_res_129643; + } + } + } + if (sle32(wave_sizze_129635, skip_threads_129647)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129647, local_tid_129633 - + squot32(local_tid_129633, 32) * 32) && + (squot32(local_tid_129633, 32) == 0 && + ltid_in_bounds_129644)) { + // write result + { + ((volatile __local + double *) mem_125074)[sext_i32_i64(local_tid_129633)] = + x_129641; + x_129642 = x_129641; + } + } + if (sle32(wave_sizze_129635, skip_threads_129647)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129647 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129633, 32) == 0 || !ltid_in_bounds_129644)) { + // read operands + { + x_89640 = x_89639; + x_89639 = ((__local + double *) mem_125074)[sext_i32_i64(squot32(local_tid_129633, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129649 = + slt64(srem64(sext_i32_i64(local_tid_129633), + iota_arg_72776), + sext_i32_i64(local_tid_129633) - + sext_i32_i64(squot32(local_tid_129633, 32) * 32 - + 1)); + + if (inactive_129649) { + x_89639 = x_89640; + } + if (!inactive_129649) { + double defunc_1_op_res_89641 = x_89639 + x_89640; + + x_89639 = defunc_1_op_res_89641; + } + } + // write final result + { + ((__local + double *) mem_125074)[sext_i32_i64(local_tid_129633)] = + x_89639; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129633, 32) == 0) { + ((__local double *) mem_125074)[sext_i32_i64(local_tid_129633)] = + x_89640; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + double i64_res_89661 = sitofp_i64_f64(x_89629); + double sqrt_res_89662; + + sqrt_res_89662 = futrts_sqrt64(i64_res_89661); + + double y_89663 = x_89630 * sqrt_res_89662; + __local char *mem_125077; + + mem_125077 = (__local char *) mem_125077_backing_1; + + bool defunc_0_f_res_89665; + int64_t defunc_0_f_res_89666; + double defunc_0_f_res_89667; + int64_t gtid_89391 = sext_i32_i64(ltid_pre_129637); + int32_t phys_tid_89392 = local_tid_129633; + __local char *red_arr_mem_129650; + + red_arr_mem_129650 = (__local char *) red_arr_mem_129650_backing_2; + + __local char *red_arr_mem_129652; + + red_arr_mem_129652 = (__local char *) red_arr_mem_129652_backing_3; + + __local char *red_arr_mem_129654; + + red_arr_mem_129654 = (__local char *) red_arr_mem_129654_backing_4; + if (slt64(gtid_89391, iota_arg_72752)) { + double x_89683 = ((__local double *) mem_125074)[gtid_89391]; + double x_89684 = ((__global double *) mem_124976)[gtid_89391]; + double defunc_0_f_res_89686 = x_89683 / y_89663; + bool cond_89687 = slt64(gtid_89391, y_89637); + bool isnan_res_89688; + + isnan_res_89688 = futrts_isnan64(defunc_0_f_res_89686); + + bool cond_t_res_89689 = !isnan_res_89688; + bool x_89690 = cond_89687 && cond_t_res_89689; + double abs_res_89691 = fabs(defunc_0_f_res_89686); + bool defunc_2_f_res_t_res_89692 = x_89684 < abs_res_89691; + bool x_89693 = x_89690 && defunc_2_f_res_t_res_89692; + double defunc_1_f_res_89694; + + if (cond_89687) { + defunc_1_f_res_89694 = defunc_0_f_res_89686; + } else { + defunc_1_f_res_89694 = 0.0; + } + ((__local bool *) red_arr_mem_129650)[gtid_89391] = x_89693; + ((__local int64_t *) red_arr_mem_129652)[gtid_89391] = gtid_89391; + ((__local double *) red_arr_mem_129654)[gtid_89391] = + defunc_1_f_res_89694; + ((__local double *) mem_125077)[gtid_89391] = defunc_0_f_res_89686; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_129656; + int32_t skip_waves_129657; + + skip_waves_129657 = 1; + + bool x_89669; + int64_t x_89670; + double x_89671; + bool x_89672; + int64_t x_89673; + double x_89674; + + offset_129656 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129633, sext_i64_i32(iota_arg_72752))) { + x_89669 = ((__local + bool *) red_arr_mem_129650)[sext_i32_i64(local_tid_129633 + + offset_129656)]; + x_89670 = ((__local + int64_t *) red_arr_mem_129652)[sext_i32_i64(local_tid_129633 + + offset_129656)]; + x_89671 = ((__local + double *) red_arr_mem_129654)[sext_i32_i64(local_tid_129633 + + offset_129656)]; + } + } + offset_129656 = 1; + while (slt32(offset_129656, wave_sizze_129635)) { + if (slt32(local_tid_129633 + offset_129656, + sext_i64_i32(iota_arg_72752)) && ((local_tid_129633 - + squot32(local_tid_129633, + wave_sizze_129635) * + wave_sizze_129635) & (2 * + offset_129656 - + 1)) == + 0) { + // read array element + { + x_89672 = ((volatile __local + bool *) red_arr_mem_129650)[sext_i32_i64(local_tid_129633 + + offset_129656)]; + x_89673 = ((volatile __local + int64_t *) red_arr_mem_129652)[sext_i32_i64(local_tid_129633 + + offset_129656)]; + x_89674 = ((volatile __local + double *) red_arr_mem_129654)[sext_i32_i64(local_tid_129633 + + offset_129656)]; + } + // apply reduction operation + { + bool defunc_1_op_res_89675; + int64_t defunc_1_op_res_89676; + + if (x_89669) { + defunc_1_op_res_89675 = x_89669; + defunc_1_op_res_89676 = x_89670; + } else { + bool x_89677 = x_89672 && x_89672; + bool x_89678 = !x_89672; + bool y_89679 = x_89669 && x_89678; + bool defunc_1_op_res_f_res_89680 = x_89677 || y_89679; + int64_t defunc_1_op_res_f_res_89681; + + if (x_89672) { + defunc_1_op_res_f_res_89681 = x_89673; + } else { + defunc_1_op_res_f_res_89681 = x_89670; + } + defunc_1_op_res_89675 = defunc_1_op_res_f_res_89680; + defunc_1_op_res_89676 = defunc_1_op_res_f_res_89681; + } + + double defunc_1_op_res_89682 = x_89671 + x_89674; + + x_89669 = defunc_1_op_res_89675; + x_89670 = defunc_1_op_res_89676; + x_89671 = defunc_1_op_res_89682; + } + // write result of operation + { + ((volatile __local + bool *) red_arr_mem_129650)[sext_i32_i64(local_tid_129633)] = + x_89669; + ((volatile __local + int64_t *) red_arr_mem_129652)[sext_i32_i64(local_tid_129633)] = + x_89670; + ((volatile __local + double *) red_arr_mem_129654)[sext_i32_i64(local_tid_129633)] = + x_89671; + } + } + offset_129656 *= 2; + } + while (slt32(skip_waves_129657, + squot32(sext_i64_i32(computed_group_sizze_89388) + + wave_sizze_129635 - 1, wave_sizze_129635))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129656 = skip_waves_129657 * wave_sizze_129635; + if (slt32(local_tid_129633 + offset_129656, + sext_i64_i32(iota_arg_72752)) && ((local_tid_129633 - + squot32(local_tid_129633, + wave_sizze_129635) * + wave_sizze_129635) == 0 && + (squot32(local_tid_129633, + wave_sizze_129635) & + (2 * skip_waves_129657 - + 1)) == 0)) { + // read array element + { + x_89672 = ((__local + bool *) red_arr_mem_129650)[sext_i32_i64(local_tid_129633 + + offset_129656)]; + x_89673 = ((__local + int64_t *) red_arr_mem_129652)[sext_i32_i64(local_tid_129633 + + offset_129656)]; + x_89674 = ((__local + double *) red_arr_mem_129654)[sext_i32_i64(local_tid_129633 + + offset_129656)]; + } + // apply reduction operation + { + bool defunc_1_op_res_89675; + int64_t defunc_1_op_res_89676; + + if (x_89669) { + defunc_1_op_res_89675 = x_89669; + defunc_1_op_res_89676 = x_89670; + } else { + bool x_89677 = x_89672 && x_89672; + bool x_89678 = !x_89672; + bool y_89679 = x_89669 && x_89678; + bool defunc_1_op_res_f_res_89680 = x_89677 || y_89679; + int64_t defunc_1_op_res_f_res_89681; + + if (x_89672) { + defunc_1_op_res_f_res_89681 = x_89673; + } else { + defunc_1_op_res_f_res_89681 = x_89670; + } + defunc_1_op_res_89675 = defunc_1_op_res_f_res_89680; + defunc_1_op_res_89676 = defunc_1_op_res_f_res_89681; + } + + double defunc_1_op_res_89682 = x_89671 + x_89674; + + x_89669 = defunc_1_op_res_89675; + x_89670 = defunc_1_op_res_89676; + x_89671 = defunc_1_op_res_89682; + } + // write result of operation + { + ((__local + bool *) red_arr_mem_129650)[sext_i32_i64(local_tid_129633)] = + x_89669; + ((__local + int64_t *) red_arr_mem_129652)[sext_i32_i64(local_tid_129633)] = + x_89670; + ((__local + double *) red_arr_mem_129654)[sext_i32_i64(local_tid_129633)] = + x_89671; + } + } + skip_waves_129657 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + defunc_0_f_res_89665 = ((__local bool *) red_arr_mem_129650)[(int64_t) 0]; + defunc_0_f_res_89666 = ((__local + int64_t *) red_arr_mem_129652)[(int64_t) 0]; + defunc_0_f_res_89667 = ((__local double *) red_arr_mem_129654)[(int64_t) 0]; + + bool cond_89695 = y_89637 == (int64_t) 0; + double defunc_0_f_res_89696; + + if (cond_89695) { + defunc_0_f_res_89696 = 0.0; + } else { + double i64_res_89697 = sitofp_i64_f64(y_89637); + double defunc_0_f_res_f_res_89698 = defunc_0_f_res_89667 / + i64_res_89697; + + defunc_0_f_res_89696 = defunc_0_f_res_f_res_89698; + } + + bool cond_89699 = !defunc_0_f_res_89665; + int64_t fst_breakzq_89700; + + if (cond_89699) { + fst_breakzq_89700 = (int64_t) -1; + } else { + bool cond_89701 = slt64(defunc_0_f_res_89666, y_89637); + int64_t adjustValInds_res_89702; + + if (cond_89701) { + int64_t i_89703 = add64(x_89629, defunc_0_f_res_89666); + bool x_89704 = sle64((int64_t) 0, i_89703); + bool y_89705 = slt64(i_89703, N_70860); + bool bounds_check_89706 = x_89704 && y_89705; + bool index_certs_89707; + + if (!bounds_check_89706) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 429) == + -1) { + global_failure_args[0] = i_89703; + global_failure_args[1] = N_70860; + ; + } + local_failure = true; + goto error_3; + } + } + + int64_t x_89708 = ((__global + int64_t *) defunc_4_map_res_mem_124921)[gtid_89386 * + N_70860 + + i_89703]; + int64_t adjustValInds_res_t_res_89709 = sub64(x_89708, n_70864); + + adjustValInds_res_89702 = adjustValInds_res_t_res_89709; + } else { + adjustValInds_res_89702 = (int64_t) -1; + } + fst_breakzq_89700 = adjustValInds_res_89702; + } + + bool cond_89710 = sle64(x_89629, (int64_t) 5); + bool cond_f_res_89711 = sle64(y_89637, (int64_t) 5); + bool x_89712 = !cond_89710; + bool y_89713 = cond_f_res_89711 && x_89712; + bool cond_89714 = cond_89710 || y_89713; + int64_t fst_breakzq_89715; + + if (cond_89714) { + fst_breakzq_89715 = (int64_t) -2; + } else { + fst_breakzq_89715 = fst_breakzq_89700; + } + + __local char *mem_125079; + + mem_125079 = (__local char *) mem_125079_backing_5; + for (int64_t i_129658 = 0; i_129658 < sdiv_up64(iota_arg_72776 - + sext_i32_i64(local_tid_129633), + computed_group_sizze_89388); + i_129658++) { + ((__local double *) mem_125079)[i_129658 * computed_group_sizze_89388 + + sext_i32_i64(local_tid_129633)] = NAN; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t write_i_89393 = sext_i32_i64(ltid_pre_129638); + int32_t phys_tid_89394 = local_tid_129633; + + if (slt64(write_i_89393, iota_arg_72776)) { + double write_value_89720 = ((__local + double *) mem_125077)[write_i_89393]; + bool cond_89721 = slt64(write_i_89393, y_89637); + int64_t defunc_0_f_res_89722; + + if (cond_89721) { + int64_t i_89723 = add64(write_i_89393, x_89629); + bool x_89724 = sle64((int64_t) 0, i_89723); + bool y_89725 = slt64(i_89723, N_70860); + bool bounds_check_89726 = x_89724 && y_89725; + bool index_certs_89727; + + if (!bounds_check_89726) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 430) == + -1) { + global_failure_args[0] = i_89723; + global_failure_args[1] = N_70860; + ; + } + local_failure = true; + goto error_3; + } + } + + int64_t x_89728 = ((__global + int64_t *) defunc_4_map_res_mem_124921)[gtid_89386 * + N_70860 + + i_89723]; + int64_t defunc_0_f_res_t_res_89729 = sub64(x_89728, n_70864); + + defunc_0_f_res_89722 = defunc_0_f_res_t_res_89729; + } else { + defunc_0_f_res_89722 = (int64_t) -1; + } + if (sle64((int64_t) 0, defunc_0_f_res_89722) && + slt64(defunc_0_f_res_89722, iota_arg_72776)) { + ((__local double *) mem_125079)[defunc_0_f_res_89722] = + write_value_89720; + } + } + + error_3: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + for (int64_t i_129659 = 0; i_129659 < sdiv_up64(iota_arg_72752 - + sext_i32_i64(local_tid_129633), + computed_group_sizze_89388); + i_129659++) { + ((__global double *) mem_125083)[gtid_89386 * iota_arg_72752 + + (i_129659 * + computed_group_sizze_89388 + + sext_i32_i64(local_tid_129633))] = + ((__local double *) mem_125079)[i_129659 * + computed_group_sizze_89388 + + sext_i32_i64(local_tid_129633)]; + } + barrier(CLK_LOCAL_MEM_FENCE); + for (int64_t i_129660 = 0; i_129660 < sdiv_up64(iota_arg_72752 - + sext_i32_i64(local_tid_129633), + computed_group_sizze_89388); + i_129660++) { + ((__global double *) mem_125086)[gtid_89386 * iota_arg_72752 + + (i_129660 * + computed_group_sizze_89388 + + sext_i32_i64(local_tid_129633))] = + ((__local double *) mem_125077)[i_129660 * + computed_group_sizze_89388 + + sext_i32_i64(local_tid_129633)]; + } + barrier(CLK_LOCAL_MEM_FENCE); + if (local_tid_129633 == 0) { + ((__global int64_t *) mem_125088)[gtid_89386] = fst_breakzq_89715; + } + if (local_tid_129633 == 0) { + ((__global double *) mem_125090)[gtid_89386] = defunc_0_f_res_89696; + } + + error_4: + return; +} +__kernel void mainDetailedzisegred_large_79431(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_127121_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_127119_backing_aligned_1, + int64_t m_70861, + int64_t k2p2zq_70876, + int64_t num_groups_81262, + int64_t groups_per_segment_127105, + int64_t elements_per_thread_127106, + int64_t virt_num_groups_127107, + int64_t threads_per_segment_127109, + __global + unsigned char *mem_121831, + __global + unsigned char *mem_121835, + __global + unsigned char *mem_121840, + __global + unsigned char *group_res_arr_mem_127110, + __global + unsigned char *mainDetailedzicounter_mem_127112) +{ + #define segred_group_sizze_81261 (mainDetailedzisegred_group_sizze_79425) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_127121_backing_1 = + (__local volatile + char *) sync_arr_mem_127121_backing_aligned_0; + __local volatile char *restrict red_arr_mem_127119_backing_0 = + (__local volatile + char *) red_arr_mem_127119_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127114; + int32_t local_tid_127115; + int64_t group_sizze_127118; + int32_t wave_sizze_127117; + int32_t group_tid_127116; + + global_tid_127114 = get_global_id(0); + local_tid_127115 = get_local_id(0); + group_sizze_127118 = get_local_size(0); + wave_sizze_127117 = LOCKSTEP_WIDTH; + group_tid_127116 = get_group_id(0); + + int32_t phys_tid_79431; + + phys_tid_79431 = global_tid_127114; + + __local char *red_arr_mem_127119; + + red_arr_mem_127119 = (__local char *) red_arr_mem_127119_backing_0; + + __local char *sync_arr_mem_127121; + + sync_arr_mem_127121 = (__local char *) sync_arr_mem_127121_backing_1; + + int32_t phys_group_id_127123; + + phys_group_id_127123 = get_group_id(0); + for (int32_t i_127124 = 0; i_127124 < + sdiv_up32(sext_i64_i32(virt_num_groups_127107) - phys_group_id_127123, + sext_i64_i32(num_groups_81262)); i_127124++) { + int32_t virt_group_id_127125 = phys_group_id_127123 + i_127124 * + sext_i64_i32(num_groups_81262); + int32_t flat_segment_id_127126 = squot32(virt_group_id_127125, + sext_i64_i32(groups_per_segment_127105)); + int64_t global_tid_127127 = srem64(sext_i32_i64(virt_group_id_127125) * + segred_group_sizze_81261 + + sext_i32_i64(local_tid_127115), + segred_group_sizze_81261 * + groups_per_segment_127105); + int64_t gtid_79418 = squot64(sext_i32_i64(flat_segment_id_127126), + k2p2zq_70876 * k2p2zq_70876); + int64_t gtid_79419 = squot64(sext_i32_i64(flat_segment_id_127126) - + squot64(sext_i32_i64(flat_segment_id_127126), + k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876); + int64_t gtid_79420 = sext_i32_i64(flat_segment_id_127126) - + squot64(sext_i32_i64(flat_segment_id_127126), k2p2zq_70876 * + k2p2zq_70876) * (k2p2zq_70876 * k2p2zq_70876) - + squot64(sext_i32_i64(flat_segment_id_127126) - + squot64(sext_i32_i64(flat_segment_id_127126), + k2p2zq_70876 * k2p2zq_70876) * (k2p2zq_70876 * + k2p2zq_70876), + k2p2zq_70876) * k2p2zq_70876; + int64_t gtid_79430; + double x_acc_127128; + int64_t chunk_sizze_127129; + + chunk_sizze_127129 = smin64(elements_per_thread_127106, + sdiv_up64(k2p2zq_70876 - global_tid_127127, + threads_per_segment_127109)); + + double x_81265; + double x_81266; + + // neutral-initialise the accumulators + { + x_acc_127128 = 0.0; + } + for (int64_t i_127133 = 0; i_127133 < chunk_sizze_127129; i_127133++) { + gtid_79430 = global_tid_127127 + threads_per_segment_127109 * + i_127133; + // apply map function + { + double x_81271 = ((__global double *) mem_121831)[gtid_79419 * + (k2p2zq_70876 * + m_70861) + + gtid_79418 * + k2p2zq_70876 + + gtid_79430]; + double x_81272 = ((__global double *) mem_121835)[gtid_79418 * + (k2p2zq_70876 * + k2p2zq_70876) + + gtid_79420 * + k2p2zq_70876 + + gtid_79430]; + double defunc_1_f_res_81273 = x_81271 * x_81272; + + // save map-out results + { } + // load accumulator + { + x_81265 = x_acc_127128; + } + // load new values + { + x_81266 = defunc_1_f_res_81273; + } + // apply reduction operator + { + double defunc_1_op_res_81267 = x_81265 + x_81266; + + // store in accumulator + { + x_acc_127128 = defunc_1_op_res_81267; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_81265 = x_acc_127128; + ((__local + double *) red_arr_mem_127119)[sext_i32_i64(local_tid_127115)] = + x_81265; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_127134; + int32_t skip_waves_127135; + + skip_waves_127135 = 1; + + double x_127130; + double x_127131; + + offset_127134 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127115, + sext_i64_i32(segred_group_sizze_81261))) { + x_127130 = ((__local + double *) red_arr_mem_127119)[sext_i32_i64(local_tid_127115 + + offset_127134)]; + } + } + offset_127134 = 1; + while (slt32(offset_127134, wave_sizze_127117)) { + if (slt32(local_tid_127115 + offset_127134, + sext_i64_i32(segred_group_sizze_81261)) && + ((local_tid_127115 - squot32(local_tid_127115, + wave_sizze_127117) * + wave_sizze_127117) & (2 * offset_127134 - 1)) == 0) { + // read array element + { + x_127131 = ((volatile __local + double *) red_arr_mem_127119)[sext_i32_i64(local_tid_127115 + + offset_127134)]; + } + // apply reduction operation + { + double defunc_1_op_res_127132 = x_127130 + x_127131; + + x_127130 = defunc_1_op_res_127132; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_127119)[sext_i32_i64(local_tid_127115)] = + x_127130; + } + } + offset_127134 *= 2; + } + while (slt32(skip_waves_127135, + squot32(sext_i64_i32(segred_group_sizze_81261) + + wave_sizze_127117 - 1, wave_sizze_127117))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_127134 = skip_waves_127135 * wave_sizze_127117; + if (slt32(local_tid_127115 + offset_127134, + sext_i64_i32(segred_group_sizze_81261)) && + ((local_tid_127115 - squot32(local_tid_127115, + wave_sizze_127117) * + wave_sizze_127117) == 0 && (squot32(local_tid_127115, + wave_sizze_127117) & (2 * + skip_waves_127135 - + 1)) == + 0)) { + // read array element + { + x_127131 = ((__local + double *) red_arr_mem_127119)[sext_i32_i64(local_tid_127115 + + offset_127134)]; + } + // apply reduction operation + { + double defunc_1_op_res_127132 = x_127130 + x_127131; + + x_127130 = defunc_1_op_res_127132; + } + // write result of operation + { + ((__local + double *) red_arr_mem_127119)[sext_i32_i64(local_tid_127115)] = + x_127130; + } + } + skip_waves_127135 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_127115) == (int64_t) 0) { + x_acc_127128 = x_127130; + } + } + if (groups_per_segment_127105 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_127115 == 0) { + ((__global double *) mem_121840)[gtid_79418 * + (k2p2zq_70876 * + k2p2zq_70876) + + gtid_79419 * k2p2zq_70876 + + gtid_79420] = x_acc_127128; + } + } + } else { + int32_t old_counter_127136; + + // first thread in group saves group result to global memory + { + if (local_tid_127115 == 0) { + ((__global + double *) group_res_arr_mem_127110)[sext_i32_i64(virt_group_id_127125) * + segred_group_sizze_81261] = + x_acc_127128; + mem_fence_global(); + old_counter_127136 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_127112)[sext_i32_i64(srem32(flat_segment_id_127126, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_127121)[(int64_t) 0] = + old_counter_127136 == groups_per_segment_127105 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_127137; + + is_last_group_127137 = ((__local + bool *) sync_arr_mem_127121)[(int64_t) 0]; + if (is_last_group_127137) { + if (local_tid_127115 == 0) { + old_counter_127136 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_127112)[sext_i32_i64(srem32(flat_segment_id_127126, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_127105)); + } + // read in the per-group-results + { + int64_t read_per_thread_127138 = + sdiv_up64(groups_per_segment_127105, + segred_group_sizze_81261); + + x_81265 = 0.0; + for (int64_t i_127139 = 0; i_127139 < + read_per_thread_127138; i_127139++) { + int64_t group_res_id_127140 = + sext_i32_i64(local_tid_127115) * + read_per_thread_127138 + i_127139; + int64_t index_of_group_res_127141 = + sext_i32_i64(flat_segment_id_127126) * + groups_per_segment_127105 + group_res_id_127140; + + if (slt64(group_res_id_127140, + groups_per_segment_127105)) { + x_81266 = ((__global + double *) group_res_arr_mem_127110)[index_of_group_res_127141 * + segred_group_sizze_81261]; + + double defunc_1_op_res_81267; + + defunc_1_op_res_81267 = x_81265 + x_81266; + x_81265 = defunc_1_op_res_81267; + } + } + } + ((__local + double *) red_arr_mem_127119)[sext_i32_i64(local_tid_127115)] = + x_81265; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_127142; + int32_t skip_waves_127143; + + skip_waves_127143 = 1; + + double x_127130; + double x_127131; + + offset_127142 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127115, + sext_i64_i32(segred_group_sizze_81261))) { + x_127130 = ((__local + double *) red_arr_mem_127119)[sext_i32_i64(local_tid_127115 + + offset_127142)]; + } + } + offset_127142 = 1; + while (slt32(offset_127142, wave_sizze_127117)) { + if (slt32(local_tid_127115 + offset_127142, + sext_i64_i32(segred_group_sizze_81261)) && + ((local_tid_127115 - squot32(local_tid_127115, + wave_sizze_127117) * + wave_sizze_127117) & (2 * offset_127142 - 1)) == + 0) { + // read array element + { + x_127131 = ((volatile __local + double *) red_arr_mem_127119)[sext_i32_i64(local_tid_127115 + + offset_127142)]; + } + // apply reduction operation + { + double defunc_1_op_res_127132 = x_127130 + + x_127131; + + x_127130 = defunc_1_op_res_127132; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_127119)[sext_i32_i64(local_tid_127115)] = + x_127130; + } + } + offset_127142 *= 2; + } + while (slt32(skip_waves_127143, + squot32(sext_i64_i32(segred_group_sizze_81261) + + wave_sizze_127117 - 1, + wave_sizze_127117))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_127142 = skip_waves_127143 * wave_sizze_127117; + if (slt32(local_tid_127115 + offset_127142, + sext_i64_i32(segred_group_sizze_81261)) && + ((local_tid_127115 - squot32(local_tid_127115, + wave_sizze_127117) * + wave_sizze_127117) == 0 && + (squot32(local_tid_127115, wave_sizze_127117) & + (2 * skip_waves_127143 - 1)) == 0)) { + // read array element + { + x_127131 = ((__local + double *) red_arr_mem_127119)[sext_i32_i64(local_tid_127115 + + offset_127142)]; + } + // apply reduction operation + { + double defunc_1_op_res_127132 = x_127130 + + x_127131; + + x_127130 = defunc_1_op_res_127132; + } + // write result of operation + { + ((__local + double *) red_arr_mem_127119)[sext_i32_i64(local_tid_127115)] = + x_127130; + } + } + skip_waves_127143 *= 2; + } + // and back to memory with the final result + { + if (local_tid_127115 == 0) { + ((__global double *) mem_121840)[gtid_79418 * + (k2p2zq_70876 * + k2p2zq_70876) + + gtid_79419 * + k2p2zq_70876 + + gtid_79420] = + x_127130; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_81261 +} +__kernel void mainDetailedzisegred_large_79709(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_126977_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_126975_backing_aligned_1, + int64_t k2p2zq_70876, + int64_t x_81093, int64_t i_81094, + int64_t j_m_i_81098, + int64_t num_groups_81180, + int64_t binop_x_120251, + int64_t groups_per_segment_126961, + int64_t elements_per_thread_126962, + int64_t virt_num_groups_126963, + int64_t threads_per_segment_126965, + __global + unsigned char *mem_121351, + __global + unsigned char *mem_param_121469, + __global + unsigned char *mem_121555, + __global + unsigned char *group_res_arr_mem_126966, + __global + unsigned char *mainDetailedzicounter_mem_126968) +{ + #define segred_group_sizze_81179 (mainDetailedzisegred_group_sizze_79703) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_126977_backing_1 = + (__local volatile + char *) sync_arr_mem_126977_backing_aligned_0; + __local volatile char *restrict red_arr_mem_126975_backing_0 = + (__local volatile + char *) red_arr_mem_126975_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126970; + int32_t local_tid_126971; + int64_t group_sizze_126974; + int32_t wave_sizze_126973; + int32_t group_tid_126972; + + global_tid_126970 = get_global_id(0); + local_tid_126971 = get_local_id(0); + group_sizze_126974 = get_local_size(0); + wave_sizze_126973 = LOCKSTEP_WIDTH; + group_tid_126972 = get_group_id(0); + + int32_t phys_tid_79709; + + phys_tid_79709 = global_tid_126970; + + __local char *red_arr_mem_126975; + + red_arr_mem_126975 = (__local char *) red_arr_mem_126975_backing_0; + + __local char *sync_arr_mem_126977; + + sync_arr_mem_126977 = (__local char *) sync_arr_mem_126977_backing_1; + + int32_t phys_group_id_126979; + + phys_group_id_126979 = get_group_id(0); + for (int32_t i_126980 = 0; i_126980 < + sdiv_up32(sext_i64_i32(virt_num_groups_126963) - phys_group_id_126979, + sext_i64_i32(num_groups_81180)); i_126980++) { + int32_t virt_group_id_126981 = phys_group_id_126979 + i_126980 * + sext_i64_i32(num_groups_81180); + int32_t flat_segment_id_126982 = squot32(virt_group_id_126981, + sext_i64_i32(groups_per_segment_126961)); + int64_t global_tid_126983 = srem64(sext_i32_i64(virt_group_id_126981) * + segred_group_sizze_81179 + + sext_i32_i64(local_tid_126971), + segred_group_sizze_81179 * + groups_per_segment_126961); + int64_t gtid_79698 = squot64(sext_i32_i64(flat_segment_id_126982), + k2p2zq_70876); + int64_t gtid_79699 = sext_i32_i64(flat_segment_id_126982) - + squot64(sext_i32_i64(flat_segment_id_126982), k2p2zq_70876) * + k2p2zq_70876; + int64_t gtid_79708; + double x_acc_126984; + int64_t chunk_sizze_126985; + + chunk_sizze_126985 = smin64(elements_per_thread_126962, + sdiv_up64(j_m_i_81098 - global_tid_126983, + threads_per_segment_126965)); + + double x_81183; + double x_81184; + + // neutral-initialise the accumulators + { + x_acc_126984 = 0.0; + } + for (int64_t i_126989 = 0; i_126989 < chunk_sizze_126985; i_126989++) { + gtid_79708 = global_tid_126983 + threads_per_segment_126965 * + i_126989; + // apply map function + { + int64_t slice_115048 = gtid_79708 + x_81093; + double x_81190 = ((__global double *) mem_121351)[gtid_79698 * + (k2p2zq_70876 * + k2p2zq_70876) + + slice_115048 * + k2p2zq_70876 + + i_81094]; + bool isnan_res_81191; + + isnan_res_81191 = futrts_isnan64(x_81190); + + double defunc_1_f_res_81192; + + if (isnan_res_81191) { + defunc_1_f_res_81192 = 0.0; + } else { + double x_81189 = ((__global + double *) mem_param_121469)[gtid_79698 * + binop_x_120251 + + gtid_79699 * + k2p2zq_70876 + + slice_115048]; + double defunc_1_f_res_f_res_81193 = x_81189 * x_81190; + + defunc_1_f_res_81192 = defunc_1_f_res_f_res_81193; + } + // save map-out results + { } + // load accumulator + { + x_81183 = x_acc_126984; + } + // load new values + { + x_81184 = defunc_1_f_res_81192; + } + // apply reduction operator + { + double defunc_1_op_res_81185 = x_81183 + x_81184; + + // store in accumulator + { + x_acc_126984 = defunc_1_op_res_81185; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_81183 = x_acc_126984; + ((__local + double *) red_arr_mem_126975)[sext_i32_i64(local_tid_126971)] = + x_81183; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_126990; + int32_t skip_waves_126991; + + skip_waves_126991 = 1; + + double x_126986; + double x_126987; + + offset_126990 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_126971, + sext_i64_i32(segred_group_sizze_81179))) { + x_126986 = ((__local + double *) red_arr_mem_126975)[sext_i32_i64(local_tid_126971 + + offset_126990)]; + } + } + offset_126990 = 1; + while (slt32(offset_126990, wave_sizze_126973)) { + if (slt32(local_tid_126971 + offset_126990, + sext_i64_i32(segred_group_sizze_81179)) && + ((local_tid_126971 - squot32(local_tid_126971, + wave_sizze_126973) * + wave_sizze_126973) & (2 * offset_126990 - 1)) == 0) { + // read array element + { + x_126987 = ((volatile __local + double *) red_arr_mem_126975)[sext_i32_i64(local_tid_126971 + + offset_126990)]; + } + // apply reduction operation + { + double defunc_1_op_res_126988 = x_126986 + x_126987; + + x_126986 = defunc_1_op_res_126988; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_126975)[sext_i32_i64(local_tid_126971)] = + x_126986; + } + } + offset_126990 *= 2; + } + while (slt32(skip_waves_126991, + squot32(sext_i64_i32(segred_group_sizze_81179) + + wave_sizze_126973 - 1, wave_sizze_126973))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_126990 = skip_waves_126991 * wave_sizze_126973; + if (slt32(local_tid_126971 + offset_126990, + sext_i64_i32(segred_group_sizze_81179)) && + ((local_tid_126971 - squot32(local_tid_126971, + wave_sizze_126973) * + wave_sizze_126973) == 0 && (squot32(local_tid_126971, + wave_sizze_126973) & (2 * + skip_waves_126991 - + 1)) == + 0)) { + // read array element + { + x_126987 = ((__local + double *) red_arr_mem_126975)[sext_i32_i64(local_tid_126971 + + offset_126990)]; + } + // apply reduction operation + { + double defunc_1_op_res_126988 = x_126986 + x_126987; + + x_126986 = defunc_1_op_res_126988; + } + // write result of operation + { + ((__local + double *) red_arr_mem_126975)[sext_i32_i64(local_tid_126971)] = + x_126986; + } + } + skip_waves_126991 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_126971) == (int64_t) 0) { + x_acc_126984 = x_126986; + } + } + if (groups_per_segment_126961 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_126971 == 0) { + ((__global double *) mem_121555)[gtid_79698 * k2p2zq_70876 + + gtid_79699] = x_acc_126984; + } + } + } else { + int32_t old_counter_126992; + + // first thread in group saves group result to global memory + { + if (local_tid_126971 == 0) { + ((__global + double *) group_res_arr_mem_126966)[sext_i32_i64(virt_group_id_126981) * + segred_group_sizze_81179] = + x_acc_126984; + mem_fence_global(); + old_counter_126992 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_126968)[sext_i32_i64(srem32(flat_segment_id_126982, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_126977)[(int64_t) 0] = + old_counter_126992 == groups_per_segment_126961 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_126993; + + is_last_group_126993 = ((__local + bool *) sync_arr_mem_126977)[(int64_t) 0]; + if (is_last_group_126993) { + if (local_tid_126971 == 0) { + old_counter_126992 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_126968)[sext_i32_i64(srem32(flat_segment_id_126982, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_126961)); + } + // read in the per-group-results + { + int64_t read_per_thread_126994 = + sdiv_up64(groups_per_segment_126961, + segred_group_sizze_81179); + + x_81183 = 0.0; + for (int64_t i_126995 = 0; i_126995 < + read_per_thread_126994; i_126995++) { + int64_t group_res_id_126996 = + sext_i32_i64(local_tid_126971) * + read_per_thread_126994 + i_126995; + int64_t index_of_group_res_126997 = + sext_i32_i64(flat_segment_id_126982) * + groups_per_segment_126961 + group_res_id_126996; + + if (slt64(group_res_id_126996, + groups_per_segment_126961)) { + x_81184 = ((__global + double *) group_res_arr_mem_126966)[index_of_group_res_126997 * + segred_group_sizze_81179]; + + double defunc_1_op_res_81185; + + defunc_1_op_res_81185 = x_81183 + x_81184; + x_81183 = defunc_1_op_res_81185; + } + } + } + ((__local + double *) red_arr_mem_126975)[sext_i32_i64(local_tid_126971)] = + x_81183; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_126998; + int32_t skip_waves_126999; + + skip_waves_126999 = 1; + + double x_126986; + double x_126987; + + offset_126998 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_126971, + sext_i64_i32(segred_group_sizze_81179))) { + x_126986 = ((__local + double *) red_arr_mem_126975)[sext_i32_i64(local_tid_126971 + + offset_126998)]; + } + } + offset_126998 = 1; + while (slt32(offset_126998, wave_sizze_126973)) { + if (slt32(local_tid_126971 + offset_126998, + sext_i64_i32(segred_group_sizze_81179)) && + ((local_tid_126971 - squot32(local_tid_126971, + wave_sizze_126973) * + wave_sizze_126973) & (2 * offset_126998 - 1)) == + 0) { + // read array element + { + x_126987 = ((volatile __local + double *) red_arr_mem_126975)[sext_i32_i64(local_tid_126971 + + offset_126998)]; + } + // apply reduction operation + { + double defunc_1_op_res_126988 = x_126986 + + x_126987; + + x_126986 = defunc_1_op_res_126988; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_126975)[sext_i32_i64(local_tid_126971)] = + x_126986; + } + } + offset_126998 *= 2; + } + while (slt32(skip_waves_126999, + squot32(sext_i64_i32(segred_group_sizze_81179) + + wave_sizze_126973 - 1, + wave_sizze_126973))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_126998 = skip_waves_126999 * wave_sizze_126973; + if (slt32(local_tid_126971 + offset_126998, + sext_i64_i32(segred_group_sizze_81179)) && + ((local_tid_126971 - squot32(local_tid_126971, + wave_sizze_126973) * + wave_sizze_126973) == 0 && + (squot32(local_tid_126971, wave_sizze_126973) & + (2 * skip_waves_126999 - 1)) == 0)) { + // read array element + { + x_126987 = ((__local + double *) red_arr_mem_126975)[sext_i32_i64(local_tid_126971 + + offset_126998)]; + } + // apply reduction operation + { + double defunc_1_op_res_126988 = x_126986 + + x_126987; + + x_126986 = defunc_1_op_res_126988; + } + // write result of operation + { + ((__local + double *) red_arr_mem_126975)[sext_i32_i64(local_tid_126971)] = + x_126986; + } + } + skip_waves_126999 *= 2; + } + // and back to memory with the final result + { + if (local_tid_126971 == 0) { + ((__global double *) mem_121555)[gtid_79698 * + k2p2zq_70876 + + gtid_79699] = + x_126986; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_81179 +} +__kernel void mainDetailedzisegred_large_80466(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_126752_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_126750_backing_aligned_1, + int64_t m_70861, + int64_t k2p2zq_70876, + int64_t defunc_2_reduce_res_70985, + int64_t j_80634, + int64_t num_groups_80667, + int64_t groups_per_segment_126736, + int64_t elements_per_thread_126737, + int64_t virt_num_groups_126738, + int64_t threads_per_segment_126740, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_120938, + __global + unsigned char *group_res_arr_mem_126741, + __global + unsigned char *mainDetailedzicounter_mem_126743) +{ + #define segred_group_sizze_80666 (mainDetailedzisegred_group_sizze_80460) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_126752_backing_1 = + (__local volatile + char *) sync_arr_mem_126752_backing_aligned_0; + __local volatile char *restrict red_arr_mem_126750_backing_0 = + (__local volatile + char *) red_arr_mem_126750_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126745; + int32_t local_tid_126746; + int64_t group_sizze_126749; + int32_t wave_sizze_126748; + int32_t group_tid_126747; + + global_tid_126745 = get_global_id(0); + local_tid_126746 = get_local_id(0); + group_sizze_126749 = get_local_size(0); + wave_sizze_126748 = LOCKSTEP_WIDTH; + group_tid_126747 = get_group_id(0); + + int32_t phys_tid_80466; + + phys_tid_80466 = global_tid_126745; + + __local char *red_arr_mem_126750; + + red_arr_mem_126750 = (__local char *) red_arr_mem_126750_backing_0; + + __local char *sync_arr_mem_126752; + + sync_arr_mem_126752 = (__local char *) sync_arr_mem_126752_backing_1; + + int32_t phys_group_id_126754; + + phys_group_id_126754 = get_group_id(0); + for (int32_t i_126755 = 0; i_126755 < + sdiv_up32(sext_i64_i32(virt_num_groups_126738) - phys_group_id_126754, + sext_i64_i32(num_groups_80667)); i_126755++) { + int32_t virt_group_id_126756 = phys_group_id_126754 + i_126755 * + sext_i64_i32(num_groups_80667); + int32_t flat_segment_id_126757 = squot32(virt_group_id_126756, + sext_i64_i32(groups_per_segment_126736)); + int64_t global_tid_126758 = srem64(sext_i32_i64(virt_group_id_126756) * + segred_group_sizze_80666 + + sext_i32_i64(local_tid_126746), + segred_group_sizze_80666 * + groups_per_segment_126736); + int64_t gtid_80457 = sext_i32_i64(flat_segment_id_126757); + int64_t gtid_80465; + double x_acc_126759; + int64_t chunk_sizze_126760; + + chunk_sizze_126760 = smin64(elements_per_thread_126737, + sdiv_up64(k2p2zq_70876 - global_tid_126758, + threads_per_segment_126740)); + + double x_80670; + double x_80671; + + // neutral-initialise the accumulators + { + x_acc_126759 = 0.0; + } + for (int64_t i_126764 = 0; i_126764 < chunk_sizze_126760; i_126764++) { + gtid_80465 = global_tid_126758 + threads_per_segment_126740 * + i_126764; + // apply map function + { + double x_80674 = ((__global double *) mem_120246)[j_80634 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_80457 * + defunc_2_reduce_res_70985 + + gtid_80465]; + double defunc_1_f_res_80675 = x_80674 * x_80674; + + // save map-out results + { } + // load accumulator + { + x_80670 = x_acc_126759; + } + // load new values + { + x_80671 = defunc_1_f_res_80675; + } + // apply reduction operator + { + double defunc_1_op_res_80672 = x_80670 + x_80671; + + // store in accumulator + { + x_acc_126759 = defunc_1_op_res_80672; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_80670 = x_acc_126759; + ((__local + double *) red_arr_mem_126750)[sext_i32_i64(local_tid_126746)] = + x_80670; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_126765; + int32_t skip_waves_126766; + + skip_waves_126766 = 1; + + double x_126761; + double x_126762; + + offset_126765 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_126746, + sext_i64_i32(segred_group_sizze_80666))) { + x_126761 = ((__local + double *) red_arr_mem_126750)[sext_i32_i64(local_tid_126746 + + offset_126765)]; + } + } + offset_126765 = 1; + while (slt32(offset_126765, wave_sizze_126748)) { + if (slt32(local_tid_126746 + offset_126765, + sext_i64_i32(segred_group_sizze_80666)) && + ((local_tid_126746 - squot32(local_tid_126746, + wave_sizze_126748) * + wave_sizze_126748) & (2 * offset_126765 - 1)) == 0) { + // read array element + { + x_126762 = ((volatile __local + double *) red_arr_mem_126750)[sext_i32_i64(local_tid_126746 + + offset_126765)]; + } + // apply reduction operation + { + double defunc_1_op_res_126763 = x_126761 + x_126762; + + x_126761 = defunc_1_op_res_126763; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_126750)[sext_i32_i64(local_tid_126746)] = + x_126761; + } + } + offset_126765 *= 2; + } + while (slt32(skip_waves_126766, + squot32(sext_i64_i32(segred_group_sizze_80666) + + wave_sizze_126748 - 1, wave_sizze_126748))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_126765 = skip_waves_126766 * wave_sizze_126748; + if (slt32(local_tid_126746 + offset_126765, + sext_i64_i32(segred_group_sizze_80666)) && + ((local_tid_126746 - squot32(local_tid_126746, + wave_sizze_126748) * + wave_sizze_126748) == 0 && (squot32(local_tid_126746, + wave_sizze_126748) & (2 * + skip_waves_126766 - + 1)) == + 0)) { + // read array element + { + x_126762 = ((__local + double *) red_arr_mem_126750)[sext_i32_i64(local_tid_126746 + + offset_126765)]; + } + // apply reduction operation + { + double defunc_1_op_res_126763 = x_126761 + x_126762; + + x_126761 = defunc_1_op_res_126763; + } + // write result of operation + { + ((__local + double *) red_arr_mem_126750)[sext_i32_i64(local_tid_126746)] = + x_126761; + } + } + skip_waves_126766 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_126746) == (int64_t) 0) { + x_acc_126759 = x_126761; + } + } + if (groups_per_segment_126736 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_126746 == 0) { + ((__global double *) mem_120938)[gtid_80457] = x_acc_126759; + } + } + } else { + int32_t old_counter_126767; + + // first thread in group saves group result to global memory + { + if (local_tid_126746 == 0) { + ((__global + double *) group_res_arr_mem_126741)[sext_i32_i64(virt_group_id_126756) * + segred_group_sizze_80666] = + x_acc_126759; + mem_fence_global(); + old_counter_126767 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_126743)[sext_i32_i64(srem32(flat_segment_id_126757, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_126752)[(int64_t) 0] = + old_counter_126767 == groups_per_segment_126736 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_126768; + + is_last_group_126768 = ((__local + bool *) sync_arr_mem_126752)[(int64_t) 0]; + if (is_last_group_126768) { + if (local_tid_126746 == 0) { + old_counter_126767 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_126743)[sext_i32_i64(srem32(flat_segment_id_126757, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_126736)); + } + // read in the per-group-results + { + int64_t read_per_thread_126769 = + sdiv_up64(groups_per_segment_126736, + segred_group_sizze_80666); + + x_80670 = 0.0; + for (int64_t i_126770 = 0; i_126770 < + read_per_thread_126769; i_126770++) { + int64_t group_res_id_126771 = + sext_i32_i64(local_tid_126746) * + read_per_thread_126769 + i_126770; + int64_t index_of_group_res_126772 = + sext_i32_i64(flat_segment_id_126757) * + groups_per_segment_126736 + group_res_id_126771; + + if (slt64(group_res_id_126771, + groups_per_segment_126736)) { + x_80671 = ((__global + double *) group_res_arr_mem_126741)[index_of_group_res_126772 * + segred_group_sizze_80666]; + + double defunc_1_op_res_80672; + + defunc_1_op_res_80672 = x_80670 + x_80671; + x_80670 = defunc_1_op_res_80672; + } + } + } + ((__local + double *) red_arr_mem_126750)[sext_i32_i64(local_tid_126746)] = + x_80670; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_126773; + int32_t skip_waves_126774; + + skip_waves_126774 = 1; + + double x_126761; + double x_126762; + + offset_126773 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_126746, + sext_i64_i32(segred_group_sizze_80666))) { + x_126761 = ((__local + double *) red_arr_mem_126750)[sext_i32_i64(local_tid_126746 + + offset_126773)]; + } + } + offset_126773 = 1; + while (slt32(offset_126773, wave_sizze_126748)) { + if (slt32(local_tid_126746 + offset_126773, + sext_i64_i32(segred_group_sizze_80666)) && + ((local_tid_126746 - squot32(local_tid_126746, + wave_sizze_126748) * + wave_sizze_126748) & (2 * offset_126773 - 1)) == + 0) { + // read array element + { + x_126762 = ((volatile __local + double *) red_arr_mem_126750)[sext_i32_i64(local_tid_126746 + + offset_126773)]; + } + // apply reduction operation + { + double defunc_1_op_res_126763 = x_126761 + + x_126762; + + x_126761 = defunc_1_op_res_126763; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_126750)[sext_i32_i64(local_tid_126746)] = + x_126761; + } + } + offset_126773 *= 2; + } + while (slt32(skip_waves_126774, + squot32(sext_i64_i32(segred_group_sizze_80666) + + wave_sizze_126748 - 1, + wave_sizze_126748))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_126773 = skip_waves_126774 * wave_sizze_126748; + if (slt32(local_tid_126746 + offset_126773, + sext_i64_i32(segred_group_sizze_80666)) && + ((local_tid_126746 - squot32(local_tid_126746, + wave_sizze_126748) * + wave_sizze_126748) == 0 && + (squot32(local_tid_126746, wave_sizze_126748) & + (2 * skip_waves_126774 - 1)) == 0)) { + // read array element + { + x_126762 = ((__local + double *) red_arr_mem_126750)[sext_i32_i64(local_tid_126746 + + offset_126773)]; + } + // apply reduction operation + { + double defunc_1_op_res_126763 = x_126761 + + x_126762; + + x_126761 = defunc_1_op_res_126763; + } + // write result of operation + { + ((__local + double *) red_arr_mem_126750)[sext_i32_i64(local_tid_126746)] = + x_126761; + } + } + skip_waves_126774 *= 2; + } + // and back to memory with the final result + { + if (local_tid_126746 == 0) { + ((__global double *) mem_120938)[gtid_80457] = + x_126761; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_80666 +} +__kernel void mainDetailedzisegred_large_83181(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_127905_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_127903_backing_aligned_1, + int64_t m_70861, + int64_t k2p2zq_70876, + int64_t num_groups_85162, + int64_t groups_per_segment_127889, + int64_t elements_per_thread_127890, + int64_t virt_num_groups_127891, + int64_t threads_per_segment_127893, + __global + unsigned char *mem_123614, + __global + unsigned char *mem_123618, + __global + unsigned char *mem_123623, + __global + unsigned char *group_res_arr_mem_127894, + __global + unsigned char *mainDetailedzicounter_mem_127896) +{ + #define segred_group_sizze_85161 (mainDetailedzisegred_group_sizze_83175) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_127905_backing_1 = + (__local volatile + char *) sync_arr_mem_127905_backing_aligned_0; + __local volatile char *restrict red_arr_mem_127903_backing_0 = + (__local volatile + char *) red_arr_mem_127903_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127898; + int32_t local_tid_127899; + int64_t group_sizze_127902; + int32_t wave_sizze_127901; + int32_t group_tid_127900; + + global_tid_127898 = get_global_id(0); + local_tid_127899 = get_local_id(0); + group_sizze_127902 = get_local_size(0); + wave_sizze_127901 = LOCKSTEP_WIDTH; + group_tid_127900 = get_group_id(0); + + int32_t phys_tid_83181; + + phys_tid_83181 = global_tid_127898; + + __local char *red_arr_mem_127903; + + red_arr_mem_127903 = (__local char *) red_arr_mem_127903_backing_0; + + __local char *sync_arr_mem_127905; + + sync_arr_mem_127905 = (__local char *) sync_arr_mem_127905_backing_1; + + int32_t phys_group_id_127907; + + phys_group_id_127907 = get_group_id(0); + for (int32_t i_127908 = 0; i_127908 < + sdiv_up32(sext_i64_i32(virt_num_groups_127891) - phys_group_id_127907, + sext_i64_i32(num_groups_85162)); i_127908++) { + int32_t virt_group_id_127909 = phys_group_id_127907 + i_127908 * + sext_i64_i32(num_groups_85162); + int32_t flat_segment_id_127910 = squot32(virt_group_id_127909, + sext_i64_i32(groups_per_segment_127889)); + int64_t global_tid_127911 = srem64(sext_i32_i64(virt_group_id_127909) * + segred_group_sizze_85161 + + sext_i32_i64(local_tid_127899), + segred_group_sizze_85161 * + groups_per_segment_127889); + int64_t gtid_83168 = squot64(sext_i32_i64(flat_segment_id_127910), + k2p2zq_70876 * k2p2zq_70876); + int64_t gtid_83169 = squot64(sext_i32_i64(flat_segment_id_127910) - + squot64(sext_i32_i64(flat_segment_id_127910), + k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876); + int64_t gtid_83170 = sext_i32_i64(flat_segment_id_127910) - + squot64(sext_i32_i64(flat_segment_id_127910), k2p2zq_70876 * + k2p2zq_70876) * (k2p2zq_70876 * k2p2zq_70876) - + squot64(sext_i32_i64(flat_segment_id_127910) - + squot64(sext_i32_i64(flat_segment_id_127910), + k2p2zq_70876 * k2p2zq_70876) * (k2p2zq_70876 * + k2p2zq_70876), + k2p2zq_70876) * k2p2zq_70876; + int64_t gtid_83180; + double x_acc_127912; + int64_t chunk_sizze_127913; + + chunk_sizze_127913 = smin64(elements_per_thread_127890, + sdiv_up64(k2p2zq_70876 - global_tid_127911, + threads_per_segment_127893)); + + double x_85165; + double x_85166; + + // neutral-initialise the accumulators + { + x_acc_127912 = 0.0; + } + for (int64_t i_127917 = 0; i_127917 < chunk_sizze_127913; i_127917++) { + gtid_83180 = global_tid_127911 + threads_per_segment_127893 * + i_127917; + // apply map function + { + double x_85171 = ((__global double *) mem_123614)[gtid_83169 * + (k2p2zq_70876 * + m_70861) + + gtid_83168 * + k2p2zq_70876 + + gtid_83180]; + double x_85172 = ((__global double *) mem_123618)[gtid_83168 * + (k2p2zq_70876 * + k2p2zq_70876) + + gtid_83170 * + k2p2zq_70876 + + gtid_83180]; + double defunc_1_f_res_85173 = x_85171 * x_85172; + + // save map-out results + { } + // load accumulator + { + x_85165 = x_acc_127912; + } + // load new values + { + x_85166 = defunc_1_f_res_85173; + } + // apply reduction operator + { + double defunc_1_op_res_85167 = x_85165 + x_85166; + + // store in accumulator + { + x_acc_127912 = defunc_1_op_res_85167; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_85165 = x_acc_127912; + ((__local + double *) red_arr_mem_127903)[sext_i32_i64(local_tid_127899)] = + x_85165; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_127918; + int32_t skip_waves_127919; + + skip_waves_127919 = 1; + + double x_127914; + double x_127915; + + offset_127918 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127899, + sext_i64_i32(segred_group_sizze_85161))) { + x_127914 = ((__local + double *) red_arr_mem_127903)[sext_i32_i64(local_tid_127899 + + offset_127918)]; + } + } + offset_127918 = 1; + while (slt32(offset_127918, wave_sizze_127901)) { + if (slt32(local_tid_127899 + offset_127918, + sext_i64_i32(segred_group_sizze_85161)) && + ((local_tid_127899 - squot32(local_tid_127899, + wave_sizze_127901) * + wave_sizze_127901) & (2 * offset_127918 - 1)) == 0) { + // read array element + { + x_127915 = ((volatile __local + double *) red_arr_mem_127903)[sext_i32_i64(local_tid_127899 + + offset_127918)]; + } + // apply reduction operation + { + double defunc_1_op_res_127916 = x_127914 + x_127915; + + x_127914 = defunc_1_op_res_127916; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_127903)[sext_i32_i64(local_tid_127899)] = + x_127914; + } + } + offset_127918 *= 2; + } + while (slt32(skip_waves_127919, + squot32(sext_i64_i32(segred_group_sizze_85161) + + wave_sizze_127901 - 1, wave_sizze_127901))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_127918 = skip_waves_127919 * wave_sizze_127901; + if (slt32(local_tid_127899 + offset_127918, + sext_i64_i32(segred_group_sizze_85161)) && + ((local_tid_127899 - squot32(local_tid_127899, + wave_sizze_127901) * + wave_sizze_127901) == 0 && (squot32(local_tid_127899, + wave_sizze_127901) & (2 * + skip_waves_127919 - + 1)) == + 0)) { + // read array element + { + x_127915 = ((__local + double *) red_arr_mem_127903)[sext_i32_i64(local_tid_127899 + + offset_127918)]; + } + // apply reduction operation + { + double defunc_1_op_res_127916 = x_127914 + x_127915; + + x_127914 = defunc_1_op_res_127916; + } + // write result of operation + { + ((__local + double *) red_arr_mem_127903)[sext_i32_i64(local_tid_127899)] = + x_127914; + } + } + skip_waves_127919 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_127899) == (int64_t) 0) { + x_acc_127912 = x_127914; + } + } + if (groups_per_segment_127889 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_127899 == 0) { + ((__global double *) mem_123623)[gtid_83168 * + (k2p2zq_70876 * + k2p2zq_70876) + + gtid_83169 * k2p2zq_70876 + + gtid_83170] = x_acc_127912; + } + } + } else { + int32_t old_counter_127920; + + // first thread in group saves group result to global memory + { + if (local_tid_127899 == 0) { + ((__global + double *) group_res_arr_mem_127894)[sext_i32_i64(virt_group_id_127909) * + segred_group_sizze_85161] = + x_acc_127912; + mem_fence_global(); + old_counter_127920 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_127896)[sext_i32_i64(srem32(flat_segment_id_127910, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_127905)[(int64_t) 0] = + old_counter_127920 == groups_per_segment_127889 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_127921; + + is_last_group_127921 = ((__local + bool *) sync_arr_mem_127905)[(int64_t) 0]; + if (is_last_group_127921) { + if (local_tid_127899 == 0) { + old_counter_127920 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_127896)[sext_i32_i64(srem32(flat_segment_id_127910, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_127889)); + } + // read in the per-group-results + { + int64_t read_per_thread_127922 = + sdiv_up64(groups_per_segment_127889, + segred_group_sizze_85161); + + x_85165 = 0.0; + for (int64_t i_127923 = 0; i_127923 < + read_per_thread_127922; i_127923++) { + int64_t group_res_id_127924 = + sext_i32_i64(local_tid_127899) * + read_per_thread_127922 + i_127923; + int64_t index_of_group_res_127925 = + sext_i32_i64(flat_segment_id_127910) * + groups_per_segment_127889 + group_res_id_127924; + + if (slt64(group_res_id_127924, + groups_per_segment_127889)) { + x_85166 = ((__global + double *) group_res_arr_mem_127894)[index_of_group_res_127925 * + segred_group_sizze_85161]; + + double defunc_1_op_res_85167; + + defunc_1_op_res_85167 = x_85165 + x_85166; + x_85165 = defunc_1_op_res_85167; + } + } + } + ((__local + double *) red_arr_mem_127903)[sext_i32_i64(local_tid_127899)] = + x_85165; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_127926; + int32_t skip_waves_127927; + + skip_waves_127927 = 1; + + double x_127914; + double x_127915; + + offset_127926 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127899, + sext_i64_i32(segred_group_sizze_85161))) { + x_127914 = ((__local + double *) red_arr_mem_127903)[sext_i32_i64(local_tid_127899 + + offset_127926)]; + } + } + offset_127926 = 1; + while (slt32(offset_127926, wave_sizze_127901)) { + if (slt32(local_tid_127899 + offset_127926, + sext_i64_i32(segred_group_sizze_85161)) && + ((local_tid_127899 - squot32(local_tid_127899, + wave_sizze_127901) * + wave_sizze_127901) & (2 * offset_127926 - 1)) == + 0) { + // read array element + { + x_127915 = ((volatile __local + double *) red_arr_mem_127903)[sext_i32_i64(local_tid_127899 + + offset_127926)]; + } + // apply reduction operation + { + double defunc_1_op_res_127916 = x_127914 + + x_127915; + + x_127914 = defunc_1_op_res_127916; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_127903)[sext_i32_i64(local_tid_127899)] = + x_127914; + } + } + offset_127926 *= 2; + } + while (slt32(skip_waves_127927, + squot32(sext_i64_i32(segred_group_sizze_85161) + + wave_sizze_127901 - 1, + wave_sizze_127901))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_127926 = skip_waves_127927 * wave_sizze_127901; + if (slt32(local_tid_127899 + offset_127926, + sext_i64_i32(segred_group_sizze_85161)) && + ((local_tid_127899 - squot32(local_tid_127899, + wave_sizze_127901) * + wave_sizze_127901) == 0 && + (squot32(local_tid_127899, wave_sizze_127901) & + (2 * skip_waves_127927 - 1)) == 0)) { + // read array element + { + x_127915 = ((__local + double *) red_arr_mem_127903)[sext_i32_i64(local_tid_127899 + + offset_127926)]; + } + // apply reduction operation + { + double defunc_1_op_res_127916 = x_127914 + + x_127915; + + x_127914 = defunc_1_op_res_127916; + } + // write result of operation + { + ((__local + double *) red_arr_mem_127903)[sext_i32_i64(local_tid_127899)] = + x_127914; + } + } + skip_waves_127927 *= 2; + } + // and back to memory with the final result + { + if (local_tid_127899 == 0) { + ((__global double *) mem_123623)[gtid_83168 * + (k2p2zq_70876 * + k2p2zq_70876) + + gtid_83169 * + k2p2zq_70876 + + gtid_83170] = + x_127914; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_85161 +} +__kernel void mainDetailedzisegred_large_83459(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_127761_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_127759_backing_aligned_1, + int64_t k2p2zq_70876, + int64_t x_84993, int64_t i_84994, + int64_t j_m_i_84998, + int64_t num_groups_85080, + int64_t binop_x_120251, + int64_t groups_per_segment_127745, + int64_t elements_per_thread_127746, + int64_t virt_num_groups_127747, + int64_t threads_per_segment_127749, + __global + unsigned char *mem_123143, + __global + unsigned char *mem_param_123252, + __global + unsigned char *mem_123338, + __global + unsigned char *group_res_arr_mem_127750, + __global + unsigned char *mainDetailedzicounter_mem_127752) +{ + #define segred_group_sizze_85079 (mainDetailedzisegred_group_sizze_83453) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_127761_backing_1 = + (__local volatile + char *) sync_arr_mem_127761_backing_aligned_0; + __local volatile char *restrict red_arr_mem_127759_backing_0 = + (__local volatile + char *) red_arr_mem_127759_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127754; + int32_t local_tid_127755; + int64_t group_sizze_127758; + int32_t wave_sizze_127757; + int32_t group_tid_127756; + + global_tid_127754 = get_global_id(0); + local_tid_127755 = get_local_id(0); + group_sizze_127758 = get_local_size(0); + wave_sizze_127757 = LOCKSTEP_WIDTH; + group_tid_127756 = get_group_id(0); + + int32_t phys_tid_83459; + + phys_tid_83459 = global_tid_127754; + + __local char *red_arr_mem_127759; + + red_arr_mem_127759 = (__local char *) red_arr_mem_127759_backing_0; + + __local char *sync_arr_mem_127761; + + sync_arr_mem_127761 = (__local char *) sync_arr_mem_127761_backing_1; + + int32_t phys_group_id_127763; + + phys_group_id_127763 = get_group_id(0); + for (int32_t i_127764 = 0; i_127764 < + sdiv_up32(sext_i64_i32(virt_num_groups_127747) - phys_group_id_127763, + sext_i64_i32(num_groups_85080)); i_127764++) { + int32_t virt_group_id_127765 = phys_group_id_127763 + i_127764 * + sext_i64_i32(num_groups_85080); + int32_t flat_segment_id_127766 = squot32(virt_group_id_127765, + sext_i64_i32(groups_per_segment_127745)); + int64_t global_tid_127767 = srem64(sext_i32_i64(virt_group_id_127765) * + segred_group_sizze_85079 + + sext_i32_i64(local_tid_127755), + segred_group_sizze_85079 * + groups_per_segment_127745); + int64_t gtid_83448 = squot64(sext_i32_i64(flat_segment_id_127766), + k2p2zq_70876); + int64_t gtid_83449 = sext_i32_i64(flat_segment_id_127766) - + squot64(sext_i32_i64(flat_segment_id_127766), k2p2zq_70876) * + k2p2zq_70876; + int64_t gtid_83458; + double x_acc_127768; + int64_t chunk_sizze_127769; + + chunk_sizze_127769 = smin64(elements_per_thread_127746, + sdiv_up64(j_m_i_84998 - global_tid_127767, + threads_per_segment_127749)); + + double x_85083; + double x_85084; + + // neutral-initialise the accumulators + { + x_acc_127768 = 0.0; + } + for (int64_t i_127773 = 0; i_127773 < chunk_sizze_127769; i_127773++) { + gtid_83458 = global_tid_127767 + threads_per_segment_127749 * + i_127773; + // apply map function + { + int64_t slice_115165 = gtid_83458 + x_84993; + double x_85090 = ((__global double *) mem_123143)[gtid_83448 * + (k2p2zq_70876 * + k2p2zq_70876) + + slice_115165 * + k2p2zq_70876 + + i_84994]; + bool isnan_res_85091; + + isnan_res_85091 = futrts_isnan64(x_85090); + + double defunc_1_f_res_85092; + + if (isnan_res_85091) { + defunc_1_f_res_85092 = 0.0; + } else { + double x_85089 = ((__global + double *) mem_param_123252)[gtid_83448 * + binop_x_120251 + + gtid_83449 * + k2p2zq_70876 + + slice_115165]; + double defunc_1_f_res_f_res_85093 = x_85089 * x_85090; + + defunc_1_f_res_85092 = defunc_1_f_res_f_res_85093; + } + // save map-out results + { } + // load accumulator + { + x_85083 = x_acc_127768; + } + // load new values + { + x_85084 = defunc_1_f_res_85092; + } + // apply reduction operator + { + double defunc_1_op_res_85085 = x_85083 + x_85084; + + // store in accumulator + { + x_acc_127768 = defunc_1_op_res_85085; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_85083 = x_acc_127768; + ((__local + double *) red_arr_mem_127759)[sext_i32_i64(local_tid_127755)] = + x_85083; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_127774; + int32_t skip_waves_127775; + + skip_waves_127775 = 1; + + double x_127770; + double x_127771; + + offset_127774 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127755, + sext_i64_i32(segred_group_sizze_85079))) { + x_127770 = ((__local + double *) red_arr_mem_127759)[sext_i32_i64(local_tid_127755 + + offset_127774)]; + } + } + offset_127774 = 1; + while (slt32(offset_127774, wave_sizze_127757)) { + if (slt32(local_tid_127755 + offset_127774, + sext_i64_i32(segred_group_sizze_85079)) && + ((local_tid_127755 - squot32(local_tid_127755, + wave_sizze_127757) * + wave_sizze_127757) & (2 * offset_127774 - 1)) == 0) { + // read array element + { + x_127771 = ((volatile __local + double *) red_arr_mem_127759)[sext_i32_i64(local_tid_127755 + + offset_127774)]; + } + // apply reduction operation + { + double defunc_1_op_res_127772 = x_127770 + x_127771; + + x_127770 = defunc_1_op_res_127772; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_127759)[sext_i32_i64(local_tid_127755)] = + x_127770; + } + } + offset_127774 *= 2; + } + while (slt32(skip_waves_127775, + squot32(sext_i64_i32(segred_group_sizze_85079) + + wave_sizze_127757 - 1, wave_sizze_127757))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_127774 = skip_waves_127775 * wave_sizze_127757; + if (slt32(local_tid_127755 + offset_127774, + sext_i64_i32(segred_group_sizze_85079)) && + ((local_tid_127755 - squot32(local_tid_127755, + wave_sizze_127757) * + wave_sizze_127757) == 0 && (squot32(local_tid_127755, + wave_sizze_127757) & (2 * + skip_waves_127775 - + 1)) == + 0)) { + // read array element + { + x_127771 = ((__local + double *) red_arr_mem_127759)[sext_i32_i64(local_tid_127755 + + offset_127774)]; + } + // apply reduction operation + { + double defunc_1_op_res_127772 = x_127770 + x_127771; + + x_127770 = defunc_1_op_res_127772; + } + // write result of operation + { + ((__local + double *) red_arr_mem_127759)[sext_i32_i64(local_tid_127755)] = + x_127770; + } + } + skip_waves_127775 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_127755) == (int64_t) 0) { + x_acc_127768 = x_127770; + } + } + if (groups_per_segment_127745 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_127755 == 0) { + ((__global double *) mem_123338)[gtid_83448 * k2p2zq_70876 + + gtid_83449] = x_acc_127768; + } + } + } else { + int32_t old_counter_127776; + + // first thread in group saves group result to global memory + { + if (local_tid_127755 == 0) { + ((__global + double *) group_res_arr_mem_127750)[sext_i32_i64(virt_group_id_127765) * + segred_group_sizze_85079] = + x_acc_127768; + mem_fence_global(); + old_counter_127776 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_127752)[sext_i32_i64(srem32(flat_segment_id_127766, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_127761)[(int64_t) 0] = + old_counter_127776 == groups_per_segment_127745 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_127777; + + is_last_group_127777 = ((__local + bool *) sync_arr_mem_127761)[(int64_t) 0]; + if (is_last_group_127777) { + if (local_tid_127755 == 0) { + old_counter_127776 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_127752)[sext_i32_i64(srem32(flat_segment_id_127766, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_127745)); + } + // read in the per-group-results + { + int64_t read_per_thread_127778 = + sdiv_up64(groups_per_segment_127745, + segred_group_sizze_85079); + + x_85083 = 0.0; + for (int64_t i_127779 = 0; i_127779 < + read_per_thread_127778; i_127779++) { + int64_t group_res_id_127780 = + sext_i32_i64(local_tid_127755) * + read_per_thread_127778 + i_127779; + int64_t index_of_group_res_127781 = + sext_i32_i64(flat_segment_id_127766) * + groups_per_segment_127745 + group_res_id_127780; + + if (slt64(group_res_id_127780, + groups_per_segment_127745)) { + x_85084 = ((__global + double *) group_res_arr_mem_127750)[index_of_group_res_127781 * + segred_group_sizze_85079]; + + double defunc_1_op_res_85085; + + defunc_1_op_res_85085 = x_85083 + x_85084; + x_85083 = defunc_1_op_res_85085; + } + } + } + ((__local + double *) red_arr_mem_127759)[sext_i32_i64(local_tid_127755)] = + x_85083; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_127782; + int32_t skip_waves_127783; + + skip_waves_127783 = 1; + + double x_127770; + double x_127771; + + offset_127782 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127755, + sext_i64_i32(segred_group_sizze_85079))) { + x_127770 = ((__local + double *) red_arr_mem_127759)[sext_i32_i64(local_tid_127755 + + offset_127782)]; + } + } + offset_127782 = 1; + while (slt32(offset_127782, wave_sizze_127757)) { + if (slt32(local_tid_127755 + offset_127782, + sext_i64_i32(segred_group_sizze_85079)) && + ((local_tid_127755 - squot32(local_tid_127755, + wave_sizze_127757) * + wave_sizze_127757) & (2 * offset_127782 - 1)) == + 0) { + // read array element + { + x_127771 = ((volatile __local + double *) red_arr_mem_127759)[sext_i32_i64(local_tid_127755 + + offset_127782)]; + } + // apply reduction operation + { + double defunc_1_op_res_127772 = x_127770 + + x_127771; + + x_127770 = defunc_1_op_res_127772; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_127759)[sext_i32_i64(local_tid_127755)] = + x_127770; + } + } + offset_127782 *= 2; + } + while (slt32(skip_waves_127783, + squot32(sext_i64_i32(segred_group_sizze_85079) + + wave_sizze_127757 - 1, + wave_sizze_127757))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_127782 = skip_waves_127783 * wave_sizze_127757; + if (slt32(local_tid_127755 + offset_127782, + sext_i64_i32(segred_group_sizze_85079)) && + ((local_tid_127755 - squot32(local_tid_127755, + wave_sizze_127757) * + wave_sizze_127757) == 0 && + (squot32(local_tid_127755, wave_sizze_127757) & + (2 * skip_waves_127783 - 1)) == 0)) { + // read array element + { + x_127771 = ((__local + double *) red_arr_mem_127759)[sext_i32_i64(local_tid_127755 + + offset_127782)]; + } + // apply reduction operation + { + double defunc_1_op_res_127772 = x_127770 + + x_127771; + + x_127770 = defunc_1_op_res_127772; + } + // write result of operation + { + ((__local + double *) red_arr_mem_127759)[sext_i32_i64(local_tid_127755)] = + x_127770; + } + } + skip_waves_127783 *= 2; + } + // and back to memory with the final result + { + if (local_tid_127755 == 0) { + ((__global double *) mem_123338)[gtid_83448 * + k2p2zq_70876 + + gtid_83449] = + x_127770; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_85079 +} +__kernel void mainDetailedzisegred_large_84232(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_127536_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_127534_backing_aligned_1, + int64_t m_70861, + int64_t defunc_2_reduce_res_70985, + int64_t rp1_71562, + int64_t j_84524, + int64_t num_groups_84557, + int64_t groups_per_segment_127520, + int64_t elements_per_thread_127521, + int64_t virt_num_groups_127522, + int64_t threads_per_segment_127524, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_122730, + __global + unsigned char *group_res_arr_mem_127525, + __global + unsigned char *mainDetailedzicounter_mem_127527) +{ + #define segred_group_sizze_84556 (mainDetailedzisegred_group_sizze_84226) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_127536_backing_1 = + (__local volatile + char *) sync_arr_mem_127536_backing_aligned_0; + __local volatile char *restrict red_arr_mem_127534_backing_0 = + (__local volatile + char *) red_arr_mem_127534_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127529; + int32_t local_tid_127530; + int64_t group_sizze_127533; + int32_t wave_sizze_127532; + int32_t group_tid_127531; + + global_tid_127529 = get_global_id(0); + local_tid_127530 = get_local_id(0); + group_sizze_127533 = get_local_size(0); + wave_sizze_127532 = LOCKSTEP_WIDTH; + group_tid_127531 = get_group_id(0); + + int32_t phys_tid_84232; + + phys_tid_84232 = global_tid_127529; + + __local char *red_arr_mem_127534; + + red_arr_mem_127534 = (__local char *) red_arr_mem_127534_backing_0; + + __local char *sync_arr_mem_127536; + + sync_arr_mem_127536 = (__local char *) sync_arr_mem_127536_backing_1; + + int32_t phys_group_id_127538; + + phys_group_id_127538 = get_group_id(0); + for (int32_t i_127539 = 0; i_127539 < + sdiv_up32(sext_i64_i32(virt_num_groups_127522) - phys_group_id_127538, + sext_i64_i32(num_groups_84557)); i_127539++) { + int32_t virt_group_id_127540 = phys_group_id_127538 + i_127539 * + sext_i64_i32(num_groups_84557); + int32_t flat_segment_id_127541 = squot32(virt_group_id_127540, + sext_i64_i32(groups_per_segment_127520)); + int64_t global_tid_127542 = srem64(sext_i32_i64(virt_group_id_127540) * + segred_group_sizze_84556 + + sext_i32_i64(local_tid_127530), + segred_group_sizze_84556 * + groups_per_segment_127520); + int64_t gtid_84223 = sext_i32_i64(flat_segment_id_127541); + int64_t gtid_84231; + double x_acc_127543; + int64_t chunk_sizze_127544; + + chunk_sizze_127544 = smin64(elements_per_thread_127521, + sdiv_up64(rp1_71562 - global_tid_127542, + threads_per_segment_127524)); + + double x_84560; + double x_84561; + + // neutral-initialise the accumulators + { + x_acc_127543 = 0.0; + } + for (int64_t i_127548 = 0; i_127548 < chunk_sizze_127544; i_127548++) { + gtid_84231 = global_tid_127542 + threads_per_segment_127524 * + i_127548; + // apply map function + { + double x_84564 = ((__global double *) mem_120246)[j_84524 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_84223 * + defunc_2_reduce_res_70985 + + gtid_84231]; + double defunc_1_f_res_84565 = x_84564 * x_84564; + + // save map-out results + { } + // load accumulator + { + x_84560 = x_acc_127543; + } + // load new values + { + x_84561 = defunc_1_f_res_84565; + } + // apply reduction operator + { + double defunc_1_op_res_84562 = x_84560 + x_84561; + + // store in accumulator + { + x_acc_127543 = defunc_1_op_res_84562; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_84560 = x_acc_127543; + ((__local + double *) red_arr_mem_127534)[sext_i32_i64(local_tid_127530)] = + x_84560; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_127549; + int32_t skip_waves_127550; + + skip_waves_127550 = 1; + + double x_127545; + double x_127546; + + offset_127549 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127530, + sext_i64_i32(segred_group_sizze_84556))) { + x_127545 = ((__local + double *) red_arr_mem_127534)[sext_i32_i64(local_tid_127530 + + offset_127549)]; + } + } + offset_127549 = 1; + while (slt32(offset_127549, wave_sizze_127532)) { + if (slt32(local_tid_127530 + offset_127549, + sext_i64_i32(segred_group_sizze_84556)) && + ((local_tid_127530 - squot32(local_tid_127530, + wave_sizze_127532) * + wave_sizze_127532) & (2 * offset_127549 - 1)) == 0) { + // read array element + { + x_127546 = ((volatile __local + double *) red_arr_mem_127534)[sext_i32_i64(local_tid_127530 + + offset_127549)]; + } + // apply reduction operation + { + double defunc_1_op_res_127547 = x_127545 + x_127546; + + x_127545 = defunc_1_op_res_127547; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_127534)[sext_i32_i64(local_tid_127530)] = + x_127545; + } + } + offset_127549 *= 2; + } + while (slt32(skip_waves_127550, + squot32(sext_i64_i32(segred_group_sizze_84556) + + wave_sizze_127532 - 1, wave_sizze_127532))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_127549 = skip_waves_127550 * wave_sizze_127532; + if (slt32(local_tid_127530 + offset_127549, + sext_i64_i32(segred_group_sizze_84556)) && + ((local_tid_127530 - squot32(local_tid_127530, + wave_sizze_127532) * + wave_sizze_127532) == 0 && (squot32(local_tid_127530, + wave_sizze_127532) & (2 * + skip_waves_127550 - + 1)) == + 0)) { + // read array element + { + x_127546 = ((__local + double *) red_arr_mem_127534)[sext_i32_i64(local_tid_127530 + + offset_127549)]; + } + // apply reduction operation + { + double defunc_1_op_res_127547 = x_127545 + x_127546; + + x_127545 = defunc_1_op_res_127547; + } + // write result of operation + { + ((__local + double *) red_arr_mem_127534)[sext_i32_i64(local_tid_127530)] = + x_127545; + } + } + skip_waves_127550 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_127530) == (int64_t) 0) { + x_acc_127543 = x_127545; + } + } + if (groups_per_segment_127520 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_127530 == 0) { + ((__global double *) mem_122730)[gtid_84223] = x_acc_127543; + } + } + } else { + int32_t old_counter_127551; + + // first thread in group saves group result to global memory + { + if (local_tid_127530 == 0) { + ((__global + double *) group_res_arr_mem_127525)[sext_i32_i64(virt_group_id_127540) * + segred_group_sizze_84556] = + x_acc_127543; + mem_fence_global(); + old_counter_127551 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_127527)[sext_i32_i64(srem32(flat_segment_id_127541, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_127536)[(int64_t) 0] = + old_counter_127551 == groups_per_segment_127520 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_127552; + + is_last_group_127552 = ((__local + bool *) sync_arr_mem_127536)[(int64_t) 0]; + if (is_last_group_127552) { + if (local_tid_127530 == 0) { + old_counter_127551 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_127527)[sext_i32_i64(srem32(flat_segment_id_127541, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_127520)); + } + // read in the per-group-results + { + int64_t read_per_thread_127553 = + sdiv_up64(groups_per_segment_127520, + segred_group_sizze_84556); + + x_84560 = 0.0; + for (int64_t i_127554 = 0; i_127554 < + read_per_thread_127553; i_127554++) { + int64_t group_res_id_127555 = + sext_i32_i64(local_tid_127530) * + read_per_thread_127553 + i_127554; + int64_t index_of_group_res_127556 = + sext_i32_i64(flat_segment_id_127541) * + groups_per_segment_127520 + group_res_id_127555; + + if (slt64(group_res_id_127555, + groups_per_segment_127520)) { + x_84561 = ((__global + double *) group_res_arr_mem_127525)[index_of_group_res_127556 * + segred_group_sizze_84556]; + + double defunc_1_op_res_84562; + + defunc_1_op_res_84562 = x_84560 + x_84561; + x_84560 = defunc_1_op_res_84562; + } + } + } + ((__local + double *) red_arr_mem_127534)[sext_i32_i64(local_tid_127530)] = + x_84560; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_127557; + int32_t skip_waves_127558; + + skip_waves_127558 = 1; + + double x_127545; + double x_127546; + + offset_127557 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127530, + sext_i64_i32(segred_group_sizze_84556))) { + x_127545 = ((__local + double *) red_arr_mem_127534)[sext_i32_i64(local_tid_127530 + + offset_127557)]; + } + } + offset_127557 = 1; + while (slt32(offset_127557, wave_sizze_127532)) { + if (slt32(local_tid_127530 + offset_127557, + sext_i64_i32(segred_group_sizze_84556)) && + ((local_tid_127530 - squot32(local_tid_127530, + wave_sizze_127532) * + wave_sizze_127532) & (2 * offset_127557 - 1)) == + 0) { + // read array element + { + x_127546 = ((volatile __local + double *) red_arr_mem_127534)[sext_i32_i64(local_tid_127530 + + offset_127557)]; + } + // apply reduction operation + { + double defunc_1_op_res_127547 = x_127545 + + x_127546; + + x_127545 = defunc_1_op_res_127547; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_127534)[sext_i32_i64(local_tid_127530)] = + x_127545; + } + } + offset_127557 *= 2; + } + while (slt32(skip_waves_127558, + squot32(sext_i64_i32(segred_group_sizze_84556) + + wave_sizze_127532 - 1, + wave_sizze_127532))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_127557 = skip_waves_127558 * wave_sizze_127532; + if (slt32(local_tid_127530 + offset_127557, + sext_i64_i32(segred_group_sizze_84556)) && + ((local_tid_127530 - squot32(local_tid_127530, + wave_sizze_127532) * + wave_sizze_127532) == 0 && + (squot32(local_tid_127530, wave_sizze_127532) & + (2 * skip_waves_127558 - 1)) == 0)) { + // read array element + { + x_127546 = ((__local + double *) red_arr_mem_127534)[sext_i32_i64(local_tid_127530 + + offset_127557)]; + } + // apply reduction operation + { + double defunc_1_op_res_127547 = x_127545 + + x_127546; + + x_127545 = defunc_1_op_res_127547; + } + // write result of operation + { + ((__local + double *) red_arr_mem_127534)[sext_i32_i64(local_tid_127530)] = + x_127545; + } + } + skip_waves_127558 *= 2; + } + // and back to memory with the final result + { + if (local_tid_127530 == 0) { + ((__global double *) mem_122730)[gtid_84223] = + x_127545; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_84556 +} +__kernel void mainDetailedzisegred_large_84385(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_127450_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_127448_backing_aligned_1, + int64_t m_70861, + int64_t k2p2zq_70876, + int64_t defunc_2_reduce_res_70985, + int64_t r_71551, + int64_t num_groups_84464, + int64_t groups_per_segment_127434, + int64_t elements_per_thread_127435, + int64_t virt_num_groups_127436, + int64_t threads_per_segment_127438, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_param_121967, + __global + unsigned char *mem_122677, + __global + unsigned char *group_res_arr_mem_127439, + __global + unsigned char *mainDetailedzicounter_mem_127441) +{ + #define segred_group_sizze_84463 (mainDetailedzisegred_group_sizze_84379) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_127450_backing_1 = + (__local volatile + char *) sync_arr_mem_127450_backing_aligned_0; + __local volatile char *restrict red_arr_mem_127448_backing_0 = + (__local volatile + char *) red_arr_mem_127448_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127443; + int32_t local_tid_127444; + int64_t group_sizze_127447; + int32_t wave_sizze_127446; + int32_t group_tid_127445; + + global_tid_127443 = get_global_id(0); + local_tid_127444 = get_local_id(0); + group_sizze_127447 = get_local_size(0); + wave_sizze_127446 = LOCKSTEP_WIDTH; + group_tid_127445 = get_group_id(0); + + int32_t phys_tid_84385; + + phys_tid_84385 = global_tid_127443; + + __local char *red_arr_mem_127448; + + red_arr_mem_127448 = (__local char *) red_arr_mem_127448_backing_0; + + __local char *sync_arr_mem_127450; + + sync_arr_mem_127450 = (__local char *) sync_arr_mem_127450_backing_1; + + int32_t phys_group_id_127452; + + phys_group_id_127452 = get_group_id(0); + for (int32_t i_127453 = 0; i_127453 < + sdiv_up32(sext_i64_i32(virt_num_groups_127436) - phys_group_id_127452, + sext_i64_i32(num_groups_84464)); i_127453++) { + int32_t virt_group_id_127454 = phys_group_id_127452 + i_127453 * + sext_i64_i32(num_groups_84464); + int32_t flat_segment_id_127455 = squot32(virt_group_id_127454, + sext_i64_i32(groups_per_segment_127434)); + int64_t global_tid_127456 = srem64(sext_i32_i64(virt_group_id_127454) * + segred_group_sizze_84463 + + sext_i32_i64(local_tid_127444), + segred_group_sizze_84463 * + groups_per_segment_127434); + int64_t gtid_84376 = sext_i32_i64(flat_segment_id_127455); + int64_t gtid_84384; + double x_acc_127457; + int64_t chunk_sizze_127458; + + chunk_sizze_127458 = smin64(elements_per_thread_127435, + sdiv_up64(k2p2zq_70876 - global_tid_127456, + threads_per_segment_127438)); + + double x_84467; + double x_84468; + + // neutral-initialise the accumulators + { + x_acc_127457 = 0.0; + } + for (int64_t i_127462 = 0; i_127462 < chunk_sizze_127458; i_127462++) { + gtid_84384 = global_tid_127456 + threads_per_segment_127438 * + i_127462; + // apply map function + { + double x_84472 = ((__global double *) mem_120246)[gtid_84384 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_84376 * + defunc_2_reduce_res_70985 + + r_71551]; + double x_84473 = ((__global + double *) mem_param_121967)[gtid_84376 * + k2p2zq_70876 + + gtid_84384]; + double defunc_1_f_res_84474 = x_84472 * x_84473; + + // save map-out results + { } + // load accumulator + { + x_84467 = x_acc_127457; + } + // load new values + { + x_84468 = defunc_1_f_res_84474; + } + // apply reduction operator + { + double defunc_1_op_res_84469 = x_84467 + x_84468; + + // store in accumulator + { + x_acc_127457 = defunc_1_op_res_84469; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_84467 = x_acc_127457; + ((__local + double *) red_arr_mem_127448)[sext_i32_i64(local_tid_127444)] = + x_84467; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_127463; + int32_t skip_waves_127464; + + skip_waves_127464 = 1; + + double x_127459; + double x_127460; + + offset_127463 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127444, + sext_i64_i32(segred_group_sizze_84463))) { + x_127459 = ((__local + double *) red_arr_mem_127448)[sext_i32_i64(local_tid_127444 + + offset_127463)]; + } + } + offset_127463 = 1; + while (slt32(offset_127463, wave_sizze_127446)) { + if (slt32(local_tid_127444 + offset_127463, + sext_i64_i32(segred_group_sizze_84463)) && + ((local_tid_127444 - squot32(local_tid_127444, + wave_sizze_127446) * + wave_sizze_127446) & (2 * offset_127463 - 1)) == 0) { + // read array element + { + x_127460 = ((volatile __local + double *) red_arr_mem_127448)[sext_i32_i64(local_tid_127444 + + offset_127463)]; + } + // apply reduction operation + { + double defunc_1_op_res_127461 = x_127459 + x_127460; + + x_127459 = defunc_1_op_res_127461; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_127448)[sext_i32_i64(local_tid_127444)] = + x_127459; + } + } + offset_127463 *= 2; + } + while (slt32(skip_waves_127464, + squot32(sext_i64_i32(segred_group_sizze_84463) + + wave_sizze_127446 - 1, wave_sizze_127446))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_127463 = skip_waves_127464 * wave_sizze_127446; + if (slt32(local_tid_127444 + offset_127463, + sext_i64_i32(segred_group_sizze_84463)) && + ((local_tid_127444 - squot32(local_tid_127444, + wave_sizze_127446) * + wave_sizze_127446) == 0 && (squot32(local_tid_127444, + wave_sizze_127446) & (2 * + skip_waves_127464 - + 1)) == + 0)) { + // read array element + { + x_127460 = ((__local + double *) red_arr_mem_127448)[sext_i32_i64(local_tid_127444 + + offset_127463)]; + } + // apply reduction operation + { + double defunc_1_op_res_127461 = x_127459 + x_127460; + + x_127459 = defunc_1_op_res_127461; + } + // write result of operation + { + ((__local + double *) red_arr_mem_127448)[sext_i32_i64(local_tid_127444)] = + x_127459; + } + } + skip_waves_127464 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_127444) == (int64_t) 0) { + x_acc_127457 = x_127459; + } + } + if (groups_per_segment_127434 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_127444 == 0) { + ((__global double *) mem_122677)[gtid_84376] = x_acc_127457; + } + } + } else { + int32_t old_counter_127465; + + // first thread in group saves group result to global memory + { + if (local_tid_127444 == 0) { + ((__global + double *) group_res_arr_mem_127439)[sext_i32_i64(virt_group_id_127454) * + segred_group_sizze_84463] = + x_acc_127457; + mem_fence_global(); + old_counter_127465 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_127441)[sext_i32_i64(srem32(flat_segment_id_127455, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_127450)[(int64_t) 0] = + old_counter_127465 == groups_per_segment_127434 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_127466; + + is_last_group_127466 = ((__local + bool *) sync_arr_mem_127450)[(int64_t) 0]; + if (is_last_group_127466) { + if (local_tid_127444 == 0) { + old_counter_127465 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_127441)[sext_i32_i64(srem32(flat_segment_id_127455, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_127434)); + } + // read in the per-group-results + { + int64_t read_per_thread_127467 = + sdiv_up64(groups_per_segment_127434, + segred_group_sizze_84463); + + x_84467 = 0.0; + for (int64_t i_127468 = 0; i_127468 < + read_per_thread_127467; i_127468++) { + int64_t group_res_id_127469 = + sext_i32_i64(local_tid_127444) * + read_per_thread_127467 + i_127468; + int64_t index_of_group_res_127470 = + sext_i32_i64(flat_segment_id_127455) * + groups_per_segment_127434 + group_res_id_127469; + + if (slt64(group_res_id_127469, + groups_per_segment_127434)) { + x_84468 = ((__global + double *) group_res_arr_mem_127439)[index_of_group_res_127470 * + segred_group_sizze_84463]; + + double defunc_1_op_res_84469; + + defunc_1_op_res_84469 = x_84467 + x_84468; + x_84467 = defunc_1_op_res_84469; + } + } + } + ((__local + double *) red_arr_mem_127448)[sext_i32_i64(local_tid_127444)] = + x_84467; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_127471; + int32_t skip_waves_127472; + + skip_waves_127472 = 1; + + double x_127459; + double x_127460; + + offset_127471 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127444, + sext_i64_i32(segred_group_sizze_84463))) { + x_127459 = ((__local + double *) red_arr_mem_127448)[sext_i32_i64(local_tid_127444 + + offset_127471)]; + } + } + offset_127471 = 1; + while (slt32(offset_127471, wave_sizze_127446)) { + if (slt32(local_tid_127444 + offset_127471, + sext_i64_i32(segred_group_sizze_84463)) && + ((local_tid_127444 - squot32(local_tid_127444, + wave_sizze_127446) * + wave_sizze_127446) & (2 * offset_127471 - 1)) == + 0) { + // read array element + { + x_127460 = ((volatile __local + double *) red_arr_mem_127448)[sext_i32_i64(local_tid_127444 + + offset_127471)]; + } + // apply reduction operation + { + double defunc_1_op_res_127461 = x_127459 + + x_127460; + + x_127459 = defunc_1_op_res_127461; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_127448)[sext_i32_i64(local_tid_127444)] = + x_127459; + } + } + offset_127471 *= 2; + } + while (slt32(skip_waves_127472, + squot32(sext_i64_i32(segred_group_sizze_84463) + + wave_sizze_127446 - 1, + wave_sizze_127446))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_127471 = skip_waves_127472 * wave_sizze_127446; + if (slt32(local_tid_127444 + offset_127471, + sext_i64_i32(segred_group_sizze_84463)) && + ((local_tid_127444 - squot32(local_tid_127444, + wave_sizze_127446) * + wave_sizze_127446) == 0 && + (squot32(local_tid_127444, wave_sizze_127446) & + (2 * skip_waves_127472 - 1)) == 0)) { + // read array element + { + x_127460 = ((__local + double *) red_arr_mem_127448)[sext_i32_i64(local_tid_127444 + + offset_127471)]; + } + // apply reduction operation + { + double defunc_1_op_res_127461 = x_127459 + + x_127460; + + x_127459 = defunc_1_op_res_127461; + } + // write result of operation + { + ((__local + double *) red_arr_mem_127448)[sext_i32_i64(local_tid_127444)] = + x_127459; + } + } + skip_waves_127472 *= 2; + } + // and back to memory with the final result + { + if (local_tid_127444 == 0) { + ((__global double *) mem_122677)[gtid_84376] = + x_127459; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_84463 +} +__kernel void mainDetailedzisegred_large_84414(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_127384_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_127382_backing_aligned_1, + int64_t m_70861, + int64_t k2p2zq_70876, + int64_t defunc_2_reduce_res_70985, + int64_t r_71551, + int64_t num_groups_84433, + int64_t groups_per_segment_127368, + int64_t elements_per_thread_127369, + int64_t virt_num_groups_127370, + int64_t threads_per_segment_127372, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_122665, + __global + unsigned char *mem_122668, + __global + unsigned char *mem_122671, + __global + unsigned char *group_res_arr_mem_127373, + __global + unsigned char *mainDetailedzicounter_mem_127375) +{ + #define segred_group_sizze_84432 (mainDetailedzisegred_group_sizze_84408) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_127384_backing_1 = + (__local volatile + char *) sync_arr_mem_127384_backing_aligned_0; + __local volatile char *restrict red_arr_mem_127382_backing_0 = + (__local volatile + char *) red_arr_mem_127382_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127377; + int32_t local_tid_127378; + int64_t group_sizze_127381; + int32_t wave_sizze_127380; + int32_t group_tid_127379; + + global_tid_127377 = get_global_id(0); + local_tid_127378 = get_local_id(0); + group_sizze_127381 = get_local_size(0); + wave_sizze_127380 = LOCKSTEP_WIDTH; + group_tid_127379 = get_group_id(0); + + int32_t phys_tid_84414; + + phys_tid_84414 = global_tid_127377; + + __local char *red_arr_mem_127382; + + red_arr_mem_127382 = (__local char *) red_arr_mem_127382_backing_0; + + __local char *sync_arr_mem_127384; + + sync_arr_mem_127384 = (__local char *) sync_arr_mem_127384_backing_1; + + int32_t phys_group_id_127386; + + phys_group_id_127386 = get_group_id(0); + for (int32_t i_127387 = 0; i_127387 < + sdiv_up32(sext_i64_i32(virt_num_groups_127370) - phys_group_id_127386, + sext_i64_i32(num_groups_84433)); i_127387++) { + int32_t virt_group_id_127388 = phys_group_id_127386 + i_127387 * + sext_i64_i32(num_groups_84433); + int32_t flat_segment_id_127389 = squot32(virt_group_id_127388, + sext_i64_i32(groups_per_segment_127368)); + int64_t global_tid_127390 = srem64(sext_i32_i64(virt_group_id_127388) * + segred_group_sizze_84432 + + sext_i32_i64(local_tid_127378), + segred_group_sizze_84432 * + groups_per_segment_127368); + int64_t gtid_84405 = sext_i32_i64(flat_segment_id_127389); + int64_t gtid_84413; + double x_acc_127391; + int64_t chunk_sizze_127392; + + chunk_sizze_127392 = smin64(elements_per_thread_127369, + sdiv_up64(k2p2zq_70876 - global_tid_127390, + threads_per_segment_127372)); + + double x_84437; + double x_84438; + + // neutral-initialise the accumulators + { + x_acc_127391 = 0.0; + } + for (int64_t i_127396 = 0; i_127396 < chunk_sizze_127392; i_127396++) { + gtid_84413 = global_tid_127390 + threads_per_segment_127372 * + i_127396; + // apply map function + { + double x_84443 = ((__global double *) mem_120246)[gtid_84413 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_84405 * + defunc_2_reduce_res_70985 + + r_71551]; + double defunc_0_f_res_84444; + double redout_119829 = 0.0; + + for (int64_t i_119830 = 0; i_119830 < k2p2zq_70876; + i_119830++) { + double x_84448 = ((__global double *) mem_120246)[i_119830 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_84405 * + defunc_2_reduce_res_70985 + + r_71551]; + double x_84449 = ((__global double *) mem_122665)[i_119830 * + (k2p2zq_70876 * + m_70861) + + gtid_84405 * + k2p2zq_70876 + + gtid_84413]; + double defunc_1_f_res_84450 = x_84448 * x_84449; + double defunc_1_op_res_84447 = defunc_1_f_res_84450 + + redout_119829; + double redout_tmp_127397 = defunc_1_op_res_84447; + + redout_119829 = redout_tmp_127397; + } + defunc_0_f_res_84444 = redout_119829; + + double defunc_1_f_res_84451 = x_84443 * defunc_0_f_res_84444; + + // save map-out results + { + ((__global double *) mem_122671)[gtid_84405 * k2p2zq_70876 + + gtid_84413] = + defunc_0_f_res_84444; + } + // load accumulator + { + x_84437 = x_acc_127391; + } + // load new values + { + x_84438 = defunc_1_f_res_84451; + } + // apply reduction operator + { + double defunc_1_op_res_84439 = x_84437 + x_84438; + + // store in accumulator + { + x_acc_127391 = defunc_1_op_res_84439; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_84437 = x_acc_127391; + ((__local + double *) red_arr_mem_127382)[sext_i32_i64(local_tid_127378)] = + x_84437; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_127398; + int32_t skip_waves_127399; + + skip_waves_127399 = 1; + + double x_127393; + double x_127394; + + offset_127398 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127378, + sext_i64_i32(segred_group_sizze_84432))) { + x_127393 = ((__local + double *) red_arr_mem_127382)[sext_i32_i64(local_tid_127378 + + offset_127398)]; + } + } + offset_127398 = 1; + while (slt32(offset_127398, wave_sizze_127380)) { + if (slt32(local_tid_127378 + offset_127398, + sext_i64_i32(segred_group_sizze_84432)) && + ((local_tid_127378 - squot32(local_tid_127378, + wave_sizze_127380) * + wave_sizze_127380) & (2 * offset_127398 - 1)) == 0) { + // read array element + { + x_127394 = ((volatile __local + double *) red_arr_mem_127382)[sext_i32_i64(local_tid_127378 + + offset_127398)]; + } + // apply reduction operation + { + double defunc_1_op_res_127395 = x_127393 + x_127394; + + x_127393 = defunc_1_op_res_127395; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_127382)[sext_i32_i64(local_tid_127378)] = + x_127393; + } + } + offset_127398 *= 2; + } + while (slt32(skip_waves_127399, + squot32(sext_i64_i32(segred_group_sizze_84432) + + wave_sizze_127380 - 1, wave_sizze_127380))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_127398 = skip_waves_127399 * wave_sizze_127380; + if (slt32(local_tid_127378 + offset_127398, + sext_i64_i32(segred_group_sizze_84432)) && + ((local_tid_127378 - squot32(local_tid_127378, + wave_sizze_127380) * + wave_sizze_127380) == 0 && (squot32(local_tid_127378, + wave_sizze_127380) & (2 * + skip_waves_127399 - + 1)) == + 0)) { + // read array element + { + x_127394 = ((__local + double *) red_arr_mem_127382)[sext_i32_i64(local_tid_127378 + + offset_127398)]; + } + // apply reduction operation + { + double defunc_1_op_res_127395 = x_127393 + x_127394; + + x_127393 = defunc_1_op_res_127395; + } + // write result of operation + { + ((__local + double *) red_arr_mem_127382)[sext_i32_i64(local_tid_127378)] = + x_127393; + } + } + skip_waves_127399 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_127378) == (int64_t) 0) { + x_acc_127391 = x_127393; + } + } + if (groups_per_segment_127368 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_127378 == 0) { + ((__global double *) mem_122668)[gtid_84405] = x_acc_127391; + } + } + } else { + int32_t old_counter_127400; + + // first thread in group saves group result to global memory + { + if (local_tid_127378 == 0) { + ((__global + double *) group_res_arr_mem_127373)[sext_i32_i64(virt_group_id_127388) * + segred_group_sizze_84432] = + x_acc_127391; + mem_fence_global(); + old_counter_127400 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_127375)[sext_i32_i64(srem32(flat_segment_id_127389, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_127384)[(int64_t) 0] = + old_counter_127400 == groups_per_segment_127368 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_127401; + + is_last_group_127401 = ((__local + bool *) sync_arr_mem_127384)[(int64_t) 0]; + if (is_last_group_127401) { + if (local_tid_127378 == 0) { + old_counter_127400 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_127375)[sext_i32_i64(srem32(flat_segment_id_127389, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_127368)); + } + // read in the per-group-results + { + int64_t read_per_thread_127402 = + sdiv_up64(groups_per_segment_127368, + segred_group_sizze_84432); + + x_84437 = 0.0; + for (int64_t i_127403 = 0; i_127403 < + read_per_thread_127402; i_127403++) { + int64_t group_res_id_127404 = + sext_i32_i64(local_tid_127378) * + read_per_thread_127402 + i_127403; + int64_t index_of_group_res_127405 = + sext_i32_i64(flat_segment_id_127389) * + groups_per_segment_127368 + group_res_id_127404; + + if (slt64(group_res_id_127404, + groups_per_segment_127368)) { + x_84438 = ((__global + double *) group_res_arr_mem_127373)[index_of_group_res_127405 * + segred_group_sizze_84432]; + + double defunc_1_op_res_84439; + + defunc_1_op_res_84439 = x_84437 + x_84438; + x_84437 = defunc_1_op_res_84439; + } + } + } + ((__local + double *) red_arr_mem_127382)[sext_i32_i64(local_tid_127378)] = + x_84437; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_127406; + int32_t skip_waves_127407; + + skip_waves_127407 = 1; + + double x_127393; + double x_127394; + + offset_127406 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127378, + sext_i64_i32(segred_group_sizze_84432))) { + x_127393 = ((__local + double *) red_arr_mem_127382)[sext_i32_i64(local_tid_127378 + + offset_127406)]; + } + } + offset_127406 = 1; + while (slt32(offset_127406, wave_sizze_127380)) { + if (slt32(local_tid_127378 + offset_127406, + sext_i64_i32(segred_group_sizze_84432)) && + ((local_tid_127378 - squot32(local_tid_127378, + wave_sizze_127380) * + wave_sizze_127380) & (2 * offset_127406 - 1)) == + 0) { + // read array element + { + x_127394 = ((volatile __local + double *) red_arr_mem_127382)[sext_i32_i64(local_tid_127378 + + offset_127406)]; + } + // apply reduction operation + { + double defunc_1_op_res_127395 = x_127393 + + x_127394; + + x_127393 = defunc_1_op_res_127395; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_127382)[sext_i32_i64(local_tid_127378)] = + x_127393; + } + } + offset_127406 *= 2; + } + while (slt32(skip_waves_127407, + squot32(sext_i64_i32(segred_group_sizze_84432) + + wave_sizze_127380 - 1, + wave_sizze_127380))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_127406 = skip_waves_127407 * wave_sizze_127380; + if (slt32(local_tid_127378 + offset_127406, + sext_i64_i32(segred_group_sizze_84432)) && + ((local_tid_127378 - squot32(local_tid_127378, + wave_sizze_127380) * + wave_sizze_127380) == 0 && + (squot32(local_tid_127378, wave_sizze_127380) & + (2 * skip_waves_127407 - 1)) == 0)) { + // read array element + { + x_127394 = ((__local + double *) red_arr_mem_127382)[sext_i32_i64(local_tid_127378 + + offset_127406)]; + } + // apply reduction operation + { + double defunc_1_op_res_127395 = x_127393 + + x_127394; + + x_127393 = defunc_1_op_res_127395; + } + // write result of operation + { + ((__local + double *) red_arr_mem_127382)[sext_i32_i64(local_tid_127378)] = + x_127393; + } + } + skip_waves_127407 *= 2; + } + // and back to memory with the final result + { + if (local_tid_127378 == 0) { + ((__global double *) mem_122668)[gtid_84405] = + x_127393; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_84432 +} +__kernel void mainDetailedzisegred_large_85711(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_128259_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128257_backing_aligned_1, + int64_t m_70861, + int64_t k2p2zq_70876, + int64_t defunc_2_reduce_res_70985, + int64_t index_primexp_72162, + int64_t num_groups_85919, + int64_t binop_x_120251, + int64_t groups_per_segment_128243, + int64_t elements_per_thread_128244, + int64_t virt_num_groups_128245, + int64_t threads_per_segment_128247, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_param_123778, + __global + unsigned char *mem_123907, + __global + unsigned char *mem_123910, + __global + unsigned char *mem_123944, + __global + unsigned char *mem_123948, + __global + unsigned char *group_res_arr_mem_128248, + __global + unsigned char *mainDetailedzicounter_mem_128250) +{ + #define segred_group_sizze_85918 (mainDetailedzisegred_group_sizze_85705) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_128259_backing_1 = + (__local volatile + char *) sync_arr_mem_128259_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128257_backing_0 = + (__local volatile + char *) red_arr_mem_128257_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128252; + int32_t local_tid_128253; + int64_t group_sizze_128256; + int32_t wave_sizze_128255; + int32_t group_tid_128254; + + global_tid_128252 = get_global_id(0); + local_tid_128253 = get_local_id(0); + group_sizze_128256 = get_local_size(0); + wave_sizze_128255 = LOCKSTEP_WIDTH; + group_tid_128254 = get_group_id(0); + + int32_t phys_tid_85711; + + phys_tid_85711 = global_tid_128252; + + __local char *red_arr_mem_128257; + + red_arr_mem_128257 = (__local char *) red_arr_mem_128257_backing_0; + + __local char *sync_arr_mem_128259; + + sync_arr_mem_128259 = (__local char *) sync_arr_mem_128259_backing_1; + + int32_t phys_group_id_128261; + + phys_group_id_128261 = get_group_id(0); + for (int32_t i_128262 = 0; i_128262 < + sdiv_up32(sext_i64_i32(virt_num_groups_128245) - phys_group_id_128261, + sext_i64_i32(num_groups_85919)); i_128262++) { + int32_t virt_group_id_128263 = phys_group_id_128261 + i_128262 * + sext_i64_i32(num_groups_85919); + int32_t flat_segment_id_128264 = squot32(virt_group_id_128263, + sext_i64_i32(groups_per_segment_128243)); + int64_t global_tid_128265 = srem64(sext_i32_i64(virt_group_id_128263) * + segred_group_sizze_85918 + + sext_i32_i64(local_tid_128253), + segred_group_sizze_85918 * + groups_per_segment_128243); + int64_t gtid_85700 = squot64(sext_i32_i64(flat_segment_id_128264), + k2p2zq_70876); + int64_t gtid_85701 = sext_i32_i64(flat_segment_id_128264) - + squot64(sext_i32_i64(flat_segment_id_128264), k2p2zq_70876) * + k2p2zq_70876; + int64_t gtid_85710; + double x_acc_128266; + int64_t chunk_sizze_128267; + + chunk_sizze_128267 = smin64(elements_per_thread_128244, + sdiv_up64(k2p2zq_70876 - global_tid_128265, + threads_per_segment_128247)); + + double x_85923; + double x_85924; + + // neutral-initialise the accumulators + { + x_acc_128266 = 0.0; + } + for (int64_t i_128271 = 0; i_128271 < chunk_sizze_128267; i_128271++) { + gtid_85710 = global_tid_128265 + threads_per_segment_128247 * + i_128271; + // apply map function + { + double fr_85929 = ((__global double *) mem_123910)[gtid_85700]; + double x_85930 = ((__global double *) mem_123907)[gtid_85700 * + k2p2zq_70876 + + gtid_85701]; + double x_85932 = ((__global double *) mem_123907)[gtid_85700 * + k2p2zq_70876 + + gtid_85710]; + double x_85933 = ((__global + double *) mem_param_123778)[gtid_85700 * + binop_x_120251 + + gtid_85701 * + k2p2zq_70876 + + gtid_85710]; + double x_85934 = ((__global double *) mem_120246)[gtid_85710 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_85700 * + defunc_2_reduce_res_70985 + + index_primexp_72162]; + double x_85935 = x_85930 * x_85932; + double y_85936 = x_85935 / fr_85929; + double defunc_1_f_res_85937 = x_85933 - y_85936; + double defunc_1_f_res_85938 = x_85934 * defunc_1_f_res_85937; + + // save map-out results + { + ((__global double *) mem_123948)[gtid_85700 * + (k2p2zq_70876 * + k2p2zq_70876) + + gtid_85701 * k2p2zq_70876 + + gtid_85710] = + defunc_1_f_res_85937; + } + // load accumulator + { + x_85923 = x_acc_128266; + } + // load new values + { + x_85924 = defunc_1_f_res_85938; + } + // apply reduction operator + { + double defunc_1_op_res_85925 = x_85923 + x_85924; + + // store in accumulator + { + x_acc_128266 = defunc_1_op_res_85925; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_85923 = x_acc_128266; + ((__local + double *) red_arr_mem_128257)[sext_i32_i64(local_tid_128253)] = + x_85923; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128272; + int32_t skip_waves_128273; + + skip_waves_128273 = 1; + + double x_128268; + double x_128269; + + offset_128272 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128253, + sext_i64_i32(segred_group_sizze_85918))) { + x_128268 = ((__local + double *) red_arr_mem_128257)[sext_i32_i64(local_tid_128253 + + offset_128272)]; + } + } + offset_128272 = 1; + while (slt32(offset_128272, wave_sizze_128255)) { + if (slt32(local_tid_128253 + offset_128272, + sext_i64_i32(segred_group_sizze_85918)) && + ((local_tid_128253 - squot32(local_tid_128253, + wave_sizze_128255) * + wave_sizze_128255) & (2 * offset_128272 - 1)) == 0) { + // read array element + { + x_128269 = ((volatile __local + double *) red_arr_mem_128257)[sext_i32_i64(local_tid_128253 + + offset_128272)]; + } + // apply reduction operation + { + double defunc_1_op_res_128270 = x_128268 + x_128269; + + x_128268 = defunc_1_op_res_128270; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128257)[sext_i32_i64(local_tid_128253)] = + x_128268; + } + } + offset_128272 *= 2; + } + while (slt32(skip_waves_128273, + squot32(sext_i64_i32(segred_group_sizze_85918) + + wave_sizze_128255 - 1, wave_sizze_128255))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128272 = skip_waves_128273 * wave_sizze_128255; + if (slt32(local_tid_128253 + offset_128272, + sext_i64_i32(segred_group_sizze_85918)) && + ((local_tid_128253 - squot32(local_tid_128253, + wave_sizze_128255) * + wave_sizze_128255) == 0 && (squot32(local_tid_128253, + wave_sizze_128255) & (2 * + skip_waves_128273 - + 1)) == + 0)) { + // read array element + { + x_128269 = ((__local + double *) red_arr_mem_128257)[sext_i32_i64(local_tid_128253 + + offset_128272)]; + } + // apply reduction operation + { + double defunc_1_op_res_128270 = x_128268 + x_128269; + + x_128268 = defunc_1_op_res_128270; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128257)[sext_i32_i64(local_tid_128253)] = + x_128268; + } + } + skip_waves_128273 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_128253) == (int64_t) 0) { + x_acc_128266 = x_128268; + } + } + if (groups_per_segment_128243 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_128253 == 0) { + ((__global double *) mem_123944)[gtid_85700 * k2p2zq_70876 + + gtid_85701] = x_acc_128266; + } + } + } else { + int32_t old_counter_128274; + + // first thread in group saves group result to global memory + { + if (local_tid_128253 == 0) { + ((__global + double *) group_res_arr_mem_128248)[sext_i32_i64(virt_group_id_128263) * + segred_group_sizze_85918] = + x_acc_128266; + mem_fence_global(); + old_counter_128274 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_128250)[sext_i32_i64(srem32(flat_segment_id_128264, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_128259)[(int64_t) 0] = + old_counter_128274 == groups_per_segment_128243 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_128275; + + is_last_group_128275 = ((__local + bool *) sync_arr_mem_128259)[(int64_t) 0]; + if (is_last_group_128275) { + if (local_tid_128253 == 0) { + old_counter_128274 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_128250)[sext_i32_i64(srem32(flat_segment_id_128264, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_128243)); + } + // read in the per-group-results + { + int64_t read_per_thread_128276 = + sdiv_up64(groups_per_segment_128243, + segred_group_sizze_85918); + + x_85923 = 0.0; + for (int64_t i_128277 = 0; i_128277 < + read_per_thread_128276; i_128277++) { + int64_t group_res_id_128278 = + sext_i32_i64(local_tid_128253) * + read_per_thread_128276 + i_128277; + int64_t index_of_group_res_128279 = + sext_i32_i64(flat_segment_id_128264) * + groups_per_segment_128243 + group_res_id_128278; + + if (slt64(group_res_id_128278, + groups_per_segment_128243)) { + x_85924 = ((__global + double *) group_res_arr_mem_128248)[index_of_group_res_128279 * + segred_group_sizze_85918]; + + double defunc_1_op_res_85925; + + defunc_1_op_res_85925 = x_85923 + x_85924; + x_85923 = defunc_1_op_res_85925; + } + } + } + ((__local + double *) red_arr_mem_128257)[sext_i32_i64(local_tid_128253)] = + x_85923; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_128280; + int32_t skip_waves_128281; + + skip_waves_128281 = 1; + + double x_128268; + double x_128269; + + offset_128280 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128253, + sext_i64_i32(segred_group_sizze_85918))) { + x_128268 = ((__local + double *) red_arr_mem_128257)[sext_i32_i64(local_tid_128253 + + offset_128280)]; + } + } + offset_128280 = 1; + while (slt32(offset_128280, wave_sizze_128255)) { + if (slt32(local_tid_128253 + offset_128280, + sext_i64_i32(segred_group_sizze_85918)) && + ((local_tid_128253 - squot32(local_tid_128253, + wave_sizze_128255) * + wave_sizze_128255) & (2 * offset_128280 - 1)) == + 0) { + // read array element + { + x_128269 = ((volatile __local + double *) red_arr_mem_128257)[sext_i32_i64(local_tid_128253 + + offset_128280)]; + } + // apply reduction operation + { + double defunc_1_op_res_128270 = x_128268 + + x_128269; + + x_128268 = defunc_1_op_res_128270; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128257)[sext_i32_i64(local_tid_128253)] = + x_128268; + } + } + offset_128280 *= 2; + } + while (slt32(skip_waves_128281, + squot32(sext_i64_i32(segred_group_sizze_85918) + + wave_sizze_128255 - 1, + wave_sizze_128255))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128280 = skip_waves_128281 * wave_sizze_128255; + if (slt32(local_tid_128253 + offset_128280, + sext_i64_i32(segred_group_sizze_85918)) && + ((local_tid_128253 - squot32(local_tid_128253, + wave_sizze_128255) * + wave_sizze_128255) == 0 && + (squot32(local_tid_128253, wave_sizze_128255) & + (2 * skip_waves_128281 - 1)) == 0)) { + // read array element + { + x_128269 = ((__local + double *) red_arr_mem_128257)[sext_i32_i64(local_tid_128253 + + offset_128280)]; + } + // apply reduction operation + { + double defunc_1_op_res_128270 = x_128268 + + x_128269; + + x_128268 = defunc_1_op_res_128270; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128257)[sext_i32_i64(local_tid_128253)] = + x_128268; + } + } + skip_waves_128281 *= 2; + } + // and back to memory with the final result + { + if (local_tid_128253 == 0) { + ((__global double *) mem_123944)[gtid_85700 * + k2p2zq_70876 + + gtid_85701] = + x_128268; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_85918 +} +__kernel void mainDetailedzisegred_large_85786(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_128183_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128181_backing_aligned_1, + int64_t m_70861, + int64_t k2p2zq_70876, + int64_t defunc_2_reduce_res_70985, + int64_t index_primexp_72162, + int64_t num_groups_85860, + int64_t groups_per_segment_128167, + int64_t elements_per_thread_128168, + int64_t virt_num_groups_128169, + int64_t threads_per_segment_128171, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_param_123786, + __global + unsigned char *mem_123913, + __global + unsigned char *group_res_arr_mem_128172, + __global + unsigned char *mainDetailedzicounter_mem_128174) +{ + #define segred_group_sizze_85859 (mainDetailedzisegred_group_sizze_85780) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_128183_backing_1 = + (__local volatile + char *) sync_arr_mem_128183_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128181_backing_0 = + (__local volatile + char *) red_arr_mem_128181_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128176; + int32_t local_tid_128177; + int64_t group_sizze_128180; + int32_t wave_sizze_128179; + int32_t group_tid_128178; + + global_tid_128176 = get_global_id(0); + local_tid_128177 = get_local_id(0); + group_sizze_128180 = get_local_size(0); + wave_sizze_128179 = LOCKSTEP_WIDTH; + group_tid_128178 = get_group_id(0); + + int32_t phys_tid_85786; + + phys_tid_85786 = global_tid_128176; + + __local char *red_arr_mem_128181; + + red_arr_mem_128181 = (__local char *) red_arr_mem_128181_backing_0; + + __local char *sync_arr_mem_128183; + + sync_arr_mem_128183 = (__local char *) sync_arr_mem_128183_backing_1; + + int32_t phys_group_id_128185; + + phys_group_id_128185 = get_group_id(0); + for (int32_t i_128186 = 0; i_128186 < + sdiv_up32(sext_i64_i32(virt_num_groups_128169) - phys_group_id_128185, + sext_i64_i32(num_groups_85860)); i_128186++) { + int32_t virt_group_id_128187 = phys_group_id_128185 + i_128186 * + sext_i64_i32(num_groups_85860); + int32_t flat_segment_id_128188 = squot32(virt_group_id_128187, + sext_i64_i32(groups_per_segment_128167)); + int64_t global_tid_128189 = srem64(sext_i32_i64(virt_group_id_128187) * + segred_group_sizze_85859 + + sext_i32_i64(local_tid_128177), + segred_group_sizze_85859 * + groups_per_segment_128167); + int64_t gtid_85777 = sext_i32_i64(flat_segment_id_128188); + int64_t gtid_85785; + double x_acc_128190; + int64_t chunk_sizze_128191; + + chunk_sizze_128191 = smin64(elements_per_thread_128168, + sdiv_up64(k2p2zq_70876 - global_tid_128189, + threads_per_segment_128171)); + + double x_85863; + double x_85864; + + // neutral-initialise the accumulators + { + x_acc_128190 = 0.0; + } + for (int64_t i_128195 = 0; i_128195 < chunk_sizze_128191; i_128195++) { + gtid_85785 = global_tid_128189 + threads_per_segment_128171 * + i_128195; + // apply map function + { + double x_85868 = ((__global double *) mem_120246)[gtid_85785 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_85777 * + defunc_2_reduce_res_70985 + + index_primexp_72162]; + double x_85869 = ((__global + double *) mem_param_123786)[gtid_85777 * + k2p2zq_70876 + + gtid_85785]; + double defunc_1_f_res_85870 = x_85868 * x_85869; + + // save map-out results + { } + // load accumulator + { + x_85863 = x_acc_128190; + } + // load new values + { + x_85864 = defunc_1_f_res_85870; + } + // apply reduction operator + { + double defunc_1_op_res_85865 = x_85863 + x_85864; + + // store in accumulator + { + x_acc_128190 = defunc_1_op_res_85865; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_85863 = x_acc_128190; + ((__local + double *) red_arr_mem_128181)[sext_i32_i64(local_tid_128177)] = + x_85863; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128196; + int32_t skip_waves_128197; + + skip_waves_128197 = 1; + + double x_128192; + double x_128193; + + offset_128196 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128177, + sext_i64_i32(segred_group_sizze_85859))) { + x_128192 = ((__local + double *) red_arr_mem_128181)[sext_i32_i64(local_tid_128177 + + offset_128196)]; + } + } + offset_128196 = 1; + while (slt32(offset_128196, wave_sizze_128179)) { + if (slt32(local_tid_128177 + offset_128196, + sext_i64_i32(segred_group_sizze_85859)) && + ((local_tid_128177 - squot32(local_tid_128177, + wave_sizze_128179) * + wave_sizze_128179) & (2 * offset_128196 - 1)) == 0) { + // read array element + { + x_128193 = ((volatile __local + double *) red_arr_mem_128181)[sext_i32_i64(local_tid_128177 + + offset_128196)]; + } + // apply reduction operation + { + double defunc_1_op_res_128194 = x_128192 + x_128193; + + x_128192 = defunc_1_op_res_128194; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128181)[sext_i32_i64(local_tid_128177)] = + x_128192; + } + } + offset_128196 *= 2; + } + while (slt32(skip_waves_128197, + squot32(sext_i64_i32(segred_group_sizze_85859) + + wave_sizze_128179 - 1, wave_sizze_128179))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128196 = skip_waves_128197 * wave_sizze_128179; + if (slt32(local_tid_128177 + offset_128196, + sext_i64_i32(segred_group_sizze_85859)) && + ((local_tid_128177 - squot32(local_tid_128177, + wave_sizze_128179) * + wave_sizze_128179) == 0 && (squot32(local_tid_128177, + wave_sizze_128179) & (2 * + skip_waves_128197 - + 1)) == + 0)) { + // read array element + { + x_128193 = ((__local + double *) red_arr_mem_128181)[sext_i32_i64(local_tid_128177 + + offset_128196)]; + } + // apply reduction operation + { + double defunc_1_op_res_128194 = x_128192 + x_128193; + + x_128192 = defunc_1_op_res_128194; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128181)[sext_i32_i64(local_tid_128177)] = + x_128192; + } + } + skip_waves_128197 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_128177) == (int64_t) 0) { + x_acc_128190 = x_128192; + } + } + if (groups_per_segment_128167 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_128177 == 0) { + ((__global double *) mem_123913)[gtid_85777] = x_acc_128190; + } + } + } else { + int32_t old_counter_128198; + + // first thread in group saves group result to global memory + { + if (local_tid_128177 == 0) { + ((__global + double *) group_res_arr_mem_128172)[sext_i32_i64(virt_group_id_128187) * + segred_group_sizze_85859] = + x_acc_128190; + mem_fence_global(); + old_counter_128198 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_128174)[sext_i32_i64(srem32(flat_segment_id_128188, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_128183)[(int64_t) 0] = + old_counter_128198 == groups_per_segment_128167 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_128199; + + is_last_group_128199 = ((__local + bool *) sync_arr_mem_128183)[(int64_t) 0]; + if (is_last_group_128199) { + if (local_tid_128177 == 0) { + old_counter_128198 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_128174)[sext_i32_i64(srem32(flat_segment_id_128188, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_128167)); + } + // read in the per-group-results + { + int64_t read_per_thread_128200 = + sdiv_up64(groups_per_segment_128167, + segred_group_sizze_85859); + + x_85863 = 0.0; + for (int64_t i_128201 = 0; i_128201 < + read_per_thread_128200; i_128201++) { + int64_t group_res_id_128202 = + sext_i32_i64(local_tid_128177) * + read_per_thread_128200 + i_128201; + int64_t index_of_group_res_128203 = + sext_i32_i64(flat_segment_id_128188) * + groups_per_segment_128167 + group_res_id_128202; + + if (slt64(group_res_id_128202, + groups_per_segment_128167)) { + x_85864 = ((__global + double *) group_res_arr_mem_128172)[index_of_group_res_128203 * + segred_group_sizze_85859]; + + double defunc_1_op_res_85865; + + defunc_1_op_res_85865 = x_85863 + x_85864; + x_85863 = defunc_1_op_res_85865; + } + } + } + ((__local + double *) red_arr_mem_128181)[sext_i32_i64(local_tid_128177)] = + x_85863; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_128204; + int32_t skip_waves_128205; + + skip_waves_128205 = 1; + + double x_128192; + double x_128193; + + offset_128204 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128177, + sext_i64_i32(segred_group_sizze_85859))) { + x_128192 = ((__local + double *) red_arr_mem_128181)[sext_i32_i64(local_tid_128177 + + offset_128204)]; + } + } + offset_128204 = 1; + while (slt32(offset_128204, wave_sizze_128179)) { + if (slt32(local_tid_128177 + offset_128204, + sext_i64_i32(segred_group_sizze_85859)) && + ((local_tid_128177 - squot32(local_tid_128177, + wave_sizze_128179) * + wave_sizze_128179) & (2 * offset_128204 - 1)) == + 0) { + // read array element + { + x_128193 = ((volatile __local + double *) red_arr_mem_128181)[sext_i32_i64(local_tid_128177 + + offset_128204)]; + } + // apply reduction operation + { + double defunc_1_op_res_128194 = x_128192 + + x_128193; + + x_128192 = defunc_1_op_res_128194; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128181)[sext_i32_i64(local_tid_128177)] = + x_128192; + } + } + offset_128204 *= 2; + } + while (slt32(skip_waves_128205, + squot32(sext_i64_i32(segred_group_sizze_85859) + + wave_sizze_128179 - 1, + wave_sizze_128179))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128204 = skip_waves_128205 * wave_sizze_128179; + if (slt32(local_tid_128177 + offset_128204, + sext_i64_i32(segred_group_sizze_85859)) && + ((local_tid_128177 - squot32(local_tid_128177, + wave_sizze_128179) * + wave_sizze_128179) == 0 && + (squot32(local_tid_128177, wave_sizze_128179) & + (2 * skip_waves_128205 - 1)) == 0)) { + // read array element + { + x_128193 = ((__local + double *) red_arr_mem_128181)[sext_i32_i64(local_tid_128177 + + offset_128204)]; + } + // apply reduction operation + { + double defunc_1_op_res_128194 = x_128192 + + x_128193; + + x_128192 = defunc_1_op_res_128194; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128181)[sext_i32_i64(local_tid_128177)] = + x_128192; + } + } + skip_waves_128205 *= 2; + } + // and back to memory with the final result + { + if (local_tid_128177 == 0) { + ((__global double *) mem_123913)[gtid_85777] = + x_128192; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_85859 +} +__kernel void mainDetailedzisegred_large_85813(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_128117_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128115_backing_aligned_1, + int64_t m_70861, + int64_t k2p2zq_70876, + int64_t defunc_2_reduce_res_70985, + int64_t index_primexp_72162, + int64_t num_groups_85832, + int64_t groups_per_segment_128101, + int64_t elements_per_thread_128102, + int64_t virt_num_groups_128103, + int64_t threads_per_segment_128105, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_123901, + __global + unsigned char *mem_123904, + __global + unsigned char *mem_123907, + __global + unsigned char *group_res_arr_mem_128106, + __global + unsigned char *mainDetailedzicounter_mem_128108) +{ + #define segred_group_sizze_85831 (mainDetailedzisegred_group_sizze_85807) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_128117_backing_1 = + (__local volatile + char *) sync_arr_mem_128117_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128115_backing_0 = + (__local volatile + char *) red_arr_mem_128115_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128110; + int32_t local_tid_128111; + int64_t group_sizze_128114; + int32_t wave_sizze_128113; + int32_t group_tid_128112; + + global_tid_128110 = get_global_id(0); + local_tid_128111 = get_local_id(0); + group_sizze_128114 = get_local_size(0); + wave_sizze_128113 = LOCKSTEP_WIDTH; + group_tid_128112 = get_group_id(0); + + int32_t phys_tid_85813; + + phys_tid_85813 = global_tid_128110; + + __local char *red_arr_mem_128115; + + red_arr_mem_128115 = (__local char *) red_arr_mem_128115_backing_0; + + __local char *sync_arr_mem_128117; + + sync_arr_mem_128117 = (__local char *) sync_arr_mem_128117_backing_1; + + int32_t phys_group_id_128119; + + phys_group_id_128119 = get_group_id(0); + for (int32_t i_128120 = 0; i_128120 < + sdiv_up32(sext_i64_i32(virt_num_groups_128103) - phys_group_id_128119, + sext_i64_i32(num_groups_85832)); i_128120++) { + int32_t virt_group_id_128121 = phys_group_id_128119 + i_128120 * + sext_i64_i32(num_groups_85832); + int32_t flat_segment_id_128122 = squot32(virt_group_id_128121, + sext_i64_i32(groups_per_segment_128101)); + int64_t global_tid_128123 = srem64(sext_i32_i64(virt_group_id_128121) * + segred_group_sizze_85831 + + sext_i32_i64(local_tid_128111), + segred_group_sizze_85831 * + groups_per_segment_128101); + int64_t gtid_85804 = sext_i32_i64(flat_segment_id_128122); + int64_t gtid_85812; + double x_acc_128124; + int64_t chunk_sizze_128125; + + chunk_sizze_128125 = smin64(elements_per_thread_128102, + sdiv_up64(k2p2zq_70876 - global_tid_128123, + threads_per_segment_128105)); + + double x_85836; + double x_85837; + + // neutral-initialise the accumulators + { + x_acc_128124 = 0.0; + } + for (int64_t i_128129 = 0; i_128129 < chunk_sizze_128125; i_128129++) { + gtid_85812 = global_tid_128123 + threads_per_segment_128105 * + i_128129; + // apply map function + { + double x_85842 = ((__global double *) mem_120246)[gtid_85812 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_85804 * + defunc_2_reduce_res_70985 + + index_primexp_72162]; + double defunc_0_f_res_85843; + double redout_119889 = 0.0; + + for (int64_t i_119890 = 0; i_119890 < k2p2zq_70876; + i_119890++) { + double x_85847 = ((__global double *) mem_120246)[i_119890 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_85804 * + defunc_2_reduce_res_70985 + + index_primexp_72162]; + double x_85848 = ((__global double *) mem_123901)[i_119890 * + (k2p2zq_70876 * + m_70861) + + gtid_85804 * + k2p2zq_70876 + + gtid_85812]; + double defunc_1_f_res_85849 = x_85847 * x_85848; + double defunc_1_op_res_85846 = defunc_1_f_res_85849 + + redout_119889; + double redout_tmp_128130 = defunc_1_op_res_85846; + + redout_119889 = redout_tmp_128130; + } + defunc_0_f_res_85843 = redout_119889; + + double defunc_1_f_res_85850 = x_85842 * defunc_0_f_res_85843; + + // save map-out results + { + ((__global double *) mem_123907)[gtid_85804 * k2p2zq_70876 + + gtid_85812] = + defunc_0_f_res_85843; + } + // load accumulator + { + x_85836 = x_acc_128124; + } + // load new values + { + x_85837 = defunc_1_f_res_85850; + } + // apply reduction operator + { + double defunc_1_op_res_85838 = x_85836 + x_85837; + + // store in accumulator + { + x_acc_128124 = defunc_1_op_res_85838; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_85836 = x_acc_128124; + ((__local + double *) red_arr_mem_128115)[sext_i32_i64(local_tid_128111)] = + x_85836; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128131; + int32_t skip_waves_128132; + + skip_waves_128132 = 1; + + double x_128126; + double x_128127; + + offset_128131 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128111, + sext_i64_i32(segred_group_sizze_85831))) { + x_128126 = ((__local + double *) red_arr_mem_128115)[sext_i32_i64(local_tid_128111 + + offset_128131)]; + } + } + offset_128131 = 1; + while (slt32(offset_128131, wave_sizze_128113)) { + if (slt32(local_tid_128111 + offset_128131, + sext_i64_i32(segred_group_sizze_85831)) && + ((local_tid_128111 - squot32(local_tid_128111, + wave_sizze_128113) * + wave_sizze_128113) & (2 * offset_128131 - 1)) == 0) { + // read array element + { + x_128127 = ((volatile __local + double *) red_arr_mem_128115)[sext_i32_i64(local_tid_128111 + + offset_128131)]; + } + // apply reduction operation + { + double defunc_1_op_res_128128 = x_128126 + x_128127; + + x_128126 = defunc_1_op_res_128128; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128115)[sext_i32_i64(local_tid_128111)] = + x_128126; + } + } + offset_128131 *= 2; + } + while (slt32(skip_waves_128132, + squot32(sext_i64_i32(segred_group_sizze_85831) + + wave_sizze_128113 - 1, wave_sizze_128113))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128131 = skip_waves_128132 * wave_sizze_128113; + if (slt32(local_tid_128111 + offset_128131, + sext_i64_i32(segred_group_sizze_85831)) && + ((local_tid_128111 - squot32(local_tid_128111, + wave_sizze_128113) * + wave_sizze_128113) == 0 && (squot32(local_tid_128111, + wave_sizze_128113) & (2 * + skip_waves_128132 - + 1)) == + 0)) { + // read array element + { + x_128127 = ((__local + double *) red_arr_mem_128115)[sext_i32_i64(local_tid_128111 + + offset_128131)]; + } + // apply reduction operation + { + double defunc_1_op_res_128128 = x_128126 + x_128127; + + x_128126 = defunc_1_op_res_128128; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128115)[sext_i32_i64(local_tid_128111)] = + x_128126; + } + } + skip_waves_128132 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_128111) == (int64_t) 0) { + x_acc_128124 = x_128126; + } + } + if (groups_per_segment_128101 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_128111 == 0) { + ((__global double *) mem_123904)[gtid_85804] = x_acc_128124; + } + } + } else { + int32_t old_counter_128133; + + // first thread in group saves group result to global memory + { + if (local_tid_128111 == 0) { + ((__global + double *) group_res_arr_mem_128106)[sext_i32_i64(virt_group_id_128121) * + segred_group_sizze_85831] = + x_acc_128124; + mem_fence_global(); + old_counter_128133 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_128108)[sext_i32_i64(srem32(flat_segment_id_128122, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_128117)[(int64_t) 0] = + old_counter_128133 == groups_per_segment_128101 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_128134; + + is_last_group_128134 = ((__local + bool *) sync_arr_mem_128117)[(int64_t) 0]; + if (is_last_group_128134) { + if (local_tid_128111 == 0) { + old_counter_128133 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_128108)[sext_i32_i64(srem32(flat_segment_id_128122, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_128101)); + } + // read in the per-group-results + { + int64_t read_per_thread_128135 = + sdiv_up64(groups_per_segment_128101, + segred_group_sizze_85831); + + x_85836 = 0.0; + for (int64_t i_128136 = 0; i_128136 < + read_per_thread_128135; i_128136++) { + int64_t group_res_id_128137 = + sext_i32_i64(local_tid_128111) * + read_per_thread_128135 + i_128136; + int64_t index_of_group_res_128138 = + sext_i32_i64(flat_segment_id_128122) * + groups_per_segment_128101 + group_res_id_128137; + + if (slt64(group_res_id_128137, + groups_per_segment_128101)) { + x_85837 = ((__global + double *) group_res_arr_mem_128106)[index_of_group_res_128138 * + segred_group_sizze_85831]; + + double defunc_1_op_res_85838; + + defunc_1_op_res_85838 = x_85836 + x_85837; + x_85836 = defunc_1_op_res_85838; + } + } + } + ((__local + double *) red_arr_mem_128115)[sext_i32_i64(local_tid_128111)] = + x_85836; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_128139; + int32_t skip_waves_128140; + + skip_waves_128140 = 1; + + double x_128126; + double x_128127; + + offset_128139 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128111, + sext_i64_i32(segred_group_sizze_85831))) { + x_128126 = ((__local + double *) red_arr_mem_128115)[sext_i32_i64(local_tid_128111 + + offset_128139)]; + } + } + offset_128139 = 1; + while (slt32(offset_128139, wave_sizze_128113)) { + if (slt32(local_tid_128111 + offset_128139, + sext_i64_i32(segred_group_sizze_85831)) && + ((local_tid_128111 - squot32(local_tid_128111, + wave_sizze_128113) * + wave_sizze_128113) & (2 * offset_128139 - 1)) == + 0) { + // read array element + { + x_128127 = ((volatile __local + double *) red_arr_mem_128115)[sext_i32_i64(local_tid_128111 + + offset_128139)]; + } + // apply reduction operation + { + double defunc_1_op_res_128128 = x_128126 + + x_128127; + + x_128126 = defunc_1_op_res_128128; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128115)[sext_i32_i64(local_tid_128111)] = + x_128126; + } + } + offset_128139 *= 2; + } + while (slt32(skip_waves_128140, + squot32(sext_i64_i32(segred_group_sizze_85831) + + wave_sizze_128113 - 1, + wave_sizze_128113))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128139 = skip_waves_128140 * wave_sizze_128113; + if (slt32(local_tid_128111 + offset_128139, + sext_i64_i32(segred_group_sizze_85831)) && + ((local_tid_128111 - squot32(local_tid_128111, + wave_sizze_128113) * + wave_sizze_128113) == 0 && + (squot32(local_tid_128111, wave_sizze_128113) & + (2 * skip_waves_128140 - 1)) == 0)) { + // read array element + { + x_128127 = ((__local + double *) red_arr_mem_128115)[sext_i32_i64(local_tid_128111 + + offset_128139)]; + } + // apply reduction operation + { + double defunc_1_op_res_128128 = x_128126 + + x_128127; + + x_128126 = defunc_1_op_res_128128; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128115)[sext_i32_i64(local_tid_128111)] = + x_128126; + } + } + skip_waves_128140 *= 2; + } + // and back to memory with the final result + { + if (local_tid_128111 == 0) { + ((__global double *) mem_123904)[gtid_85804] = + x_128126; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_85831 +} +__kernel void mainDetailedzisegred_large_86170(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_128451_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128449_backing_aligned_1, + int64_t num_recresids_padded_71534, + int64_t num_groups_86252, + int64_t groups_per_segment_128435, + int64_t elements_per_thread_128436, + int64_t virt_num_groups_128437, + int64_t threads_per_segment_128439, + __global + unsigned char *mem_124045, + __global + unsigned char *mem_124051, + __global + unsigned char *mem_124054, + __global + unsigned char *group_res_arr_mem_128440, + __global + unsigned char *mainDetailedzicounter_mem_128442) +{ + #define segred_group_sizze_86251 (mainDetailedzisegred_group_sizze_86164) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_128451_backing_1 = + (__local volatile + char *) sync_arr_mem_128451_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128449_backing_0 = + (__local volatile + char *) red_arr_mem_128449_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128444; + int32_t local_tid_128445; + int64_t group_sizze_128448; + int32_t wave_sizze_128447; + int32_t group_tid_128446; + + global_tid_128444 = get_global_id(0); + local_tid_128445 = get_local_id(0); + group_sizze_128448 = get_local_size(0); + wave_sizze_128447 = LOCKSTEP_WIDTH; + group_tid_128446 = get_group_id(0); + + int32_t phys_tid_86170; + + phys_tid_86170 = global_tid_128444; + + __local char *red_arr_mem_128449; + + red_arr_mem_128449 = (__local char *) red_arr_mem_128449_backing_0; + + __local char *sync_arr_mem_128451; + + sync_arr_mem_128451 = (__local char *) sync_arr_mem_128451_backing_1; + + int32_t phys_group_id_128453; + + phys_group_id_128453 = get_group_id(0); + for (int32_t i_128454 = 0; i_128454 < + sdiv_up32(sext_i64_i32(virt_num_groups_128437) - phys_group_id_128453, + sext_i64_i32(num_groups_86252)); i_128454++) { + int32_t virt_group_id_128455 = phys_group_id_128453 + i_128454 * + sext_i64_i32(num_groups_86252); + int32_t flat_segment_id_128456 = squot32(virt_group_id_128455, + sext_i64_i32(groups_per_segment_128435)); + int64_t global_tid_128457 = srem64(sext_i32_i64(virt_group_id_128455) * + segred_group_sizze_86251 + + sext_i32_i64(local_tid_128445), + segred_group_sizze_86251 * + groups_per_segment_128435); + int64_t gtid_86161 = sext_i32_i64(flat_segment_id_128456); + int64_t gtid_86169; + double x_acc_128458; + int64_t chunk_sizze_128459; + + chunk_sizze_128459 = smin64(elements_per_thread_128436, + sdiv_up64(num_recresids_padded_71534 - + global_tid_128457, + threads_per_segment_128439)); + + double x_86255; + double x_86256; + + // neutral-initialise the accumulators + { + x_acc_128458 = 0.0; + } + for (int64_t i_128463 = 0; i_128463 < chunk_sizze_128459; i_128463++) { + gtid_86169 = global_tid_128457 + threads_per_segment_128439 * + i_128463; + // apply map function + { + double x_86260 = ((__global double *) mem_124045)[gtid_86161 * + num_recresids_padded_71534 + + gtid_86169]; + bool isnan_res_86261; + + isnan_res_86261 = futrts_isnan64(x_86260); + + double defunc_0_f_res_86262; + + if (isnan_res_86261) { + defunc_0_f_res_86262 = 0.0; + } else { + double x_mean_86259 = ((__global + double *) mem_124051)[gtid_86161]; + double x_86263 = x_86260 - x_mean_86259; + double defunc_0_f_res_f_res_86264 = fpow64(x_86263, 2.0); + + defunc_0_f_res_86262 = defunc_0_f_res_f_res_86264; + } + // save map-out results + { } + // load accumulator + { + x_86255 = x_acc_128458; + } + // load new values + { + x_86256 = defunc_0_f_res_86262; + } + // apply reduction operator + { + double defunc_1_op_res_86257 = x_86255 + x_86256; + + // store in accumulator + { + x_acc_128458 = defunc_1_op_res_86257; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_86255 = x_acc_128458; + ((__local + double *) red_arr_mem_128449)[sext_i32_i64(local_tid_128445)] = + x_86255; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128464; + int32_t skip_waves_128465; + + skip_waves_128465 = 1; + + double x_128460; + double x_128461; + + offset_128464 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128445, + sext_i64_i32(segred_group_sizze_86251))) { + x_128460 = ((__local + double *) red_arr_mem_128449)[sext_i32_i64(local_tid_128445 + + offset_128464)]; + } + } + offset_128464 = 1; + while (slt32(offset_128464, wave_sizze_128447)) { + if (slt32(local_tid_128445 + offset_128464, + sext_i64_i32(segred_group_sizze_86251)) && + ((local_tid_128445 - squot32(local_tid_128445, + wave_sizze_128447) * + wave_sizze_128447) & (2 * offset_128464 - 1)) == 0) { + // read array element + { + x_128461 = ((volatile __local + double *) red_arr_mem_128449)[sext_i32_i64(local_tid_128445 + + offset_128464)]; + } + // apply reduction operation + { + double defunc_1_op_res_128462 = x_128460 + x_128461; + + x_128460 = defunc_1_op_res_128462; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128449)[sext_i32_i64(local_tid_128445)] = + x_128460; + } + } + offset_128464 *= 2; + } + while (slt32(skip_waves_128465, + squot32(sext_i64_i32(segred_group_sizze_86251) + + wave_sizze_128447 - 1, wave_sizze_128447))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128464 = skip_waves_128465 * wave_sizze_128447; + if (slt32(local_tid_128445 + offset_128464, + sext_i64_i32(segred_group_sizze_86251)) && + ((local_tid_128445 - squot32(local_tid_128445, + wave_sizze_128447) * + wave_sizze_128447) == 0 && (squot32(local_tid_128445, + wave_sizze_128447) & (2 * + skip_waves_128465 - + 1)) == + 0)) { + // read array element + { + x_128461 = ((__local + double *) red_arr_mem_128449)[sext_i32_i64(local_tid_128445 + + offset_128464)]; + } + // apply reduction operation + { + double defunc_1_op_res_128462 = x_128460 + x_128461; + + x_128460 = defunc_1_op_res_128462; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128449)[sext_i32_i64(local_tid_128445)] = + x_128460; + } + } + skip_waves_128465 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_128445) == (int64_t) 0) { + x_acc_128458 = x_128460; + } + } + if (groups_per_segment_128435 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_128445 == 0) { + ((__global double *) mem_124054)[gtid_86161] = x_acc_128458; + } + } + } else { + int32_t old_counter_128466; + + // first thread in group saves group result to global memory + { + if (local_tid_128445 == 0) { + ((__global + double *) group_res_arr_mem_128440)[sext_i32_i64(virt_group_id_128455) * + segred_group_sizze_86251] = + x_acc_128458; + mem_fence_global(); + old_counter_128466 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_128442)[sext_i32_i64(srem32(flat_segment_id_128456, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_128451)[(int64_t) 0] = + old_counter_128466 == groups_per_segment_128435 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_128467; + + is_last_group_128467 = ((__local + bool *) sync_arr_mem_128451)[(int64_t) 0]; + if (is_last_group_128467) { + if (local_tid_128445 == 0) { + old_counter_128466 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_128442)[sext_i32_i64(srem32(flat_segment_id_128456, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_128435)); + } + // read in the per-group-results + { + int64_t read_per_thread_128468 = + sdiv_up64(groups_per_segment_128435, + segred_group_sizze_86251); + + x_86255 = 0.0; + for (int64_t i_128469 = 0; i_128469 < + read_per_thread_128468; i_128469++) { + int64_t group_res_id_128470 = + sext_i32_i64(local_tid_128445) * + read_per_thread_128468 + i_128469; + int64_t index_of_group_res_128471 = + sext_i32_i64(flat_segment_id_128456) * + groups_per_segment_128435 + group_res_id_128470; + + if (slt64(group_res_id_128470, + groups_per_segment_128435)) { + x_86256 = ((__global + double *) group_res_arr_mem_128440)[index_of_group_res_128471 * + segred_group_sizze_86251]; + + double defunc_1_op_res_86257; + + defunc_1_op_res_86257 = x_86255 + x_86256; + x_86255 = defunc_1_op_res_86257; + } + } + } + ((__local + double *) red_arr_mem_128449)[sext_i32_i64(local_tid_128445)] = + x_86255; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_128472; + int32_t skip_waves_128473; + + skip_waves_128473 = 1; + + double x_128460; + double x_128461; + + offset_128472 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128445, + sext_i64_i32(segred_group_sizze_86251))) { + x_128460 = ((__local + double *) red_arr_mem_128449)[sext_i32_i64(local_tid_128445 + + offset_128472)]; + } + } + offset_128472 = 1; + while (slt32(offset_128472, wave_sizze_128447)) { + if (slt32(local_tid_128445 + offset_128472, + sext_i64_i32(segred_group_sizze_86251)) && + ((local_tid_128445 - squot32(local_tid_128445, + wave_sizze_128447) * + wave_sizze_128447) & (2 * offset_128472 - 1)) == + 0) { + // read array element + { + x_128461 = ((volatile __local + double *) red_arr_mem_128449)[sext_i32_i64(local_tid_128445 + + offset_128472)]; + } + // apply reduction operation + { + double defunc_1_op_res_128462 = x_128460 + + x_128461; + + x_128460 = defunc_1_op_res_128462; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128449)[sext_i32_i64(local_tid_128445)] = + x_128460; + } + } + offset_128472 *= 2; + } + while (slt32(skip_waves_128473, + squot32(sext_i64_i32(segred_group_sizze_86251) + + wave_sizze_128447 - 1, + wave_sizze_128447))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128472 = skip_waves_128473 * wave_sizze_128447; + if (slt32(local_tid_128445 + offset_128472, + sext_i64_i32(segred_group_sizze_86251)) && + ((local_tid_128445 - squot32(local_tid_128445, + wave_sizze_128447) * + wave_sizze_128447) == 0 && + (squot32(local_tid_128445, wave_sizze_128447) & + (2 * skip_waves_128473 - 1)) == 0)) { + // read array element + { + x_128461 = ((__local + double *) red_arr_mem_128449)[sext_i32_i64(local_tid_128445 + + offset_128472)]; + } + // apply reduction operation + { + double defunc_1_op_res_128462 = x_128460 + + x_128461; + + x_128460 = defunc_1_op_res_128462; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128449)[sext_i32_i64(local_tid_128445)] = + x_128460; + } + } + skip_waves_128473 *= 2; + } + // and back to memory with the final result + { + if (local_tid_128445 == 0) { + ((__global double *) mem_124054)[gtid_86161] = + x_128460; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_86251 +} +__kernel void mainDetailedzisegred_large_86200(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_128380_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128378_backing_aligned_1, + int64_t num_recresids_padded_71534, + int64_t num_groups_86230, + int64_t groups_per_segment_128364, + int64_t elements_per_thread_128365, + int64_t virt_num_groups_128366, + __global + unsigned char *mem_124045, + __global + unsigned char *mem_124048, + __global + unsigned char *group_res_arr_mem_128369, + __global + unsigned char *mainDetailedzicounter_mem_128371) +{ + #define segred_group_sizze_86229 (mainDetailedzisegred_group_sizze_86194) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_128380_backing_1 = + (__local volatile + char *) sync_arr_mem_128380_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128378_backing_0 = + (__local volatile + char *) red_arr_mem_128378_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128373; + int32_t local_tid_128374; + int64_t group_sizze_128377; + int32_t wave_sizze_128376; + int32_t group_tid_128375; + + global_tid_128373 = get_global_id(0); + local_tid_128374 = get_local_id(0); + group_sizze_128377 = get_local_size(0); + wave_sizze_128376 = LOCKSTEP_WIDTH; + group_tid_128375 = get_group_id(0); + + int32_t phys_tid_86200; + + phys_tid_86200 = global_tid_128373; + + __local char *red_arr_mem_128378; + + red_arr_mem_128378 = (__local char *) red_arr_mem_128378_backing_0; + + __local char *sync_arr_mem_128380; + + sync_arr_mem_128380 = (__local char *) sync_arr_mem_128380_backing_1; + + int32_t phys_group_id_128382; + + phys_group_id_128382 = get_group_id(0); + for (int32_t i_128383 = 0; i_128383 < + sdiv_up32(sext_i64_i32(virt_num_groups_128366) - phys_group_id_128382, + sext_i64_i32(num_groups_86230)); i_128383++) { + int32_t virt_group_id_128384 = phys_group_id_128382 + i_128383 * + sext_i64_i32(num_groups_86230); + int32_t flat_segment_id_128385 = squot32(virt_group_id_128384, + sext_i64_i32(groups_per_segment_128364)); + int64_t global_tid_128386 = srem64(sext_i32_i64(virt_group_id_128384) * + segred_group_sizze_86229 + + sext_i32_i64(local_tid_128374), + segred_group_sizze_86229 * + groups_per_segment_128364); + int64_t gtid_86191 = sext_i32_i64(flat_segment_id_128385); + int64_t gtid_86199; + double x_acc_128387; + int64_t chunk_sizze_128388; + int64_t starting_point_128389; + + starting_point_128389 = global_tid_128386 * elements_per_thread_128365; + + int64_t remaining_elements_128390; + + remaining_elements_128390 = num_recresids_padded_71534 - + starting_point_128389; + if (sle64(remaining_elements_128390, (int64_t) 0) || + sle64(num_recresids_padded_71534, starting_point_128389)) { + chunk_sizze_128388 = (int64_t) 0; + } else { + if (slt64(num_recresids_padded_71534, (global_tid_128386 + + (int64_t) 1) * + elements_per_thread_128365)) { + chunk_sizze_128388 = num_recresids_padded_71534 - + global_tid_128386 * elements_per_thread_128365; + } else { + chunk_sizze_128388 = elements_per_thread_128365; + } + } + + double x_86233; + double x_86234; + + // neutral-initialise the accumulators + { + x_acc_128387 = 0.0; + } + for (int64_t i_128398 = 0; i_128398 < elements_per_thread_128365; + i_128398++) { + gtid_86199 = sext_i32_i64(local_tid_128374) + + (squot64(global_tid_128386, segred_group_sizze_86229) * + elements_per_thread_128365 + i_128398) * + segred_group_sizze_86229; + if (slt64(gtid_86199, num_recresids_padded_71534)) { + // apply map function + { + double x_86241 = ((__global + double *) mem_124045)[gtid_86191 * + num_recresids_padded_71534 + + gtid_86199]; + + // save map-out results + { } + // load accumulator + { + x_86233 = x_acc_128387; + } + // load new values + { + x_86234 = x_86241; + } + // apply reduction operator + { + bool isnan_res_86235; + + isnan_res_86235 = futrts_isnan64(x_86233); + + double defunc_1_op_res_86236; + + if (isnan_res_86235) { + defunc_1_op_res_86236 = x_86234; + } else { + bool isnan_res_86237; + + isnan_res_86237 = futrts_isnan64(x_86234); + + double defunc_1_op_res_f_res_86238; + + if (isnan_res_86237) { + defunc_1_op_res_f_res_86238 = x_86233; + } else { + double defunc_1_op_res_f_res_f_res_86239 = + x_86233 + x_86234; + + defunc_1_op_res_f_res_86238 = + defunc_1_op_res_f_res_f_res_86239; + } + defunc_1_op_res_86236 = defunc_1_op_res_f_res_86238; + } + // store in accumulator + { + x_acc_128387 = defunc_1_op_res_86236; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_86233 = x_acc_128387; + ((__local + double *) red_arr_mem_128378)[sext_i32_i64(local_tid_128374)] = + x_86233; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128399; + int32_t skip_waves_128400; + + skip_waves_128400 = 1; + + double x_128391; + double x_128392; + + offset_128399 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128374, + sext_i64_i32(segred_group_sizze_86229))) { + x_128391 = ((__local + double *) red_arr_mem_128378)[sext_i32_i64(local_tid_128374 + + offset_128399)]; + } + } + offset_128399 = 1; + while (slt32(offset_128399, wave_sizze_128376)) { + if (slt32(local_tid_128374 + offset_128399, + sext_i64_i32(segred_group_sizze_86229)) && + ((local_tid_128374 - squot32(local_tid_128374, + wave_sizze_128376) * + wave_sizze_128376) & (2 * offset_128399 - 1)) == 0) { + // read array element + { + x_128392 = ((volatile __local + double *) red_arr_mem_128378)[sext_i32_i64(local_tid_128374 + + offset_128399)]; + } + // apply reduction operation + { + bool isnan_res_128393; + + isnan_res_128393 = futrts_isnan64(x_128391); + + double defunc_1_op_res_128394; + + if (isnan_res_128393) { + defunc_1_op_res_128394 = x_128392; + } else { + bool isnan_res_128395; + + isnan_res_128395 = futrts_isnan64(x_128392); + + double defunc_1_op_res_f_res_128396; + + if (isnan_res_128395) { + defunc_1_op_res_f_res_128396 = x_128391; + } else { + double defunc_1_op_res_f_res_f_res_128397 = + x_128391 + x_128392; + + defunc_1_op_res_f_res_128396 = + defunc_1_op_res_f_res_f_res_128397; + } + defunc_1_op_res_128394 = + defunc_1_op_res_f_res_128396; + } + x_128391 = defunc_1_op_res_128394; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128378)[sext_i32_i64(local_tid_128374)] = + x_128391; + } + } + offset_128399 *= 2; + } + while (slt32(skip_waves_128400, + squot32(sext_i64_i32(segred_group_sizze_86229) + + wave_sizze_128376 - 1, wave_sizze_128376))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128399 = skip_waves_128400 * wave_sizze_128376; + if (slt32(local_tid_128374 + offset_128399, + sext_i64_i32(segred_group_sizze_86229)) && + ((local_tid_128374 - squot32(local_tid_128374, + wave_sizze_128376) * + wave_sizze_128376) == 0 && (squot32(local_tid_128374, + wave_sizze_128376) & + (2 * skip_waves_128400 - + 1)) == 0)) { + // read array element + { + x_128392 = ((__local + double *) red_arr_mem_128378)[sext_i32_i64(local_tid_128374 + + offset_128399)]; + } + // apply reduction operation + { + bool isnan_res_128393; + + isnan_res_128393 = futrts_isnan64(x_128391); + + double defunc_1_op_res_128394; + + if (isnan_res_128393) { + defunc_1_op_res_128394 = x_128392; + } else { + bool isnan_res_128395; + + isnan_res_128395 = futrts_isnan64(x_128392); + + double defunc_1_op_res_f_res_128396; + + if (isnan_res_128395) { + defunc_1_op_res_f_res_128396 = x_128391; + } else { + double defunc_1_op_res_f_res_f_res_128397 = + x_128391 + x_128392; + + defunc_1_op_res_f_res_128396 = + defunc_1_op_res_f_res_f_res_128397; + } + defunc_1_op_res_128394 = + defunc_1_op_res_f_res_128396; + } + x_128391 = defunc_1_op_res_128394; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128378)[sext_i32_i64(local_tid_128374)] = + x_128391; + } + } + skip_waves_128400 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_128374) == (int64_t) 0) { + x_acc_128387 = x_128391; + } + } + // first thread keeps accumulator; others reset to neutral element + { + if (!(sext_i32_i64(local_tid_128374) == (int64_t) 0)) { + x_acc_128387 = 0.0; + } + } + } + x_86233 = x_acc_128387; + if (groups_per_segment_128364 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_128374 == 0) { + ((__global double *) mem_124048)[gtid_86191] = x_acc_128387; + } + } + } else { + int32_t old_counter_128401; + + // first thread in group saves group result to global memory + { + if (local_tid_128374 == 0) { + ((__global + double *) group_res_arr_mem_128369)[sext_i32_i64(virt_group_id_128384) * + segred_group_sizze_86229] = + x_acc_128387; + mem_fence_global(); + old_counter_128401 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_128371)[sext_i32_i64(srem32(flat_segment_id_128385, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_128380)[(int64_t) 0] = + old_counter_128401 == groups_per_segment_128364 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_128402; + + is_last_group_128402 = ((__local + bool *) sync_arr_mem_128380)[(int64_t) 0]; + if (is_last_group_128402) { + if (local_tid_128374 == 0) { + old_counter_128401 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_128371)[sext_i32_i64(srem32(flat_segment_id_128385, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_128364)); + } + // read in the per-group-results + { + int64_t read_per_thread_128403 = + sdiv_up64(groups_per_segment_128364, + segred_group_sizze_86229); + + x_86233 = 0.0; + for (int64_t i_128404 = 0; i_128404 < + read_per_thread_128403; i_128404++) { + int64_t group_res_id_128405 = + sext_i32_i64(local_tid_128374) * + read_per_thread_128403 + i_128404; + int64_t index_of_group_res_128406 = + sext_i32_i64(flat_segment_id_128385) * + groups_per_segment_128364 + group_res_id_128405; + + if (slt64(group_res_id_128405, + groups_per_segment_128364)) { + x_86234 = ((__global + double *) group_res_arr_mem_128369)[index_of_group_res_128406 * + segred_group_sizze_86229]; + + bool isnan_res_86235; + + isnan_res_86235 = futrts_isnan64(x_86233); + + double defunc_1_op_res_86236; + + if (isnan_res_86235) { + defunc_1_op_res_86236 = x_86234; + } else { + bool isnan_res_86237; + + isnan_res_86237 = futrts_isnan64(x_86234); + + double defunc_1_op_res_f_res_86238; + + if (isnan_res_86237) { + defunc_1_op_res_f_res_86238 = x_86233; + } else { + double defunc_1_op_res_f_res_f_res_86239 = + x_86233 + x_86234; + + defunc_1_op_res_f_res_86238 = + defunc_1_op_res_f_res_f_res_86239; + } + defunc_1_op_res_86236 = + defunc_1_op_res_f_res_86238; + } + x_86233 = defunc_1_op_res_86236; + } + } + } + ((__local + double *) red_arr_mem_128378)[sext_i32_i64(local_tid_128374)] = + x_86233; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_128407; + int32_t skip_waves_128408; + + skip_waves_128408 = 1; + + double x_128391; + double x_128392; + + offset_128407 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128374, + sext_i64_i32(segred_group_sizze_86229))) { + x_128391 = ((__local + double *) red_arr_mem_128378)[sext_i32_i64(local_tid_128374 + + offset_128407)]; + } + } + offset_128407 = 1; + while (slt32(offset_128407, wave_sizze_128376)) { + if (slt32(local_tid_128374 + offset_128407, + sext_i64_i32(segred_group_sizze_86229)) && + ((local_tid_128374 - squot32(local_tid_128374, + wave_sizze_128376) * + wave_sizze_128376) & (2 * offset_128407 - 1)) == + 0) { + // read array element + { + x_128392 = ((volatile __local + double *) red_arr_mem_128378)[sext_i32_i64(local_tid_128374 + + offset_128407)]; + } + // apply reduction operation + { + bool isnan_res_128393; + + isnan_res_128393 = futrts_isnan64(x_128391); + + double defunc_1_op_res_128394; + + if (isnan_res_128393) { + defunc_1_op_res_128394 = x_128392; + } else { + bool isnan_res_128395; + + isnan_res_128395 = futrts_isnan64(x_128392); + + double defunc_1_op_res_f_res_128396; + + if (isnan_res_128395) { + defunc_1_op_res_f_res_128396 = x_128391; + } else { + double + defunc_1_op_res_f_res_f_res_128397 = + x_128391 + x_128392; + + defunc_1_op_res_f_res_128396 = + defunc_1_op_res_f_res_f_res_128397; + } + defunc_1_op_res_128394 = + defunc_1_op_res_f_res_128396; + } + x_128391 = defunc_1_op_res_128394; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128378)[sext_i32_i64(local_tid_128374)] = + x_128391; + } + } + offset_128407 *= 2; + } + while (slt32(skip_waves_128408, + squot32(sext_i64_i32(segred_group_sizze_86229) + + wave_sizze_128376 - 1, + wave_sizze_128376))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128407 = skip_waves_128408 * wave_sizze_128376; + if (slt32(local_tid_128374 + offset_128407, + sext_i64_i32(segred_group_sizze_86229)) && + ((local_tid_128374 - squot32(local_tid_128374, + wave_sizze_128376) * + wave_sizze_128376) == 0 && + (squot32(local_tid_128374, wave_sizze_128376) & + (2 * skip_waves_128408 - 1)) == 0)) { + // read array element + { + x_128392 = ((__local + double *) red_arr_mem_128378)[sext_i32_i64(local_tid_128374 + + offset_128407)]; + } + // apply reduction operation + { + bool isnan_res_128393; + + isnan_res_128393 = futrts_isnan64(x_128391); + + double defunc_1_op_res_128394; + + if (isnan_res_128393) { + defunc_1_op_res_128394 = x_128392; + } else { + bool isnan_res_128395; + + isnan_res_128395 = futrts_isnan64(x_128392); + + double defunc_1_op_res_f_res_128396; + + if (isnan_res_128395) { + defunc_1_op_res_f_res_128396 = x_128391; + } else { + double + defunc_1_op_res_f_res_f_res_128397 = + x_128391 + x_128392; + + defunc_1_op_res_f_res_128396 = + defunc_1_op_res_f_res_f_res_128397; + } + defunc_1_op_res_128394 = + defunc_1_op_res_f_res_128396; + } + x_128391 = defunc_1_op_res_128394; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128378)[sext_i32_i64(local_tid_128374)] = + x_128391; + } + } + skip_waves_128408 *= 2; + } + // and back to memory with the final result + { + if (local_tid_128374 == 0) { + ((__global double *) mem_124048)[gtid_86191] = + x_128391; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_86229 +} +__kernel void mainDetailedzisegred_large_86835(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_128681_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128679_backing_aligned_1, + int64_t num_recresids_padded_71534, + int64_t Nmk_72261, + int64_t num_groups_87112, + int64_t groups_per_segment_128665, + int64_t elements_per_thread_128666, + int64_t virt_num_groups_128667, + int64_t threads_per_segment_128669, + __global + unsigned char *defunc_3_map_res_mem_124068, + __global + unsigned char *mem_124078, + __global + unsigned char *mem_124130, + __global + unsigned char *group_res_arr_mem_128670, + __global + unsigned char *mainDetailedzicounter_mem_128672) +{ + #define segred_group_sizze_87111 (mainDetailedzisegred_group_sizze_86829) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_128681_backing_1 = + (__local volatile + char *) sync_arr_mem_128681_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128679_backing_0 = + (__local volatile + char *) red_arr_mem_128679_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128674; + int32_t local_tid_128675; + int64_t group_sizze_128678; + int32_t wave_sizze_128677; + int32_t group_tid_128676; + + global_tid_128674 = get_global_id(0); + local_tid_128675 = get_local_id(0); + group_sizze_128678 = get_local_size(0); + wave_sizze_128677 = LOCKSTEP_WIDTH; + group_tid_128676 = get_group_id(0); + + int32_t phys_tid_86835; + + phys_tid_86835 = global_tid_128674; + + __local char *red_arr_mem_128679; + + red_arr_mem_128679 = (__local char *) red_arr_mem_128679_backing_0; + + __local char *sync_arr_mem_128681; + + sync_arr_mem_128681 = (__local char *) sync_arr_mem_128681_backing_1; + + int32_t phys_group_id_128683; + + phys_group_id_128683 = get_group_id(0); + for (int32_t i_128684 = 0; i_128684 < + sdiv_up32(sext_i64_i32(virt_num_groups_128667) - phys_group_id_128683, + sext_i64_i32(num_groups_87112)); i_128684++) { + int32_t virt_group_id_128685 = phys_group_id_128683 + i_128684 * + sext_i64_i32(num_groups_87112); + int32_t flat_segment_id_128686 = squot32(virt_group_id_128685, + sext_i64_i32(groups_per_segment_128665)); + int64_t global_tid_128687 = srem64(sext_i32_i64(virt_group_id_128685) * + segred_group_sizze_87111 + + sext_i32_i64(local_tid_128675), + segred_group_sizze_87111 * + groups_per_segment_128665); + int64_t gtid_86826 = sext_i32_i64(flat_segment_id_128686); + int64_t gtid_86834; + int64_t x_acc_128688; + int64_t chunk_sizze_128689; + + chunk_sizze_128689 = smin64(elements_per_thread_128666, + sdiv_up64(num_recresids_padded_71534 - + global_tid_128687, + threads_per_segment_128669)); + + int64_t x_87115; + int64_t x_87116; + + // neutral-initialise the accumulators + { + x_acc_128688 = (int64_t) 9223372036854775807; + } + for (int64_t i_128693 = 0; i_128693 < chunk_sizze_128689; i_128693++) { + gtid_86834 = global_tid_128687 + threads_per_segment_128669 * + i_128693; + // apply map function + { + int64_t slice_115288 = (int64_t) 1 + gtid_86834; + double x_87121 = ((__global + double *) defunc_3_map_res_mem_124068)[gtid_86826 * + Nmk_72261 + + slice_115288]; + double x_87122 = ((__global double *) mem_124078)[gtid_86826 * + Nmk_72261 + + slice_115288]; + double abs_res_87123 = fabs(x_87121); + bool cond_87124 = x_87122 < abs_res_87123; + int64_t defunc_2_f_res_87125; + + if (cond_87124) { + defunc_2_f_res_87125 = gtid_86834; + } else { + defunc_2_f_res_87125 = (int64_t) 9223372036854775807; + } + // save map-out results + { } + // load accumulator + { + x_87115 = x_acc_128688; + } + // load new values + { + x_87116 = defunc_2_f_res_87125; + } + // apply reduction operator + { + int64_t defunc_1_op_res_87117 = smin64(x_87115, x_87116); + + // store in accumulator + { + x_acc_128688 = defunc_1_op_res_87117; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_87115 = x_acc_128688; + ((__local + int64_t *) red_arr_mem_128679)[sext_i32_i64(local_tid_128675)] = + x_87115; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128694; + int32_t skip_waves_128695; + + skip_waves_128695 = 1; + + int64_t x_128690; + int64_t x_128691; + + offset_128694 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128675, + sext_i64_i32(segred_group_sizze_87111))) { + x_128690 = ((__local + int64_t *) red_arr_mem_128679)[sext_i32_i64(local_tid_128675 + + offset_128694)]; + } + } + offset_128694 = 1; + while (slt32(offset_128694, wave_sizze_128677)) { + if (slt32(local_tid_128675 + offset_128694, + sext_i64_i32(segred_group_sizze_87111)) && + ((local_tid_128675 - squot32(local_tid_128675, + wave_sizze_128677) * + wave_sizze_128677) & (2 * offset_128694 - 1)) == 0) { + // read array element + { + x_128691 = ((volatile __local + int64_t *) red_arr_mem_128679)[sext_i32_i64(local_tid_128675 + + offset_128694)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_128692 = smin64(x_128690, x_128691); + + x_128690 = defunc_1_op_res_128692; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_128679)[sext_i32_i64(local_tid_128675)] = + x_128690; + } + } + offset_128694 *= 2; + } + while (slt32(skip_waves_128695, + squot32(sext_i64_i32(segred_group_sizze_87111) + + wave_sizze_128677 - 1, wave_sizze_128677))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128694 = skip_waves_128695 * wave_sizze_128677; + if (slt32(local_tid_128675 + offset_128694, + sext_i64_i32(segred_group_sizze_87111)) && + ((local_tid_128675 - squot32(local_tid_128675, + wave_sizze_128677) * + wave_sizze_128677) == 0 && (squot32(local_tid_128675, + wave_sizze_128677) & (2 * + skip_waves_128695 - + 1)) == + 0)) { + // read array element + { + x_128691 = ((__local + int64_t *) red_arr_mem_128679)[sext_i32_i64(local_tid_128675 + + offset_128694)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_128692 = smin64(x_128690, x_128691); + + x_128690 = defunc_1_op_res_128692; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_128679)[sext_i32_i64(local_tid_128675)] = + x_128690; + } + } + skip_waves_128695 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_128675) == (int64_t) 0) { + x_acc_128688 = x_128690; + } + } + if (groups_per_segment_128665 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_128675 == 0) { + ((__global int64_t *) mem_124130)[gtid_86826] = + x_acc_128688; + } + } + } else { + int32_t old_counter_128696; + + // first thread in group saves group result to global memory + { + if (local_tid_128675 == 0) { + ((__global + int64_t *) group_res_arr_mem_128670)[sext_i32_i64(virt_group_id_128685) * + segred_group_sizze_87111] = + x_acc_128688; + mem_fence_global(); + old_counter_128696 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_128672)[sext_i32_i64(srem32(flat_segment_id_128686, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_128681)[(int64_t) 0] = + old_counter_128696 == groups_per_segment_128665 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_128697; + + is_last_group_128697 = ((__local + bool *) sync_arr_mem_128681)[(int64_t) 0]; + if (is_last_group_128697) { + if (local_tid_128675 == 0) { + old_counter_128696 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_128672)[sext_i32_i64(srem32(flat_segment_id_128686, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_128665)); + } + // read in the per-group-results + { + int64_t read_per_thread_128698 = + sdiv_up64(groups_per_segment_128665, + segred_group_sizze_87111); + + x_87115 = (int64_t) 9223372036854775807; + for (int64_t i_128699 = 0; i_128699 < + read_per_thread_128698; i_128699++) { + int64_t group_res_id_128700 = + sext_i32_i64(local_tid_128675) * + read_per_thread_128698 + i_128699; + int64_t index_of_group_res_128701 = + sext_i32_i64(flat_segment_id_128686) * + groups_per_segment_128665 + group_res_id_128700; + + if (slt64(group_res_id_128700, + groups_per_segment_128665)) { + x_87116 = ((__global + int64_t *) group_res_arr_mem_128670)[index_of_group_res_128701 * + segred_group_sizze_87111]; + + int64_t defunc_1_op_res_87117; + + defunc_1_op_res_87117 = smin64(x_87115, x_87116); + x_87115 = defunc_1_op_res_87117; + } + } + } + ((__local + int64_t *) red_arr_mem_128679)[sext_i32_i64(local_tid_128675)] = + x_87115; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_128702; + int32_t skip_waves_128703; + + skip_waves_128703 = 1; + + int64_t x_128690; + int64_t x_128691; + + offset_128702 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128675, + sext_i64_i32(segred_group_sizze_87111))) { + x_128690 = ((__local + int64_t *) red_arr_mem_128679)[sext_i32_i64(local_tid_128675 + + offset_128702)]; + } + } + offset_128702 = 1; + while (slt32(offset_128702, wave_sizze_128677)) { + if (slt32(local_tid_128675 + offset_128702, + sext_i64_i32(segred_group_sizze_87111)) && + ((local_tid_128675 - squot32(local_tid_128675, + wave_sizze_128677) * + wave_sizze_128677) & (2 * offset_128702 - 1)) == + 0) { + // read array element + { + x_128691 = ((volatile __local + int64_t *) red_arr_mem_128679)[sext_i32_i64(local_tid_128675 + + offset_128702)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_128692 = + smin64(x_128690, x_128691); + + x_128690 = defunc_1_op_res_128692; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_128679)[sext_i32_i64(local_tid_128675)] = + x_128690; + } + } + offset_128702 *= 2; + } + while (slt32(skip_waves_128703, + squot32(sext_i64_i32(segred_group_sizze_87111) + + wave_sizze_128677 - 1, + wave_sizze_128677))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128702 = skip_waves_128703 * wave_sizze_128677; + if (slt32(local_tid_128675 + offset_128702, + sext_i64_i32(segred_group_sizze_87111)) && + ((local_tid_128675 - squot32(local_tid_128675, + wave_sizze_128677) * + wave_sizze_128677) == 0 && + (squot32(local_tid_128675, wave_sizze_128677) & + (2 * skip_waves_128703 - 1)) == 0)) { + // read array element + { + x_128691 = ((__local + int64_t *) red_arr_mem_128679)[sext_i32_i64(local_tid_128675 + + offset_128702)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_128692 = + smin64(x_128690, x_128691); + + x_128690 = defunc_1_op_res_128692; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_128679)[sext_i32_i64(local_tid_128675)] = + x_128690; + } + } + skip_waves_128703 *= 2; + } + // and back to memory with the final result + { + if (local_tid_128675 == 0) { + ((__global int64_t *) mem_124130)[gtid_86826] = + x_128690; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_87111 +} +__kernel void mainDetailedzisegred_large_86960(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_128616_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128614_backing_aligned_1, + int64_t num_recresids_padded_71534, + int64_t Nmk_72261, + int64_t num_groups_86992, + int64_t groups_per_segment_128600, + int64_t elements_per_thread_128601, + int64_t virt_num_groups_128602, + int64_t threads_per_segment_128604, + __global + unsigned char *defunc_3_map_res_mem_124068, + __global + unsigned char *mem_124121, + __global + unsigned char *mem_124124, + __global + unsigned char *group_res_arr_mem_128605, + __global + unsigned char *mainDetailedzicounter_mem_128607) +{ + #define segred_group_sizze_86991 (mainDetailedzisegred_group_sizze_86954) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_128616_backing_1 = + (__local volatile + char *) sync_arr_mem_128616_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128614_backing_0 = + (__local volatile + char *) red_arr_mem_128614_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128609; + int32_t local_tid_128610; + int64_t group_sizze_128613; + int32_t wave_sizze_128612; + int32_t group_tid_128611; + + global_tid_128609 = get_global_id(0); + local_tid_128610 = get_local_id(0); + group_sizze_128613 = get_local_size(0); + wave_sizze_128612 = LOCKSTEP_WIDTH; + group_tid_128611 = get_group_id(0); + + int32_t phys_tid_86960; + + phys_tid_86960 = global_tid_128609; + + __local char *red_arr_mem_128614; + + red_arr_mem_128614 = (__local char *) red_arr_mem_128614_backing_0; + + __local char *sync_arr_mem_128616; + + sync_arr_mem_128616 = (__local char *) sync_arr_mem_128616_backing_1; + + int32_t phys_group_id_128618; + + phys_group_id_128618 = get_group_id(0); + for (int32_t i_128619 = 0; i_128619 < + sdiv_up32(sext_i64_i32(virt_num_groups_128602) - phys_group_id_128618, + sext_i64_i32(num_groups_86992)); i_128619++) { + int32_t virt_group_id_128620 = phys_group_id_128618 + i_128619 * + sext_i64_i32(num_groups_86992); + int32_t flat_segment_id_128621 = squot32(virt_group_id_128620, + sext_i64_i32(groups_per_segment_128600)); + int64_t global_tid_128622 = srem64(sext_i32_i64(virt_group_id_128620) * + segred_group_sizze_86991 + + sext_i32_i64(local_tid_128610), + segred_group_sizze_86991 * + groups_per_segment_128600); + int64_t gtid_86951 = sext_i32_i64(flat_segment_id_128621); + int64_t gtid_86959; + double x_acc_128623; + int64_t chunk_sizze_128624; + + chunk_sizze_128624 = smin64(elements_per_thread_128601, + sdiv_up64(num_recresids_padded_71534 - + global_tid_128622, + threads_per_segment_128604)); + + double x_86995; + double x_86996; + + // neutral-initialise the accumulators + { + x_acc_128623 = -INFINITY; + } + for (int64_t i_128628 = 0; i_128628 < chunk_sizze_128624; i_128628++) { + gtid_86959 = global_tid_128622 + threads_per_segment_128604 * + i_128628; + // apply map function + { + double i64_res_86999 = ((__global + double *) mem_124121)[gtid_86951]; + int64_t slice_115286 = (int64_t) 1 + gtid_86959; + double x_87000 = ((__global + double *) defunc_3_map_res_mem_124068)[gtid_86951 * + Nmk_72261 + + slice_115286]; + int64_t x_87002 = mul64((int64_t) 2, gtid_86959); + int64_t i64_arg_87003 = add64((int64_t) 2, x_87002); + double i64_res_87004 = sitofp_i64_f64(i64_arg_87003); + double y_87005 = i64_res_87004 / i64_res_86999; + double lifted_div_res_87006 = 1.0 + y_87005; + double abs_arg_87007 = x_87000 / lifted_div_res_87006; + double abs_res_87008 = fabs(abs_arg_87007); + + // save map-out results + { } + // load accumulator + { + x_86995 = x_acc_128623; + } + // load new values + { + x_86996 = abs_res_87008; + } + // apply reduction operator + { + double defunc_1_op_res_86997 = fmax64(x_86995, x_86996); + + // store in accumulator + { + x_acc_128623 = defunc_1_op_res_86997; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_86995 = x_acc_128623; + ((__local + double *) red_arr_mem_128614)[sext_i32_i64(local_tid_128610)] = + x_86995; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128629; + int32_t skip_waves_128630; + + skip_waves_128630 = 1; + + double x_128625; + double x_128626; + + offset_128629 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128610, + sext_i64_i32(segred_group_sizze_86991))) { + x_128625 = ((__local + double *) red_arr_mem_128614)[sext_i32_i64(local_tid_128610 + + offset_128629)]; + } + } + offset_128629 = 1; + while (slt32(offset_128629, wave_sizze_128612)) { + if (slt32(local_tid_128610 + offset_128629, + sext_i64_i32(segred_group_sizze_86991)) && + ((local_tid_128610 - squot32(local_tid_128610, + wave_sizze_128612) * + wave_sizze_128612) & (2 * offset_128629 - 1)) == 0) { + // read array element + { + x_128626 = ((volatile __local + double *) red_arr_mem_128614)[sext_i32_i64(local_tid_128610 + + offset_128629)]; + } + // apply reduction operation + { + double defunc_1_op_res_128627 = fmax64(x_128625, x_128626); + + x_128625 = defunc_1_op_res_128627; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128614)[sext_i32_i64(local_tid_128610)] = + x_128625; + } + } + offset_128629 *= 2; + } + while (slt32(skip_waves_128630, + squot32(sext_i64_i32(segred_group_sizze_86991) + + wave_sizze_128612 - 1, wave_sizze_128612))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128629 = skip_waves_128630 * wave_sizze_128612; + if (slt32(local_tid_128610 + offset_128629, + sext_i64_i32(segred_group_sizze_86991)) && + ((local_tid_128610 - squot32(local_tid_128610, + wave_sizze_128612) * + wave_sizze_128612) == 0 && (squot32(local_tid_128610, + wave_sizze_128612) & (2 * + skip_waves_128630 - + 1)) == + 0)) { + // read array element + { + x_128626 = ((__local + double *) red_arr_mem_128614)[sext_i32_i64(local_tid_128610 + + offset_128629)]; + } + // apply reduction operation + { + double defunc_1_op_res_128627 = fmax64(x_128625, x_128626); + + x_128625 = defunc_1_op_res_128627; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128614)[sext_i32_i64(local_tid_128610)] = + x_128625; + } + } + skip_waves_128630 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_128610) == (int64_t) 0) { + x_acc_128623 = x_128625; + } + } + if (groups_per_segment_128600 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_128610 == 0) { + ((__global double *) mem_124124)[gtid_86951] = x_acc_128623; + } + } + } else { + int32_t old_counter_128631; + + // first thread in group saves group result to global memory + { + if (local_tid_128610 == 0) { + ((__global + double *) group_res_arr_mem_128605)[sext_i32_i64(virt_group_id_128620) * + segred_group_sizze_86991] = + x_acc_128623; + mem_fence_global(); + old_counter_128631 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_128607)[sext_i32_i64(srem32(flat_segment_id_128621, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_128616)[(int64_t) 0] = + old_counter_128631 == groups_per_segment_128600 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_128632; + + is_last_group_128632 = ((__local + bool *) sync_arr_mem_128616)[(int64_t) 0]; + if (is_last_group_128632) { + if (local_tid_128610 == 0) { + old_counter_128631 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_128607)[sext_i32_i64(srem32(flat_segment_id_128621, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_128600)); + } + // read in the per-group-results + { + int64_t read_per_thread_128633 = + sdiv_up64(groups_per_segment_128600, + segred_group_sizze_86991); + + x_86995 = -INFINITY; + for (int64_t i_128634 = 0; i_128634 < + read_per_thread_128633; i_128634++) { + int64_t group_res_id_128635 = + sext_i32_i64(local_tid_128610) * + read_per_thread_128633 + i_128634; + int64_t index_of_group_res_128636 = + sext_i32_i64(flat_segment_id_128621) * + groups_per_segment_128600 + group_res_id_128635; + + if (slt64(group_res_id_128635, + groups_per_segment_128600)) { + x_86996 = ((__global + double *) group_res_arr_mem_128605)[index_of_group_res_128636 * + segred_group_sizze_86991]; + + double defunc_1_op_res_86997; + + defunc_1_op_res_86997 = fmax64(x_86995, x_86996); + x_86995 = defunc_1_op_res_86997; + } + } + } + ((__local + double *) red_arr_mem_128614)[sext_i32_i64(local_tid_128610)] = + x_86995; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_128637; + int32_t skip_waves_128638; + + skip_waves_128638 = 1; + + double x_128625; + double x_128626; + + offset_128637 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128610, + sext_i64_i32(segred_group_sizze_86991))) { + x_128625 = ((__local + double *) red_arr_mem_128614)[sext_i32_i64(local_tid_128610 + + offset_128637)]; + } + } + offset_128637 = 1; + while (slt32(offset_128637, wave_sizze_128612)) { + if (slt32(local_tid_128610 + offset_128637, + sext_i64_i32(segred_group_sizze_86991)) && + ((local_tid_128610 - squot32(local_tid_128610, + wave_sizze_128612) * + wave_sizze_128612) & (2 * offset_128637 - 1)) == + 0) { + // read array element + { + x_128626 = ((volatile __local + double *) red_arr_mem_128614)[sext_i32_i64(local_tid_128610 + + offset_128637)]; + } + // apply reduction operation + { + double defunc_1_op_res_128627 = fmax64(x_128625, + x_128626); + + x_128625 = defunc_1_op_res_128627; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128614)[sext_i32_i64(local_tid_128610)] = + x_128625; + } + } + offset_128637 *= 2; + } + while (slt32(skip_waves_128638, + squot32(sext_i64_i32(segred_group_sizze_86991) + + wave_sizze_128612 - 1, + wave_sizze_128612))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128637 = skip_waves_128638 * wave_sizze_128612; + if (slt32(local_tid_128610 + offset_128637, + sext_i64_i32(segred_group_sizze_86991)) && + ((local_tid_128610 - squot32(local_tid_128610, + wave_sizze_128612) * + wave_sizze_128612) == 0 && + (squot32(local_tid_128610, wave_sizze_128612) & + (2 * skip_waves_128638 - 1)) == 0)) { + // read array element + { + x_128626 = ((__local + double *) red_arr_mem_128614)[sext_i32_i64(local_tid_128610 + + offset_128637)]; + } + // apply reduction operation + { + double defunc_1_op_res_128627 = fmax64(x_128625, + x_128626); + + x_128625 = defunc_1_op_res_128627; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128614)[sext_i32_i64(local_tid_128610)] = + x_128625; + } + } + skip_waves_128638 *= 2; + } + // and back to memory with the final result + { + if (local_tid_128610 == 0) { + ((__global double *) mem_124124)[gtid_86951] = + x_128625; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_86991 +} +__kernel void mainDetailedzisegred_large_87308(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_128815_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128813_backing_aligned_1, + int64_t N_70860, int64_t n_70864, + int64_t k2p2zq_70876, + int64_t num_groups_87447, + int64_t groups_per_segment_128799, + int64_t elements_per_thread_128800, + int64_t virt_num_groups_128801, + int64_t threads_per_segment_128803, + __global + unsigned char *binop_p_mem_120117, + __global + unsigned char *mem_124142, + __global + unsigned char *mem_124276, + __global + unsigned char *mem_124281, + __global + unsigned char *group_res_arr_mem_128804, + __global + unsigned char *mainDetailedzicounter_mem_128806) +{ + #define segred_group_sizze_87446 (mainDetailedzisegred_group_sizze_87302) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_128815_backing_1 = + (__local volatile + char *) sync_arr_mem_128815_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128813_backing_0 = + (__local volatile + char *) red_arr_mem_128813_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128808; + int32_t local_tid_128809; + int64_t group_sizze_128812; + int32_t wave_sizze_128811; + int32_t group_tid_128810; + + global_tid_128808 = get_global_id(0); + local_tid_128809 = get_local_id(0); + group_sizze_128812 = get_local_size(0); + wave_sizze_128811 = LOCKSTEP_WIDTH; + group_tid_128810 = get_group_id(0); + + int32_t phys_tid_87308; + + phys_tid_87308 = global_tid_128808; + + __local char *red_arr_mem_128813; + + red_arr_mem_128813 = (__local char *) red_arr_mem_128813_backing_0; + + __local char *sync_arr_mem_128815; + + sync_arr_mem_128815 = (__local char *) sync_arr_mem_128815_backing_1; + + int32_t phys_group_id_128817; + + phys_group_id_128817 = get_group_id(0); + for (int32_t i_128818 = 0; i_128818 < + sdiv_up32(sext_i64_i32(virt_num_groups_128801) - phys_group_id_128817, + sext_i64_i32(num_groups_87447)); i_128818++) { + int32_t virt_group_id_128819 = phys_group_id_128817 + i_128818 * + sext_i64_i32(num_groups_87447); + int32_t flat_segment_id_128820 = squot32(virt_group_id_128819, + sext_i64_i32(groups_per_segment_128799)); + int64_t global_tid_128821 = srem64(sext_i32_i64(virt_group_id_128819) * + segred_group_sizze_87446 + + sext_i32_i64(local_tid_128809), + segred_group_sizze_87446 * + groups_per_segment_128799); + int64_t gtid_87295 = squot64(sext_i32_i64(flat_segment_id_128820), + k2p2zq_70876 * k2p2zq_70876); + int64_t gtid_87296 = squot64(sext_i32_i64(flat_segment_id_128820) - + squot64(sext_i32_i64(flat_segment_id_128820), + k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876); + int64_t gtid_87297 = sext_i32_i64(flat_segment_id_128820) - + squot64(sext_i32_i64(flat_segment_id_128820), k2p2zq_70876 * + k2p2zq_70876) * (k2p2zq_70876 * k2p2zq_70876) - + squot64(sext_i32_i64(flat_segment_id_128820) - + squot64(sext_i32_i64(flat_segment_id_128820), + k2p2zq_70876 * k2p2zq_70876) * (k2p2zq_70876 * + k2p2zq_70876), + k2p2zq_70876) * k2p2zq_70876; + int64_t gtid_87307; + double x_acc_128822; + int64_t chunk_sizze_128823; + + chunk_sizze_128823 = smin64(elements_per_thread_128800, + sdiv_up64(n_70864 - global_tid_128821, + threads_per_segment_128803)); + + double x_87450; + double x_87451; + + // neutral-initialise the accumulators + { + x_acc_128822 = 0.0; + } + for (int64_t i_128827 = 0; i_128827 < chunk_sizze_128823; i_128827++) { + gtid_87307 = global_tid_128821 + threads_per_segment_128803 * + i_128827; + // apply map function + { + double x_87456 = ((__global double *) mem_124142)[gtid_87295 * + N_70860 + + gtid_87307]; + double x_87457 = ((__global + double *) binop_p_mem_120117)[gtid_87296 * + N_70860 + + gtid_87307]; + double x_87458 = ((__global double *) mem_124276)[gtid_87297 * + N_70860 + + gtid_87307]; + double x_87459 = x_87457 * x_87458; + bool isnan_res_87460; + + isnan_res_87460 = futrts_isnan64(x_87456); + + double y_87461; + + if (isnan_res_87460) { + y_87461 = 0.0; + } else { + y_87461 = 1.0; + } + + double defunc_2_f_res_87462 = x_87459 * y_87461; + + // save map-out results + { } + // load accumulator + { + x_87450 = x_acc_128822; + } + // load new values + { + x_87451 = defunc_2_f_res_87462; + } + // apply reduction operator + { + double defunc_1_op_res_87452 = x_87450 + x_87451; + + // store in accumulator + { + x_acc_128822 = defunc_1_op_res_87452; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_87450 = x_acc_128822; + ((__local + double *) red_arr_mem_128813)[sext_i32_i64(local_tid_128809)] = + x_87450; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128828; + int32_t skip_waves_128829; + + skip_waves_128829 = 1; + + double x_128824; + double x_128825; + + offset_128828 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128809, + sext_i64_i32(segred_group_sizze_87446))) { + x_128824 = ((__local + double *) red_arr_mem_128813)[sext_i32_i64(local_tid_128809 + + offset_128828)]; + } + } + offset_128828 = 1; + while (slt32(offset_128828, wave_sizze_128811)) { + if (slt32(local_tid_128809 + offset_128828, + sext_i64_i32(segred_group_sizze_87446)) && + ((local_tid_128809 - squot32(local_tid_128809, + wave_sizze_128811) * + wave_sizze_128811) & (2 * offset_128828 - 1)) == 0) { + // read array element + { + x_128825 = ((volatile __local + double *) red_arr_mem_128813)[sext_i32_i64(local_tid_128809 + + offset_128828)]; + } + // apply reduction operation + { + double defunc_1_op_res_128826 = x_128824 + x_128825; + + x_128824 = defunc_1_op_res_128826; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128813)[sext_i32_i64(local_tid_128809)] = + x_128824; + } + } + offset_128828 *= 2; + } + while (slt32(skip_waves_128829, + squot32(sext_i64_i32(segred_group_sizze_87446) + + wave_sizze_128811 - 1, wave_sizze_128811))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128828 = skip_waves_128829 * wave_sizze_128811; + if (slt32(local_tid_128809 + offset_128828, + sext_i64_i32(segred_group_sizze_87446)) && + ((local_tid_128809 - squot32(local_tid_128809, + wave_sizze_128811) * + wave_sizze_128811) == 0 && (squot32(local_tid_128809, + wave_sizze_128811) & (2 * + skip_waves_128829 - + 1)) == + 0)) { + // read array element + { + x_128825 = ((__local + double *) red_arr_mem_128813)[sext_i32_i64(local_tid_128809 + + offset_128828)]; + } + // apply reduction operation + { + double defunc_1_op_res_128826 = x_128824 + x_128825; + + x_128824 = defunc_1_op_res_128826; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128813)[sext_i32_i64(local_tid_128809)] = + x_128824; + } + } + skip_waves_128829 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_128809) == (int64_t) 0) { + x_acc_128822 = x_128824; + } + } + if (groups_per_segment_128799 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_128809 == 0) { + ((__global double *) mem_124281)[gtid_87295 * + (k2p2zq_70876 * + k2p2zq_70876) + + gtid_87296 * k2p2zq_70876 + + gtid_87297] = x_acc_128822; + } + } + } else { + int32_t old_counter_128830; + + // first thread in group saves group result to global memory + { + if (local_tid_128809 == 0) { + ((__global + double *) group_res_arr_mem_128804)[sext_i32_i64(virt_group_id_128819) * + segred_group_sizze_87446] = + x_acc_128822; + mem_fence_global(); + old_counter_128830 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_128806)[sext_i32_i64(srem32(flat_segment_id_128820, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_128815)[(int64_t) 0] = + old_counter_128830 == groups_per_segment_128799 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_128831; + + is_last_group_128831 = ((__local + bool *) sync_arr_mem_128815)[(int64_t) 0]; + if (is_last_group_128831) { + if (local_tid_128809 == 0) { + old_counter_128830 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_128806)[sext_i32_i64(srem32(flat_segment_id_128820, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_128799)); + } + // read in the per-group-results + { + int64_t read_per_thread_128832 = + sdiv_up64(groups_per_segment_128799, + segred_group_sizze_87446); + + x_87450 = 0.0; + for (int64_t i_128833 = 0; i_128833 < + read_per_thread_128832; i_128833++) { + int64_t group_res_id_128834 = + sext_i32_i64(local_tid_128809) * + read_per_thread_128832 + i_128833; + int64_t index_of_group_res_128835 = + sext_i32_i64(flat_segment_id_128820) * + groups_per_segment_128799 + group_res_id_128834; + + if (slt64(group_res_id_128834, + groups_per_segment_128799)) { + x_87451 = ((__global + double *) group_res_arr_mem_128804)[index_of_group_res_128835 * + segred_group_sizze_87446]; + + double defunc_1_op_res_87452; + + defunc_1_op_res_87452 = x_87450 + x_87451; + x_87450 = defunc_1_op_res_87452; + } + } + } + ((__local + double *) red_arr_mem_128813)[sext_i32_i64(local_tid_128809)] = + x_87450; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_128836; + int32_t skip_waves_128837; + + skip_waves_128837 = 1; + + double x_128824; + double x_128825; + + offset_128836 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128809, + sext_i64_i32(segred_group_sizze_87446))) { + x_128824 = ((__local + double *) red_arr_mem_128813)[sext_i32_i64(local_tid_128809 + + offset_128836)]; + } + } + offset_128836 = 1; + while (slt32(offset_128836, wave_sizze_128811)) { + if (slt32(local_tid_128809 + offset_128836, + sext_i64_i32(segred_group_sizze_87446)) && + ((local_tid_128809 - squot32(local_tid_128809, + wave_sizze_128811) * + wave_sizze_128811) & (2 * offset_128836 - 1)) == + 0) { + // read array element + { + x_128825 = ((volatile __local + double *) red_arr_mem_128813)[sext_i32_i64(local_tid_128809 + + offset_128836)]; + } + // apply reduction operation + { + double defunc_1_op_res_128826 = x_128824 + + x_128825; + + x_128824 = defunc_1_op_res_128826; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128813)[sext_i32_i64(local_tid_128809)] = + x_128824; + } + } + offset_128836 *= 2; + } + while (slt32(skip_waves_128837, + squot32(sext_i64_i32(segred_group_sizze_87446) + + wave_sizze_128811 - 1, + wave_sizze_128811))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128836 = skip_waves_128837 * wave_sizze_128811; + if (slt32(local_tid_128809 + offset_128836, + sext_i64_i32(segred_group_sizze_87446)) && + ((local_tid_128809 - squot32(local_tid_128809, + wave_sizze_128811) * + wave_sizze_128811) == 0 && + (squot32(local_tid_128809, wave_sizze_128811) & + (2 * skip_waves_128837 - 1)) == 0)) { + // read array element + { + x_128825 = ((__local + double *) red_arr_mem_128813)[sext_i32_i64(local_tid_128809 + + offset_128836)]; + } + // apply reduction operation + { + double defunc_1_op_res_128826 = x_128824 + + x_128825; + + x_128824 = defunc_1_op_res_128826; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128813)[sext_i32_i64(local_tid_128809)] = + x_128824; + } + } + skip_waves_128837 *= 2; + } + // and back to memory with the final result + { + if (local_tid_128809 == 0) { + ((__global double *) mem_124281)[gtid_87295 * + (k2p2zq_70876 * + k2p2zq_70876) + + gtid_87296 * + k2p2zq_70876 + + gtid_87297] = + x_128824; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_87446 +} +__kernel void mainDetailedzisegred_large_88192(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_129005_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_129003_backing_aligned_1, + int64_t N_70860, int64_t n_70864, + int64_t k2p2zq_70876, + int64_t num_groups_88245, + int64_t groups_per_segment_128989, + int64_t elements_per_thread_128990, + int64_t virt_num_groups_128991, + int64_t threads_per_segment_128993, + __global + unsigned char *binop_p_mem_120117, + __global + unsigned char *mem_124142, + __global + unsigned char *mem_124587, + __global + unsigned char *group_res_arr_mem_128994, + __global + unsigned char *mainDetailedzicounter_mem_128996) +{ + #define segred_group_sizze_88244 (mainDetailedzisegred_group_sizze_88186) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_129005_backing_1 = + (__local volatile + char *) sync_arr_mem_129005_backing_aligned_0; + __local volatile char *restrict red_arr_mem_129003_backing_0 = + (__local volatile + char *) red_arr_mem_129003_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128998; + int32_t local_tid_128999; + int64_t group_sizze_129002; + int32_t wave_sizze_129001; + int32_t group_tid_129000; + + global_tid_128998 = get_global_id(0); + local_tid_128999 = get_local_id(0); + group_sizze_129002 = get_local_size(0); + wave_sizze_129001 = LOCKSTEP_WIDTH; + group_tid_129000 = get_group_id(0); + + int32_t phys_tid_88192; + + phys_tid_88192 = global_tid_128998; + + __local char *red_arr_mem_129003; + + red_arr_mem_129003 = (__local char *) red_arr_mem_129003_backing_0; + + __local char *sync_arr_mem_129005; + + sync_arr_mem_129005 = (__local char *) sync_arr_mem_129005_backing_1; + + int32_t phys_group_id_129007; + + phys_group_id_129007 = get_group_id(0); + for (int32_t i_129008 = 0; i_129008 < + sdiv_up32(sext_i64_i32(virt_num_groups_128991) - phys_group_id_129007, + sext_i64_i32(num_groups_88245)); i_129008++) { + int32_t virt_group_id_129009 = phys_group_id_129007 + i_129008 * + sext_i64_i32(num_groups_88245); + int32_t flat_segment_id_129010 = squot32(virt_group_id_129009, + sext_i64_i32(groups_per_segment_128989)); + int64_t global_tid_129011 = srem64(sext_i32_i64(virt_group_id_129009) * + segred_group_sizze_88244 + + sext_i32_i64(local_tid_128999), + segred_group_sizze_88244 * + groups_per_segment_128989); + int64_t gtid_88181 = squot64(sext_i32_i64(flat_segment_id_129010), + k2p2zq_70876); + int64_t gtid_88182 = sext_i32_i64(flat_segment_id_129010) - + squot64(sext_i32_i64(flat_segment_id_129010), k2p2zq_70876) * + k2p2zq_70876; + int64_t gtid_88191; + double x_acc_129012; + int64_t chunk_sizze_129013; + + chunk_sizze_129013 = smin64(elements_per_thread_128990, + sdiv_up64(n_70864 - global_tid_129011, + threads_per_segment_128993)); + + double x_88248; + double x_88249; + + // neutral-initialise the accumulators + { + x_acc_129012 = 0.0; + } + for (int64_t i_129017 = 0; i_129017 < chunk_sizze_129013; i_129017++) { + gtid_88191 = global_tid_129011 + threads_per_segment_128993 * + i_129017; + // apply map function + { + double x_88254 = ((__global double *) mem_124142)[gtid_88181 * + N_70860 + + gtid_88191]; + bool isnan_res_88255; + + isnan_res_88255 = futrts_isnan64(x_88254); + + double defunc_1_f_res_88256; + + if (isnan_res_88255) { + defunc_1_f_res_88256 = 0.0; + } else { + double x_88253 = ((__global + double *) binop_p_mem_120117)[gtid_88182 * + N_70860 + + gtid_88191]; + double defunc_1_f_res_f_res_88257 = x_88253 * x_88254; + + defunc_1_f_res_88256 = defunc_1_f_res_f_res_88257; + } + // save map-out results + { } + // load accumulator + { + x_88248 = x_acc_129012; + } + // load new values + { + x_88249 = defunc_1_f_res_88256; + } + // apply reduction operator + { + double defunc_1_op_res_88250 = x_88248 + x_88249; + + // store in accumulator + { + x_acc_129012 = defunc_1_op_res_88250; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_88248 = x_acc_129012; + ((__local + double *) red_arr_mem_129003)[sext_i32_i64(local_tid_128999)] = + x_88248; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_129018; + int32_t skip_waves_129019; + + skip_waves_129019 = 1; + + double x_129014; + double x_129015; + + offset_129018 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128999, + sext_i64_i32(segred_group_sizze_88244))) { + x_129014 = ((__local + double *) red_arr_mem_129003)[sext_i32_i64(local_tid_128999 + + offset_129018)]; + } + } + offset_129018 = 1; + while (slt32(offset_129018, wave_sizze_129001)) { + if (slt32(local_tid_128999 + offset_129018, + sext_i64_i32(segred_group_sizze_88244)) && + ((local_tid_128999 - squot32(local_tid_128999, + wave_sizze_129001) * + wave_sizze_129001) & (2 * offset_129018 - 1)) == 0) { + // read array element + { + x_129015 = ((volatile __local + double *) red_arr_mem_129003)[sext_i32_i64(local_tid_128999 + + offset_129018)]; + } + // apply reduction operation + { + double defunc_1_op_res_129016 = x_129014 + x_129015; + + x_129014 = defunc_1_op_res_129016; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_129003)[sext_i32_i64(local_tid_128999)] = + x_129014; + } + } + offset_129018 *= 2; + } + while (slt32(skip_waves_129019, + squot32(sext_i64_i32(segred_group_sizze_88244) + + wave_sizze_129001 - 1, wave_sizze_129001))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129018 = skip_waves_129019 * wave_sizze_129001; + if (slt32(local_tid_128999 + offset_129018, + sext_i64_i32(segred_group_sizze_88244)) && + ((local_tid_128999 - squot32(local_tid_128999, + wave_sizze_129001) * + wave_sizze_129001) == 0 && (squot32(local_tid_128999, + wave_sizze_129001) & (2 * + skip_waves_129019 - + 1)) == + 0)) { + // read array element + { + x_129015 = ((__local + double *) red_arr_mem_129003)[sext_i32_i64(local_tid_128999 + + offset_129018)]; + } + // apply reduction operation + { + double defunc_1_op_res_129016 = x_129014 + x_129015; + + x_129014 = defunc_1_op_res_129016; + } + // write result of operation + { + ((__local + double *) red_arr_mem_129003)[sext_i32_i64(local_tid_128999)] = + x_129014; + } + } + skip_waves_129019 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_128999) == (int64_t) 0) { + x_acc_129012 = x_129014; + } + } + if (groups_per_segment_128989 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_128999 == 0) { + ((__global double *) mem_124587)[gtid_88181 * k2p2zq_70876 + + gtid_88182] = x_acc_129012; + } + } + } else { + int32_t old_counter_129020; + + // first thread in group saves group result to global memory + { + if (local_tid_128999 == 0) { + ((__global + double *) group_res_arr_mem_128994)[sext_i32_i64(virt_group_id_129009) * + segred_group_sizze_88244] = + x_acc_129012; + mem_fence_global(); + old_counter_129020 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_128996)[sext_i32_i64(srem32(flat_segment_id_129010, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_129005)[(int64_t) 0] = + old_counter_129020 == groups_per_segment_128989 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_129021; + + is_last_group_129021 = ((__local + bool *) sync_arr_mem_129005)[(int64_t) 0]; + if (is_last_group_129021) { + if (local_tid_128999 == 0) { + old_counter_129020 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_128996)[sext_i32_i64(srem32(flat_segment_id_129010, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_128989)); + } + // read in the per-group-results + { + int64_t read_per_thread_129022 = + sdiv_up64(groups_per_segment_128989, + segred_group_sizze_88244); + + x_88248 = 0.0; + for (int64_t i_129023 = 0; i_129023 < + read_per_thread_129022; i_129023++) { + int64_t group_res_id_129024 = + sext_i32_i64(local_tid_128999) * + read_per_thread_129022 + i_129023; + int64_t index_of_group_res_129025 = + sext_i32_i64(flat_segment_id_129010) * + groups_per_segment_128989 + group_res_id_129024; + + if (slt64(group_res_id_129024, + groups_per_segment_128989)) { + x_88249 = ((__global + double *) group_res_arr_mem_128994)[index_of_group_res_129025 * + segred_group_sizze_88244]; + + double defunc_1_op_res_88250; + + defunc_1_op_res_88250 = x_88248 + x_88249; + x_88248 = defunc_1_op_res_88250; + } + } + } + ((__local + double *) red_arr_mem_129003)[sext_i32_i64(local_tid_128999)] = + x_88248; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_129026; + int32_t skip_waves_129027; + + skip_waves_129027 = 1; + + double x_129014; + double x_129015; + + offset_129026 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128999, + sext_i64_i32(segred_group_sizze_88244))) { + x_129014 = ((__local + double *) red_arr_mem_129003)[sext_i32_i64(local_tid_128999 + + offset_129026)]; + } + } + offset_129026 = 1; + while (slt32(offset_129026, wave_sizze_129001)) { + if (slt32(local_tid_128999 + offset_129026, + sext_i64_i32(segred_group_sizze_88244)) && + ((local_tid_128999 - squot32(local_tid_128999, + wave_sizze_129001) * + wave_sizze_129001) & (2 * offset_129026 - 1)) == + 0) { + // read array element + { + x_129015 = ((volatile __local + double *) red_arr_mem_129003)[sext_i32_i64(local_tid_128999 + + offset_129026)]; + } + // apply reduction operation + { + double defunc_1_op_res_129016 = x_129014 + + x_129015; + + x_129014 = defunc_1_op_res_129016; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_129003)[sext_i32_i64(local_tid_128999)] = + x_129014; + } + } + offset_129026 *= 2; + } + while (slt32(skip_waves_129027, + squot32(sext_i64_i32(segred_group_sizze_88244) + + wave_sizze_129001 - 1, + wave_sizze_129001))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129026 = skip_waves_129027 * wave_sizze_129001; + if (slt32(local_tid_128999 + offset_129026, + sext_i64_i32(segred_group_sizze_88244)) && + ((local_tid_128999 - squot32(local_tid_128999, + wave_sizze_129001) * + wave_sizze_129001) == 0 && + (squot32(local_tid_128999, wave_sizze_129001) & + (2 * skip_waves_129027 - 1)) == 0)) { + // read array element + { + x_129015 = ((__local + double *) red_arr_mem_129003)[sext_i32_i64(local_tid_128999 + + offset_129026)]; + } + // apply reduction operation + { + double defunc_1_op_res_129016 = x_129014 + + x_129015; + + x_129014 = defunc_1_op_res_129016; + } + // write result of operation + { + ((__local + double *) red_arr_mem_129003)[sext_i32_i64(local_tid_128999)] = + x_129014; + } + } + skip_waves_129027 *= 2; + } + // and back to memory with the final result + { + if (local_tid_128999 == 0) { + ((__global double *) mem_124587)[gtid_88181 * + k2p2zq_70876 + + gtid_88182] = + x_129014; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_88244 +} +__kernel void mainDetailedzisegred_large_88329(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_129093_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_129091_backing_aligned_1, + int64_t k2p2zq_70876, + int64_t num_groups_88378, + int64_t groups_per_segment_129077, + int64_t elements_per_thread_129078, + int64_t virt_num_groups_129079, + int64_t threads_per_segment_129081, + __global + unsigned char *defunc_3_map_res_mem_124372, + __global + unsigned char *defunc_3_map_res_mem_124593, + __global + unsigned char *mem_124653, + __global + unsigned char *group_res_arr_mem_129082, + __global + unsigned char *mainDetailedzicounter_mem_129084) +{ + #define segred_group_sizze_88377 (mainDetailedzisegred_group_sizze_88323) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_129093_backing_1 = + (__local volatile + char *) sync_arr_mem_129093_backing_aligned_0; + __local volatile char *restrict red_arr_mem_129091_backing_0 = + (__local volatile + char *) red_arr_mem_129091_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129086; + int32_t local_tid_129087; + int64_t group_sizze_129090; + int32_t wave_sizze_129089; + int32_t group_tid_129088; + + global_tid_129086 = get_global_id(0); + local_tid_129087 = get_local_id(0); + group_sizze_129090 = get_local_size(0); + wave_sizze_129089 = LOCKSTEP_WIDTH; + group_tid_129088 = get_group_id(0); + + int32_t phys_tid_88329; + + phys_tid_88329 = global_tid_129086; + + __local char *red_arr_mem_129091; + + red_arr_mem_129091 = (__local char *) red_arr_mem_129091_backing_0; + + __local char *sync_arr_mem_129093; + + sync_arr_mem_129093 = (__local char *) sync_arr_mem_129093_backing_1; + + int32_t phys_group_id_129095; + + phys_group_id_129095 = get_group_id(0); + for (int32_t i_129096 = 0; i_129096 < + sdiv_up32(sext_i64_i32(virt_num_groups_129079) - phys_group_id_129095, + sext_i64_i32(num_groups_88378)); i_129096++) { + int32_t virt_group_id_129097 = phys_group_id_129095 + i_129096 * + sext_i64_i32(num_groups_88378); + int32_t flat_segment_id_129098 = squot32(virt_group_id_129097, + sext_i64_i32(groups_per_segment_129077)); + int64_t global_tid_129099 = srem64(sext_i32_i64(virt_group_id_129097) * + segred_group_sizze_88377 + + sext_i32_i64(local_tid_129087), + segred_group_sizze_88377 * + groups_per_segment_129077); + int64_t gtid_88318 = squot64(sext_i32_i64(flat_segment_id_129098), + k2p2zq_70876); + int64_t gtid_88319 = sext_i32_i64(flat_segment_id_129098) - + squot64(sext_i32_i64(flat_segment_id_129098), k2p2zq_70876) * + k2p2zq_70876; + int64_t gtid_88328; + double x_acc_129100; + int64_t chunk_sizze_129101; + + chunk_sizze_129101 = smin64(elements_per_thread_129078, + sdiv_up64(k2p2zq_70876 - global_tid_129099, + threads_per_segment_129081)); + + double x_88381; + double x_88382; + + // neutral-initialise the accumulators + { + x_acc_129100 = 0.0; + } + for (int64_t i_129105 = 0; i_129105 < chunk_sizze_129101; i_129105++) { + gtid_88328 = global_tid_129099 + threads_per_segment_129081 * + i_129105; + // apply map function + { + double x_88387 = ((__global + double *) defunc_3_map_res_mem_124593)[gtid_88318 * + k2p2zq_70876 + + gtid_88328]; + double x_88388 = ((__global + double *) defunc_3_map_res_mem_124372)[gtid_88318 * + (k2p2zq_70876 * + k2p2zq_70876) + + gtid_88319 * + k2p2zq_70876 + + gtid_88328]; + double defunc_1_f_res_88389 = x_88387 * x_88388; + + // save map-out results + { } + // load accumulator + { + x_88381 = x_acc_129100; + } + // load new values + { + x_88382 = defunc_1_f_res_88389; + } + // apply reduction operator + { + double defunc_1_op_res_88383 = x_88381 + x_88382; + + // store in accumulator + { + x_acc_129100 = defunc_1_op_res_88383; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_88381 = x_acc_129100; + ((__local + double *) red_arr_mem_129091)[sext_i32_i64(local_tid_129087)] = + x_88381; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_129106; + int32_t skip_waves_129107; + + skip_waves_129107 = 1; + + double x_129102; + double x_129103; + + offset_129106 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129087, + sext_i64_i32(segred_group_sizze_88377))) { + x_129102 = ((__local + double *) red_arr_mem_129091)[sext_i32_i64(local_tid_129087 + + offset_129106)]; + } + } + offset_129106 = 1; + while (slt32(offset_129106, wave_sizze_129089)) { + if (slt32(local_tid_129087 + offset_129106, + sext_i64_i32(segred_group_sizze_88377)) && + ((local_tid_129087 - squot32(local_tid_129087, + wave_sizze_129089) * + wave_sizze_129089) & (2 * offset_129106 - 1)) == 0) { + // read array element + { + x_129103 = ((volatile __local + double *) red_arr_mem_129091)[sext_i32_i64(local_tid_129087 + + offset_129106)]; + } + // apply reduction operation + { + double defunc_1_op_res_129104 = x_129102 + x_129103; + + x_129102 = defunc_1_op_res_129104; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_129091)[sext_i32_i64(local_tid_129087)] = + x_129102; + } + } + offset_129106 *= 2; + } + while (slt32(skip_waves_129107, + squot32(sext_i64_i32(segred_group_sizze_88377) + + wave_sizze_129089 - 1, wave_sizze_129089))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129106 = skip_waves_129107 * wave_sizze_129089; + if (slt32(local_tid_129087 + offset_129106, + sext_i64_i32(segred_group_sizze_88377)) && + ((local_tid_129087 - squot32(local_tid_129087, + wave_sizze_129089) * + wave_sizze_129089) == 0 && (squot32(local_tid_129087, + wave_sizze_129089) & (2 * + skip_waves_129107 - + 1)) == + 0)) { + // read array element + { + x_129103 = ((__local + double *) red_arr_mem_129091)[sext_i32_i64(local_tid_129087 + + offset_129106)]; + } + // apply reduction operation + { + double defunc_1_op_res_129104 = x_129102 + x_129103; + + x_129102 = defunc_1_op_res_129104; + } + // write result of operation + { + ((__local + double *) red_arr_mem_129091)[sext_i32_i64(local_tid_129087)] = + x_129102; + } + } + skip_waves_129107 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_129087) == (int64_t) 0) { + x_acc_129100 = x_129102; + } + } + if (groups_per_segment_129077 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_129087 == 0) { + ((__global double *) mem_124653)[gtid_88318 * k2p2zq_70876 + + gtid_88319] = x_acc_129100; + } + } + } else { + int32_t old_counter_129108; + + // first thread in group saves group result to global memory + { + if (local_tid_129087 == 0) { + ((__global + double *) group_res_arr_mem_129082)[sext_i32_i64(virt_group_id_129097) * + segred_group_sizze_88377] = + x_acc_129100; + mem_fence_global(); + old_counter_129108 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_129084)[sext_i32_i64(srem32(flat_segment_id_129098, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_129093)[(int64_t) 0] = + old_counter_129108 == groups_per_segment_129077 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_129109; + + is_last_group_129109 = ((__local + bool *) sync_arr_mem_129093)[(int64_t) 0]; + if (is_last_group_129109) { + if (local_tid_129087 == 0) { + old_counter_129108 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_129084)[sext_i32_i64(srem32(flat_segment_id_129098, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_129077)); + } + // read in the per-group-results + { + int64_t read_per_thread_129110 = + sdiv_up64(groups_per_segment_129077, + segred_group_sizze_88377); + + x_88381 = 0.0; + for (int64_t i_129111 = 0; i_129111 < + read_per_thread_129110; i_129111++) { + int64_t group_res_id_129112 = + sext_i32_i64(local_tid_129087) * + read_per_thread_129110 + i_129111; + int64_t index_of_group_res_129113 = + sext_i32_i64(flat_segment_id_129098) * + groups_per_segment_129077 + group_res_id_129112; + + if (slt64(group_res_id_129112, + groups_per_segment_129077)) { + x_88382 = ((__global + double *) group_res_arr_mem_129082)[index_of_group_res_129113 * + segred_group_sizze_88377]; + + double defunc_1_op_res_88383; + + defunc_1_op_res_88383 = x_88381 + x_88382; + x_88381 = defunc_1_op_res_88383; + } + } + } + ((__local + double *) red_arr_mem_129091)[sext_i32_i64(local_tid_129087)] = + x_88381; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_129114; + int32_t skip_waves_129115; + + skip_waves_129115 = 1; + + double x_129102; + double x_129103; + + offset_129114 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129087, + sext_i64_i32(segred_group_sizze_88377))) { + x_129102 = ((__local + double *) red_arr_mem_129091)[sext_i32_i64(local_tid_129087 + + offset_129114)]; + } + } + offset_129114 = 1; + while (slt32(offset_129114, wave_sizze_129089)) { + if (slt32(local_tid_129087 + offset_129114, + sext_i64_i32(segred_group_sizze_88377)) && + ((local_tid_129087 - squot32(local_tid_129087, + wave_sizze_129089) * + wave_sizze_129089) & (2 * offset_129114 - 1)) == + 0) { + // read array element + { + x_129103 = ((volatile __local + double *) red_arr_mem_129091)[sext_i32_i64(local_tid_129087 + + offset_129114)]; + } + // apply reduction operation + { + double defunc_1_op_res_129104 = x_129102 + + x_129103; + + x_129102 = defunc_1_op_res_129104; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_129091)[sext_i32_i64(local_tid_129087)] = + x_129102; + } + } + offset_129114 *= 2; + } + while (slt32(skip_waves_129115, + squot32(sext_i64_i32(segred_group_sizze_88377) + + wave_sizze_129089 - 1, + wave_sizze_129089))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129114 = skip_waves_129115 * wave_sizze_129089; + if (slt32(local_tid_129087 + offset_129114, + sext_i64_i32(segred_group_sizze_88377)) && + ((local_tid_129087 - squot32(local_tid_129087, + wave_sizze_129089) * + wave_sizze_129089) == 0 && + (squot32(local_tid_129087, wave_sizze_129089) & + (2 * skip_waves_129115 - 1)) == 0)) { + // read array element + { + x_129103 = ((__local + double *) red_arr_mem_129091)[sext_i32_i64(local_tid_129087 + + offset_129114)]; + } + // apply reduction operation + { + double defunc_1_op_res_129104 = x_129102 + + x_129103; + + x_129102 = defunc_1_op_res_129104; + } + // write result of operation + { + ((__local + double *) red_arr_mem_129091)[sext_i32_i64(local_tid_129087)] = + x_129102; + } + } + skip_waves_129115 *= 2; + } + // and back to memory with the final result + { + if (local_tid_129087 == 0) { + ((__global double *) mem_124653)[gtid_88318 * + k2p2zq_70876 + + gtid_88319] = + x_129102; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_88377 +} +__kernel void mainDetailedzisegred_large_88459(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_129225_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_129223_backing_aligned_1, + int64_t N_70860, + int64_t k2p2zq_70876, + int64_t num_groups_88506, + int64_t groups_per_segment_129209, + int64_t elements_per_thread_129210, + int64_t virt_num_groups_129211, + int64_t threads_per_segment_129213, + __global + unsigned char *mem_120124, + __global + unsigned char *defunc_4_map_res_mem_124659, + __global + unsigned char *mem_124877, + __global + unsigned char *group_res_arr_mem_129214, + __global + unsigned char *mainDetailedzicounter_mem_129216) +{ + #define segred_group_sizze_88505 (mainDetailedzisegred_group_sizze_88453) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_129225_backing_1 = + (__local volatile + char *) sync_arr_mem_129225_backing_aligned_0; + __local volatile char *restrict red_arr_mem_129223_backing_0 = + (__local volatile + char *) red_arr_mem_129223_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129218; + int32_t local_tid_129219; + int64_t group_sizze_129222; + int32_t wave_sizze_129221; + int32_t group_tid_129220; + + global_tid_129218 = get_global_id(0); + local_tid_129219 = get_local_id(0); + group_sizze_129222 = get_local_size(0); + wave_sizze_129221 = LOCKSTEP_WIDTH; + group_tid_129220 = get_group_id(0); + + int32_t phys_tid_88459; + + phys_tid_88459 = global_tid_129218; + + __local char *red_arr_mem_129223; + + red_arr_mem_129223 = (__local char *) red_arr_mem_129223_backing_0; + + __local char *sync_arr_mem_129225; + + sync_arr_mem_129225 = (__local char *) sync_arr_mem_129225_backing_1; + + int32_t phys_group_id_129227; + + phys_group_id_129227 = get_group_id(0); + for (int32_t i_129228 = 0; i_129228 < + sdiv_up32(sext_i64_i32(virt_num_groups_129211) - phys_group_id_129227, + sext_i64_i32(num_groups_88506)); i_129228++) { + int32_t virt_group_id_129229 = phys_group_id_129227 + i_129228 * + sext_i64_i32(num_groups_88506); + int32_t flat_segment_id_129230 = squot32(virt_group_id_129229, + sext_i64_i32(groups_per_segment_129209)); + int64_t global_tid_129231 = srem64(sext_i32_i64(virt_group_id_129229) * + segred_group_sizze_88505 + + sext_i32_i64(local_tid_129219), + segred_group_sizze_88505 * + groups_per_segment_129209); + int64_t gtid_88448 = squot64(sext_i32_i64(flat_segment_id_129230), + N_70860); + int64_t gtid_88449 = sext_i32_i64(flat_segment_id_129230) - + squot64(sext_i32_i64(flat_segment_id_129230), N_70860) * + N_70860; + int64_t gtid_88458; + double x_acc_129232; + int64_t chunk_sizze_129233; + + chunk_sizze_129233 = smin64(elements_per_thread_129210, + sdiv_up64(k2p2zq_70876 - global_tid_129231, + threads_per_segment_129213)); + + double x_88509; + double x_88510; + + // neutral-initialise the accumulators + { + x_acc_129232 = 0.0; + } + for (int64_t i_129237 = 0; i_129237 < chunk_sizze_129233; i_129237++) { + gtid_88458 = global_tid_129231 + threads_per_segment_129213 * + i_129237; + // apply map function + { + double x_88514 = ((__global + double *) defunc_4_map_res_mem_124659)[gtid_88448 * + k2p2zq_70876 + + gtid_88458]; + double x_88515 = ((__global double *) mem_120124)[gtid_88449 * + k2p2zq_70876 + + gtid_88458]; + double defunc_1_f_res_88516 = x_88514 * x_88515; + + // save map-out results + { } + // load accumulator + { + x_88509 = x_acc_129232; + } + // load new values + { + x_88510 = defunc_1_f_res_88516; + } + // apply reduction operator + { + double defunc_1_op_res_88511 = x_88509 + x_88510; + + // store in accumulator + { + x_acc_129232 = defunc_1_op_res_88511; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_88509 = x_acc_129232; + ((__local + double *) red_arr_mem_129223)[sext_i32_i64(local_tid_129219)] = + x_88509; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_129238; + int32_t skip_waves_129239; + + skip_waves_129239 = 1; + + double x_129234; + double x_129235; + + offset_129238 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129219, + sext_i64_i32(segred_group_sizze_88505))) { + x_129234 = ((__local + double *) red_arr_mem_129223)[sext_i32_i64(local_tid_129219 + + offset_129238)]; + } + } + offset_129238 = 1; + while (slt32(offset_129238, wave_sizze_129221)) { + if (slt32(local_tid_129219 + offset_129238, + sext_i64_i32(segred_group_sizze_88505)) && + ((local_tid_129219 - squot32(local_tid_129219, + wave_sizze_129221) * + wave_sizze_129221) & (2 * offset_129238 - 1)) == 0) { + // read array element + { + x_129235 = ((volatile __local + double *) red_arr_mem_129223)[sext_i32_i64(local_tid_129219 + + offset_129238)]; + } + // apply reduction operation + { + double defunc_1_op_res_129236 = x_129234 + x_129235; + + x_129234 = defunc_1_op_res_129236; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_129223)[sext_i32_i64(local_tid_129219)] = + x_129234; + } + } + offset_129238 *= 2; + } + while (slt32(skip_waves_129239, + squot32(sext_i64_i32(segred_group_sizze_88505) + + wave_sizze_129221 - 1, wave_sizze_129221))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129238 = skip_waves_129239 * wave_sizze_129221; + if (slt32(local_tid_129219 + offset_129238, + sext_i64_i32(segred_group_sizze_88505)) && + ((local_tid_129219 - squot32(local_tid_129219, + wave_sizze_129221) * + wave_sizze_129221) == 0 && (squot32(local_tid_129219, + wave_sizze_129221) & (2 * + skip_waves_129239 - + 1)) == + 0)) { + // read array element + { + x_129235 = ((__local + double *) red_arr_mem_129223)[sext_i32_i64(local_tid_129219 + + offset_129238)]; + } + // apply reduction operation + { + double defunc_1_op_res_129236 = x_129234 + x_129235; + + x_129234 = defunc_1_op_res_129236; + } + // write result of operation + { + ((__local + double *) red_arr_mem_129223)[sext_i32_i64(local_tid_129219)] = + x_129234; + } + } + skip_waves_129239 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_129219) == (int64_t) 0) { + x_acc_129232 = x_129234; + } + } + if (groups_per_segment_129209 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_129219 == 0) { + ((__global double *) mem_124877)[gtid_88448 * N_70860 + + gtid_88449] = x_acc_129232; + } + } + } else { + int32_t old_counter_129240; + + // first thread in group saves group result to global memory + { + if (local_tid_129219 == 0) { + ((__global + double *) group_res_arr_mem_129214)[sext_i32_i64(virt_group_id_129229) * + segred_group_sizze_88505] = + x_acc_129232; + mem_fence_global(); + old_counter_129240 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_129216)[sext_i32_i64(srem32(flat_segment_id_129230, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_129225)[(int64_t) 0] = + old_counter_129240 == groups_per_segment_129209 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_129241; + + is_last_group_129241 = ((__local + bool *) sync_arr_mem_129225)[(int64_t) 0]; + if (is_last_group_129241) { + if (local_tid_129219 == 0) { + old_counter_129240 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_129216)[sext_i32_i64(srem32(flat_segment_id_129230, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_129209)); + } + // read in the per-group-results + { + int64_t read_per_thread_129242 = + sdiv_up64(groups_per_segment_129209, + segred_group_sizze_88505); + + x_88509 = 0.0; + for (int64_t i_129243 = 0; i_129243 < + read_per_thread_129242; i_129243++) { + int64_t group_res_id_129244 = + sext_i32_i64(local_tid_129219) * + read_per_thread_129242 + i_129243; + int64_t index_of_group_res_129245 = + sext_i32_i64(flat_segment_id_129230) * + groups_per_segment_129209 + group_res_id_129244; + + if (slt64(group_res_id_129244, + groups_per_segment_129209)) { + x_88510 = ((__global + double *) group_res_arr_mem_129214)[index_of_group_res_129245 * + segred_group_sizze_88505]; + + double defunc_1_op_res_88511; + + defunc_1_op_res_88511 = x_88509 + x_88510; + x_88509 = defunc_1_op_res_88511; + } + } + } + ((__local + double *) red_arr_mem_129223)[sext_i32_i64(local_tid_129219)] = + x_88509; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_129246; + int32_t skip_waves_129247; + + skip_waves_129247 = 1; + + double x_129234; + double x_129235; + + offset_129246 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129219, + sext_i64_i32(segred_group_sizze_88505))) { + x_129234 = ((__local + double *) red_arr_mem_129223)[sext_i32_i64(local_tid_129219 + + offset_129246)]; + } + } + offset_129246 = 1; + while (slt32(offset_129246, wave_sizze_129221)) { + if (slt32(local_tid_129219 + offset_129246, + sext_i64_i32(segred_group_sizze_88505)) && + ((local_tid_129219 - squot32(local_tid_129219, + wave_sizze_129221) * + wave_sizze_129221) & (2 * offset_129246 - 1)) == + 0) { + // read array element + { + x_129235 = ((volatile __local + double *) red_arr_mem_129223)[sext_i32_i64(local_tid_129219 + + offset_129246)]; + } + // apply reduction operation + { + double defunc_1_op_res_129236 = x_129234 + + x_129235; + + x_129234 = defunc_1_op_res_129236; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_129223)[sext_i32_i64(local_tid_129219)] = + x_129234; + } + } + offset_129246 *= 2; + } + while (slt32(skip_waves_129247, + squot32(sext_i64_i32(segred_group_sizze_88505) + + wave_sizze_129221 - 1, + wave_sizze_129221))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129246 = skip_waves_129247 * wave_sizze_129221; + if (slt32(local_tid_129219 + offset_129246, + sext_i64_i32(segred_group_sizze_88505)) && + ((local_tid_129219 - squot32(local_tid_129219, + wave_sizze_129221) * + wave_sizze_129221) == 0 && + (squot32(local_tid_129219, wave_sizze_129221) & + (2 * skip_waves_129247 - 1)) == 0)) { + // read array element + { + x_129235 = ((__local + double *) red_arr_mem_129223)[sext_i32_i64(local_tid_129219 + + offset_129246)]; + } + // apply reduction operation + { + double defunc_1_op_res_129236 = x_129234 + + x_129235; + + x_129234 = defunc_1_op_res_129236; + } + // write result of operation + { + ((__local + double *) red_arr_mem_129223)[sext_i32_i64(local_tid_129219)] = + x_129234; + } + } + skip_waves_129247 *= 2; + } + // and back to memory with the final result + { + if (local_tid_129219 == 0) { + ((__global double *) mem_124877)[gtid_88448 * + N_70860 + + gtid_88449] = + x_129234; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_88505 +} +__kernel void mainDetailedzisegred_large_88880(__global int *global_failure, + int failure_is_an_option, + __global + int64_t *global_failure_args, + __local volatile + int64_t *sync_arr_mem_129455_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_129453_backing_aligned_1, + int64_t N_70860, int64_t n_70864, + int64_t num_groups_88930, + int64_t groups_per_segment_129439, + int64_t elements_per_thread_129440, + int64_t virt_num_groups_129441, + int64_t threads_per_segment_129443, + __global + unsigned char *mem_124924, + __global + unsigned char *mem_124949, + __global + unsigned char *mem_124952, + __global + unsigned char *group_res_arr_mem_129444, + __global + unsigned char *mainDetailedzicounter_mem_129446) +{ + #define segred_group_sizze_88929 (mainDetailedzisegred_group_sizze_88874) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_129455_backing_1 = + (__local volatile + char *) sync_arr_mem_129455_backing_aligned_0; + __local volatile char *restrict red_arr_mem_129453_backing_0 = + (__local volatile + char *) red_arr_mem_129453_backing_aligned_1; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_129448; + int32_t local_tid_129449; + int64_t group_sizze_129452; + int32_t wave_sizze_129451; + int32_t group_tid_129450; + + global_tid_129448 = get_global_id(0); + local_tid_129449 = get_local_id(0); + group_sizze_129452 = get_local_size(0); + wave_sizze_129451 = LOCKSTEP_WIDTH; + group_tid_129450 = get_group_id(0); + + int32_t phys_tid_88880; + + phys_tid_88880 = global_tid_129448; + + __local char *red_arr_mem_129453; + + red_arr_mem_129453 = (__local char *) red_arr_mem_129453_backing_0; + + __local char *sync_arr_mem_129455; + + sync_arr_mem_129455 = (__local char *) sync_arr_mem_129455_backing_1; + + int32_t phys_group_id_129457; + + phys_group_id_129457 = get_group_id(0); + for (int32_t i_129458 = 0; i_129458 < + sdiv_up32(sext_i64_i32(virt_num_groups_129441) - phys_group_id_129457, + sext_i64_i32(num_groups_88930)); i_129458++) { + int32_t virt_group_id_129459 = phys_group_id_129457 + i_129458 * + sext_i64_i32(num_groups_88930); + int32_t flat_segment_id_129460 = squot32(virt_group_id_129459, + sext_i64_i32(groups_per_segment_129439)); + int64_t global_tid_129461 = srem64(sext_i32_i64(virt_group_id_129459) * + segred_group_sizze_88929 + + sext_i32_i64(local_tid_129449), + segred_group_sizze_88929 * + groups_per_segment_129439); + int64_t gtid_88871 = sext_i32_i64(flat_segment_id_129460); + int64_t gtid_88879; + double x_acc_129462; + int64_t chunk_sizze_129463; + + chunk_sizze_129463 = smin64(elements_per_thread_129440, + sdiv_up64(n_70864 - global_tid_129461, + threads_per_segment_129443)); + + double x_88933; + double x_88934; + + // neutral-initialise the accumulators + { + x_acc_129462 = 0.0; + } + for (int64_t i_129467 = 0; i_129467 < chunk_sizze_129463; i_129467++) { + gtid_88879 = global_tid_129461 + threads_per_segment_129443 * + i_129467; + // apply map function + { + int64_t defunc_0_f_res_88937 = ((__global + int64_t *) mem_124949)[gtid_88871]; + bool cond_88939 = slt64(gtid_88879, defunc_0_f_res_88937); + double defunc_0_f_res_88940; + + if (cond_88939) { + bool y_88942 = slt64(gtid_88879, N_70860); + bool index_certs_88944; + + if (!y_88942) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 407) == -1) { + global_failure_args[0] = gtid_88879; + global_failure_args[1] = N_70860; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_0_f_res_t_res_88945 = ((__global + double *) mem_124924)[gtid_88871 * + N_70860 + + gtid_88879]; + + defunc_0_f_res_88940 = defunc_0_f_res_t_res_88945; + } else { + defunc_0_f_res_88940 = 0.0; + } + + double defunc_0_f_res_88946 = defunc_0_f_res_88940 * + defunc_0_f_res_88940; + + // save map-out results + { } + // load accumulator + { + x_88933 = x_acc_129462; + } + // load new values + { + x_88934 = defunc_0_f_res_88946; + } + // apply reduction operator + { + double defunc_1_op_res_88935 = x_88933 + x_88934; + + // store in accumulator + { + x_acc_129462 = defunc_1_op_res_88935; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_88933 = x_acc_129462; + ((__local + double *) red_arr_mem_129453)[sext_i32_i64(local_tid_129449)] = + x_88933; + } + + error_0: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_129468; + int32_t skip_waves_129469; + + skip_waves_129469 = 1; + + double x_129464; + double x_129465; + + offset_129468 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129449, + sext_i64_i32(segred_group_sizze_88929))) { + x_129464 = ((__local + double *) red_arr_mem_129453)[sext_i32_i64(local_tid_129449 + + offset_129468)]; + } + } + offset_129468 = 1; + while (slt32(offset_129468, wave_sizze_129451)) { + if (slt32(local_tid_129449 + offset_129468, + sext_i64_i32(segred_group_sizze_88929)) && + ((local_tid_129449 - squot32(local_tid_129449, + wave_sizze_129451) * + wave_sizze_129451) & (2 * offset_129468 - 1)) == 0) { + // read array element + { + x_129465 = ((volatile __local + double *) red_arr_mem_129453)[sext_i32_i64(local_tid_129449 + + offset_129468)]; + } + // apply reduction operation + { + double defunc_1_op_res_129466 = x_129464 + x_129465; + + x_129464 = defunc_1_op_res_129466; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_129453)[sext_i32_i64(local_tid_129449)] = + x_129464; + } + } + offset_129468 *= 2; + } + while (slt32(skip_waves_129469, + squot32(sext_i64_i32(segred_group_sizze_88929) + + wave_sizze_129451 - 1, wave_sizze_129451))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129468 = skip_waves_129469 * wave_sizze_129451; + if (slt32(local_tid_129449 + offset_129468, + sext_i64_i32(segred_group_sizze_88929)) && + ((local_tid_129449 - squot32(local_tid_129449, + wave_sizze_129451) * + wave_sizze_129451) == 0 && (squot32(local_tid_129449, + wave_sizze_129451) & (2 * + skip_waves_129469 - + 1)) == + 0)) { + // read array element + { + x_129465 = ((__local + double *) red_arr_mem_129453)[sext_i32_i64(local_tid_129449 + + offset_129468)]; + } + // apply reduction operation + { + double defunc_1_op_res_129466 = x_129464 + x_129465; + + x_129464 = defunc_1_op_res_129466; + } + // write result of operation + { + ((__local + double *) red_arr_mem_129453)[sext_i32_i64(local_tid_129449)] = + x_129464; + } + } + skip_waves_129469 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_129449) == (int64_t) 0) { + x_acc_129462 = x_129464; + } + } + if (groups_per_segment_129439 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_129449 == 0) { + ((__global double *) mem_124952)[gtid_88871] = x_acc_129462; + } + } + } else { + int32_t old_counter_129470; + + // first thread in group saves group result to global memory + { + if (local_tid_129449 == 0) { + ((__global + double *) group_res_arr_mem_129444)[sext_i32_i64(virt_group_id_129459) * + segred_group_sizze_88929] = + x_acc_129462; + mem_fence_global(); + old_counter_129470 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_129446)[sext_i32_i64(srem32(flat_segment_id_129460, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_129455)[(int64_t) 0] = + old_counter_129470 == groups_per_segment_129439 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_129471; + + is_last_group_129471 = ((__local + bool *) sync_arr_mem_129455)[(int64_t) 0]; + if (is_last_group_129471) { + if (local_tid_129449 == 0) { + old_counter_129470 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_129446)[sext_i32_i64(srem32(flat_segment_id_129460, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_129439)); + } + // read in the per-group-results + { + int64_t read_per_thread_129472 = + sdiv_up64(groups_per_segment_129439, + segred_group_sizze_88929); + + x_88933 = 0.0; + for (int64_t i_129473 = 0; i_129473 < + read_per_thread_129472; i_129473++) { + int64_t group_res_id_129474 = + sext_i32_i64(local_tid_129449) * + read_per_thread_129472 + i_129473; + int64_t index_of_group_res_129475 = + sext_i32_i64(flat_segment_id_129460) * + groups_per_segment_129439 + group_res_id_129474; + + if (slt64(group_res_id_129474, + groups_per_segment_129439)) { + x_88934 = ((__global + double *) group_res_arr_mem_129444)[index_of_group_res_129475 * + segred_group_sizze_88929]; + + double defunc_1_op_res_88935; + + defunc_1_op_res_88935 = x_88933 + x_88934; + x_88933 = defunc_1_op_res_88935; + } + } + } + ((__local + double *) red_arr_mem_129453)[sext_i32_i64(local_tid_129449)] = + x_88933; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_129476; + int32_t skip_waves_129477; + + skip_waves_129477 = 1; + + double x_129464; + double x_129465; + + offset_129476 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129449, + sext_i64_i32(segred_group_sizze_88929))) { + x_129464 = ((__local + double *) red_arr_mem_129453)[sext_i32_i64(local_tid_129449 + + offset_129476)]; + } + } + offset_129476 = 1; + while (slt32(offset_129476, wave_sizze_129451)) { + if (slt32(local_tid_129449 + offset_129476, + sext_i64_i32(segred_group_sizze_88929)) && + ((local_tid_129449 - squot32(local_tid_129449, + wave_sizze_129451) * + wave_sizze_129451) & (2 * offset_129476 - 1)) == + 0) { + // read array element + { + x_129465 = ((volatile __local + double *) red_arr_mem_129453)[sext_i32_i64(local_tid_129449 + + offset_129476)]; + } + // apply reduction operation + { + double defunc_1_op_res_129466 = x_129464 + + x_129465; + + x_129464 = defunc_1_op_res_129466; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_129453)[sext_i32_i64(local_tid_129449)] = + x_129464; + } + } + offset_129476 *= 2; + } + while (slt32(skip_waves_129477, + squot32(sext_i64_i32(segred_group_sizze_88929) + + wave_sizze_129451 - 1, + wave_sizze_129451))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129476 = skip_waves_129477 * wave_sizze_129451; + if (slt32(local_tid_129449 + offset_129476, + sext_i64_i32(segred_group_sizze_88929)) && + ((local_tid_129449 - squot32(local_tid_129449, + wave_sizze_129451) * + wave_sizze_129451) == 0 && + (squot32(local_tid_129449, wave_sizze_129451) & + (2 * skip_waves_129477 - 1)) == 0)) { + // read array element + { + x_129465 = ((__local + double *) red_arr_mem_129453)[sext_i32_i64(local_tid_129449 + + offset_129476)]; + } + // apply reduction operation + { + double defunc_1_op_res_129466 = x_129464 + + x_129465; + + x_129464 = defunc_1_op_res_129466; + } + // write result of operation + { + ((__local + double *) red_arr_mem_129453)[sext_i32_i64(local_tid_129449)] = + x_129464; + } + } + skip_waves_129477 *= 2; + } + // and back to memory with the final result + { + if (local_tid_129449 == 0) { + ((__global double *) mem_124952)[gtid_88871] = + x_129464; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_88929 +} +__kernel void mainDetailedzisegred_large_88904(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_129395_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_129393_backing_aligned_1, + int64_t N_70860, int64_t n_70864, + int64_t num_groups_88916, + int64_t groups_per_segment_129379, + int64_t elements_per_thread_129380, + int64_t virt_num_groups_129381, + int64_t threads_per_segment_129383, + __global + unsigned char *mem_124142, + __global + unsigned char *mem_124949, + __global + unsigned char *group_res_arr_mem_129384, + __global + unsigned char *mainDetailedzicounter_mem_129386) +{ + #define segred_group_sizze_88915 (mainDetailedzisegred_group_sizze_88898) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_129395_backing_1 = + (__local volatile + char *) sync_arr_mem_129395_backing_aligned_0; + __local volatile char *restrict red_arr_mem_129393_backing_0 = + (__local volatile + char *) red_arr_mem_129393_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129388; + int32_t local_tid_129389; + int64_t group_sizze_129392; + int32_t wave_sizze_129391; + int32_t group_tid_129390; + + global_tid_129388 = get_global_id(0); + local_tid_129389 = get_local_id(0); + group_sizze_129392 = get_local_size(0); + wave_sizze_129391 = LOCKSTEP_WIDTH; + group_tid_129390 = get_group_id(0); + + int32_t phys_tid_88904; + + phys_tid_88904 = global_tid_129388; + + __local char *red_arr_mem_129393; + + red_arr_mem_129393 = (__local char *) red_arr_mem_129393_backing_0; + + __local char *sync_arr_mem_129395; + + sync_arr_mem_129395 = (__local char *) sync_arr_mem_129395_backing_1; + + int32_t phys_group_id_129397; + + phys_group_id_129397 = get_group_id(0); + for (int32_t i_129398 = 0; i_129398 < + sdiv_up32(sext_i64_i32(virt_num_groups_129381) - phys_group_id_129397, + sext_i64_i32(num_groups_88916)); i_129398++) { + int32_t virt_group_id_129399 = phys_group_id_129397 + i_129398 * + sext_i64_i32(num_groups_88916); + int32_t flat_segment_id_129400 = squot32(virt_group_id_129399, + sext_i64_i32(groups_per_segment_129379)); + int64_t global_tid_129401 = srem64(sext_i32_i64(virt_group_id_129399) * + segred_group_sizze_88915 + + sext_i32_i64(local_tid_129389), + segred_group_sizze_88915 * + groups_per_segment_129379); + int64_t gtid_88895 = sext_i32_i64(flat_segment_id_129400); + int64_t gtid_88903; + int64_t x_acc_129402; + int64_t chunk_sizze_129403; + + chunk_sizze_129403 = smin64(elements_per_thread_129380, + sdiv_up64(n_70864 - global_tid_129401, + threads_per_segment_129383)); + + int64_t x_88919; + int64_t x_88920; + + // neutral-initialise the accumulators + { + x_acc_129402 = (int64_t) 0; + } + for (int64_t i_129407 = 0; i_129407 < chunk_sizze_129403; i_129407++) { + gtid_88903 = global_tid_129401 + threads_per_segment_129383 * + i_129407; + // apply map function + { + double x_88923 = ((__global double *) mem_124142)[gtid_88895 * + N_70860 + + gtid_88903]; + bool isnan_res_88924; + + isnan_res_88924 = futrts_isnan64(x_88923); + + bool cond_88925 = !isnan_res_88924; + int64_t defunc_0_f_res_88926 = btoi_bool_i64(cond_88925); + + // save map-out results + { } + // load accumulator + { + x_88919 = x_acc_129402; + } + // load new values + { + x_88920 = defunc_0_f_res_88926; + } + // apply reduction operator + { + int64_t defunc_1_op_res_88921 = add64(x_88919, x_88920); + + // store in accumulator + { + x_acc_129402 = defunc_1_op_res_88921; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_88919 = x_acc_129402; + ((__local + int64_t *) red_arr_mem_129393)[sext_i32_i64(local_tid_129389)] = + x_88919; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_129408; + int32_t skip_waves_129409; + + skip_waves_129409 = 1; + + int64_t x_129404; + int64_t x_129405; + + offset_129408 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129389, + sext_i64_i32(segred_group_sizze_88915))) { + x_129404 = ((__local + int64_t *) red_arr_mem_129393)[sext_i32_i64(local_tid_129389 + + offset_129408)]; + } + } + offset_129408 = 1; + while (slt32(offset_129408, wave_sizze_129391)) { + if (slt32(local_tid_129389 + offset_129408, + sext_i64_i32(segred_group_sizze_88915)) && + ((local_tid_129389 - squot32(local_tid_129389, + wave_sizze_129391) * + wave_sizze_129391) & (2 * offset_129408 - 1)) == 0) { + // read array element + { + x_129405 = ((volatile __local + int64_t *) red_arr_mem_129393)[sext_i32_i64(local_tid_129389 + + offset_129408)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_129406 = add64(x_129404, x_129405); + + x_129404 = defunc_1_op_res_129406; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_129393)[sext_i32_i64(local_tid_129389)] = + x_129404; + } + } + offset_129408 *= 2; + } + while (slt32(skip_waves_129409, + squot32(sext_i64_i32(segred_group_sizze_88915) + + wave_sizze_129391 - 1, wave_sizze_129391))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129408 = skip_waves_129409 * wave_sizze_129391; + if (slt32(local_tid_129389 + offset_129408, + sext_i64_i32(segred_group_sizze_88915)) && + ((local_tid_129389 - squot32(local_tid_129389, + wave_sizze_129391) * + wave_sizze_129391) == 0 && (squot32(local_tid_129389, + wave_sizze_129391) & (2 * + skip_waves_129409 - + 1)) == + 0)) { + // read array element + { + x_129405 = ((__local + int64_t *) red_arr_mem_129393)[sext_i32_i64(local_tid_129389 + + offset_129408)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_129406 = add64(x_129404, x_129405); + + x_129404 = defunc_1_op_res_129406; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_129393)[sext_i32_i64(local_tid_129389)] = + x_129404; + } + } + skip_waves_129409 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_129389) == (int64_t) 0) { + x_acc_129402 = x_129404; + } + } + if (groups_per_segment_129379 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_129389 == 0) { + ((__global int64_t *) mem_124949)[gtid_88895] = + x_acc_129402; + } + } + } else { + int32_t old_counter_129410; + + // first thread in group saves group result to global memory + { + if (local_tid_129389 == 0) { + ((__global + int64_t *) group_res_arr_mem_129384)[sext_i32_i64(virt_group_id_129399) * + segred_group_sizze_88915] = + x_acc_129402; + mem_fence_global(); + old_counter_129410 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_129386)[sext_i32_i64(srem32(flat_segment_id_129400, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_129395)[(int64_t) 0] = + old_counter_129410 == groups_per_segment_129379 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_129411; + + is_last_group_129411 = ((__local + bool *) sync_arr_mem_129395)[(int64_t) 0]; + if (is_last_group_129411) { + if (local_tid_129389 == 0) { + old_counter_129410 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_129386)[sext_i32_i64(srem32(flat_segment_id_129400, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_129379)); + } + // read in the per-group-results + { + int64_t read_per_thread_129412 = + sdiv_up64(groups_per_segment_129379, + segred_group_sizze_88915); + + x_88919 = (int64_t) 0; + for (int64_t i_129413 = 0; i_129413 < + read_per_thread_129412; i_129413++) { + int64_t group_res_id_129414 = + sext_i32_i64(local_tid_129389) * + read_per_thread_129412 + i_129413; + int64_t index_of_group_res_129415 = + sext_i32_i64(flat_segment_id_129400) * + groups_per_segment_129379 + group_res_id_129414; + + if (slt64(group_res_id_129414, + groups_per_segment_129379)) { + x_88920 = ((__global + int64_t *) group_res_arr_mem_129384)[index_of_group_res_129415 * + segred_group_sizze_88915]; + + int64_t defunc_1_op_res_88921; + + defunc_1_op_res_88921 = add64(x_88919, x_88920); + x_88919 = defunc_1_op_res_88921; + } + } + } + ((__local + int64_t *) red_arr_mem_129393)[sext_i32_i64(local_tid_129389)] = + x_88919; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_129416; + int32_t skip_waves_129417; + + skip_waves_129417 = 1; + + int64_t x_129404; + int64_t x_129405; + + offset_129416 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129389, + sext_i64_i32(segred_group_sizze_88915))) { + x_129404 = ((__local + int64_t *) red_arr_mem_129393)[sext_i32_i64(local_tid_129389 + + offset_129416)]; + } + } + offset_129416 = 1; + while (slt32(offset_129416, wave_sizze_129391)) { + if (slt32(local_tid_129389 + offset_129416, + sext_i64_i32(segred_group_sizze_88915)) && + ((local_tid_129389 - squot32(local_tid_129389, + wave_sizze_129391) * + wave_sizze_129391) & (2 * offset_129416 - 1)) == + 0) { + // read array element + { + x_129405 = ((volatile __local + int64_t *) red_arr_mem_129393)[sext_i32_i64(local_tid_129389 + + offset_129416)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_129406 = add64(x_129404, + x_129405); + + x_129404 = defunc_1_op_res_129406; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_129393)[sext_i32_i64(local_tid_129389)] = + x_129404; + } + } + offset_129416 *= 2; + } + while (slt32(skip_waves_129417, + squot32(sext_i64_i32(segred_group_sizze_88915) + + wave_sizze_129391 - 1, + wave_sizze_129391))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129416 = skip_waves_129417 * wave_sizze_129391; + if (slt32(local_tid_129389 + offset_129416, + sext_i64_i32(segred_group_sizze_88915)) && + ((local_tid_129389 - squot32(local_tid_129389, + wave_sizze_129391) * + wave_sizze_129391) == 0 && + (squot32(local_tid_129389, wave_sizze_129391) & + (2 * skip_waves_129417 - 1)) == 0)) { + // read array element + { + x_129405 = ((__local + int64_t *) red_arr_mem_129393)[sext_i32_i64(local_tid_129389 + + offset_129416)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_129406 = add64(x_129404, + x_129405); + + x_129404 = defunc_1_op_res_129406; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_129393)[sext_i32_i64(local_tid_129389)] = + x_129404; + } + } + skip_waves_129417 *= 2; + } + // and back to memory with the final result + { + if (local_tid_129389 == 0) { + ((__global int64_t *) mem_124949)[gtid_88895] = + x_129404; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_88915 +} +__kernel void mainDetailedzisegred_large_89034(__global int *global_failure, + int failure_is_an_option, + __global + int64_t *global_failure_args, + __local volatile + int64_t *sync_arr_mem_129560_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_129558_backing_aligned_1, + int64_t N_70860, + int64_t defunc_2_reduce_comm_res_72722, + int64_t num_groups_89055, + int64_t groups_per_segment_129544, + int64_t elements_per_thread_129545, + int64_t virt_num_groups_129546, + int64_t threads_per_segment_129548, + __global + unsigned char *mem_124924, + __global + unsigned char *defunc_3_map_res_mem_124961, + __global + unsigned char *defunc_3_map_res_mem_124962, + __global + unsigned char *mem_124972, + __global + unsigned char *group_res_arr_mem_129549, + __global + unsigned char *mainDetailedzicounter_mem_129551) +{ + #define segred_group_sizze_89054 (mainDetailedzisegred_group_sizze_89028) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_129560_backing_1 = + (__local volatile + char *) sync_arr_mem_129560_backing_aligned_0; + __local volatile char *restrict red_arr_mem_129558_backing_0 = + (__local volatile + char *) red_arr_mem_129558_backing_aligned_1; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_129553; + int32_t local_tid_129554; + int64_t group_sizze_129557; + int32_t wave_sizze_129556; + int32_t group_tid_129555; + + global_tid_129553 = get_global_id(0); + local_tid_129554 = get_local_id(0); + group_sizze_129557 = get_local_size(0); + wave_sizze_129556 = LOCKSTEP_WIDTH; + group_tid_129555 = get_group_id(0); + + int32_t phys_tid_89034; + + phys_tid_89034 = global_tid_129553; + + __local char *red_arr_mem_129558; + + red_arr_mem_129558 = (__local char *) red_arr_mem_129558_backing_0; + + __local char *sync_arr_mem_129560; + + sync_arr_mem_129560 = (__local char *) sync_arr_mem_129560_backing_1; + + int32_t phys_group_id_129562; + + phys_group_id_129562 = get_group_id(0); + for (int32_t i_129563 = 0; i_129563 < + sdiv_up32(sext_i64_i32(virt_num_groups_129546) - phys_group_id_129562, + sext_i64_i32(num_groups_89055)); i_129563++) { + int32_t virt_group_id_129564 = phys_group_id_129562 + i_129563 * + sext_i64_i32(num_groups_89055); + int32_t flat_segment_id_129565 = squot32(virt_group_id_129564, + sext_i64_i32(groups_per_segment_129544)); + int64_t global_tid_129566 = srem64(sext_i32_i64(virt_group_id_129564) * + segred_group_sizze_89054 + + sext_i32_i64(local_tid_129554), + segred_group_sizze_89054 * + groups_per_segment_129544); + int64_t gtid_89025 = sext_i32_i64(flat_segment_id_129565); + int64_t gtid_89033; + double x_acc_129567; + int64_t chunk_sizze_129568; + + chunk_sizze_129568 = smin64(elements_per_thread_129545, + sdiv_up64(defunc_2_reduce_comm_res_72722 - + global_tid_129566, + threads_per_segment_129548)); + + double x_89058; + double x_89059; + + // neutral-initialise the accumulators + { + x_acc_129567 = 0.0; + } + for (int64_t i_129572 = 0; i_129572 < chunk_sizze_129568; i_129572++) { + gtid_89033 = global_tid_129566 + threads_per_segment_129548 * + i_129572; + // apply map function + { + int64_t x_89063 = ((__global + int64_t *) defunc_3_map_res_mem_124961)[gtid_89025]; + bool cond_89065 = slt64(gtid_89033, x_89063); + double defunc_0_f_res_89066; + + if (cond_89065) { + int64_t x_89062 = ((__global + int64_t *) defunc_3_map_res_mem_124962)[gtid_89025]; + int64_t x_89067 = add64(gtid_89033, x_89062); + int64_t x_89068 = sub64(x_89067, x_89063); + int64_t i_89069 = add64((int64_t) 1, x_89068); + bool x_89070 = sle64((int64_t) 0, i_89069); + bool y_89071 = slt64(i_89069, N_70860); + bool bounds_check_89072 = x_89070 && y_89071; + bool index_certs_89073; + + if (!bounds_check_89072) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 410) == -1) { + global_failure_args[0] = i_89069; + global_failure_args[1] = N_70860; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_0_f_res_t_res_89074 = ((__global + double *) mem_124924)[gtid_89025 * + N_70860 + + i_89069]; + + defunc_0_f_res_89066 = defunc_0_f_res_t_res_89074; + } else { + defunc_0_f_res_89066 = 0.0; + } + // save map-out results + { } + // load accumulator + { + x_89058 = x_acc_129567; + } + // load new values + { + x_89059 = defunc_0_f_res_89066; + } + // apply reduction operator + { + double defunc_1_op_res_89060 = x_89058 + x_89059; + + // store in accumulator + { + x_acc_129567 = defunc_1_op_res_89060; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_89058 = x_acc_129567; + ((__local + double *) red_arr_mem_129558)[sext_i32_i64(local_tid_129554)] = + x_89058; + } + + error_0: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_129573; + int32_t skip_waves_129574; + + skip_waves_129574 = 1; + + double x_129569; + double x_129570; + + offset_129573 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129554, + sext_i64_i32(segred_group_sizze_89054))) { + x_129569 = ((__local + double *) red_arr_mem_129558)[sext_i32_i64(local_tid_129554 + + offset_129573)]; + } + } + offset_129573 = 1; + while (slt32(offset_129573, wave_sizze_129556)) { + if (slt32(local_tid_129554 + offset_129573, + sext_i64_i32(segred_group_sizze_89054)) && + ((local_tid_129554 - squot32(local_tid_129554, + wave_sizze_129556) * + wave_sizze_129556) & (2 * offset_129573 - 1)) == 0) { + // read array element + { + x_129570 = ((volatile __local + double *) red_arr_mem_129558)[sext_i32_i64(local_tid_129554 + + offset_129573)]; + } + // apply reduction operation + { + double defunc_1_op_res_129571 = x_129569 + x_129570; + + x_129569 = defunc_1_op_res_129571; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_129558)[sext_i32_i64(local_tid_129554)] = + x_129569; + } + } + offset_129573 *= 2; + } + while (slt32(skip_waves_129574, + squot32(sext_i64_i32(segred_group_sizze_89054) + + wave_sizze_129556 - 1, wave_sizze_129556))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129573 = skip_waves_129574 * wave_sizze_129556; + if (slt32(local_tid_129554 + offset_129573, + sext_i64_i32(segred_group_sizze_89054)) && + ((local_tid_129554 - squot32(local_tid_129554, + wave_sizze_129556) * + wave_sizze_129556) == 0 && (squot32(local_tid_129554, + wave_sizze_129556) & (2 * + skip_waves_129574 - + 1)) == + 0)) { + // read array element + { + x_129570 = ((__local + double *) red_arr_mem_129558)[sext_i32_i64(local_tid_129554 + + offset_129573)]; + } + // apply reduction operation + { + double defunc_1_op_res_129571 = x_129569 + x_129570; + + x_129569 = defunc_1_op_res_129571; + } + // write result of operation + { + ((__local + double *) red_arr_mem_129558)[sext_i32_i64(local_tid_129554)] = + x_129569; + } + } + skip_waves_129574 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_129554) == (int64_t) 0) { + x_acc_129567 = x_129569; + } + } + if (groups_per_segment_129544 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_129554 == 0) { + ((__global double *) mem_124972)[gtid_89025] = x_acc_129567; + } + } + } else { + int32_t old_counter_129575; + + // first thread in group saves group result to global memory + { + if (local_tid_129554 == 0) { + ((__global + double *) group_res_arr_mem_129549)[sext_i32_i64(virt_group_id_129564) * + segred_group_sizze_89054] = + x_acc_129567; + mem_fence_global(); + old_counter_129575 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_129551)[sext_i32_i64(srem32(flat_segment_id_129565, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_129560)[(int64_t) 0] = + old_counter_129575 == groups_per_segment_129544 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_129576; + + is_last_group_129576 = ((__local + bool *) sync_arr_mem_129560)[(int64_t) 0]; + if (is_last_group_129576) { + if (local_tid_129554 == 0) { + old_counter_129575 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_129551)[sext_i32_i64(srem32(flat_segment_id_129565, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_129544)); + } + // read in the per-group-results + { + int64_t read_per_thread_129577 = + sdiv_up64(groups_per_segment_129544, + segred_group_sizze_89054); + + x_89058 = 0.0; + for (int64_t i_129578 = 0; i_129578 < + read_per_thread_129577; i_129578++) { + int64_t group_res_id_129579 = + sext_i32_i64(local_tid_129554) * + read_per_thread_129577 + i_129578; + int64_t index_of_group_res_129580 = + sext_i32_i64(flat_segment_id_129565) * + groups_per_segment_129544 + group_res_id_129579; + + if (slt64(group_res_id_129579, + groups_per_segment_129544)) { + x_89059 = ((__global + double *) group_res_arr_mem_129549)[index_of_group_res_129580 * + segred_group_sizze_89054]; + + double defunc_1_op_res_89060; + + defunc_1_op_res_89060 = x_89058 + x_89059; + x_89058 = defunc_1_op_res_89060; + } + } + } + ((__local + double *) red_arr_mem_129558)[sext_i32_i64(local_tid_129554)] = + x_89058; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_129581; + int32_t skip_waves_129582; + + skip_waves_129582 = 1; + + double x_129569; + double x_129570; + + offset_129581 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129554, + sext_i64_i32(segred_group_sizze_89054))) { + x_129569 = ((__local + double *) red_arr_mem_129558)[sext_i32_i64(local_tid_129554 + + offset_129581)]; + } + } + offset_129581 = 1; + while (slt32(offset_129581, wave_sizze_129556)) { + if (slt32(local_tid_129554 + offset_129581, + sext_i64_i32(segred_group_sizze_89054)) && + ((local_tid_129554 - squot32(local_tid_129554, + wave_sizze_129556) * + wave_sizze_129556) & (2 * offset_129581 - 1)) == + 0) { + // read array element + { + x_129570 = ((volatile __local + double *) red_arr_mem_129558)[sext_i32_i64(local_tid_129554 + + offset_129581)]; + } + // apply reduction operation + { + double defunc_1_op_res_129571 = x_129569 + + x_129570; + + x_129569 = defunc_1_op_res_129571; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_129558)[sext_i32_i64(local_tid_129554)] = + x_129569; + } + } + offset_129581 *= 2; + } + while (slt32(skip_waves_129582, + squot32(sext_i64_i32(segred_group_sizze_89054) + + wave_sizze_129556 - 1, + wave_sizze_129556))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129581 = skip_waves_129582 * wave_sizze_129556; + if (slt32(local_tid_129554 + offset_129581, + sext_i64_i32(segred_group_sizze_89054)) && + ((local_tid_129554 - squot32(local_tid_129554, + wave_sizze_129556) * + wave_sizze_129556) == 0 && + (squot32(local_tid_129554, wave_sizze_129556) & + (2 * skip_waves_129582 - 1)) == 0)) { + // read array element + { + x_129570 = ((__local + double *) red_arr_mem_129558)[sext_i32_i64(local_tid_129554 + + offset_129581)]; + } + // apply reduction operation + { + double defunc_1_op_res_129571 = x_129569 + + x_129570; + + x_129569 = defunc_1_op_res_129571; + } + // write result of operation + { + ((__local + double *) red_arr_mem_129558)[sext_i32_i64(local_tid_129554)] = + x_129569; + } + } + skip_waves_129582 *= 2; + } + // and back to memory with the final result + { + if (local_tid_129554 == 0) { + ((__global double *) mem_124972)[gtid_89025] = + x_129569; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_89054 +} +__kernel void mainDetailedzisegred_large_89828(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_129787_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_129785_backing_aligned_1, + __local volatile + int64_t *red_arr_mem_129783_backing_aligned_2, + __local volatile + int64_t *red_arr_mem_129781_backing_aligned_3, + int64_t iota_arg_72752, + int64_t iota_arg_72776, + int64_t num_groups_89969, + int64_t groups_per_segment_129763, + int64_t elements_per_thread_129764, + int64_t virt_num_groups_129765, + __global + unsigned char *mem_124976, + __global + unsigned char *mem_125093, + __global + unsigned char *mem_125097, + __global + unsigned char *mem_125100, + __global + unsigned char *mem_125103, + __global + unsigned char *mem_125105, + __global + unsigned char *mem_125107, + __global + unsigned char *mem_125110, + __global + unsigned char *group_res_arr_mem_129768, + __global + unsigned char *group_res_arr_mem_129770, + __global + unsigned char *group_res_arr_mem_129772, + __global + unsigned char *mainDetailedzicounter_mem_129774) +{ + #define segred_group_sizze_89968 (mainDetailedzisegred_group_sizze_89822) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_129787_backing_3 = + (__local volatile + char *) sync_arr_mem_129787_backing_aligned_0; + __local volatile char *restrict red_arr_mem_129785_backing_2 = + (__local volatile + char *) red_arr_mem_129785_backing_aligned_1; + __local volatile char *restrict red_arr_mem_129783_backing_1 = + (__local volatile + char *) red_arr_mem_129783_backing_aligned_2; + __local volatile char *restrict red_arr_mem_129781_backing_0 = + (__local volatile + char *) red_arr_mem_129781_backing_aligned_3; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129776; + int32_t local_tid_129777; + int64_t group_sizze_129780; + int32_t wave_sizze_129779; + int32_t group_tid_129778; + + global_tid_129776 = get_global_id(0); + local_tid_129777 = get_local_id(0); + group_sizze_129780 = get_local_size(0); + wave_sizze_129779 = LOCKSTEP_WIDTH; + group_tid_129778 = get_group_id(0); + + int32_t phys_tid_89828; + + phys_tid_89828 = global_tid_129776; + + __local char *red_arr_mem_129781; + + red_arr_mem_129781 = (__local char *) red_arr_mem_129781_backing_0; + + __local char *red_arr_mem_129783; + + red_arr_mem_129783 = (__local char *) red_arr_mem_129783_backing_1; + + __local char *red_arr_mem_129785; + + red_arr_mem_129785 = (__local char *) red_arr_mem_129785_backing_2; + + __local char *sync_arr_mem_129787; + + sync_arr_mem_129787 = (__local char *) sync_arr_mem_129787_backing_3; + + int32_t phys_group_id_129789; + + phys_group_id_129789 = get_group_id(0); + for (int32_t i_129790 = 0; i_129790 < + sdiv_up32(sext_i64_i32(virt_num_groups_129765) - phys_group_id_129789, + sext_i64_i32(num_groups_89969)); i_129790++) { + int32_t virt_group_id_129791 = phys_group_id_129789 + i_129790 * + sext_i64_i32(num_groups_89969); + int32_t flat_segment_id_129792 = squot32(virt_group_id_129791, + sext_i64_i32(groups_per_segment_129763)); + int64_t global_tid_129793 = srem64(sext_i32_i64(virt_group_id_129791) * + segred_group_sizze_89968 + + sext_i32_i64(local_tid_129777), + segred_group_sizze_89968 * + groups_per_segment_129763); + int64_t gtid_89819 = sext_i32_i64(flat_segment_id_129792); + int64_t gtid_89827; + bool x_acc_129794; + int64_t x_acc_129795; + double x_acc_129796; + int64_t chunk_sizze_129797; + int64_t starting_point_129798; + + starting_point_129798 = global_tid_129793 * elements_per_thread_129764; + + int64_t remaining_elements_129799; + + remaining_elements_129799 = iota_arg_72752 - starting_point_129798; + if (sle64(remaining_elements_129799, (int64_t) 0) || + sle64(iota_arg_72752, starting_point_129798)) { + chunk_sizze_129797 = (int64_t) 0; + } else { + if (slt64(iota_arg_72752, (global_tid_129793 + (int64_t) 1) * + elements_per_thread_129764)) { + chunk_sizze_129797 = iota_arg_72752 - global_tid_129793 * + elements_per_thread_129764; + } else { + chunk_sizze_129797 = elements_per_thread_129764; + } + } + + bool x_89975; + int64_t x_89976; + double x_89977; + bool x_89978; + int64_t x_89979; + double x_89980; + + // neutral-initialise the accumulators + { + x_acc_129794 = 0; + x_acc_129795 = (int64_t) -1; + x_acc_129796 = 0.0; + } + for (int64_t i_129814 = 0; i_129814 < elements_per_thread_129764; + i_129814++) { + gtid_89827 = sext_i32_i64(local_tid_129777) + + (squot64(global_tid_129793, segred_group_sizze_89968) * + elements_per_thread_129764 + i_129814) * + segred_group_sizze_89968; + if (slt64(gtid_89827, iota_arg_72752)) { + // apply map function + { + int64_t y_89989 = ((__global + int64_t *) mem_125093)[gtid_89819]; + double y_89990 = ((__global + double *) mem_125100)[gtid_89819]; + int64_t binop_x_115392 = iota_arg_72752 * gtid_89819; + int64_t binop_x_115393 = gtid_89827 + binop_x_115392; + int64_t new_index_115394 = squot64(binop_x_115393, + iota_arg_72776); + int64_t binop_y_115400 = iota_arg_72776 * new_index_115394; + int64_t new_index_115401 = binop_x_115393 - binop_y_115400; + double x_89992 = ((__global + double *) mem_125097)[new_index_115394 * + iota_arg_72776 + + new_index_115401]; + double x_89993 = ((__global + double *) mem_124976)[gtid_89827]; + double defunc_0_f_res_89995 = x_89992 / y_89990; + bool cond_89996 = slt64(gtid_89827, y_89989); + bool isnan_res_89997; + + isnan_res_89997 = futrts_isnan64(defunc_0_f_res_89995); + + bool cond_t_res_89998 = !isnan_res_89997; + bool x_89999 = cond_89996 && cond_t_res_89998; + double abs_res_90000 = fabs(defunc_0_f_res_89995); + bool defunc_2_f_res_t_res_90001 = x_89993 < abs_res_90000; + bool x_90002 = x_89999 && defunc_2_f_res_t_res_90001; + double defunc_1_f_res_90003; + + if (cond_89996) { + defunc_1_f_res_90003 = defunc_0_f_res_89995; + } else { + defunc_1_f_res_90003 = 0.0; + } + // save map-out results + { + ((__global double *) mem_125110)[gtid_89819 * + iota_arg_72752 + + gtid_89827] = + defunc_0_f_res_89995; + } + // load accumulator + { + x_89975 = x_acc_129794; + x_89976 = x_acc_129795; + x_89977 = x_acc_129796; + } + // load new values + { + x_89978 = x_90002; + x_89979 = gtid_89827; + x_89980 = defunc_1_f_res_90003; + } + // apply reduction operator + { + bool defunc_1_op_res_89981; + int64_t defunc_1_op_res_89982; + + if (x_89975) { + defunc_1_op_res_89981 = x_89975; + defunc_1_op_res_89982 = x_89976; + } else { + bool x_89983 = x_89978 && x_89978; + bool x_89984 = !x_89978; + bool y_89985 = x_89975 && x_89984; + bool defunc_1_op_res_f_res_89986 = x_89983 || + y_89985; + int64_t defunc_1_op_res_f_res_89987; + + if (x_89978) { + defunc_1_op_res_f_res_89987 = x_89979; + } else { + defunc_1_op_res_f_res_89987 = x_89976; + } + defunc_1_op_res_89981 = defunc_1_op_res_f_res_89986; + defunc_1_op_res_89982 = defunc_1_op_res_f_res_89987; + } + + double defunc_1_op_res_89988 = x_89977 + x_89980; + + // store in accumulator + { + x_acc_129794 = defunc_1_op_res_89981; + x_acc_129795 = defunc_1_op_res_89982; + x_acc_129796 = defunc_1_op_res_89988; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_89975 = x_acc_129794; + x_89976 = x_acc_129795; + x_89977 = x_acc_129796; + ((__local + bool *) red_arr_mem_129781)[sext_i32_i64(local_tid_129777)] = + x_89975; + ((__local + int64_t *) red_arr_mem_129783)[sext_i32_i64(local_tid_129777)] = + x_89976; + ((__local + double *) red_arr_mem_129785)[sext_i32_i64(local_tid_129777)] = + x_89977; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_129815; + int32_t skip_waves_129816; + + skip_waves_129816 = 1; + + bool x_129800; + int64_t x_129801; + double x_129802; + bool x_129803; + int64_t x_129804; + double x_129805; + + offset_129815 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129777, + sext_i64_i32(segred_group_sizze_89968))) { + x_129800 = ((__local + bool *) red_arr_mem_129781)[sext_i32_i64(local_tid_129777 + + offset_129815)]; + x_129801 = ((__local + int64_t *) red_arr_mem_129783)[sext_i32_i64(local_tid_129777 + + offset_129815)]; + x_129802 = ((__local + double *) red_arr_mem_129785)[sext_i32_i64(local_tid_129777 + + offset_129815)]; + } + } + offset_129815 = 1; + while (slt32(offset_129815, wave_sizze_129779)) { + if (slt32(local_tid_129777 + offset_129815, + sext_i64_i32(segred_group_sizze_89968)) && + ((local_tid_129777 - squot32(local_tid_129777, + wave_sizze_129779) * + wave_sizze_129779) & (2 * offset_129815 - 1)) == 0) { + // read array element + { + x_129803 = ((volatile __local + bool *) red_arr_mem_129781)[sext_i32_i64(local_tid_129777 + + offset_129815)]; + x_129804 = ((volatile __local + int64_t *) red_arr_mem_129783)[sext_i32_i64(local_tid_129777 + + offset_129815)]; + x_129805 = ((volatile __local + double *) red_arr_mem_129785)[sext_i32_i64(local_tid_129777 + + offset_129815)]; + } + // apply reduction operation + { + bool defunc_1_op_res_129806; + int64_t defunc_1_op_res_129807; + + if (x_129800) { + defunc_1_op_res_129806 = x_129800; + defunc_1_op_res_129807 = x_129801; + } else { + bool x_129808 = x_129803 && x_129803; + bool x_129809 = !x_129803; + bool y_129810 = x_129800 && x_129809; + bool defunc_1_op_res_f_res_129811 = x_129808 || + y_129810; + int64_t defunc_1_op_res_f_res_129812; + + if (x_129803) { + defunc_1_op_res_f_res_129812 = x_129804; + } else { + defunc_1_op_res_f_res_129812 = x_129801; + } + defunc_1_op_res_129806 = + defunc_1_op_res_f_res_129811; + defunc_1_op_res_129807 = + defunc_1_op_res_f_res_129812; + } + + double defunc_1_op_res_129813 = x_129802 + x_129805; + + x_129800 = defunc_1_op_res_129806; + x_129801 = defunc_1_op_res_129807; + x_129802 = defunc_1_op_res_129813; + } + // write result of operation + { + ((volatile __local + bool *) red_arr_mem_129781)[sext_i32_i64(local_tid_129777)] = + x_129800; + ((volatile __local + int64_t *) red_arr_mem_129783)[sext_i32_i64(local_tid_129777)] = + x_129801; + ((volatile __local + double *) red_arr_mem_129785)[sext_i32_i64(local_tid_129777)] = + x_129802; + } + } + offset_129815 *= 2; + } + while (slt32(skip_waves_129816, + squot32(sext_i64_i32(segred_group_sizze_89968) + + wave_sizze_129779 - 1, wave_sizze_129779))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129815 = skip_waves_129816 * wave_sizze_129779; + if (slt32(local_tid_129777 + offset_129815, + sext_i64_i32(segred_group_sizze_89968)) && + ((local_tid_129777 - squot32(local_tid_129777, + wave_sizze_129779) * + wave_sizze_129779) == 0 && (squot32(local_tid_129777, + wave_sizze_129779) & + (2 * skip_waves_129816 - + 1)) == 0)) { + // read array element + { + x_129803 = ((__local + bool *) red_arr_mem_129781)[sext_i32_i64(local_tid_129777 + + offset_129815)]; + x_129804 = ((__local + int64_t *) red_arr_mem_129783)[sext_i32_i64(local_tid_129777 + + offset_129815)]; + x_129805 = ((__local + double *) red_arr_mem_129785)[sext_i32_i64(local_tid_129777 + + offset_129815)]; + } + // apply reduction operation + { + bool defunc_1_op_res_129806; + int64_t defunc_1_op_res_129807; + + if (x_129800) { + defunc_1_op_res_129806 = x_129800; + defunc_1_op_res_129807 = x_129801; + } else { + bool x_129808 = x_129803 && x_129803; + bool x_129809 = !x_129803; + bool y_129810 = x_129800 && x_129809; + bool defunc_1_op_res_f_res_129811 = x_129808 || + y_129810; + int64_t defunc_1_op_res_f_res_129812; + + if (x_129803) { + defunc_1_op_res_f_res_129812 = x_129804; + } else { + defunc_1_op_res_f_res_129812 = x_129801; + } + defunc_1_op_res_129806 = + defunc_1_op_res_f_res_129811; + defunc_1_op_res_129807 = + defunc_1_op_res_f_res_129812; + } + + double defunc_1_op_res_129813 = x_129802 + x_129805; + + x_129800 = defunc_1_op_res_129806; + x_129801 = defunc_1_op_res_129807; + x_129802 = defunc_1_op_res_129813; + } + // write result of operation + { + ((__local + bool *) red_arr_mem_129781)[sext_i32_i64(local_tid_129777)] = + x_129800; + ((__local + int64_t *) red_arr_mem_129783)[sext_i32_i64(local_tid_129777)] = + x_129801; + ((__local + double *) red_arr_mem_129785)[sext_i32_i64(local_tid_129777)] = + x_129802; + } + } + skip_waves_129816 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_129777) == (int64_t) 0) { + x_acc_129794 = x_129800; + x_acc_129795 = x_129801; + x_acc_129796 = x_129802; + } + } + // first thread keeps accumulator; others reset to neutral element + { + if (!(sext_i32_i64(local_tid_129777) == (int64_t) 0)) { + x_acc_129794 = 0; + x_acc_129795 = (int64_t) -1; + x_acc_129796 = 0.0; + } + } + } + x_89975 = x_acc_129794; + x_89976 = x_acc_129795; + x_89977 = x_acc_129796; + if (groups_per_segment_129763 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_129777 == 0) { + ((__global bool *) mem_125103)[gtid_89819] = x_acc_129794; + ((__global int64_t *) mem_125105)[gtid_89819] = + x_acc_129795; + ((__global double *) mem_125107)[gtid_89819] = x_acc_129796; + } + } + } else { + int32_t old_counter_129817; + + // first thread in group saves group result to global memory + { + if (local_tid_129777 == 0) { + ((__global + bool *) group_res_arr_mem_129768)[sext_i32_i64(virt_group_id_129791) * + segred_group_sizze_89968] = + x_acc_129794; + ((__global + int64_t *) group_res_arr_mem_129770)[sext_i32_i64(virt_group_id_129791) * + segred_group_sizze_89968] = + x_acc_129795; + ((__global + double *) group_res_arr_mem_129772)[sext_i32_i64(virt_group_id_129791) * + segred_group_sizze_89968] = + x_acc_129796; + mem_fence_global(); + old_counter_129817 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_129774)[sext_i32_i64(srem32(flat_segment_id_129792, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_129787)[(int64_t) 0] = + old_counter_129817 == groups_per_segment_129763 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_129818; + + is_last_group_129818 = ((__local + bool *) sync_arr_mem_129787)[(int64_t) 0]; + if (is_last_group_129818) { + if (local_tid_129777 == 0) { + old_counter_129817 = + atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_129774)[sext_i32_i64(srem32(flat_segment_id_129792, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_129763)); + } + // read in the per-group-results + { + int64_t read_per_thread_129819 = + sdiv_up64(groups_per_segment_129763, + segred_group_sizze_89968); + + x_89975 = 0; + x_89976 = (int64_t) -1; + x_89977 = 0.0; + for (int64_t i_129820 = 0; i_129820 < + read_per_thread_129819; i_129820++) { + int64_t group_res_id_129821 = + sext_i32_i64(local_tid_129777) * + read_per_thread_129819 + i_129820; + int64_t index_of_group_res_129822 = + sext_i32_i64(flat_segment_id_129792) * + groups_per_segment_129763 + group_res_id_129821; + + if (slt64(group_res_id_129821, + groups_per_segment_129763)) { + x_89978 = ((__global + bool *) group_res_arr_mem_129768)[index_of_group_res_129822 * + segred_group_sizze_89968]; + x_89979 = ((__global + int64_t *) group_res_arr_mem_129770)[index_of_group_res_129822 * + segred_group_sizze_89968]; + x_89980 = ((__global + double *) group_res_arr_mem_129772)[index_of_group_res_129822 * + segred_group_sizze_89968]; + + bool defunc_1_op_res_89981; + int64_t defunc_1_op_res_89982; + + if (x_89975) { + defunc_1_op_res_89981 = x_89975; + defunc_1_op_res_89982 = x_89976; + } else { + bool x_89983 = x_89978 && x_89978; + bool x_89984 = !x_89978; + bool y_89985 = x_89975 && x_89984; + bool defunc_1_op_res_f_res_89986 = x_89983 || + y_89985; + int64_t defunc_1_op_res_f_res_89987; + + if (x_89978) { + defunc_1_op_res_f_res_89987 = x_89979; + } else { + defunc_1_op_res_f_res_89987 = x_89976; + } + defunc_1_op_res_89981 = + defunc_1_op_res_f_res_89986; + defunc_1_op_res_89982 = + defunc_1_op_res_f_res_89987; + } + + double defunc_1_op_res_89988 = x_89977 + x_89980; + + x_89975 = defunc_1_op_res_89981; + x_89976 = defunc_1_op_res_89982; + x_89977 = defunc_1_op_res_89988; + } + } + } + ((__local + bool *) red_arr_mem_129781)[sext_i32_i64(local_tid_129777)] = + x_89975; + ((__local + int64_t *) red_arr_mem_129783)[sext_i32_i64(local_tid_129777)] = + x_89976; + ((__local + double *) red_arr_mem_129785)[sext_i32_i64(local_tid_129777)] = + x_89977; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_129823; + int32_t skip_waves_129824; + + skip_waves_129824 = 1; + + bool x_129800; + int64_t x_129801; + double x_129802; + bool x_129803; + int64_t x_129804; + double x_129805; + + offset_129823 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129777, + sext_i64_i32(segred_group_sizze_89968))) { + x_129800 = ((__local + bool *) red_arr_mem_129781)[sext_i32_i64(local_tid_129777 + + offset_129823)]; + x_129801 = ((__local + int64_t *) red_arr_mem_129783)[sext_i32_i64(local_tid_129777 + + offset_129823)]; + x_129802 = ((__local + double *) red_arr_mem_129785)[sext_i32_i64(local_tid_129777 + + offset_129823)]; + } + } + offset_129823 = 1; + while (slt32(offset_129823, wave_sizze_129779)) { + if (slt32(local_tid_129777 + offset_129823, + sext_i64_i32(segred_group_sizze_89968)) && + ((local_tid_129777 - squot32(local_tid_129777, + wave_sizze_129779) * + wave_sizze_129779) & (2 * offset_129823 - 1)) == + 0) { + // read array element + { + x_129803 = ((volatile __local + bool *) red_arr_mem_129781)[sext_i32_i64(local_tid_129777 + + offset_129823)]; + x_129804 = ((volatile __local + int64_t *) red_arr_mem_129783)[sext_i32_i64(local_tid_129777 + + offset_129823)]; + x_129805 = ((volatile __local + double *) red_arr_mem_129785)[sext_i32_i64(local_tid_129777 + + offset_129823)]; + } + // apply reduction operation + { + bool defunc_1_op_res_129806; + int64_t defunc_1_op_res_129807; + + if (x_129800) { + defunc_1_op_res_129806 = x_129800; + defunc_1_op_res_129807 = x_129801; + } else { + bool x_129808 = x_129803 && x_129803; + bool x_129809 = !x_129803; + bool y_129810 = x_129800 && x_129809; + bool defunc_1_op_res_f_res_129811 = + x_129808 || y_129810; + int64_t defunc_1_op_res_f_res_129812; + + if (x_129803) { + defunc_1_op_res_f_res_129812 = x_129804; + } else { + defunc_1_op_res_f_res_129812 = x_129801; + } + defunc_1_op_res_129806 = + defunc_1_op_res_f_res_129811; + defunc_1_op_res_129807 = + defunc_1_op_res_f_res_129812; + } + + double defunc_1_op_res_129813 = x_129802 + + x_129805; + + x_129800 = defunc_1_op_res_129806; + x_129801 = defunc_1_op_res_129807; + x_129802 = defunc_1_op_res_129813; + } + // write result of operation + { + ((volatile __local + bool *) red_arr_mem_129781)[sext_i32_i64(local_tid_129777)] = + x_129800; + ((volatile __local + int64_t *) red_arr_mem_129783)[sext_i32_i64(local_tid_129777)] = + x_129801; + ((volatile __local + double *) red_arr_mem_129785)[sext_i32_i64(local_tid_129777)] = + x_129802; + } + } + offset_129823 *= 2; + } + while (slt32(skip_waves_129824, + squot32(sext_i64_i32(segred_group_sizze_89968) + + wave_sizze_129779 - 1, + wave_sizze_129779))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129823 = skip_waves_129824 * wave_sizze_129779; + if (slt32(local_tid_129777 + offset_129823, + sext_i64_i32(segred_group_sizze_89968)) && + ((local_tid_129777 - squot32(local_tid_129777, + wave_sizze_129779) * + wave_sizze_129779) == 0 && + (squot32(local_tid_129777, wave_sizze_129779) & + (2 * skip_waves_129824 - 1)) == 0)) { + // read array element + { + x_129803 = ((__local + bool *) red_arr_mem_129781)[sext_i32_i64(local_tid_129777 + + offset_129823)]; + x_129804 = ((__local + int64_t *) red_arr_mem_129783)[sext_i32_i64(local_tid_129777 + + offset_129823)]; + x_129805 = ((__local + double *) red_arr_mem_129785)[sext_i32_i64(local_tid_129777 + + offset_129823)]; + } + // apply reduction operation + { + bool defunc_1_op_res_129806; + int64_t defunc_1_op_res_129807; + + if (x_129800) { + defunc_1_op_res_129806 = x_129800; + defunc_1_op_res_129807 = x_129801; + } else { + bool x_129808 = x_129803 && x_129803; + bool x_129809 = !x_129803; + bool y_129810 = x_129800 && x_129809; + bool defunc_1_op_res_f_res_129811 = + x_129808 || y_129810; + int64_t defunc_1_op_res_f_res_129812; + + if (x_129803) { + defunc_1_op_res_f_res_129812 = x_129804; + } else { + defunc_1_op_res_f_res_129812 = x_129801; + } + defunc_1_op_res_129806 = + defunc_1_op_res_f_res_129811; + defunc_1_op_res_129807 = + defunc_1_op_res_f_res_129812; + } + + double defunc_1_op_res_129813 = x_129802 + + x_129805; + + x_129800 = defunc_1_op_res_129806; + x_129801 = defunc_1_op_res_129807; + x_129802 = defunc_1_op_res_129813; + } + // write result of operation + { + ((__local + bool *) red_arr_mem_129781)[sext_i32_i64(local_tid_129777)] = + x_129800; + ((__local + int64_t *) red_arr_mem_129783)[sext_i32_i64(local_tid_129777)] = + x_129801; + ((__local + double *) red_arr_mem_129785)[sext_i32_i64(local_tid_129777)] = + x_129802; + } + } + skip_waves_129824 *= 2; + } + // and back to memory with the final result + { + if (local_tid_129777 == 0) { + ((__global bool *) mem_125103)[gtid_89819] = + x_129800; + ((__global int64_t *) mem_125105)[gtid_89819] = + x_129801; + ((__global double *) mem_125107)[gtid_89819] = + x_129802; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_89968 +} +__kernel void mainDetailedzisegred_nonseg_77489(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_126368_backing_aligned_0, + __local volatile + int64_t *sync_arr_mem_126366_backing_aligned_1, + int64_t m_70861, + int64_t n_70864, + int64_t m_70956, + int64_t num_groups_77492, + int64_t num_threads_125631, + int64_t num_threads_126360, + __global + unsigned char *mem_120127, + __global + unsigned char *mem_120130, + __global + unsigned char *mem_120144, + __global + unsigned char *mem_120146, + __global + unsigned char *mem_120172, + __global + unsigned char *mem_120174, + __global + unsigned char *mem_120177, + __global + unsigned char *mem_120180, + __global + unsigned char *mainDetailedzicounter_mem_126356, + __global + unsigned char *group_res_arr_mem_126358) +{ + #define segred_group_sizze_77491 (mainDetailedzisegred_group_sizze_77478) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_126368_backing_1 = + (__local volatile + char *) red_arr_mem_126368_backing_aligned_0; + __local volatile char *restrict sync_arr_mem_126366_backing_0 = + (__local volatile + char *) sync_arr_mem_126366_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126361; + int32_t local_tid_126362; + int64_t group_sizze_126365; + int32_t wave_sizze_126364; + int32_t group_tid_126363; + + global_tid_126361 = get_global_id(0); + local_tid_126362 = get_local_id(0); + group_sizze_126365 = get_local_size(0); + wave_sizze_126364 = LOCKSTEP_WIDTH; + group_tid_126363 = get_group_id(0); + + int32_t phys_tid_77489; + + phys_tid_77489 = global_tid_126361; + + __local char *sync_arr_mem_126366; + + sync_arr_mem_126366 = (__local char *) sync_arr_mem_126366_backing_0; + + __local char *red_arr_mem_126368; + + red_arr_mem_126368 = (__local char *) red_arr_mem_126368_backing_1; + + int64_t dummy_77487; + + dummy_77487 = (int64_t) 0; + + int64_t gtid_77488; + + gtid_77488 = (int64_t) 0; + + int64_t x_acc_126370; + int64_t chunk_sizze_126371; + + chunk_sizze_126371 = smin64(sdiv_up64(m_70861, + sext_i32_i64(sext_i64_i32(segred_group_sizze_77491 * + num_groups_77492))), + sdiv_up64(m_70861 - phys_tid_77489, + num_threads_126360)); + + int64_t x_77498; + int64_t x_77499; + + // neutral-initialise the accumulators + { + x_acc_126370 = (int64_t) -9223372036854775808; + } + for (int64_t i_126375 = 0; i_126375 < chunk_sizze_126371; i_126375++) { + gtid_77488 = phys_tid_77489 + num_threads_126360 * i_126375; + // apply map function + { + int64_t discard_119622; + int64_t scanacc_119618 = (int64_t) 0; + + for (int64_t i_119620 = 0; i_119620 < n_70864; i_119620++) { + int64_t binop_y_119975 = (int64_t) -1 * i_119620; + int64_t slice_119976 = m_70956 + binop_y_119975; + double x_77506 = ((__global double *) mem_120127)[slice_119976 * + m_70861 + + gtid_77488]; + bool defunc_0_f_res_77507; + + defunc_0_f_res_77507 = futrts_isnan64(x_77506); + + bool defunc_0_g_res_77508 = !defunc_0_f_res_77507; + int64_t defunc_0_f_res_77509 = + btoi_bool_i64(defunc_0_g_res_77508); + int64_t defunc_1_op_res_77505 = add64(defunc_0_f_res_77509, + scanacc_119618); + + ((__global int64_t *) mem_120130)[phys_tid_77489 + i_119620 * + num_threads_125631] = + defunc_1_op_res_77505; + + int64_t scanacc_tmp_126376 = defunc_1_op_res_77505; + + scanacc_119618 = scanacc_tmp_126376; + } + discard_119622 = scanacc_119618; + + int64_t last_res_77510 = ((__global + int64_t *) mem_120130)[phys_tid_77489 + + m_70956 * + num_threads_125631]; + + for (int64_t i_126378 = 0; i_126378 < n_70864; i_126378++) { + ((__global double *) mem_120144)[phys_tid_77489 + i_126378 * + num_threads_125631] = NAN; + } + for (int64_t i_126379 = 0; i_126379 < n_70864; i_126379++) { + ((__global int64_t *) mem_120146)[phys_tid_77489 + i_126379 * + num_threads_125631] = + (int64_t) 0; + } + for (int64_t write_iter_119623 = 0; write_iter_119623 < n_70864; + write_iter_119623++) { + int64_t binop_y_119983 = (int64_t) -1 * write_iter_119623; + int64_t slice_119984 = m_70956 + binop_y_119983; + double write_iv_119626 = ((__global + double *) mem_120127)[slice_119984 * + m_70861 + + gtid_77488]; + bool defunc_0_f_res_77518; + + defunc_0_f_res_77518 = futrts_isnan64(write_iv_119626); + + bool defunc_0_g_res_77519 = !defunc_0_f_res_77518; + int64_t defunc_1_f_res_77520; + + if (defunc_0_g_res_77519) { + int64_t write_iv_119627 = ((__global + int64_t *) mem_120130)[phys_tid_77489 + + write_iter_119623 * + num_threads_125631]; + int64_t defunc_1_f_res_t_res_77521 = sub64(write_iv_119627, + (int64_t) 1); + + defunc_1_f_res_77520 = defunc_1_f_res_t_res_77521; + } else { + defunc_1_f_res_77520 = (int64_t) -1; + } + + bool less_than_zzero_119629 = slt64(defunc_1_f_res_77520, + (int64_t) 0); + bool greater_than_sizze_119630 = sle64(n_70864, + defunc_1_f_res_77520); + bool outside_bounds_dim_119631 = less_than_zzero_119629 || + greater_than_sizze_119630; + + if (!outside_bounds_dim_119631) { + ((__global int64_t *) mem_120146)[phys_tid_77489 + + defunc_1_f_res_77520 * + num_threads_125631] = + write_iter_119623; + } + if (!outside_bounds_dim_119631) { + for (int64_t i_126382 = 0; i_126382 < (int64_t) 1; + i_126382++) { + ((__global double *) mem_120144)[phys_tid_77489 + + (defunc_1_f_res_77520 + + i_126382) * + num_threads_125631] = + ((__global double *) mem_120127)[m_70861 * + slice_119984 + + gtid_77488 + + i_126382 * + ((int64_t) -1 * + m_70861)]; + } + } + } + // save map-out results + { + ((__global int64_t *) mem_120174)[dummy_77487 * m_70861 + + gtid_77488] = last_res_77510; + for (int64_t i_126383 = 0; i_126383 < n_70864; i_126383++) { + ((__global double *) mem_120177)[i_126383 * m_70861 + + dummy_77487 * m_70861 + + gtid_77488] = ((__global + double *) mem_120144)[phys_tid_77489 + + i_126383 * + num_threads_125631]; + } + for (int64_t i_126384 = 0; i_126384 < n_70864; i_126384++) { + ((__global int64_t *) mem_120180)[i_126384 * m_70861 + + dummy_77487 * m_70861 + + gtid_77488] = ((__global + int64_t *) mem_120146)[phys_tid_77489 + + i_126384 * + num_threads_125631]; + } + } + // load accumulator + { + x_77498 = x_acc_126370; + } + // load new values + { + x_77499 = last_res_77510; + } + // apply reduction operator + { + int64_t defunc_1_op_res_77500 = smax64(x_77498, x_77499); + + // store in accumulator + { + x_acc_126370 = defunc_1_op_res_77500; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_77498 = x_acc_126370; + ((__local + int64_t *) red_arr_mem_126368)[sext_i32_i64(local_tid_126362)] = + x_77498; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_126385; + int32_t skip_waves_126386; + + skip_waves_126386 = 1; + + int64_t x_126372; + int64_t x_126373; + + offset_126385 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_126362, sext_i64_i32(segred_group_sizze_77491))) { + x_126372 = ((__local + int64_t *) red_arr_mem_126368)[sext_i32_i64(local_tid_126362 + + offset_126385)]; + } + } + offset_126385 = 1; + while (slt32(offset_126385, wave_sizze_126364)) { + if (slt32(local_tid_126362 + offset_126385, + sext_i64_i32(segred_group_sizze_77491)) && + ((local_tid_126362 - squot32(local_tid_126362, wave_sizze_126364) * + wave_sizze_126364) & (2 * offset_126385 - 1)) == 0) { + // read array element + { + x_126373 = ((volatile __local + int64_t *) red_arr_mem_126368)[sext_i32_i64(local_tid_126362 + + offset_126385)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_126374 = smax64(x_126372, x_126373); + + x_126372 = defunc_1_op_res_126374; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_126368)[sext_i32_i64(local_tid_126362)] = + x_126372; + } + } + offset_126385 *= 2; + } + while (slt32(skip_waves_126386, + squot32(sext_i64_i32(segred_group_sizze_77491) + + wave_sizze_126364 - 1, wave_sizze_126364))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_126385 = skip_waves_126386 * wave_sizze_126364; + if (slt32(local_tid_126362 + offset_126385, + sext_i64_i32(segred_group_sizze_77491)) && + ((local_tid_126362 - squot32(local_tid_126362, wave_sizze_126364) * + wave_sizze_126364) == 0 && (squot32(local_tid_126362, + wave_sizze_126364) & (2 * + skip_waves_126386 - + 1)) == + 0)) { + // read array element + { + x_126373 = ((__local + int64_t *) red_arr_mem_126368)[sext_i32_i64(local_tid_126362 + + offset_126385)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_126374 = smax64(x_126372, x_126373); + + x_126372 = defunc_1_op_res_126374; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_126368)[sext_i32_i64(local_tid_126362)] = + x_126372; + } + } + skip_waves_126386 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_126362) == (int64_t) 0) { + x_acc_126370 = x_126372; + } + } + + int32_t old_counter_126387; + + // first thread in group saves group result to global memory + { + if (local_tid_126362 == 0) { + ((__global + int64_t *) group_res_arr_mem_126358)[sext_i32_i64(group_tid_126363) * + segred_group_sizze_77491] = + x_acc_126370; + mem_fence_global(); + old_counter_126387 = atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_126356)[(int64_t) 0], + (int) 1); + ((__local bool *) sync_arr_mem_126366)[(int64_t) 0] = + old_counter_126387 == num_groups_77492 - (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_126388; + + is_last_group_126388 = ((__local bool *) sync_arr_mem_126366)[(int64_t) 0]; + if (is_last_group_126388) { + if (local_tid_126362 == 0) { + old_counter_126387 = atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_126356)[(int64_t) 0], + (int) ((int64_t) 0 - + num_groups_77492)); + } + // read in the per-group-results + { + int64_t read_per_thread_126389 = sdiv_up64(num_groups_77492, + segred_group_sizze_77491); + + x_77498 = (int64_t) -9223372036854775808; + for (int64_t i_126390 = 0; i_126390 < read_per_thread_126389; + i_126390++) { + int64_t group_res_id_126391 = sext_i32_i64(local_tid_126362) * + read_per_thread_126389 + i_126390; + int64_t index_of_group_res_126392 = group_res_id_126391; + + if (slt64(group_res_id_126391, num_groups_77492)) { + x_77499 = ((__global + int64_t *) group_res_arr_mem_126358)[index_of_group_res_126392 * + segred_group_sizze_77491]; + + int64_t defunc_1_op_res_77500; + + defunc_1_op_res_77500 = smax64(x_77498, x_77499); + x_77498 = defunc_1_op_res_77500; + } + } + } + ((__local + int64_t *) red_arr_mem_126368)[sext_i32_i64(local_tid_126362)] = + x_77498; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_126393; + int32_t skip_waves_126394; + + skip_waves_126394 = 1; + + int64_t x_126372; + int64_t x_126373; + + offset_126393 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_126362, + sext_i64_i32(segred_group_sizze_77491))) { + x_126372 = ((__local + int64_t *) red_arr_mem_126368)[sext_i32_i64(local_tid_126362 + + offset_126393)]; + } + } + offset_126393 = 1; + while (slt32(offset_126393, wave_sizze_126364)) { + if (slt32(local_tid_126362 + offset_126393, + sext_i64_i32(segred_group_sizze_77491)) && + ((local_tid_126362 - squot32(local_tid_126362, + wave_sizze_126364) * + wave_sizze_126364) & (2 * offset_126393 - 1)) == 0) { + // read array element + { + x_126373 = ((volatile __local + int64_t *) red_arr_mem_126368)[sext_i32_i64(local_tid_126362 + + offset_126393)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_126374 = smax64(x_126372, + x_126373); + + x_126372 = defunc_1_op_res_126374; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_126368)[sext_i32_i64(local_tid_126362)] = + x_126372; + } + } + offset_126393 *= 2; + } + while (slt32(skip_waves_126394, + squot32(sext_i64_i32(segred_group_sizze_77491) + + wave_sizze_126364 - 1, wave_sizze_126364))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_126393 = skip_waves_126394 * wave_sizze_126364; + if (slt32(local_tid_126362 + offset_126393, + sext_i64_i32(segred_group_sizze_77491)) && + ((local_tid_126362 - squot32(local_tid_126362, + wave_sizze_126364) * + wave_sizze_126364) == 0 && (squot32(local_tid_126362, + wave_sizze_126364) & + (2 * skip_waves_126394 - + 1)) == 0)) { + // read array element + { + x_126373 = ((__local + int64_t *) red_arr_mem_126368)[sext_i32_i64(local_tid_126362 + + offset_126393)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_126374 = smax64(x_126372, + x_126373); + + x_126372 = defunc_1_op_res_126374; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_126368)[sext_i32_i64(local_tid_126362)] = + x_126372; + } + } + skip_waves_126394 *= 2; + } + // and back to memory with the final result + { + if (local_tid_126362 == 0) { + ((__global int64_t *) mem_120172)[(int64_t) 0] = x_126372; + } + } + } + } + + error_1: + return; + #undef segred_group_sizze_77491 +} +__kernel void mainDetailedzisegred_nonseg_77726(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_126519_backing_aligned_0, + __local volatile + int64_t *sync_arr_mem_126517_backing_aligned_1, + int64_t m_70861, + int64_t num_groups_77823, + int64_t num_threads_126511, + __global + unsigned char *defunc_2_reduce_res_map_acc_mem_120211, + __global + unsigned char *mem_120218, + __global + unsigned char *mainDetailedzicounter_mem_126507, + __global + unsigned char *group_res_arr_mem_126509) +{ + #define segred_group_sizze_77822 (mainDetailedzisegred_group_sizze_77718) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_126519_backing_1 = + (__local volatile + char *) red_arr_mem_126519_backing_aligned_0; + __local volatile char *restrict sync_arr_mem_126517_backing_0 = + (__local volatile + char *) sync_arr_mem_126517_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126512; + int32_t local_tid_126513; + int64_t group_sizze_126516; + int32_t wave_sizze_126515; + int32_t group_tid_126514; + + global_tid_126512 = get_global_id(0); + local_tid_126513 = get_local_id(0); + group_sizze_126516 = get_local_size(0); + wave_sizze_126515 = LOCKSTEP_WIDTH; + group_tid_126514 = get_group_id(0); + + int32_t phys_tid_77726; + + phys_tid_77726 = global_tid_126512; + + __local char *sync_arr_mem_126517; + + sync_arr_mem_126517 = (__local char *) sync_arr_mem_126517_backing_0; + + __local char *red_arr_mem_126519; + + red_arr_mem_126519 = (__local char *) red_arr_mem_126519_backing_1; + + int64_t dummy_77724; + + dummy_77724 = (int64_t) 0; + + int64_t gtid_77725; + + gtid_77725 = (int64_t) 0; + + int64_t x_acc_126521; + int64_t chunk_sizze_126522; + + chunk_sizze_126522 = smin64(sdiv_up64(m_70861, + sext_i32_i64(sext_i64_i32(segred_group_sizze_77822 * + num_groups_77823))), + sdiv_up64(m_70861 - phys_tid_77726, + num_threads_126511)); + + int64_t x_77826; + int64_t x_77827; + + // neutral-initialise the accumulators + { + x_acc_126521 = (int64_t) -9223372036854775808; + } + for (int64_t i_126526 = 0; i_126526 < chunk_sizze_126522; i_126526++) { + gtid_77725 = phys_tid_77726 + num_threads_126511 * i_126526; + // apply map function + { + int64_t x_77829 = ((__global + int64_t *) defunc_2_reduce_res_map_acc_mem_120211)[gtid_77725]; + + // save map-out results + { } + // load accumulator + { + x_77826 = x_acc_126521; + } + // load new values + { + x_77827 = x_77829; + } + // apply reduction operator + { + int64_t defunc_1_op_res_77828 = smax64(x_77826, x_77827); + + // store in accumulator + { + x_acc_126521 = defunc_1_op_res_77828; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_77826 = x_acc_126521; + ((__local + int64_t *) red_arr_mem_126519)[sext_i32_i64(local_tid_126513)] = + x_77826; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_126527; + int32_t skip_waves_126528; + + skip_waves_126528 = 1; + + int64_t x_126523; + int64_t x_126524; + + offset_126527 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_126513, sext_i64_i32(segred_group_sizze_77822))) { + x_126523 = ((__local + int64_t *) red_arr_mem_126519)[sext_i32_i64(local_tid_126513 + + offset_126527)]; + } + } + offset_126527 = 1; + while (slt32(offset_126527, wave_sizze_126515)) { + if (slt32(local_tid_126513 + offset_126527, + sext_i64_i32(segred_group_sizze_77822)) && + ((local_tid_126513 - squot32(local_tid_126513, wave_sizze_126515) * + wave_sizze_126515) & (2 * offset_126527 - 1)) == 0) { + // read array element + { + x_126524 = ((volatile __local + int64_t *) red_arr_mem_126519)[sext_i32_i64(local_tid_126513 + + offset_126527)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_126525 = smax64(x_126523, x_126524); + + x_126523 = defunc_1_op_res_126525; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_126519)[sext_i32_i64(local_tid_126513)] = + x_126523; + } + } + offset_126527 *= 2; + } + while (slt32(skip_waves_126528, + squot32(sext_i64_i32(segred_group_sizze_77822) + + wave_sizze_126515 - 1, wave_sizze_126515))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_126527 = skip_waves_126528 * wave_sizze_126515; + if (slt32(local_tid_126513 + offset_126527, + sext_i64_i32(segred_group_sizze_77822)) && + ((local_tid_126513 - squot32(local_tid_126513, wave_sizze_126515) * + wave_sizze_126515) == 0 && (squot32(local_tid_126513, + wave_sizze_126515) & (2 * + skip_waves_126528 - + 1)) == + 0)) { + // read array element + { + x_126524 = ((__local + int64_t *) red_arr_mem_126519)[sext_i32_i64(local_tid_126513 + + offset_126527)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_126525 = smax64(x_126523, x_126524); + + x_126523 = defunc_1_op_res_126525; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_126519)[sext_i32_i64(local_tid_126513)] = + x_126523; + } + } + skip_waves_126528 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_126513) == (int64_t) 0) { + x_acc_126521 = x_126523; + } + } + + int32_t old_counter_126529; + + // first thread in group saves group result to global memory + { + if (local_tid_126513 == 0) { + ((__global + int64_t *) group_res_arr_mem_126509)[sext_i32_i64(group_tid_126514) * + segred_group_sizze_77822] = + x_acc_126521; + mem_fence_global(); + old_counter_126529 = atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_126507)[(int64_t) 0], + (int) 1); + ((__local bool *) sync_arr_mem_126517)[(int64_t) 0] = + old_counter_126529 == num_groups_77823 - (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_126530; + + is_last_group_126530 = ((__local bool *) sync_arr_mem_126517)[(int64_t) 0]; + if (is_last_group_126530) { + if (local_tid_126513 == 0) { + old_counter_126529 = atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_126507)[(int64_t) 0], + (int) ((int64_t) 0 - + num_groups_77823)); + } + // read in the per-group-results + { + int64_t read_per_thread_126531 = sdiv_up64(num_groups_77823, + segred_group_sizze_77822); + + x_77826 = (int64_t) -9223372036854775808; + for (int64_t i_126532 = 0; i_126532 < read_per_thread_126531; + i_126532++) { + int64_t group_res_id_126533 = sext_i32_i64(local_tid_126513) * + read_per_thread_126531 + i_126532; + int64_t index_of_group_res_126534 = group_res_id_126533; + + if (slt64(group_res_id_126533, num_groups_77823)) { + x_77827 = ((__global + int64_t *) group_res_arr_mem_126509)[index_of_group_res_126534 * + segred_group_sizze_77822]; + + int64_t defunc_1_op_res_77828; + + defunc_1_op_res_77828 = smax64(x_77826, x_77827); + x_77826 = defunc_1_op_res_77828; + } + } + } + ((__local + int64_t *) red_arr_mem_126519)[sext_i32_i64(local_tid_126513)] = + x_77826; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_126535; + int32_t skip_waves_126536; + + skip_waves_126536 = 1; + + int64_t x_126523; + int64_t x_126524; + + offset_126535 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_126513, + sext_i64_i32(segred_group_sizze_77822))) { + x_126523 = ((__local + int64_t *) red_arr_mem_126519)[sext_i32_i64(local_tid_126513 + + offset_126535)]; + } + } + offset_126535 = 1; + while (slt32(offset_126535, wave_sizze_126515)) { + if (slt32(local_tid_126513 + offset_126535, + sext_i64_i32(segred_group_sizze_77822)) && + ((local_tid_126513 - squot32(local_tid_126513, + wave_sizze_126515) * + wave_sizze_126515) & (2 * offset_126535 - 1)) == 0) { + // read array element + { + x_126524 = ((volatile __local + int64_t *) red_arr_mem_126519)[sext_i32_i64(local_tid_126513 + + offset_126535)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_126525 = smax64(x_126523, + x_126524); + + x_126523 = defunc_1_op_res_126525; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_126519)[sext_i32_i64(local_tid_126513)] = + x_126523; + } + } + offset_126535 *= 2; + } + while (slt32(skip_waves_126536, + squot32(sext_i64_i32(segred_group_sizze_77822) + + wave_sizze_126515 - 1, wave_sizze_126515))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_126535 = skip_waves_126536 * wave_sizze_126515; + if (slt32(local_tid_126513 + offset_126535, + sext_i64_i32(segred_group_sizze_77822)) && + ((local_tid_126513 - squot32(local_tid_126513, + wave_sizze_126515) * + wave_sizze_126515) == 0 && (squot32(local_tid_126513, + wave_sizze_126515) & + (2 * skip_waves_126536 - + 1)) == 0)) { + // read array element + { + x_126524 = ((__local + int64_t *) red_arr_mem_126519)[sext_i32_i64(local_tid_126513 + + offset_126535)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_126525 = smax64(x_126523, + x_126524); + + x_126523 = defunc_1_op_res_126525; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_126519)[sext_i32_i64(local_tid_126513)] = + x_126523; + } + } + skip_waves_126536 *= 2; + } + // and back to memory with the final result + { + if (local_tid_126513 == 0) { + ((__global int64_t *) mem_120218)[(int64_t) 0] = x_126523; + } + } + } + } + + error_1: + return; + #undef segred_group_sizze_77822 +} +__kernel void mainDetailedzisegred_nonseg_85434(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_127998_backing_aligned_0, + __local volatile + int64_t *sync_arr_mem_127996_backing_aligned_1, + int64_t m_70861, + int64_t num_groups_85429, + int64_t num_threads_127990, + __global + unsigned char *defunc_7_map_res_mem_123721, + __global + unsigned char *mem_123728, + __global + unsigned char *mainDetailedzicounter_mem_127986, + __global + unsigned char *group_res_arr_mem_127988) +{ + #define segred_group_sizze_85427 (mainDetailedzisegred_group_sizze_85426) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_127998_backing_1 = + (__local volatile + char *) red_arr_mem_127998_backing_aligned_0; + __local volatile char *restrict sync_arr_mem_127996_backing_0 = + (__local volatile + char *) sync_arr_mem_127996_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127991; + int32_t local_tid_127992; + int64_t group_sizze_127995; + int32_t wave_sizze_127994; + int32_t group_tid_127993; + + global_tid_127991 = get_global_id(0); + local_tid_127992 = get_local_id(0); + group_sizze_127995 = get_local_size(0); + wave_sizze_127994 = LOCKSTEP_WIDTH; + group_tid_127993 = get_group_id(0); + + int32_t phys_tid_85434; + + phys_tid_85434 = global_tid_127991; + + __local char *sync_arr_mem_127996; + + sync_arr_mem_127996 = (__local char *) sync_arr_mem_127996_backing_0; + + __local char *red_arr_mem_127998; + + red_arr_mem_127998 = (__local char *) red_arr_mem_127998_backing_1; + + int64_t dummy_85432; + + dummy_85432 = (int64_t) 0; + + int64_t gtid_85433; + + gtid_85433 = (int64_t) 0; + + bool x_acc_128000; + int64_t chunk_sizze_128001; + + chunk_sizze_128001 = smin64(sdiv_up64(m_70861, + sext_i32_i64(sext_i64_i32(segred_group_sizze_85427 * + num_groups_85429))), + sdiv_up64(m_70861 - phys_tid_85434, + num_threads_127990)); + + bool x_72145; + bool x_72146; + + // neutral-initialise the accumulators + { + x_acc_128000 = 0; + } + for (int64_t i_128005 = 0; i_128005 < chunk_sizze_128001; i_128005++) { + gtid_85433 = phys_tid_85434 + num_threads_127990 * i_128005; + // apply map function + { + bool x_72148 = ((__global + bool *) defunc_7_map_res_mem_123721)[gtid_85433]; + + // save map-out results + { } + // load accumulator + { + x_72145 = x_acc_128000; + } + // load new values + { + x_72146 = x_72148; + } + // apply reduction operator + { + bool defunc_1_op_res_72147 = x_72145 || x_72146; + + // store in accumulator + { + x_acc_128000 = defunc_1_op_res_72147; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_72145 = x_acc_128000; + ((__local bool *) red_arr_mem_127998)[sext_i32_i64(local_tid_127992)] = + x_72145; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128006; + int32_t skip_waves_128007; + + skip_waves_128007 = 1; + + bool x_128002; + bool x_128003; + + offset_128006 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127992, sext_i64_i32(segred_group_sizze_85427))) { + x_128002 = ((__local + bool *) red_arr_mem_127998)[sext_i32_i64(local_tid_127992 + + offset_128006)]; + } + } + offset_128006 = 1; + while (slt32(offset_128006, wave_sizze_127994)) { + if (slt32(local_tid_127992 + offset_128006, + sext_i64_i32(segred_group_sizze_85427)) && + ((local_tid_127992 - squot32(local_tid_127992, wave_sizze_127994) * + wave_sizze_127994) & (2 * offset_128006 - 1)) == 0) { + // read array element + { + x_128003 = ((volatile __local + bool *) red_arr_mem_127998)[sext_i32_i64(local_tid_127992 + + offset_128006)]; + } + // apply reduction operation + { + bool defunc_1_op_res_128004 = x_128002 || x_128003; + + x_128002 = defunc_1_op_res_128004; + } + // write result of operation + { + ((volatile __local + bool *) red_arr_mem_127998)[sext_i32_i64(local_tid_127992)] = + x_128002; + } + } + offset_128006 *= 2; + } + while (slt32(skip_waves_128007, + squot32(sext_i64_i32(segred_group_sizze_85427) + + wave_sizze_127994 - 1, wave_sizze_127994))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128006 = skip_waves_128007 * wave_sizze_127994; + if (slt32(local_tid_127992 + offset_128006, + sext_i64_i32(segred_group_sizze_85427)) && + ((local_tid_127992 - squot32(local_tid_127992, wave_sizze_127994) * + wave_sizze_127994) == 0 && (squot32(local_tid_127992, + wave_sizze_127994) & (2 * + skip_waves_128007 - + 1)) == + 0)) { + // read array element + { + x_128003 = ((__local + bool *) red_arr_mem_127998)[sext_i32_i64(local_tid_127992 + + offset_128006)]; + } + // apply reduction operation + { + bool defunc_1_op_res_128004 = x_128002 || x_128003; + + x_128002 = defunc_1_op_res_128004; + } + // write result of operation + { + ((__local + bool *) red_arr_mem_127998)[sext_i32_i64(local_tid_127992)] = + x_128002; + } + } + skip_waves_128007 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_127992) == (int64_t) 0) { + x_acc_128000 = x_128002; + } + } + + int32_t old_counter_128008; + + // first thread in group saves group result to global memory + { + if (local_tid_127992 == 0) { + ((__global + bool *) group_res_arr_mem_127988)[sext_i32_i64(group_tid_127993) * + segred_group_sizze_85427] = + x_acc_128000; + mem_fence_global(); + old_counter_128008 = atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_127986)[(int64_t) 0], + (int) 1); + ((__local bool *) sync_arr_mem_127996)[(int64_t) 0] = + old_counter_128008 == num_groups_85429 - (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_128009; + + is_last_group_128009 = ((__local bool *) sync_arr_mem_127996)[(int64_t) 0]; + if (is_last_group_128009) { + if (local_tid_127992 == 0) { + old_counter_128008 = atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_127986)[(int64_t) 0], + (int) ((int64_t) 0 - + num_groups_85429)); + } + // read in the per-group-results + { + int64_t read_per_thread_128010 = sdiv_up64(num_groups_85429, + segred_group_sizze_85427); + + x_72145 = 0; + for (int64_t i_128011 = 0; i_128011 < read_per_thread_128010; + i_128011++) { + int64_t group_res_id_128012 = sext_i32_i64(local_tid_127992) * + read_per_thread_128010 + i_128011; + int64_t index_of_group_res_128013 = group_res_id_128012; + + if (slt64(group_res_id_128012, num_groups_85429)) { + x_72146 = ((__global + bool *) group_res_arr_mem_127988)[index_of_group_res_128013 * + segred_group_sizze_85427]; + + bool defunc_1_op_res_72147; + + defunc_1_op_res_72147 = x_72145 || x_72146; + x_72145 = defunc_1_op_res_72147; + } + } + } + ((__local bool *) red_arr_mem_127998)[sext_i32_i64(local_tid_127992)] = + x_72145; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_128014; + int32_t skip_waves_128015; + + skip_waves_128015 = 1; + + bool x_128002; + bool x_128003; + + offset_128014 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127992, + sext_i64_i32(segred_group_sizze_85427))) { + x_128002 = ((__local + bool *) red_arr_mem_127998)[sext_i32_i64(local_tid_127992 + + offset_128014)]; + } + } + offset_128014 = 1; + while (slt32(offset_128014, wave_sizze_127994)) { + if (slt32(local_tid_127992 + offset_128014, + sext_i64_i32(segred_group_sizze_85427)) && + ((local_tid_127992 - squot32(local_tid_127992, + wave_sizze_127994) * + wave_sizze_127994) & (2 * offset_128014 - 1)) == 0) { + // read array element + { + x_128003 = ((volatile __local + bool *) red_arr_mem_127998)[sext_i32_i64(local_tid_127992 + + offset_128014)]; + } + // apply reduction operation + { + bool defunc_1_op_res_128004 = x_128002 || x_128003; + + x_128002 = defunc_1_op_res_128004; + } + // write result of operation + { + ((volatile __local + bool *) red_arr_mem_127998)[sext_i32_i64(local_tid_127992)] = + x_128002; + } + } + offset_128014 *= 2; + } + while (slt32(skip_waves_128015, + squot32(sext_i64_i32(segred_group_sizze_85427) + + wave_sizze_127994 - 1, wave_sizze_127994))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128014 = skip_waves_128015 * wave_sizze_127994; + if (slt32(local_tid_127992 + offset_128014, + sext_i64_i32(segred_group_sizze_85427)) && + ((local_tid_127992 - squot32(local_tid_127992, + wave_sizze_127994) * + wave_sizze_127994) == 0 && (squot32(local_tid_127992, + wave_sizze_127994) & + (2 * skip_waves_128015 - + 1)) == 0)) { + // read array element + { + x_128003 = ((__local + bool *) red_arr_mem_127998)[sext_i32_i64(local_tid_127992 + + offset_128014)]; + } + // apply reduction operation + { + bool defunc_1_op_res_128004 = x_128002 || x_128003; + + x_128002 = defunc_1_op_res_128004; + } + // write result of operation + { + ((__local + bool *) red_arr_mem_127998)[sext_i32_i64(local_tid_127992)] = + x_128002; + } + } + skip_waves_128015 *= 2; + } + // and back to memory with the final result + { + if (local_tid_127992 == 0) { + ((__global bool *) mem_123728)[(int64_t) 0] = x_128002; + } + } + } + } + + error_1: + return; + #undef segred_group_sizze_85427 +} +__kernel void mainDetailedzisegred_nonseg_88976(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_129498_backing_aligned_0, + __local volatile + int64_t *sync_arr_mem_129496_backing_aligned_1, + int64_t m_70861, + int64_t num_groups_88971, + int64_t num_threads_129490, + __global + unsigned char *defunc_3_map_res_mem_124961, + __global + unsigned char *mem_124966, + __global + unsigned char *mainDetailedzicounter_mem_129486, + __global + unsigned char *group_res_arr_mem_129488) +{ + #define segred_group_sizze_88969 (mainDetailedzisegred_group_sizze_88968) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_129498_backing_1 = + (__local volatile + char *) red_arr_mem_129498_backing_aligned_0; + __local volatile char *restrict sync_arr_mem_129496_backing_0 = + (__local volatile + char *) sync_arr_mem_129496_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129491; + int32_t local_tid_129492; + int64_t group_sizze_129495; + int32_t wave_sizze_129494; + int32_t group_tid_129493; + + global_tid_129491 = get_global_id(0); + local_tid_129492 = get_local_id(0); + group_sizze_129495 = get_local_size(0); + wave_sizze_129494 = LOCKSTEP_WIDTH; + group_tid_129493 = get_group_id(0); + + int32_t phys_tid_88976; + + phys_tid_88976 = global_tid_129491; + + __local char *sync_arr_mem_129496; + + sync_arr_mem_129496 = (__local char *) sync_arr_mem_129496_backing_0; + + __local char *red_arr_mem_129498; + + red_arr_mem_129498 = (__local char *) red_arr_mem_129498_backing_1; + + int64_t dummy_88974; + + dummy_88974 = (int64_t) 0; + + int64_t gtid_88975; + + gtid_88975 = (int64_t) 0; + + int64_t x_acc_129500; + int64_t chunk_sizze_129501; + + chunk_sizze_129501 = smin64(sdiv_up64(m_70861, + sext_i32_i64(sext_i64_i32(segred_group_sizze_88969 * + num_groups_88971))), + sdiv_up64(m_70861 - phys_tid_88976, + num_threads_129490)); + + int64_t x_72723; + int64_t x_72724; + + // neutral-initialise the accumulators + { + x_acc_129500 = (int64_t) 0; + } + for (int64_t i_129505 = 0; i_129505 < chunk_sizze_129501; i_129505++) { + gtid_88975 = phys_tid_88976 + num_threads_129490 * i_129505; + // apply map function + { + int64_t x_72726 = ((__global + int64_t *) defunc_3_map_res_mem_124961)[gtid_88975]; + + // save map-out results + { } + // load accumulator + { + x_72723 = x_acc_129500; + } + // load new values + { + x_72724 = x_72726; + } + // apply reduction operator + { + int64_t defunc_1_op_res_72725 = smax64(x_72723, x_72724); + + // store in accumulator + { + x_acc_129500 = defunc_1_op_res_72725; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_72723 = x_acc_129500; + ((__local + int64_t *) red_arr_mem_129498)[sext_i32_i64(local_tid_129492)] = + x_72723; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_129506; + int32_t skip_waves_129507; + + skip_waves_129507 = 1; + + int64_t x_129502; + int64_t x_129503; + + offset_129506 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129492, sext_i64_i32(segred_group_sizze_88969))) { + x_129502 = ((__local + int64_t *) red_arr_mem_129498)[sext_i32_i64(local_tid_129492 + + offset_129506)]; + } + } + offset_129506 = 1; + while (slt32(offset_129506, wave_sizze_129494)) { + if (slt32(local_tid_129492 + offset_129506, + sext_i64_i32(segred_group_sizze_88969)) && + ((local_tid_129492 - squot32(local_tid_129492, wave_sizze_129494) * + wave_sizze_129494) & (2 * offset_129506 - 1)) == 0) { + // read array element + { + x_129503 = ((volatile __local + int64_t *) red_arr_mem_129498)[sext_i32_i64(local_tid_129492 + + offset_129506)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_129504 = smax64(x_129502, x_129503); + + x_129502 = defunc_1_op_res_129504; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_129498)[sext_i32_i64(local_tid_129492)] = + x_129502; + } + } + offset_129506 *= 2; + } + while (slt32(skip_waves_129507, + squot32(sext_i64_i32(segred_group_sizze_88969) + + wave_sizze_129494 - 1, wave_sizze_129494))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129506 = skip_waves_129507 * wave_sizze_129494; + if (slt32(local_tid_129492 + offset_129506, + sext_i64_i32(segred_group_sizze_88969)) && + ((local_tid_129492 - squot32(local_tid_129492, wave_sizze_129494) * + wave_sizze_129494) == 0 && (squot32(local_tid_129492, + wave_sizze_129494) & (2 * + skip_waves_129507 - + 1)) == + 0)) { + // read array element + { + x_129503 = ((__local + int64_t *) red_arr_mem_129498)[sext_i32_i64(local_tid_129492 + + offset_129506)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_129504 = smax64(x_129502, x_129503); + + x_129502 = defunc_1_op_res_129504; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_129498)[sext_i32_i64(local_tid_129492)] = + x_129502; + } + } + skip_waves_129507 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_129492) == (int64_t) 0) { + x_acc_129500 = x_129502; + } + } + + int32_t old_counter_129508; + + // first thread in group saves group result to global memory + { + if (local_tid_129492 == 0) { + ((__global + int64_t *) group_res_arr_mem_129488)[sext_i32_i64(group_tid_129493) * + segred_group_sizze_88969] = + x_acc_129500; + mem_fence_global(); + old_counter_129508 = atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_129486)[(int64_t) 0], + (int) 1); + ((__local bool *) sync_arr_mem_129496)[(int64_t) 0] = + old_counter_129508 == num_groups_88971 - (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_129509; + + is_last_group_129509 = ((__local bool *) sync_arr_mem_129496)[(int64_t) 0]; + if (is_last_group_129509) { + if (local_tid_129492 == 0) { + old_counter_129508 = atomic_add_i32_global(&((volatile __global + int *) mainDetailedzicounter_mem_129486)[(int64_t) 0], + (int) ((int64_t) 0 - + num_groups_88971)); + } + // read in the per-group-results + { + int64_t read_per_thread_129510 = sdiv_up64(num_groups_88971, + segred_group_sizze_88969); + + x_72723 = (int64_t) 0; + for (int64_t i_129511 = 0; i_129511 < read_per_thread_129510; + i_129511++) { + int64_t group_res_id_129512 = sext_i32_i64(local_tid_129492) * + read_per_thread_129510 + i_129511; + int64_t index_of_group_res_129513 = group_res_id_129512; + + if (slt64(group_res_id_129512, num_groups_88971)) { + x_72724 = ((__global + int64_t *) group_res_arr_mem_129488)[index_of_group_res_129513 * + segred_group_sizze_88969]; + + int64_t defunc_1_op_res_72725; + + defunc_1_op_res_72725 = smax64(x_72723, x_72724); + x_72723 = defunc_1_op_res_72725; + } + } + } + ((__local + int64_t *) red_arr_mem_129498)[sext_i32_i64(local_tid_129492)] = + x_72723; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_129514; + int32_t skip_waves_129515; + + skip_waves_129515 = 1; + + int64_t x_129502; + int64_t x_129503; + + offset_129514 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129492, + sext_i64_i32(segred_group_sizze_88969))) { + x_129502 = ((__local + int64_t *) red_arr_mem_129498)[sext_i32_i64(local_tid_129492 + + offset_129514)]; + } + } + offset_129514 = 1; + while (slt32(offset_129514, wave_sizze_129494)) { + if (slt32(local_tid_129492 + offset_129514, + sext_i64_i32(segred_group_sizze_88969)) && + ((local_tid_129492 - squot32(local_tid_129492, + wave_sizze_129494) * + wave_sizze_129494) & (2 * offset_129514 - 1)) == 0) { + // read array element + { + x_129503 = ((volatile __local + int64_t *) red_arr_mem_129498)[sext_i32_i64(local_tid_129492 + + offset_129514)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_129504 = smax64(x_129502, + x_129503); + + x_129502 = defunc_1_op_res_129504; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_129498)[sext_i32_i64(local_tid_129492)] = + x_129502; + } + } + offset_129514 *= 2; + } + while (slt32(skip_waves_129515, + squot32(sext_i64_i32(segred_group_sizze_88969) + + wave_sizze_129494 - 1, wave_sizze_129494))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129514 = skip_waves_129515 * wave_sizze_129494; + if (slt32(local_tid_129492 + offset_129514, + sext_i64_i32(segred_group_sizze_88969)) && + ((local_tid_129492 - squot32(local_tid_129492, + wave_sizze_129494) * + wave_sizze_129494) == 0 && (squot32(local_tid_129492, + wave_sizze_129494) & + (2 * skip_waves_129515 - + 1)) == 0)) { + // read array element + { + x_129503 = ((__local + int64_t *) red_arr_mem_129498)[sext_i32_i64(local_tid_129492 + + offset_129514)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_129504 = smax64(x_129502, + x_129503); + + x_129502 = defunc_1_op_res_129504; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_129498)[sext_i32_i64(local_tid_129492)] = + x_129502; + } + } + skip_waves_129515 *= 2; + } + // and back to memory with the final result + { + if (local_tid_129492 == 0) { + ((__global int64_t *) mem_124966)[(int64_t) 0] = x_129502; + } + } + } + } + + error_1: + return; + #undef segred_group_sizze_88969 +} +__kernel void mainDetailedzisegred_small_79431(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_127091_backing_aligned_0, + int64_t m_70861, + int64_t k2p2zq_70876, + int64_t num_groups_81262, + int64_t segment_sizze_nonzzero_127084, + __global + unsigned char *mem_121831, + __global + unsigned char *mem_121835, + __global + unsigned char *mem_121840) +{ + #define segred_group_sizze_81261 (mainDetailedzisegred_group_sizze_79425) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_127091_backing_0 = + (__local volatile + char *) red_arr_mem_127091_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127086; + int32_t local_tid_127087; + int64_t group_sizze_127090; + int32_t wave_sizze_127089; + int32_t group_tid_127088; + + global_tid_127086 = get_global_id(0); + local_tid_127087 = get_local_id(0); + group_sizze_127090 = get_local_size(0); + wave_sizze_127089 = LOCKSTEP_WIDTH; + group_tid_127088 = get_group_id(0); + + int32_t phys_tid_79431; + + phys_tid_79431 = global_tid_127086; + + __local char *red_arr_mem_127091; + + red_arr_mem_127091 = (__local char *) red_arr_mem_127091_backing_0; + + int32_t phys_group_id_127093; + + phys_group_id_127093 = get_group_id(0); + for (int32_t i_127094 = 0; i_127094 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861 * k2p2zq_70876 * k2p2zq_70876, + squot64(segred_group_sizze_81261, + segment_sizze_nonzzero_127084))) - + phys_group_id_127093, sext_i64_i32(num_groups_81262)); + i_127094++) { + int32_t virt_group_id_127095 = phys_group_id_127093 + i_127094 * + sext_i64_i32(num_groups_81262); + int64_t gtid_79418 = squot64(squot64(sext_i32_i64(local_tid_127087), + segment_sizze_nonzzero_127084) + + sext_i32_i64(virt_group_id_127095) * + squot64(segred_group_sizze_81261, + segment_sizze_nonzzero_127084), + k2p2zq_70876 * k2p2zq_70876); + int64_t gtid_79419 = squot64(squot64(sext_i32_i64(local_tid_127087), + segment_sizze_nonzzero_127084) + + sext_i32_i64(virt_group_id_127095) * + squot64(segred_group_sizze_81261, + segment_sizze_nonzzero_127084) - + squot64(squot64(sext_i32_i64(local_tid_127087), + segment_sizze_nonzzero_127084) + + sext_i32_i64(virt_group_id_127095) * + squot64(segred_group_sizze_81261, + segment_sizze_nonzzero_127084), + k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876); + int64_t gtid_79420 = squot64(sext_i32_i64(local_tid_127087), + segment_sizze_nonzzero_127084) + + sext_i32_i64(virt_group_id_127095) * + squot64(segred_group_sizze_81261, + segment_sizze_nonzzero_127084) - + squot64(squot64(sext_i32_i64(local_tid_127087), + segment_sizze_nonzzero_127084) + + sext_i32_i64(virt_group_id_127095) * + squot64(segred_group_sizze_81261, + segment_sizze_nonzzero_127084), k2p2zq_70876 * + k2p2zq_70876) * (k2p2zq_70876 * k2p2zq_70876) - + squot64(squot64(sext_i32_i64(local_tid_127087), + segment_sizze_nonzzero_127084) + + sext_i32_i64(virt_group_id_127095) * + squot64(segred_group_sizze_81261, + segment_sizze_nonzzero_127084) - + squot64(squot64(sext_i32_i64(local_tid_127087), + segment_sizze_nonzzero_127084) + + sext_i32_i64(virt_group_id_127095) * + squot64(segred_group_sizze_81261, + segment_sizze_nonzzero_127084), + k2p2zq_70876 * k2p2zq_70876) * (k2p2zq_70876 * + k2p2zq_70876), + k2p2zq_70876) * k2p2zq_70876; + int64_t gtid_79430 = srem64(sext_i32_i64(local_tid_127087), + k2p2zq_70876); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, k2p2zq_70876) && (((slt64(gtid_79418, + m_70861) && + slt64(gtid_79419, + k2p2zq_70876)) && + slt64(gtid_79420, + k2p2zq_70876)) && + slt64(sext_i32_i64(local_tid_127087), + k2p2zq_70876 * + squot64(segred_group_sizze_81261, + segment_sizze_nonzzero_127084)))) { + double x_81271 = ((__global double *) mem_121831)[gtid_79419 * + (k2p2zq_70876 * + m_70861) + + gtid_79418 * + k2p2zq_70876 + + gtid_79430]; + double x_81272 = ((__global double *) mem_121835)[gtid_79418 * + (k2p2zq_70876 * + k2p2zq_70876) + + gtid_79420 * + k2p2zq_70876 + + gtid_79430]; + double defunc_1_f_res_81273 = x_81271 * x_81272; + + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_127091)[sext_i32_i64(local_tid_127087)] = + defunc_1_f_res_81273; + } + } else { + ((__local + double *) red_arr_mem_127091)[sext_i32_i64(local_tid_127087)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, k2p2zq_70876)) { + // perform segmented scan to imitate reduction + { + double x_81265; + double x_81266; + double x_127096; + double x_127097; + bool ltid_in_bounds_127099; + + ltid_in_bounds_127099 = slt64(sext_i32_i64(local_tid_127087), + k2p2zq_70876 * + squot64(segred_group_sizze_81261, + segment_sizze_nonzzero_127084)); + + int32_t skip_threads_127100; + + // read input for in-block scan + { + if (ltid_in_bounds_127099) { + x_81266 = ((volatile __local + double *) red_arr_mem_127091)[sext_i32_i64(local_tid_127087)]; + if ((local_tid_127087 - squot32(local_tid_127087, 32) * + 32) == 0) { + x_81265 = x_81266; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127100 = 1; + while (slt32(skip_threads_127100, 32)) { + if (sle32(skip_threads_127100, local_tid_127087 - + squot32(local_tid_127087, 32) * 32) && + ltid_in_bounds_127099) { + // read operands + { + x_81265 = ((volatile __local + double *) red_arr_mem_127091)[sext_i32_i64(local_tid_127087) - + sext_i32_i64(skip_threads_127100)]; + } + // perform operation + { + bool inactive_127101 = + slt64(srem64(sext_i32_i64(local_tid_127087), + k2p2zq_70876), + sext_i32_i64(local_tid_127087) - + sext_i32_i64(local_tid_127087 - + skip_threads_127100)); + + if (inactive_127101) { + x_81265 = x_81266; + } + if (!inactive_127101) { + double defunc_1_op_res_81267 = x_81265 + + x_81266; + + x_81265 = defunc_1_op_res_81267; + } + } + } + if (sle32(wave_sizze_127089, skip_threads_127100)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127100, local_tid_127087 - + squot32(local_tid_127087, 32) * 32) && + ltid_in_bounds_127099) { + // write result + { + ((volatile __local + double *) red_arr_mem_127091)[sext_i32_i64(local_tid_127087)] = + x_81265; + x_81266 = x_81265; + } + } + if (sle32(wave_sizze_127089, skip_threads_127100)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127100 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_127087 - squot32(local_tid_127087, 32) * + 32) == 31 && ltid_in_bounds_127099) { + ((volatile __local + double *) red_arr_mem_127091)[sext_i32_i64(squot32(local_tid_127087, + 32))] = + x_81265; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_127102; + + // read input for in-block scan + { + if (squot32(local_tid_127087, 32) == 0 && + ltid_in_bounds_127099) { + x_127097 = ((volatile __local + double *) red_arr_mem_127091)[sext_i32_i64(local_tid_127087)]; + if ((local_tid_127087 - squot32(local_tid_127087, + 32) * 32) == 0) { + x_127096 = x_127097; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127102 = 1; + while (slt32(skip_threads_127102, 32)) { + if (sle32(skip_threads_127102, local_tid_127087 - + squot32(local_tid_127087, 32) * 32) && + (squot32(local_tid_127087, 32) == 0 && + ltid_in_bounds_127099)) { + // read operands + { + x_127096 = ((volatile __local + double *) red_arr_mem_127091)[sext_i32_i64(local_tid_127087) - + sext_i32_i64(skip_threads_127102)]; + } + // perform operation + { + bool inactive_127103 = + slt64(srem64(sext_i32_i64(local_tid_127087 * + 32 + 32 - 1), + k2p2zq_70876), + sext_i32_i64(local_tid_127087 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_127087 - + skip_threads_127102) * + 32 + 32 - 1)); + + if (inactive_127103) { + x_127096 = x_127097; + } + if (!inactive_127103) { + double defunc_1_op_res_127098 = + x_127096 + x_127097; + + x_127096 = defunc_1_op_res_127098; + } + } + } + if (sle32(wave_sizze_127089, skip_threads_127102)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127102, local_tid_127087 - + squot32(local_tid_127087, 32) * 32) && + (squot32(local_tid_127087, 32) == 0 && + ltid_in_bounds_127099)) { + // write result + { + ((volatile __local + double *) red_arr_mem_127091)[sext_i32_i64(local_tid_127087)] = + x_127096; + x_127097 = x_127096; + } + } + if (sle32(wave_sizze_127089, skip_threads_127102)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127102 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_127087, 32) == 0 || + !ltid_in_bounds_127099)) { + // read operands + { + x_81266 = x_81265; + x_81265 = ((__local + double *) red_arr_mem_127091)[sext_i32_i64(squot32(local_tid_127087, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_127104 = + slt64(srem64(sext_i32_i64(local_tid_127087), + k2p2zq_70876), + sext_i32_i64(local_tid_127087) - + sext_i32_i64(squot32(local_tid_127087, + 32) * 32 - 1)); + + if (inactive_127104) { + x_81265 = x_81266; + } + if (!inactive_127104) { + double defunc_1_op_res_81267 = x_81265 + + x_81266; + + x_81265 = defunc_1_op_res_81267; + } + } + // write final result + { + ((__local + double *) red_arr_mem_127091)[sext_i32_i64(local_tid_127087)] = + x_81265; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_127087, 32) == 0) { + ((__local + double *) red_arr_mem_127091)[sext_i32_i64(local_tid_127087)] = + x_81266; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_127095) * + squot64(segred_group_sizze_81261, + segment_sizze_nonzzero_127084) + + sext_i32_i64(local_tid_127087), m_70861 * k2p2zq_70876 * + k2p2zq_70876) && slt64(sext_i32_i64(local_tid_127087), + squot64(segred_group_sizze_81261, + segment_sizze_nonzzero_127084))) { + ((__global + double *) mem_121840)[squot64(sext_i32_i64(virt_group_id_127095) * + squot64(segred_group_sizze_81261, + segment_sizze_nonzzero_127084) + + sext_i32_i64(local_tid_127087), + k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876) + + squot64(sext_i32_i64(virt_group_id_127095) * + squot64(segred_group_sizze_81261, + segment_sizze_nonzzero_127084) + + sext_i32_i64(local_tid_127087) - + squot64(sext_i32_i64(virt_group_id_127095) * + squot64(segred_group_sizze_81261, + segment_sizze_nonzzero_127084) + + sext_i32_i64(local_tid_127087), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876) * k2p2zq_70876 + + (sext_i32_i64(virt_group_id_127095) * + squot64(segred_group_sizze_81261, + segment_sizze_nonzzero_127084) + + sext_i32_i64(local_tid_127087) - + squot64(sext_i32_i64(virt_group_id_127095) * + squot64(segred_group_sizze_81261, + segment_sizze_nonzzero_127084) + + sext_i32_i64(local_tid_127087), + k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876) - + squot64(sext_i32_i64(virt_group_id_127095) * + squot64(segred_group_sizze_81261, + segment_sizze_nonzzero_127084) + + sext_i32_i64(local_tid_127087) - + squot64(sext_i32_i64(virt_group_id_127095) * + squot64(segred_group_sizze_81261, + segment_sizze_nonzzero_127084) + + sext_i32_i64(local_tid_127087), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876) * + k2p2zq_70876)] = ((__local + double *) red_arr_mem_127091)[(sext_i32_i64(local_tid_127087) + + (int64_t) 1) * + segment_sizze_nonzzero_127084 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_81261 +} +__kernel void mainDetailedzisegred_small_79709(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_126947_backing_aligned_0, + int64_t m_70861, + int64_t k2p2zq_70876, + int64_t x_81093, int64_t i_81094, + int64_t j_m_i_81098, + int64_t num_groups_81180, + int64_t binop_x_120251, + int64_t segment_sizze_nonzzero_126940, + __global + unsigned char *mem_121351, + __global + unsigned char *mem_param_121469, + __global + unsigned char *mem_121555) +{ + #define segred_group_sizze_81179 (mainDetailedzisegred_group_sizze_79703) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_126947_backing_0 = + (__local volatile + char *) red_arr_mem_126947_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126942; + int32_t local_tid_126943; + int64_t group_sizze_126946; + int32_t wave_sizze_126945; + int32_t group_tid_126944; + + global_tid_126942 = get_global_id(0); + local_tid_126943 = get_local_id(0); + group_sizze_126946 = get_local_size(0); + wave_sizze_126945 = LOCKSTEP_WIDTH; + group_tid_126944 = get_group_id(0); + + int32_t phys_tid_79709; + + phys_tid_79709 = global_tid_126942; + + __local char *red_arr_mem_126947; + + red_arr_mem_126947 = (__local char *) red_arr_mem_126947_backing_0; + + int32_t phys_group_id_126949; + + phys_group_id_126949 = get_group_id(0); + for (int32_t i_126950 = 0; i_126950 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861 * k2p2zq_70876, + squot64(segred_group_sizze_81179, + segment_sizze_nonzzero_126940))) - + phys_group_id_126949, sext_i64_i32(num_groups_81180)); + i_126950++) { + int32_t virt_group_id_126951 = phys_group_id_126949 + i_126950 * + sext_i64_i32(num_groups_81180); + int64_t gtid_79698 = squot64(squot64(sext_i32_i64(local_tid_126943), + segment_sizze_nonzzero_126940) + + sext_i32_i64(virt_group_id_126951) * + squot64(segred_group_sizze_81179, + segment_sizze_nonzzero_126940), + k2p2zq_70876); + int64_t gtid_79699 = squot64(sext_i32_i64(local_tid_126943), + segment_sizze_nonzzero_126940) + + sext_i32_i64(virt_group_id_126951) * + squot64(segred_group_sizze_81179, + segment_sizze_nonzzero_126940) - + squot64(squot64(sext_i32_i64(local_tid_126943), + segment_sizze_nonzzero_126940) + + sext_i32_i64(virt_group_id_126951) * + squot64(segred_group_sizze_81179, + segment_sizze_nonzzero_126940), k2p2zq_70876) * + k2p2zq_70876; + int64_t gtid_79708 = srem64(sext_i32_i64(local_tid_126943), + j_m_i_81098); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, j_m_i_81098) && ((slt64(gtid_79698, + m_70861) && + slt64(gtid_79699, + k2p2zq_70876)) && + slt64(sext_i32_i64(local_tid_126943), + j_m_i_81098 * + squot64(segred_group_sizze_81179, + segment_sizze_nonzzero_126940)))) { + int64_t slice_115048 = gtid_79708 + x_81093; + double x_81190 = ((__global double *) mem_121351)[gtid_79698 * + (k2p2zq_70876 * + k2p2zq_70876) + + slice_115048 * + k2p2zq_70876 + + i_81094]; + bool isnan_res_81191; + + isnan_res_81191 = futrts_isnan64(x_81190); + + double defunc_1_f_res_81192; + + if (isnan_res_81191) { + defunc_1_f_res_81192 = 0.0; + } else { + double x_81189 = ((__global + double *) mem_param_121469)[gtid_79698 * + binop_x_120251 + + gtid_79699 * + k2p2zq_70876 + + slice_115048]; + double defunc_1_f_res_f_res_81193 = x_81189 * x_81190; + + defunc_1_f_res_81192 = defunc_1_f_res_f_res_81193; + } + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_126947)[sext_i32_i64(local_tid_126943)] = + defunc_1_f_res_81192; + } + } else { + ((__local + double *) red_arr_mem_126947)[sext_i32_i64(local_tid_126943)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, j_m_i_81098)) { + // perform segmented scan to imitate reduction + { + double x_81183; + double x_81184; + double x_126952; + double x_126953; + bool ltid_in_bounds_126955; + + ltid_in_bounds_126955 = slt64(sext_i32_i64(local_tid_126943), + j_m_i_81098 * + squot64(segred_group_sizze_81179, + segment_sizze_nonzzero_126940)); + + int32_t skip_threads_126956; + + // read input for in-block scan + { + if (ltid_in_bounds_126955) { + x_81184 = ((volatile __local + double *) red_arr_mem_126947)[sext_i32_i64(local_tid_126943)]; + if ((local_tid_126943 - squot32(local_tid_126943, 32) * + 32) == 0) { + x_81183 = x_81184; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_126956 = 1; + while (slt32(skip_threads_126956, 32)) { + if (sle32(skip_threads_126956, local_tid_126943 - + squot32(local_tid_126943, 32) * 32) && + ltid_in_bounds_126955) { + // read operands + { + x_81183 = ((volatile __local + double *) red_arr_mem_126947)[sext_i32_i64(local_tid_126943) - + sext_i32_i64(skip_threads_126956)]; + } + // perform operation + { + bool inactive_126957 = + slt64(srem64(sext_i32_i64(local_tid_126943), + j_m_i_81098), + sext_i32_i64(local_tid_126943) - + sext_i32_i64(local_tid_126943 - + skip_threads_126956)); + + if (inactive_126957) { + x_81183 = x_81184; + } + if (!inactive_126957) { + double defunc_1_op_res_81185 = x_81183 + + x_81184; + + x_81183 = defunc_1_op_res_81185; + } + } + } + if (sle32(wave_sizze_126945, skip_threads_126956)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_126956, local_tid_126943 - + squot32(local_tid_126943, 32) * 32) && + ltid_in_bounds_126955) { + // write result + { + ((volatile __local + double *) red_arr_mem_126947)[sext_i32_i64(local_tid_126943)] = + x_81183; + x_81184 = x_81183; + } + } + if (sle32(wave_sizze_126945, skip_threads_126956)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_126956 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_126943 - squot32(local_tid_126943, 32) * + 32) == 31 && ltid_in_bounds_126955) { + ((volatile __local + double *) red_arr_mem_126947)[sext_i32_i64(squot32(local_tid_126943, + 32))] = + x_81183; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_126958; + + // read input for in-block scan + { + if (squot32(local_tid_126943, 32) == 0 && + ltid_in_bounds_126955) { + x_126953 = ((volatile __local + double *) red_arr_mem_126947)[sext_i32_i64(local_tid_126943)]; + if ((local_tid_126943 - squot32(local_tid_126943, + 32) * 32) == 0) { + x_126952 = x_126953; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_126958 = 1; + while (slt32(skip_threads_126958, 32)) { + if (sle32(skip_threads_126958, local_tid_126943 - + squot32(local_tid_126943, 32) * 32) && + (squot32(local_tid_126943, 32) == 0 && + ltid_in_bounds_126955)) { + // read operands + { + x_126952 = ((volatile __local + double *) red_arr_mem_126947)[sext_i32_i64(local_tid_126943) - + sext_i32_i64(skip_threads_126958)]; + } + // perform operation + { + bool inactive_126959 = + slt64(srem64(sext_i32_i64(local_tid_126943 * + 32 + 32 - 1), + j_m_i_81098), + sext_i32_i64(local_tid_126943 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_126943 - + skip_threads_126958) * + 32 + 32 - 1)); + + if (inactive_126959) { + x_126952 = x_126953; + } + if (!inactive_126959) { + double defunc_1_op_res_126954 = + x_126952 + x_126953; + + x_126952 = defunc_1_op_res_126954; + } + } + } + if (sle32(wave_sizze_126945, skip_threads_126958)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_126958, local_tid_126943 - + squot32(local_tid_126943, 32) * 32) && + (squot32(local_tid_126943, 32) == 0 && + ltid_in_bounds_126955)) { + // write result + { + ((volatile __local + double *) red_arr_mem_126947)[sext_i32_i64(local_tid_126943)] = + x_126952; + x_126953 = x_126952; + } + } + if (sle32(wave_sizze_126945, skip_threads_126958)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_126958 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_126943, 32) == 0 || + !ltid_in_bounds_126955)) { + // read operands + { + x_81184 = x_81183; + x_81183 = ((__local + double *) red_arr_mem_126947)[sext_i32_i64(squot32(local_tid_126943, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_126960 = + slt64(srem64(sext_i32_i64(local_tid_126943), + j_m_i_81098), + sext_i32_i64(local_tid_126943) - + sext_i32_i64(squot32(local_tid_126943, + 32) * 32 - 1)); + + if (inactive_126960) { + x_81183 = x_81184; + } + if (!inactive_126960) { + double defunc_1_op_res_81185 = x_81183 + + x_81184; + + x_81183 = defunc_1_op_res_81185; + } + } + // write final result + { + ((__local + double *) red_arr_mem_126947)[sext_i32_i64(local_tid_126943)] = + x_81183; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_126943, 32) == 0) { + ((__local + double *) red_arr_mem_126947)[sext_i32_i64(local_tid_126943)] = + x_81184; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_126951) * + squot64(segred_group_sizze_81179, + segment_sizze_nonzzero_126940) + + sext_i32_i64(local_tid_126943), m_70861 * k2p2zq_70876) && + slt64(sext_i32_i64(local_tid_126943), + squot64(segred_group_sizze_81179, + segment_sizze_nonzzero_126940))) { + ((__global + double *) mem_121555)[squot64(sext_i32_i64(virt_group_id_126951) * + squot64(segred_group_sizze_81179, + segment_sizze_nonzzero_126940) + + sext_i32_i64(local_tid_126943), + k2p2zq_70876) * k2p2zq_70876 + + (sext_i32_i64(virt_group_id_126951) * + squot64(segred_group_sizze_81179, + segment_sizze_nonzzero_126940) + + sext_i32_i64(local_tid_126943) - + squot64(sext_i32_i64(virt_group_id_126951) * + squot64(segred_group_sizze_81179, + segment_sizze_nonzzero_126940) + + sext_i32_i64(local_tid_126943), + k2p2zq_70876) * + k2p2zq_70876)] = ((__local + double *) red_arr_mem_126947)[(sext_i32_i64(local_tid_126943) + + (int64_t) 1) * + segment_sizze_nonzzero_126940 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_81179 +} +__kernel void mainDetailedzisegred_small_80466(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_126722_backing_aligned_0, + int64_t m_70861, + int64_t k2p2zq_70876, + int64_t defunc_2_reduce_res_70985, + int64_t j_80634, + int64_t num_groups_80667, + int64_t segment_sizze_nonzzero_126715, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_120938) +{ + #define segred_group_sizze_80666 (mainDetailedzisegred_group_sizze_80460) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_126722_backing_0 = + (__local volatile + char *) red_arr_mem_126722_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126717; + int32_t local_tid_126718; + int64_t group_sizze_126721; + int32_t wave_sizze_126720; + int32_t group_tid_126719; + + global_tid_126717 = get_global_id(0); + local_tid_126718 = get_local_id(0); + group_sizze_126721 = get_local_size(0); + wave_sizze_126720 = LOCKSTEP_WIDTH; + group_tid_126719 = get_group_id(0); + + int32_t phys_tid_80466; + + phys_tid_80466 = global_tid_126717; + + __local char *red_arr_mem_126722; + + red_arr_mem_126722 = (__local char *) red_arr_mem_126722_backing_0; + + int32_t phys_group_id_126724; + + phys_group_id_126724 = get_group_id(0); + for (int32_t i_126725 = 0; i_126725 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, + squot64(segred_group_sizze_80666, + segment_sizze_nonzzero_126715))) - + phys_group_id_126724, sext_i64_i32(num_groups_80667)); + i_126725++) { + int32_t virt_group_id_126726 = phys_group_id_126724 + i_126725 * + sext_i64_i32(num_groups_80667); + int64_t gtid_80457 = squot64(sext_i32_i64(local_tid_126718), + segment_sizze_nonzzero_126715) + + sext_i32_i64(virt_group_id_126726) * + squot64(segred_group_sizze_80666, + segment_sizze_nonzzero_126715); + int64_t gtid_80465 = srem64(sext_i32_i64(local_tid_126718), + k2p2zq_70876); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, k2p2zq_70876) && (slt64(gtid_80457, + m_70861) && + slt64(sext_i32_i64(local_tid_126718), + k2p2zq_70876 * + squot64(segred_group_sizze_80666, + segment_sizze_nonzzero_126715)))) { + double x_80674 = ((__global double *) mem_120246)[j_80634 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_80457 * + defunc_2_reduce_res_70985 + + gtid_80465]; + double defunc_1_f_res_80675 = x_80674 * x_80674; + + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_126722)[sext_i32_i64(local_tid_126718)] = + defunc_1_f_res_80675; + } + } else { + ((__local + double *) red_arr_mem_126722)[sext_i32_i64(local_tid_126718)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, k2p2zq_70876)) { + // perform segmented scan to imitate reduction + { + double x_80670; + double x_80671; + double x_126727; + double x_126728; + bool ltid_in_bounds_126730; + + ltid_in_bounds_126730 = slt64(sext_i32_i64(local_tid_126718), + k2p2zq_70876 * + squot64(segred_group_sizze_80666, + segment_sizze_nonzzero_126715)); + + int32_t skip_threads_126731; + + // read input for in-block scan + { + if (ltid_in_bounds_126730) { + x_80671 = ((volatile __local + double *) red_arr_mem_126722)[sext_i32_i64(local_tid_126718)]; + if ((local_tid_126718 - squot32(local_tid_126718, 32) * + 32) == 0) { + x_80670 = x_80671; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_126731 = 1; + while (slt32(skip_threads_126731, 32)) { + if (sle32(skip_threads_126731, local_tid_126718 - + squot32(local_tid_126718, 32) * 32) && + ltid_in_bounds_126730) { + // read operands + { + x_80670 = ((volatile __local + double *) red_arr_mem_126722)[sext_i32_i64(local_tid_126718) - + sext_i32_i64(skip_threads_126731)]; + } + // perform operation + { + bool inactive_126732 = + slt64(srem64(sext_i32_i64(local_tid_126718), + k2p2zq_70876), + sext_i32_i64(local_tid_126718) - + sext_i32_i64(local_tid_126718 - + skip_threads_126731)); + + if (inactive_126732) { + x_80670 = x_80671; + } + if (!inactive_126732) { + double defunc_1_op_res_80672 = x_80670 + + x_80671; + + x_80670 = defunc_1_op_res_80672; + } + } + } + if (sle32(wave_sizze_126720, skip_threads_126731)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_126731, local_tid_126718 - + squot32(local_tid_126718, 32) * 32) && + ltid_in_bounds_126730) { + // write result + { + ((volatile __local + double *) red_arr_mem_126722)[sext_i32_i64(local_tid_126718)] = + x_80670; + x_80671 = x_80670; + } + } + if (sle32(wave_sizze_126720, skip_threads_126731)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_126731 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_126718 - squot32(local_tid_126718, 32) * + 32) == 31 && ltid_in_bounds_126730) { + ((volatile __local + double *) red_arr_mem_126722)[sext_i32_i64(squot32(local_tid_126718, + 32))] = + x_80670; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_126733; + + // read input for in-block scan + { + if (squot32(local_tid_126718, 32) == 0 && + ltid_in_bounds_126730) { + x_126728 = ((volatile __local + double *) red_arr_mem_126722)[sext_i32_i64(local_tid_126718)]; + if ((local_tid_126718 - squot32(local_tid_126718, + 32) * 32) == 0) { + x_126727 = x_126728; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_126733 = 1; + while (slt32(skip_threads_126733, 32)) { + if (sle32(skip_threads_126733, local_tid_126718 - + squot32(local_tid_126718, 32) * 32) && + (squot32(local_tid_126718, 32) == 0 && + ltid_in_bounds_126730)) { + // read operands + { + x_126727 = ((volatile __local + double *) red_arr_mem_126722)[sext_i32_i64(local_tid_126718) - + sext_i32_i64(skip_threads_126733)]; + } + // perform operation + { + bool inactive_126734 = + slt64(srem64(sext_i32_i64(local_tid_126718 * + 32 + 32 - 1), + k2p2zq_70876), + sext_i32_i64(local_tid_126718 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_126718 - + skip_threads_126733) * + 32 + 32 - 1)); + + if (inactive_126734) { + x_126727 = x_126728; + } + if (!inactive_126734) { + double defunc_1_op_res_126729 = + x_126727 + x_126728; + + x_126727 = defunc_1_op_res_126729; + } + } + } + if (sle32(wave_sizze_126720, skip_threads_126733)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_126733, local_tid_126718 - + squot32(local_tid_126718, 32) * 32) && + (squot32(local_tid_126718, 32) == 0 && + ltid_in_bounds_126730)) { + // write result + { + ((volatile __local + double *) red_arr_mem_126722)[sext_i32_i64(local_tid_126718)] = + x_126727; + x_126728 = x_126727; + } + } + if (sle32(wave_sizze_126720, skip_threads_126733)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_126733 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_126718, 32) == 0 || + !ltid_in_bounds_126730)) { + // read operands + { + x_80671 = x_80670; + x_80670 = ((__local + double *) red_arr_mem_126722)[sext_i32_i64(squot32(local_tid_126718, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_126735 = + slt64(srem64(sext_i32_i64(local_tid_126718), + k2p2zq_70876), + sext_i32_i64(local_tid_126718) - + sext_i32_i64(squot32(local_tid_126718, + 32) * 32 - 1)); + + if (inactive_126735) { + x_80670 = x_80671; + } + if (!inactive_126735) { + double defunc_1_op_res_80672 = x_80670 + + x_80671; + + x_80670 = defunc_1_op_res_80672; + } + } + // write final result + { + ((__local + double *) red_arr_mem_126722)[sext_i32_i64(local_tid_126718)] = + x_80670; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_126718, 32) == 0) { + ((__local + double *) red_arr_mem_126722)[sext_i32_i64(local_tid_126718)] = + x_80671; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_126726) * + squot64(segred_group_sizze_80666, + segment_sizze_nonzzero_126715) + + sext_i32_i64(local_tid_126718), m_70861) && + slt64(sext_i32_i64(local_tid_126718), + squot64(segred_group_sizze_80666, + segment_sizze_nonzzero_126715))) { + ((__global + double *) mem_120938)[sext_i32_i64(virt_group_id_126726) * + squot64(segred_group_sizze_80666, + segment_sizze_nonzzero_126715) + + sext_i32_i64(local_tid_126718)] = + ((__local + double *) red_arr_mem_126722)[(sext_i32_i64(local_tid_126718) + + (int64_t) 1) * + segment_sizze_nonzzero_126715 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_80666 +} +__kernel void mainDetailedzisegred_small_83181(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_127875_backing_aligned_0, + int64_t m_70861, + int64_t k2p2zq_70876, + int64_t num_groups_85162, + int64_t segment_sizze_nonzzero_127868, + __global + unsigned char *mem_123614, + __global + unsigned char *mem_123618, + __global + unsigned char *mem_123623) +{ + #define segred_group_sizze_85161 (mainDetailedzisegred_group_sizze_83175) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_127875_backing_0 = + (__local volatile + char *) red_arr_mem_127875_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127870; + int32_t local_tid_127871; + int64_t group_sizze_127874; + int32_t wave_sizze_127873; + int32_t group_tid_127872; + + global_tid_127870 = get_global_id(0); + local_tid_127871 = get_local_id(0); + group_sizze_127874 = get_local_size(0); + wave_sizze_127873 = LOCKSTEP_WIDTH; + group_tid_127872 = get_group_id(0); + + int32_t phys_tid_83181; + + phys_tid_83181 = global_tid_127870; + + __local char *red_arr_mem_127875; + + red_arr_mem_127875 = (__local char *) red_arr_mem_127875_backing_0; + + int32_t phys_group_id_127877; + + phys_group_id_127877 = get_group_id(0); + for (int32_t i_127878 = 0; i_127878 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861 * k2p2zq_70876 * k2p2zq_70876, + squot64(segred_group_sizze_85161, + segment_sizze_nonzzero_127868))) - + phys_group_id_127877, sext_i64_i32(num_groups_85162)); + i_127878++) { + int32_t virt_group_id_127879 = phys_group_id_127877 + i_127878 * + sext_i64_i32(num_groups_85162); + int64_t gtid_83168 = squot64(squot64(sext_i32_i64(local_tid_127871), + segment_sizze_nonzzero_127868) + + sext_i32_i64(virt_group_id_127879) * + squot64(segred_group_sizze_85161, + segment_sizze_nonzzero_127868), + k2p2zq_70876 * k2p2zq_70876); + int64_t gtid_83169 = squot64(squot64(sext_i32_i64(local_tid_127871), + segment_sizze_nonzzero_127868) + + sext_i32_i64(virt_group_id_127879) * + squot64(segred_group_sizze_85161, + segment_sizze_nonzzero_127868) - + squot64(squot64(sext_i32_i64(local_tid_127871), + segment_sizze_nonzzero_127868) + + sext_i32_i64(virt_group_id_127879) * + squot64(segred_group_sizze_85161, + segment_sizze_nonzzero_127868), + k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876); + int64_t gtid_83170 = squot64(sext_i32_i64(local_tid_127871), + segment_sizze_nonzzero_127868) + + sext_i32_i64(virt_group_id_127879) * + squot64(segred_group_sizze_85161, + segment_sizze_nonzzero_127868) - + squot64(squot64(sext_i32_i64(local_tid_127871), + segment_sizze_nonzzero_127868) + + sext_i32_i64(virt_group_id_127879) * + squot64(segred_group_sizze_85161, + segment_sizze_nonzzero_127868), k2p2zq_70876 * + k2p2zq_70876) * (k2p2zq_70876 * k2p2zq_70876) - + squot64(squot64(sext_i32_i64(local_tid_127871), + segment_sizze_nonzzero_127868) + + sext_i32_i64(virt_group_id_127879) * + squot64(segred_group_sizze_85161, + segment_sizze_nonzzero_127868) - + squot64(squot64(sext_i32_i64(local_tid_127871), + segment_sizze_nonzzero_127868) + + sext_i32_i64(virt_group_id_127879) * + squot64(segred_group_sizze_85161, + segment_sizze_nonzzero_127868), + k2p2zq_70876 * k2p2zq_70876) * (k2p2zq_70876 * + k2p2zq_70876), + k2p2zq_70876) * k2p2zq_70876; + int64_t gtid_83180 = srem64(sext_i32_i64(local_tid_127871), + k2p2zq_70876); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, k2p2zq_70876) && (((slt64(gtid_83168, + m_70861) && + slt64(gtid_83169, + k2p2zq_70876)) && + slt64(gtid_83170, + k2p2zq_70876)) && + slt64(sext_i32_i64(local_tid_127871), + k2p2zq_70876 * + squot64(segred_group_sizze_85161, + segment_sizze_nonzzero_127868)))) { + double x_85171 = ((__global double *) mem_123614)[gtid_83169 * + (k2p2zq_70876 * + m_70861) + + gtid_83168 * + k2p2zq_70876 + + gtid_83180]; + double x_85172 = ((__global double *) mem_123618)[gtid_83168 * + (k2p2zq_70876 * + k2p2zq_70876) + + gtid_83170 * + k2p2zq_70876 + + gtid_83180]; + double defunc_1_f_res_85173 = x_85171 * x_85172; + + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_127875)[sext_i32_i64(local_tid_127871)] = + defunc_1_f_res_85173; + } + } else { + ((__local + double *) red_arr_mem_127875)[sext_i32_i64(local_tid_127871)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, k2p2zq_70876)) { + // perform segmented scan to imitate reduction + { + double x_85165; + double x_85166; + double x_127880; + double x_127881; + bool ltid_in_bounds_127883; + + ltid_in_bounds_127883 = slt64(sext_i32_i64(local_tid_127871), + k2p2zq_70876 * + squot64(segred_group_sizze_85161, + segment_sizze_nonzzero_127868)); + + int32_t skip_threads_127884; + + // read input for in-block scan + { + if (ltid_in_bounds_127883) { + x_85166 = ((volatile __local + double *) red_arr_mem_127875)[sext_i32_i64(local_tid_127871)]; + if ((local_tid_127871 - squot32(local_tid_127871, 32) * + 32) == 0) { + x_85165 = x_85166; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127884 = 1; + while (slt32(skip_threads_127884, 32)) { + if (sle32(skip_threads_127884, local_tid_127871 - + squot32(local_tid_127871, 32) * 32) && + ltid_in_bounds_127883) { + // read operands + { + x_85165 = ((volatile __local + double *) red_arr_mem_127875)[sext_i32_i64(local_tid_127871) - + sext_i32_i64(skip_threads_127884)]; + } + // perform operation + { + bool inactive_127885 = + slt64(srem64(sext_i32_i64(local_tid_127871), + k2p2zq_70876), + sext_i32_i64(local_tid_127871) - + sext_i32_i64(local_tid_127871 - + skip_threads_127884)); + + if (inactive_127885) { + x_85165 = x_85166; + } + if (!inactive_127885) { + double defunc_1_op_res_85167 = x_85165 + + x_85166; + + x_85165 = defunc_1_op_res_85167; + } + } + } + if (sle32(wave_sizze_127873, skip_threads_127884)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127884, local_tid_127871 - + squot32(local_tid_127871, 32) * 32) && + ltid_in_bounds_127883) { + // write result + { + ((volatile __local + double *) red_arr_mem_127875)[sext_i32_i64(local_tid_127871)] = + x_85165; + x_85166 = x_85165; + } + } + if (sle32(wave_sizze_127873, skip_threads_127884)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127884 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_127871 - squot32(local_tid_127871, 32) * + 32) == 31 && ltid_in_bounds_127883) { + ((volatile __local + double *) red_arr_mem_127875)[sext_i32_i64(squot32(local_tid_127871, + 32))] = + x_85165; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_127886; + + // read input for in-block scan + { + if (squot32(local_tid_127871, 32) == 0 && + ltid_in_bounds_127883) { + x_127881 = ((volatile __local + double *) red_arr_mem_127875)[sext_i32_i64(local_tid_127871)]; + if ((local_tid_127871 - squot32(local_tid_127871, + 32) * 32) == 0) { + x_127880 = x_127881; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127886 = 1; + while (slt32(skip_threads_127886, 32)) { + if (sle32(skip_threads_127886, local_tid_127871 - + squot32(local_tid_127871, 32) * 32) && + (squot32(local_tid_127871, 32) == 0 && + ltid_in_bounds_127883)) { + // read operands + { + x_127880 = ((volatile __local + double *) red_arr_mem_127875)[sext_i32_i64(local_tid_127871) - + sext_i32_i64(skip_threads_127886)]; + } + // perform operation + { + bool inactive_127887 = + slt64(srem64(sext_i32_i64(local_tid_127871 * + 32 + 32 - 1), + k2p2zq_70876), + sext_i32_i64(local_tid_127871 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_127871 - + skip_threads_127886) * + 32 + 32 - 1)); + + if (inactive_127887) { + x_127880 = x_127881; + } + if (!inactive_127887) { + double defunc_1_op_res_127882 = + x_127880 + x_127881; + + x_127880 = defunc_1_op_res_127882; + } + } + } + if (sle32(wave_sizze_127873, skip_threads_127886)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127886, local_tid_127871 - + squot32(local_tid_127871, 32) * 32) && + (squot32(local_tid_127871, 32) == 0 && + ltid_in_bounds_127883)) { + // write result + { + ((volatile __local + double *) red_arr_mem_127875)[sext_i32_i64(local_tid_127871)] = + x_127880; + x_127881 = x_127880; + } + } + if (sle32(wave_sizze_127873, skip_threads_127886)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127886 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_127871, 32) == 0 || + !ltid_in_bounds_127883)) { + // read operands + { + x_85166 = x_85165; + x_85165 = ((__local + double *) red_arr_mem_127875)[sext_i32_i64(squot32(local_tid_127871, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_127888 = + slt64(srem64(sext_i32_i64(local_tid_127871), + k2p2zq_70876), + sext_i32_i64(local_tid_127871) - + sext_i32_i64(squot32(local_tid_127871, + 32) * 32 - 1)); + + if (inactive_127888) { + x_85165 = x_85166; + } + if (!inactive_127888) { + double defunc_1_op_res_85167 = x_85165 + + x_85166; + + x_85165 = defunc_1_op_res_85167; + } + } + // write final result + { + ((__local + double *) red_arr_mem_127875)[sext_i32_i64(local_tid_127871)] = + x_85165; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_127871, 32) == 0) { + ((__local + double *) red_arr_mem_127875)[sext_i32_i64(local_tid_127871)] = + x_85166; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_127879) * + squot64(segred_group_sizze_85161, + segment_sizze_nonzzero_127868) + + sext_i32_i64(local_tid_127871), m_70861 * k2p2zq_70876 * + k2p2zq_70876) && slt64(sext_i32_i64(local_tid_127871), + squot64(segred_group_sizze_85161, + segment_sizze_nonzzero_127868))) { + ((__global + double *) mem_123623)[squot64(sext_i32_i64(virt_group_id_127879) * + squot64(segred_group_sizze_85161, + segment_sizze_nonzzero_127868) + + sext_i32_i64(local_tid_127871), + k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876) + + squot64(sext_i32_i64(virt_group_id_127879) * + squot64(segred_group_sizze_85161, + segment_sizze_nonzzero_127868) + + sext_i32_i64(local_tid_127871) - + squot64(sext_i32_i64(virt_group_id_127879) * + squot64(segred_group_sizze_85161, + segment_sizze_nonzzero_127868) + + sext_i32_i64(local_tid_127871), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876) * k2p2zq_70876 + + (sext_i32_i64(virt_group_id_127879) * + squot64(segred_group_sizze_85161, + segment_sizze_nonzzero_127868) + + sext_i32_i64(local_tid_127871) - + squot64(sext_i32_i64(virt_group_id_127879) * + squot64(segred_group_sizze_85161, + segment_sizze_nonzzero_127868) + + sext_i32_i64(local_tid_127871), + k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876) - + squot64(sext_i32_i64(virt_group_id_127879) * + squot64(segred_group_sizze_85161, + segment_sizze_nonzzero_127868) + + sext_i32_i64(local_tid_127871) - + squot64(sext_i32_i64(virt_group_id_127879) * + squot64(segred_group_sizze_85161, + segment_sizze_nonzzero_127868) + + sext_i32_i64(local_tid_127871), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876) * + k2p2zq_70876)] = ((__local + double *) red_arr_mem_127875)[(sext_i32_i64(local_tid_127871) + + (int64_t) 1) * + segment_sizze_nonzzero_127868 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_85161 +} +__kernel void mainDetailedzisegred_small_83459(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_127731_backing_aligned_0, + int64_t m_70861, + int64_t k2p2zq_70876, + int64_t x_84993, int64_t i_84994, + int64_t j_m_i_84998, + int64_t num_groups_85080, + int64_t binop_x_120251, + int64_t segment_sizze_nonzzero_127724, + __global + unsigned char *mem_123143, + __global + unsigned char *mem_param_123252, + __global + unsigned char *mem_123338) +{ + #define segred_group_sizze_85079 (mainDetailedzisegred_group_sizze_83453) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_127731_backing_0 = + (__local volatile + char *) red_arr_mem_127731_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127726; + int32_t local_tid_127727; + int64_t group_sizze_127730; + int32_t wave_sizze_127729; + int32_t group_tid_127728; + + global_tid_127726 = get_global_id(0); + local_tid_127727 = get_local_id(0); + group_sizze_127730 = get_local_size(0); + wave_sizze_127729 = LOCKSTEP_WIDTH; + group_tid_127728 = get_group_id(0); + + int32_t phys_tid_83459; + + phys_tid_83459 = global_tid_127726; + + __local char *red_arr_mem_127731; + + red_arr_mem_127731 = (__local char *) red_arr_mem_127731_backing_0; + + int32_t phys_group_id_127733; + + phys_group_id_127733 = get_group_id(0); + for (int32_t i_127734 = 0; i_127734 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861 * k2p2zq_70876, + squot64(segred_group_sizze_85079, + segment_sizze_nonzzero_127724))) - + phys_group_id_127733, sext_i64_i32(num_groups_85080)); + i_127734++) { + int32_t virt_group_id_127735 = phys_group_id_127733 + i_127734 * + sext_i64_i32(num_groups_85080); + int64_t gtid_83448 = squot64(squot64(sext_i32_i64(local_tid_127727), + segment_sizze_nonzzero_127724) + + sext_i32_i64(virt_group_id_127735) * + squot64(segred_group_sizze_85079, + segment_sizze_nonzzero_127724), + k2p2zq_70876); + int64_t gtid_83449 = squot64(sext_i32_i64(local_tid_127727), + segment_sizze_nonzzero_127724) + + sext_i32_i64(virt_group_id_127735) * + squot64(segred_group_sizze_85079, + segment_sizze_nonzzero_127724) - + squot64(squot64(sext_i32_i64(local_tid_127727), + segment_sizze_nonzzero_127724) + + sext_i32_i64(virt_group_id_127735) * + squot64(segred_group_sizze_85079, + segment_sizze_nonzzero_127724), k2p2zq_70876) * + k2p2zq_70876; + int64_t gtid_83458 = srem64(sext_i32_i64(local_tid_127727), + j_m_i_84998); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, j_m_i_84998) && ((slt64(gtid_83448, + m_70861) && + slt64(gtid_83449, + k2p2zq_70876)) && + slt64(sext_i32_i64(local_tid_127727), + j_m_i_84998 * + squot64(segred_group_sizze_85079, + segment_sizze_nonzzero_127724)))) { + int64_t slice_115165 = gtid_83458 + x_84993; + double x_85090 = ((__global double *) mem_123143)[gtid_83448 * + (k2p2zq_70876 * + k2p2zq_70876) + + slice_115165 * + k2p2zq_70876 + + i_84994]; + bool isnan_res_85091; + + isnan_res_85091 = futrts_isnan64(x_85090); + + double defunc_1_f_res_85092; + + if (isnan_res_85091) { + defunc_1_f_res_85092 = 0.0; + } else { + double x_85089 = ((__global + double *) mem_param_123252)[gtid_83448 * + binop_x_120251 + + gtid_83449 * + k2p2zq_70876 + + slice_115165]; + double defunc_1_f_res_f_res_85093 = x_85089 * x_85090; + + defunc_1_f_res_85092 = defunc_1_f_res_f_res_85093; + } + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_127731)[sext_i32_i64(local_tid_127727)] = + defunc_1_f_res_85092; + } + } else { + ((__local + double *) red_arr_mem_127731)[sext_i32_i64(local_tid_127727)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, j_m_i_84998)) { + // perform segmented scan to imitate reduction + { + double x_85083; + double x_85084; + double x_127736; + double x_127737; + bool ltid_in_bounds_127739; + + ltid_in_bounds_127739 = slt64(sext_i32_i64(local_tid_127727), + j_m_i_84998 * + squot64(segred_group_sizze_85079, + segment_sizze_nonzzero_127724)); + + int32_t skip_threads_127740; + + // read input for in-block scan + { + if (ltid_in_bounds_127739) { + x_85084 = ((volatile __local + double *) red_arr_mem_127731)[sext_i32_i64(local_tid_127727)]; + if ((local_tid_127727 - squot32(local_tid_127727, 32) * + 32) == 0) { + x_85083 = x_85084; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127740 = 1; + while (slt32(skip_threads_127740, 32)) { + if (sle32(skip_threads_127740, local_tid_127727 - + squot32(local_tid_127727, 32) * 32) && + ltid_in_bounds_127739) { + // read operands + { + x_85083 = ((volatile __local + double *) red_arr_mem_127731)[sext_i32_i64(local_tid_127727) - + sext_i32_i64(skip_threads_127740)]; + } + // perform operation + { + bool inactive_127741 = + slt64(srem64(sext_i32_i64(local_tid_127727), + j_m_i_84998), + sext_i32_i64(local_tid_127727) - + sext_i32_i64(local_tid_127727 - + skip_threads_127740)); + + if (inactive_127741) { + x_85083 = x_85084; + } + if (!inactive_127741) { + double defunc_1_op_res_85085 = x_85083 + + x_85084; + + x_85083 = defunc_1_op_res_85085; + } + } + } + if (sle32(wave_sizze_127729, skip_threads_127740)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127740, local_tid_127727 - + squot32(local_tid_127727, 32) * 32) && + ltid_in_bounds_127739) { + // write result + { + ((volatile __local + double *) red_arr_mem_127731)[sext_i32_i64(local_tid_127727)] = + x_85083; + x_85084 = x_85083; + } + } + if (sle32(wave_sizze_127729, skip_threads_127740)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127740 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_127727 - squot32(local_tid_127727, 32) * + 32) == 31 && ltid_in_bounds_127739) { + ((volatile __local + double *) red_arr_mem_127731)[sext_i32_i64(squot32(local_tid_127727, + 32))] = + x_85083; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_127742; + + // read input for in-block scan + { + if (squot32(local_tid_127727, 32) == 0 && + ltid_in_bounds_127739) { + x_127737 = ((volatile __local + double *) red_arr_mem_127731)[sext_i32_i64(local_tid_127727)]; + if ((local_tid_127727 - squot32(local_tid_127727, + 32) * 32) == 0) { + x_127736 = x_127737; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127742 = 1; + while (slt32(skip_threads_127742, 32)) { + if (sle32(skip_threads_127742, local_tid_127727 - + squot32(local_tid_127727, 32) * 32) && + (squot32(local_tid_127727, 32) == 0 && + ltid_in_bounds_127739)) { + // read operands + { + x_127736 = ((volatile __local + double *) red_arr_mem_127731)[sext_i32_i64(local_tid_127727) - + sext_i32_i64(skip_threads_127742)]; + } + // perform operation + { + bool inactive_127743 = + slt64(srem64(sext_i32_i64(local_tid_127727 * + 32 + 32 - 1), + j_m_i_84998), + sext_i32_i64(local_tid_127727 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_127727 - + skip_threads_127742) * + 32 + 32 - 1)); + + if (inactive_127743) { + x_127736 = x_127737; + } + if (!inactive_127743) { + double defunc_1_op_res_127738 = + x_127736 + x_127737; + + x_127736 = defunc_1_op_res_127738; + } + } + } + if (sle32(wave_sizze_127729, skip_threads_127742)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127742, local_tid_127727 - + squot32(local_tid_127727, 32) * 32) && + (squot32(local_tid_127727, 32) == 0 && + ltid_in_bounds_127739)) { + // write result + { + ((volatile __local + double *) red_arr_mem_127731)[sext_i32_i64(local_tid_127727)] = + x_127736; + x_127737 = x_127736; + } + } + if (sle32(wave_sizze_127729, skip_threads_127742)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127742 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_127727, 32) == 0 || + !ltid_in_bounds_127739)) { + // read operands + { + x_85084 = x_85083; + x_85083 = ((__local + double *) red_arr_mem_127731)[sext_i32_i64(squot32(local_tid_127727, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_127744 = + slt64(srem64(sext_i32_i64(local_tid_127727), + j_m_i_84998), + sext_i32_i64(local_tid_127727) - + sext_i32_i64(squot32(local_tid_127727, + 32) * 32 - 1)); + + if (inactive_127744) { + x_85083 = x_85084; + } + if (!inactive_127744) { + double defunc_1_op_res_85085 = x_85083 + + x_85084; + + x_85083 = defunc_1_op_res_85085; + } + } + // write final result + { + ((__local + double *) red_arr_mem_127731)[sext_i32_i64(local_tid_127727)] = + x_85083; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_127727, 32) == 0) { + ((__local + double *) red_arr_mem_127731)[sext_i32_i64(local_tid_127727)] = + x_85084; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_127735) * + squot64(segred_group_sizze_85079, + segment_sizze_nonzzero_127724) + + sext_i32_i64(local_tid_127727), m_70861 * k2p2zq_70876) && + slt64(sext_i32_i64(local_tid_127727), + squot64(segred_group_sizze_85079, + segment_sizze_nonzzero_127724))) { + ((__global + double *) mem_123338)[squot64(sext_i32_i64(virt_group_id_127735) * + squot64(segred_group_sizze_85079, + segment_sizze_nonzzero_127724) + + sext_i32_i64(local_tid_127727), + k2p2zq_70876) * k2p2zq_70876 + + (sext_i32_i64(virt_group_id_127735) * + squot64(segred_group_sizze_85079, + segment_sizze_nonzzero_127724) + + sext_i32_i64(local_tid_127727) - + squot64(sext_i32_i64(virt_group_id_127735) * + squot64(segred_group_sizze_85079, + segment_sizze_nonzzero_127724) + + sext_i32_i64(local_tid_127727), + k2p2zq_70876) * + k2p2zq_70876)] = ((__local + double *) red_arr_mem_127731)[(sext_i32_i64(local_tid_127727) + + (int64_t) 1) * + segment_sizze_nonzzero_127724 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_85079 +} +__kernel void mainDetailedzisegred_small_84232(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_127506_backing_aligned_0, + int64_t m_70861, + int64_t defunc_2_reduce_res_70985, + int64_t rp1_71562, + int64_t j_84524, + int64_t num_groups_84557, + int64_t segment_sizze_nonzzero_127499, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_122730) +{ + #define segred_group_sizze_84556 (mainDetailedzisegred_group_sizze_84226) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_127506_backing_0 = + (__local volatile + char *) red_arr_mem_127506_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127501; + int32_t local_tid_127502; + int64_t group_sizze_127505; + int32_t wave_sizze_127504; + int32_t group_tid_127503; + + global_tid_127501 = get_global_id(0); + local_tid_127502 = get_local_id(0); + group_sizze_127505 = get_local_size(0); + wave_sizze_127504 = LOCKSTEP_WIDTH; + group_tid_127503 = get_group_id(0); + + int32_t phys_tid_84232; + + phys_tid_84232 = global_tid_127501; + + __local char *red_arr_mem_127506; + + red_arr_mem_127506 = (__local char *) red_arr_mem_127506_backing_0; + + int32_t phys_group_id_127508; + + phys_group_id_127508 = get_group_id(0); + for (int32_t i_127509 = 0; i_127509 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, + squot64(segred_group_sizze_84556, + segment_sizze_nonzzero_127499))) - + phys_group_id_127508, sext_i64_i32(num_groups_84557)); + i_127509++) { + int32_t virt_group_id_127510 = phys_group_id_127508 + i_127509 * + sext_i64_i32(num_groups_84557); + int64_t gtid_84223 = squot64(sext_i32_i64(local_tid_127502), + segment_sizze_nonzzero_127499) + + sext_i32_i64(virt_group_id_127510) * + squot64(segred_group_sizze_84556, + segment_sizze_nonzzero_127499); + int64_t gtid_84231 = srem64(sext_i32_i64(local_tid_127502), rp1_71562); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, rp1_71562) && (slt64(gtid_84223, m_70861) && + slt64(sext_i32_i64(local_tid_127502), + rp1_71562 * + squot64(segred_group_sizze_84556, + segment_sizze_nonzzero_127499)))) { + double x_84564 = ((__global double *) mem_120246)[j_84524 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_84223 * + defunc_2_reduce_res_70985 + + gtid_84231]; + double defunc_1_f_res_84565 = x_84564 * x_84564; + + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_127506)[sext_i32_i64(local_tid_127502)] = + defunc_1_f_res_84565; + } + } else { + ((__local + double *) red_arr_mem_127506)[sext_i32_i64(local_tid_127502)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, rp1_71562)) { + // perform segmented scan to imitate reduction + { + double x_84560; + double x_84561; + double x_127511; + double x_127512; + bool ltid_in_bounds_127514; + + ltid_in_bounds_127514 = slt64(sext_i32_i64(local_tid_127502), + rp1_71562 * + squot64(segred_group_sizze_84556, + segment_sizze_nonzzero_127499)); + + int32_t skip_threads_127515; + + // read input for in-block scan + { + if (ltid_in_bounds_127514) { + x_84561 = ((volatile __local + double *) red_arr_mem_127506)[sext_i32_i64(local_tid_127502)]; + if ((local_tid_127502 - squot32(local_tid_127502, 32) * + 32) == 0) { + x_84560 = x_84561; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127515 = 1; + while (slt32(skip_threads_127515, 32)) { + if (sle32(skip_threads_127515, local_tid_127502 - + squot32(local_tid_127502, 32) * 32) && + ltid_in_bounds_127514) { + // read operands + { + x_84560 = ((volatile __local + double *) red_arr_mem_127506)[sext_i32_i64(local_tid_127502) - + sext_i32_i64(skip_threads_127515)]; + } + // perform operation + { + bool inactive_127516 = + slt64(srem64(sext_i32_i64(local_tid_127502), + rp1_71562), + sext_i32_i64(local_tid_127502) - + sext_i32_i64(local_tid_127502 - + skip_threads_127515)); + + if (inactive_127516) { + x_84560 = x_84561; + } + if (!inactive_127516) { + double defunc_1_op_res_84562 = x_84560 + + x_84561; + + x_84560 = defunc_1_op_res_84562; + } + } + } + if (sle32(wave_sizze_127504, skip_threads_127515)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127515, local_tid_127502 - + squot32(local_tid_127502, 32) * 32) && + ltid_in_bounds_127514) { + // write result + { + ((volatile __local + double *) red_arr_mem_127506)[sext_i32_i64(local_tid_127502)] = + x_84560; + x_84561 = x_84560; + } + } + if (sle32(wave_sizze_127504, skip_threads_127515)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127515 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_127502 - squot32(local_tid_127502, 32) * + 32) == 31 && ltid_in_bounds_127514) { + ((volatile __local + double *) red_arr_mem_127506)[sext_i32_i64(squot32(local_tid_127502, + 32))] = + x_84560; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_127517; + + // read input for in-block scan + { + if (squot32(local_tid_127502, 32) == 0 && + ltid_in_bounds_127514) { + x_127512 = ((volatile __local + double *) red_arr_mem_127506)[sext_i32_i64(local_tid_127502)]; + if ((local_tid_127502 - squot32(local_tid_127502, + 32) * 32) == 0) { + x_127511 = x_127512; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127517 = 1; + while (slt32(skip_threads_127517, 32)) { + if (sle32(skip_threads_127517, local_tid_127502 - + squot32(local_tid_127502, 32) * 32) && + (squot32(local_tid_127502, 32) == 0 && + ltid_in_bounds_127514)) { + // read operands + { + x_127511 = ((volatile __local + double *) red_arr_mem_127506)[sext_i32_i64(local_tid_127502) - + sext_i32_i64(skip_threads_127517)]; + } + // perform operation + { + bool inactive_127518 = + slt64(srem64(sext_i32_i64(local_tid_127502 * + 32 + 32 - 1), rp1_71562), + sext_i32_i64(local_tid_127502 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_127502 - + skip_threads_127517) * + 32 + 32 - 1)); + + if (inactive_127518) { + x_127511 = x_127512; + } + if (!inactive_127518) { + double defunc_1_op_res_127513 = + x_127511 + x_127512; + + x_127511 = defunc_1_op_res_127513; + } + } + } + if (sle32(wave_sizze_127504, skip_threads_127517)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127517, local_tid_127502 - + squot32(local_tid_127502, 32) * 32) && + (squot32(local_tid_127502, 32) == 0 && + ltid_in_bounds_127514)) { + // write result + { + ((volatile __local + double *) red_arr_mem_127506)[sext_i32_i64(local_tid_127502)] = + x_127511; + x_127512 = x_127511; + } + } + if (sle32(wave_sizze_127504, skip_threads_127517)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127517 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_127502, 32) == 0 || + !ltid_in_bounds_127514)) { + // read operands + { + x_84561 = x_84560; + x_84560 = ((__local + double *) red_arr_mem_127506)[sext_i32_i64(squot32(local_tid_127502, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_127519 = + slt64(srem64(sext_i32_i64(local_tid_127502), + rp1_71562), + sext_i32_i64(local_tid_127502) - + sext_i32_i64(squot32(local_tid_127502, + 32) * 32 - 1)); + + if (inactive_127519) { + x_84560 = x_84561; + } + if (!inactive_127519) { + double defunc_1_op_res_84562 = x_84560 + + x_84561; + + x_84560 = defunc_1_op_res_84562; + } + } + // write final result + { + ((__local + double *) red_arr_mem_127506)[sext_i32_i64(local_tid_127502)] = + x_84560; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_127502, 32) == 0) { + ((__local + double *) red_arr_mem_127506)[sext_i32_i64(local_tid_127502)] = + x_84561; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_127510) * + squot64(segred_group_sizze_84556, + segment_sizze_nonzzero_127499) + + sext_i32_i64(local_tid_127502), m_70861) && + slt64(sext_i32_i64(local_tid_127502), + squot64(segred_group_sizze_84556, + segment_sizze_nonzzero_127499))) { + ((__global + double *) mem_122730)[sext_i32_i64(virt_group_id_127510) * + squot64(segred_group_sizze_84556, + segment_sizze_nonzzero_127499) + + sext_i32_i64(local_tid_127502)] = + ((__local + double *) red_arr_mem_127506)[(sext_i32_i64(local_tid_127502) + + (int64_t) 1) * + segment_sizze_nonzzero_127499 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_84556 +} +__kernel void mainDetailedzisegred_small_84385(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_127420_backing_aligned_0, + int64_t m_70861, + int64_t k2p2zq_70876, + int64_t defunc_2_reduce_res_70985, + int64_t r_71551, + int64_t num_groups_84464, + int64_t segment_sizze_nonzzero_127413, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_param_121967, + __global + unsigned char *mem_122677) +{ + #define segred_group_sizze_84463 (mainDetailedzisegred_group_sizze_84379) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_127420_backing_0 = + (__local volatile + char *) red_arr_mem_127420_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127415; + int32_t local_tid_127416; + int64_t group_sizze_127419; + int32_t wave_sizze_127418; + int32_t group_tid_127417; + + global_tid_127415 = get_global_id(0); + local_tid_127416 = get_local_id(0); + group_sizze_127419 = get_local_size(0); + wave_sizze_127418 = LOCKSTEP_WIDTH; + group_tid_127417 = get_group_id(0); + + int32_t phys_tid_84385; + + phys_tid_84385 = global_tid_127415; + + __local char *red_arr_mem_127420; + + red_arr_mem_127420 = (__local char *) red_arr_mem_127420_backing_0; + + int32_t phys_group_id_127422; + + phys_group_id_127422 = get_group_id(0); + for (int32_t i_127423 = 0; i_127423 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, + squot64(segred_group_sizze_84463, + segment_sizze_nonzzero_127413))) - + phys_group_id_127422, sext_i64_i32(num_groups_84464)); + i_127423++) { + int32_t virt_group_id_127424 = phys_group_id_127422 + i_127423 * + sext_i64_i32(num_groups_84464); + int64_t gtid_84376 = squot64(sext_i32_i64(local_tid_127416), + segment_sizze_nonzzero_127413) + + sext_i32_i64(virt_group_id_127424) * + squot64(segred_group_sizze_84463, + segment_sizze_nonzzero_127413); + int64_t gtid_84384 = srem64(sext_i32_i64(local_tid_127416), + k2p2zq_70876); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, k2p2zq_70876) && (slt64(gtid_84376, + m_70861) && + slt64(sext_i32_i64(local_tid_127416), + k2p2zq_70876 * + squot64(segred_group_sizze_84463, + segment_sizze_nonzzero_127413)))) { + double x_84472 = ((__global double *) mem_120246)[gtid_84384 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_84376 * + defunc_2_reduce_res_70985 + + r_71551]; + double x_84473 = ((__global + double *) mem_param_121967)[gtid_84376 * + k2p2zq_70876 + + gtid_84384]; + double defunc_1_f_res_84474 = x_84472 * x_84473; + + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_127420)[sext_i32_i64(local_tid_127416)] = + defunc_1_f_res_84474; + } + } else { + ((__local + double *) red_arr_mem_127420)[sext_i32_i64(local_tid_127416)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, k2p2zq_70876)) { + // perform segmented scan to imitate reduction + { + double x_84467; + double x_84468; + double x_127425; + double x_127426; + bool ltid_in_bounds_127428; + + ltid_in_bounds_127428 = slt64(sext_i32_i64(local_tid_127416), + k2p2zq_70876 * + squot64(segred_group_sizze_84463, + segment_sizze_nonzzero_127413)); + + int32_t skip_threads_127429; + + // read input for in-block scan + { + if (ltid_in_bounds_127428) { + x_84468 = ((volatile __local + double *) red_arr_mem_127420)[sext_i32_i64(local_tid_127416)]; + if ((local_tid_127416 - squot32(local_tid_127416, 32) * + 32) == 0) { + x_84467 = x_84468; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127429 = 1; + while (slt32(skip_threads_127429, 32)) { + if (sle32(skip_threads_127429, local_tid_127416 - + squot32(local_tid_127416, 32) * 32) && + ltid_in_bounds_127428) { + // read operands + { + x_84467 = ((volatile __local + double *) red_arr_mem_127420)[sext_i32_i64(local_tid_127416) - + sext_i32_i64(skip_threads_127429)]; + } + // perform operation + { + bool inactive_127430 = + slt64(srem64(sext_i32_i64(local_tid_127416), + k2p2zq_70876), + sext_i32_i64(local_tid_127416) - + sext_i32_i64(local_tid_127416 - + skip_threads_127429)); + + if (inactive_127430) { + x_84467 = x_84468; + } + if (!inactive_127430) { + double defunc_1_op_res_84469 = x_84467 + + x_84468; + + x_84467 = defunc_1_op_res_84469; + } + } + } + if (sle32(wave_sizze_127418, skip_threads_127429)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127429, local_tid_127416 - + squot32(local_tid_127416, 32) * 32) && + ltid_in_bounds_127428) { + // write result + { + ((volatile __local + double *) red_arr_mem_127420)[sext_i32_i64(local_tid_127416)] = + x_84467; + x_84468 = x_84467; + } + } + if (sle32(wave_sizze_127418, skip_threads_127429)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127429 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_127416 - squot32(local_tid_127416, 32) * + 32) == 31 && ltid_in_bounds_127428) { + ((volatile __local + double *) red_arr_mem_127420)[sext_i32_i64(squot32(local_tid_127416, + 32))] = + x_84467; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_127431; + + // read input for in-block scan + { + if (squot32(local_tid_127416, 32) == 0 && + ltid_in_bounds_127428) { + x_127426 = ((volatile __local + double *) red_arr_mem_127420)[sext_i32_i64(local_tid_127416)]; + if ((local_tid_127416 - squot32(local_tid_127416, + 32) * 32) == 0) { + x_127425 = x_127426; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127431 = 1; + while (slt32(skip_threads_127431, 32)) { + if (sle32(skip_threads_127431, local_tid_127416 - + squot32(local_tid_127416, 32) * 32) && + (squot32(local_tid_127416, 32) == 0 && + ltid_in_bounds_127428)) { + // read operands + { + x_127425 = ((volatile __local + double *) red_arr_mem_127420)[sext_i32_i64(local_tid_127416) - + sext_i32_i64(skip_threads_127431)]; + } + // perform operation + { + bool inactive_127432 = + slt64(srem64(sext_i32_i64(local_tid_127416 * + 32 + 32 - 1), + k2p2zq_70876), + sext_i32_i64(local_tid_127416 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_127416 - + skip_threads_127431) * + 32 + 32 - 1)); + + if (inactive_127432) { + x_127425 = x_127426; + } + if (!inactive_127432) { + double defunc_1_op_res_127427 = + x_127425 + x_127426; + + x_127425 = defunc_1_op_res_127427; + } + } + } + if (sle32(wave_sizze_127418, skip_threads_127431)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127431, local_tid_127416 - + squot32(local_tid_127416, 32) * 32) && + (squot32(local_tid_127416, 32) == 0 && + ltid_in_bounds_127428)) { + // write result + { + ((volatile __local + double *) red_arr_mem_127420)[sext_i32_i64(local_tid_127416)] = + x_127425; + x_127426 = x_127425; + } + } + if (sle32(wave_sizze_127418, skip_threads_127431)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127431 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_127416, 32) == 0 || + !ltid_in_bounds_127428)) { + // read operands + { + x_84468 = x_84467; + x_84467 = ((__local + double *) red_arr_mem_127420)[sext_i32_i64(squot32(local_tid_127416, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_127433 = + slt64(srem64(sext_i32_i64(local_tid_127416), + k2p2zq_70876), + sext_i32_i64(local_tid_127416) - + sext_i32_i64(squot32(local_tid_127416, + 32) * 32 - 1)); + + if (inactive_127433) { + x_84467 = x_84468; + } + if (!inactive_127433) { + double defunc_1_op_res_84469 = x_84467 + + x_84468; + + x_84467 = defunc_1_op_res_84469; + } + } + // write final result + { + ((__local + double *) red_arr_mem_127420)[sext_i32_i64(local_tid_127416)] = + x_84467; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_127416, 32) == 0) { + ((__local + double *) red_arr_mem_127420)[sext_i32_i64(local_tid_127416)] = + x_84468; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_127424) * + squot64(segred_group_sizze_84463, + segment_sizze_nonzzero_127413) + + sext_i32_i64(local_tid_127416), m_70861) && + slt64(sext_i32_i64(local_tid_127416), + squot64(segred_group_sizze_84463, + segment_sizze_nonzzero_127413))) { + ((__global + double *) mem_122677)[sext_i32_i64(virt_group_id_127424) * + squot64(segred_group_sizze_84463, + segment_sizze_nonzzero_127413) + + sext_i32_i64(local_tid_127416)] = + ((__local + double *) red_arr_mem_127420)[(sext_i32_i64(local_tid_127416) + + (int64_t) 1) * + segment_sizze_nonzzero_127413 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_84463 +} +__kernel void mainDetailedzisegred_small_84414(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_127353_backing_aligned_0, + int64_t m_70861, + int64_t k2p2zq_70876, + int64_t defunc_2_reduce_res_70985, + int64_t r_71551, + int64_t num_groups_84433, + int64_t segment_sizze_nonzzero_127346, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_122665, + __global + unsigned char *mem_122668, + __global + unsigned char *mem_122671) +{ + #define segred_group_sizze_84432 (mainDetailedzisegred_group_sizze_84408) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_127353_backing_0 = + (__local volatile + char *) red_arr_mem_127353_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127348; + int32_t local_tid_127349; + int64_t group_sizze_127352; + int32_t wave_sizze_127351; + int32_t group_tid_127350; + + global_tid_127348 = get_global_id(0); + local_tid_127349 = get_local_id(0); + group_sizze_127352 = get_local_size(0); + wave_sizze_127351 = LOCKSTEP_WIDTH; + group_tid_127350 = get_group_id(0); + + int32_t phys_tid_84414; + + phys_tid_84414 = global_tid_127348; + + __local char *red_arr_mem_127353; + + red_arr_mem_127353 = (__local char *) red_arr_mem_127353_backing_0; + + int32_t phys_group_id_127355; + + phys_group_id_127355 = get_group_id(0); + for (int32_t i_127356 = 0; i_127356 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, + squot64(segred_group_sizze_84432, + segment_sizze_nonzzero_127346))) - + phys_group_id_127355, sext_i64_i32(num_groups_84433)); + i_127356++) { + int32_t virt_group_id_127357 = phys_group_id_127355 + i_127356 * + sext_i64_i32(num_groups_84433); + int64_t gtid_84405 = squot64(sext_i32_i64(local_tid_127349), + segment_sizze_nonzzero_127346) + + sext_i32_i64(virt_group_id_127357) * + squot64(segred_group_sizze_84432, + segment_sizze_nonzzero_127346); + int64_t gtid_84413 = srem64(sext_i32_i64(local_tid_127349), + k2p2zq_70876); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, k2p2zq_70876) && (slt64(gtid_84405, + m_70861) && + slt64(sext_i32_i64(local_tid_127349), + k2p2zq_70876 * + squot64(segred_group_sizze_84432, + segment_sizze_nonzzero_127346)))) { + double x_84443 = ((__global double *) mem_120246)[gtid_84413 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_84405 * + defunc_2_reduce_res_70985 + + r_71551]; + double defunc_0_f_res_84444; + double redout_119829 = 0.0; + + for (int64_t i_119830 = 0; i_119830 < k2p2zq_70876; + i_119830++) { + double x_84448 = ((__global double *) mem_120246)[i_119830 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_84405 * + defunc_2_reduce_res_70985 + + r_71551]; + double x_84449 = ((__global double *) mem_122665)[i_119830 * + (k2p2zq_70876 * + m_70861) + + gtid_84405 * + k2p2zq_70876 + + gtid_84413]; + double defunc_1_f_res_84450 = x_84448 * x_84449; + double defunc_1_op_res_84447 = defunc_1_f_res_84450 + + redout_119829; + double redout_tmp_127358 = defunc_1_op_res_84447; + + redout_119829 = redout_tmp_127358; + } + defunc_0_f_res_84444 = redout_119829; + + double defunc_1_f_res_84451 = x_84443 * defunc_0_f_res_84444; + + // save map-out results + { + ((__global double *) mem_122671)[gtid_84405 * k2p2zq_70876 + + gtid_84413] = + defunc_0_f_res_84444; + } + // save results to be reduced + { + ((__local + double *) red_arr_mem_127353)[sext_i32_i64(local_tid_127349)] = + defunc_1_f_res_84451; + } + } else { + ((__local + double *) red_arr_mem_127353)[sext_i32_i64(local_tid_127349)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, k2p2zq_70876)) { + // perform segmented scan to imitate reduction + { + double x_84437; + double x_84438; + double x_127359; + double x_127360; + bool ltid_in_bounds_127362; + + ltid_in_bounds_127362 = slt64(sext_i32_i64(local_tid_127349), + k2p2zq_70876 * + squot64(segred_group_sizze_84432, + segment_sizze_nonzzero_127346)); + + int32_t skip_threads_127363; + + // read input for in-block scan + { + if (ltid_in_bounds_127362) { + x_84438 = ((volatile __local + double *) red_arr_mem_127353)[sext_i32_i64(local_tid_127349)]; + if ((local_tid_127349 - squot32(local_tid_127349, 32) * + 32) == 0) { + x_84437 = x_84438; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127363 = 1; + while (slt32(skip_threads_127363, 32)) { + if (sle32(skip_threads_127363, local_tid_127349 - + squot32(local_tid_127349, 32) * 32) && + ltid_in_bounds_127362) { + // read operands + { + x_84437 = ((volatile __local + double *) red_arr_mem_127353)[sext_i32_i64(local_tid_127349) - + sext_i32_i64(skip_threads_127363)]; + } + // perform operation + { + bool inactive_127364 = + slt64(srem64(sext_i32_i64(local_tid_127349), + k2p2zq_70876), + sext_i32_i64(local_tid_127349) - + sext_i32_i64(local_tid_127349 - + skip_threads_127363)); + + if (inactive_127364) { + x_84437 = x_84438; + } + if (!inactive_127364) { + double defunc_1_op_res_84439 = x_84437 + + x_84438; + + x_84437 = defunc_1_op_res_84439; + } + } + } + if (sle32(wave_sizze_127351, skip_threads_127363)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127363, local_tid_127349 - + squot32(local_tid_127349, 32) * 32) && + ltid_in_bounds_127362) { + // write result + { + ((volatile __local + double *) red_arr_mem_127353)[sext_i32_i64(local_tid_127349)] = + x_84437; + x_84438 = x_84437; + } + } + if (sle32(wave_sizze_127351, skip_threads_127363)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127363 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_127349 - squot32(local_tid_127349, 32) * + 32) == 31 && ltid_in_bounds_127362) { + ((volatile __local + double *) red_arr_mem_127353)[sext_i32_i64(squot32(local_tid_127349, + 32))] = + x_84437; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_127365; + + // read input for in-block scan + { + if (squot32(local_tid_127349, 32) == 0 && + ltid_in_bounds_127362) { + x_127360 = ((volatile __local + double *) red_arr_mem_127353)[sext_i32_i64(local_tid_127349)]; + if ((local_tid_127349 - squot32(local_tid_127349, + 32) * 32) == 0) { + x_127359 = x_127360; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127365 = 1; + while (slt32(skip_threads_127365, 32)) { + if (sle32(skip_threads_127365, local_tid_127349 - + squot32(local_tid_127349, 32) * 32) && + (squot32(local_tid_127349, 32) == 0 && + ltid_in_bounds_127362)) { + // read operands + { + x_127359 = ((volatile __local + double *) red_arr_mem_127353)[sext_i32_i64(local_tid_127349) - + sext_i32_i64(skip_threads_127365)]; + } + // perform operation + { + bool inactive_127366 = + slt64(srem64(sext_i32_i64(local_tid_127349 * + 32 + 32 - 1), + k2p2zq_70876), + sext_i32_i64(local_tid_127349 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_127349 - + skip_threads_127365) * + 32 + 32 - 1)); + + if (inactive_127366) { + x_127359 = x_127360; + } + if (!inactive_127366) { + double defunc_1_op_res_127361 = + x_127359 + x_127360; + + x_127359 = defunc_1_op_res_127361; + } + } + } + if (sle32(wave_sizze_127351, skip_threads_127365)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127365, local_tid_127349 - + squot32(local_tid_127349, 32) * 32) && + (squot32(local_tid_127349, 32) == 0 && + ltid_in_bounds_127362)) { + // write result + { + ((volatile __local + double *) red_arr_mem_127353)[sext_i32_i64(local_tid_127349)] = + x_127359; + x_127360 = x_127359; + } + } + if (sle32(wave_sizze_127351, skip_threads_127365)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127365 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_127349, 32) == 0 || + !ltid_in_bounds_127362)) { + // read operands + { + x_84438 = x_84437; + x_84437 = ((__local + double *) red_arr_mem_127353)[sext_i32_i64(squot32(local_tid_127349, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_127367 = + slt64(srem64(sext_i32_i64(local_tid_127349), + k2p2zq_70876), + sext_i32_i64(local_tid_127349) - + sext_i32_i64(squot32(local_tid_127349, + 32) * 32 - 1)); + + if (inactive_127367) { + x_84437 = x_84438; + } + if (!inactive_127367) { + double defunc_1_op_res_84439 = x_84437 + + x_84438; + + x_84437 = defunc_1_op_res_84439; + } + } + // write final result + { + ((__local + double *) red_arr_mem_127353)[sext_i32_i64(local_tid_127349)] = + x_84437; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_127349, 32) == 0) { + ((__local + double *) red_arr_mem_127353)[sext_i32_i64(local_tid_127349)] = + x_84438; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_127357) * + squot64(segred_group_sizze_84432, + segment_sizze_nonzzero_127346) + + sext_i32_i64(local_tid_127349), m_70861) && + slt64(sext_i32_i64(local_tid_127349), + squot64(segred_group_sizze_84432, + segment_sizze_nonzzero_127346))) { + ((__global + double *) mem_122668)[sext_i32_i64(virt_group_id_127357) * + squot64(segred_group_sizze_84432, + segment_sizze_nonzzero_127346) + + sext_i32_i64(local_tid_127349)] = + ((__local + double *) red_arr_mem_127353)[(sext_i32_i64(local_tid_127349) + + (int64_t) 1) * + segment_sizze_nonzzero_127346 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_84432 +} +__kernel void mainDetailedzisegred_small_85711(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_128229_backing_aligned_0, + int64_t m_70861, + int64_t k2p2zq_70876, + int64_t defunc_2_reduce_res_70985, + int64_t index_primexp_72162, + int64_t num_groups_85919, + int64_t binop_x_120251, + int64_t segment_sizze_nonzzero_128222, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_param_123778, + __global + unsigned char *mem_123907, + __global + unsigned char *mem_123910, + __global + unsigned char *mem_123944, + __global + unsigned char *mem_123948) +{ + #define segred_group_sizze_85918 (mainDetailedzisegred_group_sizze_85705) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_128229_backing_0 = + (__local volatile + char *) red_arr_mem_128229_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128224; + int32_t local_tid_128225; + int64_t group_sizze_128228; + int32_t wave_sizze_128227; + int32_t group_tid_128226; + + global_tid_128224 = get_global_id(0); + local_tid_128225 = get_local_id(0); + group_sizze_128228 = get_local_size(0); + wave_sizze_128227 = LOCKSTEP_WIDTH; + group_tid_128226 = get_group_id(0); + + int32_t phys_tid_85711; + + phys_tid_85711 = global_tid_128224; + + __local char *red_arr_mem_128229; + + red_arr_mem_128229 = (__local char *) red_arr_mem_128229_backing_0; + + int32_t phys_group_id_128231; + + phys_group_id_128231 = get_group_id(0); + for (int32_t i_128232 = 0; i_128232 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861 * k2p2zq_70876, + squot64(segred_group_sizze_85918, + segment_sizze_nonzzero_128222))) - + phys_group_id_128231, sext_i64_i32(num_groups_85919)); + i_128232++) { + int32_t virt_group_id_128233 = phys_group_id_128231 + i_128232 * + sext_i64_i32(num_groups_85919); + int64_t gtid_85700 = squot64(squot64(sext_i32_i64(local_tid_128225), + segment_sizze_nonzzero_128222) + + sext_i32_i64(virt_group_id_128233) * + squot64(segred_group_sizze_85918, + segment_sizze_nonzzero_128222), + k2p2zq_70876); + int64_t gtid_85701 = squot64(sext_i32_i64(local_tid_128225), + segment_sizze_nonzzero_128222) + + sext_i32_i64(virt_group_id_128233) * + squot64(segred_group_sizze_85918, + segment_sizze_nonzzero_128222) - + squot64(squot64(sext_i32_i64(local_tid_128225), + segment_sizze_nonzzero_128222) + + sext_i32_i64(virt_group_id_128233) * + squot64(segred_group_sizze_85918, + segment_sizze_nonzzero_128222), k2p2zq_70876) * + k2p2zq_70876; + int64_t gtid_85710 = srem64(sext_i32_i64(local_tid_128225), + k2p2zq_70876); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, k2p2zq_70876) && ((slt64(gtid_85700, + m_70861) && + slt64(gtid_85701, + k2p2zq_70876)) && + slt64(sext_i32_i64(local_tid_128225), + k2p2zq_70876 * + squot64(segred_group_sizze_85918, + segment_sizze_nonzzero_128222)))) { + double fr_85929 = ((__global double *) mem_123910)[gtid_85700]; + double x_85930 = ((__global double *) mem_123907)[gtid_85700 * + k2p2zq_70876 + + gtid_85701]; + double x_85932 = ((__global double *) mem_123907)[gtid_85700 * + k2p2zq_70876 + + gtid_85710]; + double x_85933 = ((__global + double *) mem_param_123778)[gtid_85700 * + binop_x_120251 + + gtid_85701 * + k2p2zq_70876 + + gtid_85710]; + double x_85934 = ((__global double *) mem_120246)[gtid_85710 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_85700 * + defunc_2_reduce_res_70985 + + index_primexp_72162]; + double x_85935 = x_85930 * x_85932; + double y_85936 = x_85935 / fr_85929; + double defunc_1_f_res_85937 = x_85933 - y_85936; + double defunc_1_f_res_85938 = x_85934 * defunc_1_f_res_85937; + + // save map-out results + { + ((__global double *) mem_123948)[gtid_85700 * + (k2p2zq_70876 * + k2p2zq_70876) + + gtid_85701 * k2p2zq_70876 + + gtid_85710] = + defunc_1_f_res_85937; + } + // save results to be reduced + { + ((__local + double *) red_arr_mem_128229)[sext_i32_i64(local_tid_128225)] = + defunc_1_f_res_85938; + } + } else { + ((__local + double *) red_arr_mem_128229)[sext_i32_i64(local_tid_128225)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, k2p2zq_70876)) { + // perform segmented scan to imitate reduction + { + double x_85923; + double x_85924; + double x_128234; + double x_128235; + bool ltid_in_bounds_128237; + + ltid_in_bounds_128237 = slt64(sext_i32_i64(local_tid_128225), + k2p2zq_70876 * + squot64(segred_group_sizze_85918, + segment_sizze_nonzzero_128222)); + + int32_t skip_threads_128238; + + // read input for in-block scan + { + if (ltid_in_bounds_128237) { + x_85924 = ((volatile __local + double *) red_arr_mem_128229)[sext_i32_i64(local_tid_128225)]; + if ((local_tid_128225 - squot32(local_tid_128225, 32) * + 32) == 0) { + x_85923 = x_85924; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128238 = 1; + while (slt32(skip_threads_128238, 32)) { + if (sle32(skip_threads_128238, local_tid_128225 - + squot32(local_tid_128225, 32) * 32) && + ltid_in_bounds_128237) { + // read operands + { + x_85923 = ((volatile __local + double *) red_arr_mem_128229)[sext_i32_i64(local_tid_128225) - + sext_i32_i64(skip_threads_128238)]; + } + // perform operation + { + bool inactive_128239 = + slt64(srem64(sext_i32_i64(local_tid_128225), + k2p2zq_70876), + sext_i32_i64(local_tid_128225) - + sext_i32_i64(local_tid_128225 - + skip_threads_128238)); + + if (inactive_128239) { + x_85923 = x_85924; + } + if (!inactive_128239) { + double defunc_1_op_res_85925 = x_85923 + + x_85924; + + x_85923 = defunc_1_op_res_85925; + } + } + } + if (sle32(wave_sizze_128227, skip_threads_128238)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128238, local_tid_128225 - + squot32(local_tid_128225, 32) * 32) && + ltid_in_bounds_128237) { + // write result + { + ((volatile __local + double *) red_arr_mem_128229)[sext_i32_i64(local_tid_128225)] = + x_85923; + x_85924 = x_85923; + } + } + if (sle32(wave_sizze_128227, skip_threads_128238)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128238 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128225 - squot32(local_tid_128225, 32) * + 32) == 31 && ltid_in_bounds_128237) { + ((volatile __local + double *) red_arr_mem_128229)[sext_i32_i64(squot32(local_tid_128225, + 32))] = + x_85923; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128240; + + // read input for in-block scan + { + if (squot32(local_tid_128225, 32) == 0 && + ltid_in_bounds_128237) { + x_128235 = ((volatile __local + double *) red_arr_mem_128229)[sext_i32_i64(local_tid_128225)]; + if ((local_tid_128225 - squot32(local_tid_128225, + 32) * 32) == 0) { + x_128234 = x_128235; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128240 = 1; + while (slt32(skip_threads_128240, 32)) { + if (sle32(skip_threads_128240, local_tid_128225 - + squot32(local_tid_128225, 32) * 32) && + (squot32(local_tid_128225, 32) == 0 && + ltid_in_bounds_128237)) { + // read operands + { + x_128234 = ((volatile __local + double *) red_arr_mem_128229)[sext_i32_i64(local_tid_128225) - + sext_i32_i64(skip_threads_128240)]; + } + // perform operation + { + bool inactive_128241 = + slt64(srem64(sext_i32_i64(local_tid_128225 * + 32 + 32 - 1), + k2p2zq_70876), + sext_i32_i64(local_tid_128225 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_128225 - + skip_threads_128240) * + 32 + 32 - 1)); + + if (inactive_128241) { + x_128234 = x_128235; + } + if (!inactive_128241) { + double defunc_1_op_res_128236 = + x_128234 + x_128235; + + x_128234 = defunc_1_op_res_128236; + } + } + } + if (sle32(wave_sizze_128227, skip_threads_128240)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128240, local_tid_128225 - + squot32(local_tid_128225, 32) * 32) && + (squot32(local_tid_128225, 32) == 0 && + ltid_in_bounds_128237)) { + // write result + { + ((volatile __local + double *) red_arr_mem_128229)[sext_i32_i64(local_tid_128225)] = + x_128234; + x_128235 = x_128234; + } + } + if (sle32(wave_sizze_128227, skip_threads_128240)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128240 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128225, 32) == 0 || + !ltid_in_bounds_128237)) { + // read operands + { + x_85924 = x_85923; + x_85923 = ((__local + double *) red_arr_mem_128229)[sext_i32_i64(squot32(local_tid_128225, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128242 = + slt64(srem64(sext_i32_i64(local_tid_128225), + k2p2zq_70876), + sext_i32_i64(local_tid_128225) - + sext_i32_i64(squot32(local_tid_128225, + 32) * 32 - 1)); + + if (inactive_128242) { + x_85923 = x_85924; + } + if (!inactive_128242) { + double defunc_1_op_res_85925 = x_85923 + + x_85924; + + x_85923 = defunc_1_op_res_85925; + } + } + // write final result + { + ((__local + double *) red_arr_mem_128229)[sext_i32_i64(local_tid_128225)] = + x_85923; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128225, 32) == 0) { + ((__local + double *) red_arr_mem_128229)[sext_i32_i64(local_tid_128225)] = + x_85924; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_128233) * + squot64(segred_group_sizze_85918, + segment_sizze_nonzzero_128222) + + sext_i32_i64(local_tid_128225), m_70861 * k2p2zq_70876) && + slt64(sext_i32_i64(local_tid_128225), + squot64(segred_group_sizze_85918, + segment_sizze_nonzzero_128222))) { + ((__global + double *) mem_123944)[squot64(sext_i32_i64(virt_group_id_128233) * + squot64(segred_group_sizze_85918, + segment_sizze_nonzzero_128222) + + sext_i32_i64(local_tid_128225), + k2p2zq_70876) * k2p2zq_70876 + + (sext_i32_i64(virt_group_id_128233) * + squot64(segred_group_sizze_85918, + segment_sizze_nonzzero_128222) + + sext_i32_i64(local_tid_128225) - + squot64(sext_i32_i64(virt_group_id_128233) * + squot64(segred_group_sizze_85918, + segment_sizze_nonzzero_128222) + + sext_i32_i64(local_tid_128225), + k2p2zq_70876) * + k2p2zq_70876)] = ((__local + double *) red_arr_mem_128229)[(sext_i32_i64(local_tid_128225) + + (int64_t) 1) * + segment_sizze_nonzzero_128222 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_85918 +} +__kernel void mainDetailedzisegred_small_85786(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_128153_backing_aligned_0, + int64_t m_70861, + int64_t k2p2zq_70876, + int64_t defunc_2_reduce_res_70985, + int64_t index_primexp_72162, + int64_t num_groups_85860, + int64_t segment_sizze_nonzzero_128146, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_param_123786, + __global + unsigned char *mem_123913) +{ + #define segred_group_sizze_85859 (mainDetailedzisegred_group_sizze_85780) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_128153_backing_0 = + (__local volatile + char *) red_arr_mem_128153_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128148; + int32_t local_tid_128149; + int64_t group_sizze_128152; + int32_t wave_sizze_128151; + int32_t group_tid_128150; + + global_tid_128148 = get_global_id(0); + local_tid_128149 = get_local_id(0); + group_sizze_128152 = get_local_size(0); + wave_sizze_128151 = LOCKSTEP_WIDTH; + group_tid_128150 = get_group_id(0); + + int32_t phys_tid_85786; + + phys_tid_85786 = global_tid_128148; + + __local char *red_arr_mem_128153; + + red_arr_mem_128153 = (__local char *) red_arr_mem_128153_backing_0; + + int32_t phys_group_id_128155; + + phys_group_id_128155 = get_group_id(0); + for (int32_t i_128156 = 0; i_128156 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, + squot64(segred_group_sizze_85859, + segment_sizze_nonzzero_128146))) - + phys_group_id_128155, sext_i64_i32(num_groups_85860)); + i_128156++) { + int32_t virt_group_id_128157 = phys_group_id_128155 + i_128156 * + sext_i64_i32(num_groups_85860); + int64_t gtid_85777 = squot64(sext_i32_i64(local_tid_128149), + segment_sizze_nonzzero_128146) + + sext_i32_i64(virt_group_id_128157) * + squot64(segred_group_sizze_85859, + segment_sizze_nonzzero_128146); + int64_t gtid_85785 = srem64(sext_i32_i64(local_tid_128149), + k2p2zq_70876); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, k2p2zq_70876) && (slt64(gtid_85777, + m_70861) && + slt64(sext_i32_i64(local_tid_128149), + k2p2zq_70876 * + squot64(segred_group_sizze_85859, + segment_sizze_nonzzero_128146)))) { + double x_85868 = ((__global double *) mem_120246)[gtid_85785 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_85777 * + defunc_2_reduce_res_70985 + + index_primexp_72162]; + double x_85869 = ((__global + double *) mem_param_123786)[gtid_85777 * + k2p2zq_70876 + + gtid_85785]; + double defunc_1_f_res_85870 = x_85868 * x_85869; + + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_128153)[sext_i32_i64(local_tid_128149)] = + defunc_1_f_res_85870; + } + } else { + ((__local + double *) red_arr_mem_128153)[sext_i32_i64(local_tid_128149)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, k2p2zq_70876)) { + // perform segmented scan to imitate reduction + { + double x_85863; + double x_85864; + double x_128158; + double x_128159; + bool ltid_in_bounds_128161; + + ltid_in_bounds_128161 = slt64(sext_i32_i64(local_tid_128149), + k2p2zq_70876 * + squot64(segred_group_sizze_85859, + segment_sizze_nonzzero_128146)); + + int32_t skip_threads_128162; + + // read input for in-block scan + { + if (ltid_in_bounds_128161) { + x_85864 = ((volatile __local + double *) red_arr_mem_128153)[sext_i32_i64(local_tid_128149)]; + if ((local_tid_128149 - squot32(local_tid_128149, 32) * + 32) == 0) { + x_85863 = x_85864; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128162 = 1; + while (slt32(skip_threads_128162, 32)) { + if (sle32(skip_threads_128162, local_tid_128149 - + squot32(local_tid_128149, 32) * 32) && + ltid_in_bounds_128161) { + // read operands + { + x_85863 = ((volatile __local + double *) red_arr_mem_128153)[sext_i32_i64(local_tid_128149) - + sext_i32_i64(skip_threads_128162)]; + } + // perform operation + { + bool inactive_128163 = + slt64(srem64(sext_i32_i64(local_tid_128149), + k2p2zq_70876), + sext_i32_i64(local_tid_128149) - + sext_i32_i64(local_tid_128149 - + skip_threads_128162)); + + if (inactive_128163) { + x_85863 = x_85864; + } + if (!inactive_128163) { + double defunc_1_op_res_85865 = x_85863 + + x_85864; + + x_85863 = defunc_1_op_res_85865; + } + } + } + if (sle32(wave_sizze_128151, skip_threads_128162)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128162, local_tid_128149 - + squot32(local_tid_128149, 32) * 32) && + ltid_in_bounds_128161) { + // write result + { + ((volatile __local + double *) red_arr_mem_128153)[sext_i32_i64(local_tid_128149)] = + x_85863; + x_85864 = x_85863; + } + } + if (sle32(wave_sizze_128151, skip_threads_128162)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128162 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128149 - squot32(local_tid_128149, 32) * + 32) == 31 && ltid_in_bounds_128161) { + ((volatile __local + double *) red_arr_mem_128153)[sext_i32_i64(squot32(local_tid_128149, + 32))] = + x_85863; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128164; + + // read input for in-block scan + { + if (squot32(local_tid_128149, 32) == 0 && + ltid_in_bounds_128161) { + x_128159 = ((volatile __local + double *) red_arr_mem_128153)[sext_i32_i64(local_tid_128149)]; + if ((local_tid_128149 - squot32(local_tid_128149, + 32) * 32) == 0) { + x_128158 = x_128159; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128164 = 1; + while (slt32(skip_threads_128164, 32)) { + if (sle32(skip_threads_128164, local_tid_128149 - + squot32(local_tid_128149, 32) * 32) && + (squot32(local_tid_128149, 32) == 0 && + ltid_in_bounds_128161)) { + // read operands + { + x_128158 = ((volatile __local + double *) red_arr_mem_128153)[sext_i32_i64(local_tid_128149) - + sext_i32_i64(skip_threads_128164)]; + } + // perform operation + { + bool inactive_128165 = + slt64(srem64(sext_i32_i64(local_tid_128149 * + 32 + 32 - 1), + k2p2zq_70876), + sext_i32_i64(local_tid_128149 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_128149 - + skip_threads_128164) * + 32 + 32 - 1)); + + if (inactive_128165) { + x_128158 = x_128159; + } + if (!inactive_128165) { + double defunc_1_op_res_128160 = + x_128158 + x_128159; + + x_128158 = defunc_1_op_res_128160; + } + } + } + if (sle32(wave_sizze_128151, skip_threads_128164)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128164, local_tid_128149 - + squot32(local_tid_128149, 32) * 32) && + (squot32(local_tid_128149, 32) == 0 && + ltid_in_bounds_128161)) { + // write result + { + ((volatile __local + double *) red_arr_mem_128153)[sext_i32_i64(local_tid_128149)] = + x_128158; + x_128159 = x_128158; + } + } + if (sle32(wave_sizze_128151, skip_threads_128164)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128164 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128149, 32) == 0 || + !ltid_in_bounds_128161)) { + // read operands + { + x_85864 = x_85863; + x_85863 = ((__local + double *) red_arr_mem_128153)[sext_i32_i64(squot32(local_tid_128149, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128166 = + slt64(srem64(sext_i32_i64(local_tid_128149), + k2p2zq_70876), + sext_i32_i64(local_tid_128149) - + sext_i32_i64(squot32(local_tid_128149, + 32) * 32 - 1)); + + if (inactive_128166) { + x_85863 = x_85864; + } + if (!inactive_128166) { + double defunc_1_op_res_85865 = x_85863 + + x_85864; + + x_85863 = defunc_1_op_res_85865; + } + } + // write final result + { + ((__local + double *) red_arr_mem_128153)[sext_i32_i64(local_tid_128149)] = + x_85863; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128149, 32) == 0) { + ((__local + double *) red_arr_mem_128153)[sext_i32_i64(local_tid_128149)] = + x_85864; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_128157) * + squot64(segred_group_sizze_85859, + segment_sizze_nonzzero_128146) + + sext_i32_i64(local_tid_128149), m_70861) && + slt64(sext_i32_i64(local_tid_128149), + squot64(segred_group_sizze_85859, + segment_sizze_nonzzero_128146))) { + ((__global + double *) mem_123913)[sext_i32_i64(virt_group_id_128157) * + squot64(segred_group_sizze_85859, + segment_sizze_nonzzero_128146) + + sext_i32_i64(local_tid_128149)] = + ((__local + double *) red_arr_mem_128153)[(sext_i32_i64(local_tid_128149) + + (int64_t) 1) * + segment_sizze_nonzzero_128146 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_85859 +} +__kernel void mainDetailedzisegred_small_85813(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_128086_backing_aligned_0, + int64_t m_70861, + int64_t k2p2zq_70876, + int64_t defunc_2_reduce_res_70985, + int64_t index_primexp_72162, + int64_t num_groups_85832, + int64_t segment_sizze_nonzzero_128079, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_123901, + __global + unsigned char *mem_123904, + __global + unsigned char *mem_123907) +{ + #define segred_group_sizze_85831 (mainDetailedzisegred_group_sizze_85807) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_128086_backing_0 = + (__local volatile + char *) red_arr_mem_128086_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128081; + int32_t local_tid_128082; + int64_t group_sizze_128085; + int32_t wave_sizze_128084; + int32_t group_tid_128083; + + global_tid_128081 = get_global_id(0); + local_tid_128082 = get_local_id(0); + group_sizze_128085 = get_local_size(0); + wave_sizze_128084 = LOCKSTEP_WIDTH; + group_tid_128083 = get_group_id(0); + + int32_t phys_tid_85813; + + phys_tid_85813 = global_tid_128081; + + __local char *red_arr_mem_128086; + + red_arr_mem_128086 = (__local char *) red_arr_mem_128086_backing_0; + + int32_t phys_group_id_128088; + + phys_group_id_128088 = get_group_id(0); + for (int32_t i_128089 = 0; i_128089 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, + squot64(segred_group_sizze_85831, + segment_sizze_nonzzero_128079))) - + phys_group_id_128088, sext_i64_i32(num_groups_85832)); + i_128089++) { + int32_t virt_group_id_128090 = phys_group_id_128088 + i_128089 * + sext_i64_i32(num_groups_85832); + int64_t gtid_85804 = squot64(sext_i32_i64(local_tid_128082), + segment_sizze_nonzzero_128079) + + sext_i32_i64(virt_group_id_128090) * + squot64(segred_group_sizze_85831, + segment_sizze_nonzzero_128079); + int64_t gtid_85812 = srem64(sext_i32_i64(local_tid_128082), + k2p2zq_70876); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, k2p2zq_70876) && (slt64(gtid_85804, + m_70861) && + slt64(sext_i32_i64(local_tid_128082), + k2p2zq_70876 * + squot64(segred_group_sizze_85831, + segment_sizze_nonzzero_128079)))) { + double x_85842 = ((__global double *) mem_120246)[gtid_85812 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_85804 * + defunc_2_reduce_res_70985 + + index_primexp_72162]; + double defunc_0_f_res_85843; + double redout_119889 = 0.0; + + for (int64_t i_119890 = 0; i_119890 < k2p2zq_70876; + i_119890++) { + double x_85847 = ((__global double *) mem_120246)[i_119890 * + (defunc_2_reduce_res_70985 * + m_70861) + + gtid_85804 * + defunc_2_reduce_res_70985 + + index_primexp_72162]; + double x_85848 = ((__global double *) mem_123901)[i_119890 * + (k2p2zq_70876 * + m_70861) + + gtid_85804 * + k2p2zq_70876 + + gtid_85812]; + double defunc_1_f_res_85849 = x_85847 * x_85848; + double defunc_1_op_res_85846 = defunc_1_f_res_85849 + + redout_119889; + double redout_tmp_128091 = defunc_1_op_res_85846; + + redout_119889 = redout_tmp_128091; + } + defunc_0_f_res_85843 = redout_119889; + + double defunc_1_f_res_85850 = x_85842 * defunc_0_f_res_85843; + + // save map-out results + { + ((__global double *) mem_123907)[gtid_85804 * k2p2zq_70876 + + gtid_85812] = + defunc_0_f_res_85843; + } + // save results to be reduced + { + ((__local + double *) red_arr_mem_128086)[sext_i32_i64(local_tid_128082)] = + defunc_1_f_res_85850; + } + } else { + ((__local + double *) red_arr_mem_128086)[sext_i32_i64(local_tid_128082)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, k2p2zq_70876)) { + // perform segmented scan to imitate reduction + { + double x_85836; + double x_85837; + double x_128092; + double x_128093; + bool ltid_in_bounds_128095; + + ltid_in_bounds_128095 = slt64(sext_i32_i64(local_tid_128082), + k2p2zq_70876 * + squot64(segred_group_sizze_85831, + segment_sizze_nonzzero_128079)); + + int32_t skip_threads_128096; + + // read input for in-block scan + { + if (ltid_in_bounds_128095) { + x_85837 = ((volatile __local + double *) red_arr_mem_128086)[sext_i32_i64(local_tid_128082)]; + if ((local_tid_128082 - squot32(local_tid_128082, 32) * + 32) == 0) { + x_85836 = x_85837; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128096 = 1; + while (slt32(skip_threads_128096, 32)) { + if (sle32(skip_threads_128096, local_tid_128082 - + squot32(local_tid_128082, 32) * 32) && + ltid_in_bounds_128095) { + // read operands + { + x_85836 = ((volatile __local + double *) red_arr_mem_128086)[sext_i32_i64(local_tid_128082) - + sext_i32_i64(skip_threads_128096)]; + } + // perform operation + { + bool inactive_128097 = + slt64(srem64(sext_i32_i64(local_tid_128082), + k2p2zq_70876), + sext_i32_i64(local_tid_128082) - + sext_i32_i64(local_tid_128082 - + skip_threads_128096)); + + if (inactive_128097) { + x_85836 = x_85837; + } + if (!inactive_128097) { + double defunc_1_op_res_85838 = x_85836 + + x_85837; + + x_85836 = defunc_1_op_res_85838; + } + } + } + if (sle32(wave_sizze_128084, skip_threads_128096)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128096, local_tid_128082 - + squot32(local_tid_128082, 32) * 32) && + ltid_in_bounds_128095) { + // write result + { + ((volatile __local + double *) red_arr_mem_128086)[sext_i32_i64(local_tid_128082)] = + x_85836; + x_85837 = x_85836; + } + } + if (sle32(wave_sizze_128084, skip_threads_128096)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128096 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128082 - squot32(local_tid_128082, 32) * + 32) == 31 && ltid_in_bounds_128095) { + ((volatile __local + double *) red_arr_mem_128086)[sext_i32_i64(squot32(local_tid_128082, + 32))] = + x_85836; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128098; + + // read input for in-block scan + { + if (squot32(local_tid_128082, 32) == 0 && + ltid_in_bounds_128095) { + x_128093 = ((volatile __local + double *) red_arr_mem_128086)[sext_i32_i64(local_tid_128082)]; + if ((local_tid_128082 - squot32(local_tid_128082, + 32) * 32) == 0) { + x_128092 = x_128093; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128098 = 1; + while (slt32(skip_threads_128098, 32)) { + if (sle32(skip_threads_128098, local_tid_128082 - + squot32(local_tid_128082, 32) * 32) && + (squot32(local_tid_128082, 32) == 0 && + ltid_in_bounds_128095)) { + // read operands + { + x_128092 = ((volatile __local + double *) red_arr_mem_128086)[sext_i32_i64(local_tid_128082) - + sext_i32_i64(skip_threads_128098)]; + } + // perform operation + { + bool inactive_128099 = + slt64(srem64(sext_i32_i64(local_tid_128082 * + 32 + 32 - 1), + k2p2zq_70876), + sext_i32_i64(local_tid_128082 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_128082 - + skip_threads_128098) * + 32 + 32 - 1)); + + if (inactive_128099) { + x_128092 = x_128093; + } + if (!inactive_128099) { + double defunc_1_op_res_128094 = + x_128092 + x_128093; + + x_128092 = defunc_1_op_res_128094; + } + } + } + if (sle32(wave_sizze_128084, skip_threads_128098)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128098, local_tid_128082 - + squot32(local_tid_128082, 32) * 32) && + (squot32(local_tid_128082, 32) == 0 && + ltid_in_bounds_128095)) { + // write result + { + ((volatile __local + double *) red_arr_mem_128086)[sext_i32_i64(local_tid_128082)] = + x_128092; + x_128093 = x_128092; + } + } + if (sle32(wave_sizze_128084, skip_threads_128098)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128098 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128082, 32) == 0 || + !ltid_in_bounds_128095)) { + // read operands + { + x_85837 = x_85836; + x_85836 = ((__local + double *) red_arr_mem_128086)[sext_i32_i64(squot32(local_tid_128082, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128100 = + slt64(srem64(sext_i32_i64(local_tid_128082), + k2p2zq_70876), + sext_i32_i64(local_tid_128082) - + sext_i32_i64(squot32(local_tid_128082, + 32) * 32 - 1)); + + if (inactive_128100) { + x_85836 = x_85837; + } + if (!inactive_128100) { + double defunc_1_op_res_85838 = x_85836 + + x_85837; + + x_85836 = defunc_1_op_res_85838; + } + } + // write final result + { + ((__local + double *) red_arr_mem_128086)[sext_i32_i64(local_tid_128082)] = + x_85836; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128082, 32) == 0) { + ((__local + double *) red_arr_mem_128086)[sext_i32_i64(local_tid_128082)] = + x_85837; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_128090) * + squot64(segred_group_sizze_85831, + segment_sizze_nonzzero_128079) + + sext_i32_i64(local_tid_128082), m_70861) && + slt64(sext_i32_i64(local_tid_128082), + squot64(segred_group_sizze_85831, + segment_sizze_nonzzero_128079))) { + ((__global + double *) mem_123904)[sext_i32_i64(virt_group_id_128090) * + squot64(segred_group_sizze_85831, + segment_sizze_nonzzero_128079) + + sext_i32_i64(local_tid_128082)] = + ((__local + double *) red_arr_mem_128086)[(sext_i32_i64(local_tid_128082) + + (int64_t) 1) * + segment_sizze_nonzzero_128079 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_85831 +} +__kernel void mainDetailedzisegred_small_86170(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_128421_backing_aligned_0, + int64_t m_70861, + int64_t num_recresids_padded_71534, + int64_t num_groups_86252, + int64_t segment_sizze_nonzzero_128414, + __global + unsigned char *mem_124045, + __global + unsigned char *mem_124051, + __global + unsigned char *mem_124054) +{ + #define segred_group_sizze_86251 (mainDetailedzisegred_group_sizze_86164) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_128421_backing_0 = + (__local volatile + char *) red_arr_mem_128421_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128416; + int32_t local_tid_128417; + int64_t group_sizze_128420; + int32_t wave_sizze_128419; + int32_t group_tid_128418; + + global_tid_128416 = get_global_id(0); + local_tid_128417 = get_local_id(0); + group_sizze_128420 = get_local_size(0); + wave_sizze_128419 = LOCKSTEP_WIDTH; + group_tid_128418 = get_group_id(0); + + int32_t phys_tid_86170; + + phys_tid_86170 = global_tid_128416; + + __local char *red_arr_mem_128421; + + red_arr_mem_128421 = (__local char *) red_arr_mem_128421_backing_0; + + int32_t phys_group_id_128423; + + phys_group_id_128423 = get_group_id(0); + for (int32_t i_128424 = 0; i_128424 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, + squot64(segred_group_sizze_86251, + segment_sizze_nonzzero_128414))) - + phys_group_id_128423, sext_i64_i32(num_groups_86252)); + i_128424++) { + int32_t virt_group_id_128425 = phys_group_id_128423 + i_128424 * + sext_i64_i32(num_groups_86252); + int64_t gtid_86161 = squot64(sext_i32_i64(local_tid_128417), + segment_sizze_nonzzero_128414) + + sext_i32_i64(virt_group_id_128425) * + squot64(segred_group_sizze_86251, + segment_sizze_nonzzero_128414); + int64_t gtid_86169 = srem64(sext_i32_i64(local_tid_128417), + num_recresids_padded_71534); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, num_recresids_padded_71534) && + (slt64(gtid_86161, m_70861) && + slt64(sext_i32_i64(local_tid_128417), + num_recresids_padded_71534 * + squot64(segred_group_sizze_86251, + segment_sizze_nonzzero_128414)))) { + double x_86260 = ((__global double *) mem_124045)[gtid_86161 * + num_recresids_padded_71534 + + gtid_86169]; + bool isnan_res_86261; + + isnan_res_86261 = futrts_isnan64(x_86260); + + double defunc_0_f_res_86262; + + if (isnan_res_86261) { + defunc_0_f_res_86262 = 0.0; + } else { + double x_mean_86259 = ((__global + double *) mem_124051)[gtid_86161]; + double x_86263 = x_86260 - x_mean_86259; + double defunc_0_f_res_f_res_86264 = fpow64(x_86263, 2.0); + + defunc_0_f_res_86262 = defunc_0_f_res_f_res_86264; + } + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_128421)[sext_i32_i64(local_tid_128417)] = + defunc_0_f_res_86262; + } + } else { + ((__local + double *) red_arr_mem_128421)[sext_i32_i64(local_tid_128417)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, num_recresids_padded_71534)) { + // perform segmented scan to imitate reduction + { + double x_86255; + double x_86256; + double x_128426; + double x_128427; + bool ltid_in_bounds_128429; + + ltid_in_bounds_128429 = slt64(sext_i32_i64(local_tid_128417), + num_recresids_padded_71534 * + squot64(segred_group_sizze_86251, + segment_sizze_nonzzero_128414)); + + int32_t skip_threads_128430; + + // read input for in-block scan + { + if (ltid_in_bounds_128429) { + x_86256 = ((volatile __local + double *) red_arr_mem_128421)[sext_i32_i64(local_tid_128417)]; + if ((local_tid_128417 - squot32(local_tid_128417, 32) * + 32) == 0) { + x_86255 = x_86256; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128430 = 1; + while (slt32(skip_threads_128430, 32)) { + if (sle32(skip_threads_128430, local_tid_128417 - + squot32(local_tid_128417, 32) * 32) && + ltid_in_bounds_128429) { + // read operands + { + x_86255 = ((volatile __local + double *) red_arr_mem_128421)[sext_i32_i64(local_tid_128417) - + sext_i32_i64(skip_threads_128430)]; + } + // perform operation + { + bool inactive_128431 = + slt64(srem64(sext_i32_i64(local_tid_128417), + num_recresids_padded_71534), + sext_i32_i64(local_tid_128417) - + sext_i32_i64(local_tid_128417 - + skip_threads_128430)); + + if (inactive_128431) { + x_86255 = x_86256; + } + if (!inactive_128431) { + double defunc_1_op_res_86257 = x_86255 + + x_86256; + + x_86255 = defunc_1_op_res_86257; + } + } + } + if (sle32(wave_sizze_128419, skip_threads_128430)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128430, local_tid_128417 - + squot32(local_tid_128417, 32) * 32) && + ltid_in_bounds_128429) { + // write result + { + ((volatile __local + double *) red_arr_mem_128421)[sext_i32_i64(local_tid_128417)] = + x_86255; + x_86256 = x_86255; + } + } + if (sle32(wave_sizze_128419, skip_threads_128430)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128430 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128417 - squot32(local_tid_128417, 32) * + 32) == 31 && ltid_in_bounds_128429) { + ((volatile __local + double *) red_arr_mem_128421)[sext_i32_i64(squot32(local_tid_128417, + 32))] = + x_86255; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128432; + + // read input for in-block scan + { + if (squot32(local_tid_128417, 32) == 0 && + ltid_in_bounds_128429) { + x_128427 = ((volatile __local + double *) red_arr_mem_128421)[sext_i32_i64(local_tid_128417)]; + if ((local_tid_128417 - squot32(local_tid_128417, + 32) * 32) == 0) { + x_128426 = x_128427; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128432 = 1; + while (slt32(skip_threads_128432, 32)) { + if (sle32(skip_threads_128432, local_tid_128417 - + squot32(local_tid_128417, 32) * 32) && + (squot32(local_tid_128417, 32) == 0 && + ltid_in_bounds_128429)) { + // read operands + { + x_128426 = ((volatile __local + double *) red_arr_mem_128421)[sext_i32_i64(local_tid_128417) - + sext_i32_i64(skip_threads_128432)]; + } + // perform operation + { + bool inactive_128433 = + slt64(srem64(sext_i32_i64(local_tid_128417 * + 32 + 32 - 1), + num_recresids_padded_71534), + sext_i32_i64(local_tid_128417 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_128417 - + skip_threads_128432) * + 32 + 32 - 1)); + + if (inactive_128433) { + x_128426 = x_128427; + } + if (!inactive_128433) { + double defunc_1_op_res_128428 = + x_128426 + x_128427; + + x_128426 = defunc_1_op_res_128428; + } + } + } + if (sle32(wave_sizze_128419, skip_threads_128432)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128432, local_tid_128417 - + squot32(local_tid_128417, 32) * 32) && + (squot32(local_tid_128417, 32) == 0 && + ltid_in_bounds_128429)) { + // write result + { + ((volatile __local + double *) red_arr_mem_128421)[sext_i32_i64(local_tid_128417)] = + x_128426; + x_128427 = x_128426; + } + } + if (sle32(wave_sizze_128419, skip_threads_128432)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128432 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128417, 32) == 0 || + !ltid_in_bounds_128429)) { + // read operands + { + x_86256 = x_86255; + x_86255 = ((__local + double *) red_arr_mem_128421)[sext_i32_i64(squot32(local_tid_128417, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128434 = + slt64(srem64(sext_i32_i64(local_tid_128417), + num_recresids_padded_71534), + sext_i32_i64(local_tid_128417) - + sext_i32_i64(squot32(local_tid_128417, + 32) * 32 - 1)); + + if (inactive_128434) { + x_86255 = x_86256; + } + if (!inactive_128434) { + double defunc_1_op_res_86257 = x_86255 + + x_86256; + + x_86255 = defunc_1_op_res_86257; + } + } + // write final result + { + ((__local + double *) red_arr_mem_128421)[sext_i32_i64(local_tid_128417)] = + x_86255; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128417, 32) == 0) { + ((__local + double *) red_arr_mem_128421)[sext_i32_i64(local_tid_128417)] = + x_86256; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_128425) * + squot64(segred_group_sizze_86251, + segment_sizze_nonzzero_128414) + + sext_i32_i64(local_tid_128417), m_70861) && + slt64(sext_i32_i64(local_tid_128417), + squot64(segred_group_sizze_86251, + segment_sizze_nonzzero_128414))) { + ((__global + double *) mem_124054)[sext_i32_i64(virt_group_id_128425) * + squot64(segred_group_sizze_86251, + segment_sizze_nonzzero_128414) + + sext_i32_i64(local_tid_128417)] = + ((__local + double *) red_arr_mem_128421)[(sext_i32_i64(local_tid_128417) + + (int64_t) 1) * + segment_sizze_nonzzero_128414 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_86251 +} +__kernel void mainDetailedzisegred_small_86200(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_128346_backing_aligned_0, + int64_t m_70861, + int64_t num_recresids_padded_71534, + int64_t num_groups_86230, + int64_t segment_sizze_nonzzero_128339, + __global + unsigned char *mem_124045, + __global + unsigned char *mem_124048) +{ + #define segred_group_sizze_86229 (mainDetailedzisegred_group_sizze_86194) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_128346_backing_0 = + (__local volatile + char *) red_arr_mem_128346_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128341; + int32_t local_tid_128342; + int64_t group_sizze_128345; + int32_t wave_sizze_128344; + int32_t group_tid_128343; + + global_tid_128341 = get_global_id(0); + local_tid_128342 = get_local_id(0); + group_sizze_128345 = get_local_size(0); + wave_sizze_128344 = LOCKSTEP_WIDTH; + group_tid_128343 = get_group_id(0); + + int32_t phys_tid_86200; + + phys_tid_86200 = global_tid_128341; + + __local char *red_arr_mem_128346; + + red_arr_mem_128346 = (__local char *) red_arr_mem_128346_backing_0; + + int32_t phys_group_id_128348; + + phys_group_id_128348 = get_group_id(0); + for (int32_t i_128349 = 0; i_128349 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, + squot64(segred_group_sizze_86229, + segment_sizze_nonzzero_128339))) - + phys_group_id_128348, sext_i64_i32(num_groups_86230)); + i_128349++) { + int32_t virt_group_id_128350 = phys_group_id_128348 + i_128349 * + sext_i64_i32(num_groups_86230); + int64_t gtid_86191 = squot64(sext_i32_i64(local_tid_128342), + segment_sizze_nonzzero_128339) + + sext_i32_i64(virt_group_id_128350) * + squot64(segred_group_sizze_86229, + segment_sizze_nonzzero_128339); + int64_t gtid_86199 = srem64(sext_i32_i64(local_tid_128342), + num_recresids_padded_71534); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, num_recresids_padded_71534) && + (slt64(gtid_86191, m_70861) && + slt64(sext_i32_i64(local_tid_128342), + num_recresids_padded_71534 * + squot64(segred_group_sizze_86229, + segment_sizze_nonzzero_128339)))) { + double x_86241 = ((__global double *) mem_124045)[gtid_86191 * + num_recresids_padded_71534 + + gtid_86199]; + + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_128346)[sext_i32_i64(local_tid_128342)] = + x_86241; + } + } else { + ((__local + double *) red_arr_mem_128346)[sext_i32_i64(local_tid_128342)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, num_recresids_padded_71534)) { + // perform segmented scan to imitate reduction + { + double x_86233; + double x_86234; + double x_128351; + double x_128352; + bool ltid_in_bounds_128358; + + ltid_in_bounds_128358 = slt64(sext_i32_i64(local_tid_128342), + num_recresids_padded_71534 * + squot64(segred_group_sizze_86229, + segment_sizze_nonzzero_128339)); + + int32_t skip_threads_128359; + + // read input for in-block scan + { + if (ltid_in_bounds_128358) { + x_86234 = ((volatile __local + double *) red_arr_mem_128346)[sext_i32_i64(local_tid_128342)]; + if ((local_tid_128342 - squot32(local_tid_128342, 32) * + 32) == 0) { + x_86233 = x_86234; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128359 = 1; + while (slt32(skip_threads_128359, 32)) { + if (sle32(skip_threads_128359, local_tid_128342 - + squot32(local_tid_128342, 32) * 32) && + ltid_in_bounds_128358) { + // read operands + { + x_86233 = ((volatile __local + double *) red_arr_mem_128346)[sext_i32_i64(local_tid_128342) - + sext_i32_i64(skip_threads_128359)]; + } + // perform operation + { + bool inactive_128360 = + slt64(srem64(sext_i32_i64(local_tid_128342), + num_recresids_padded_71534), + sext_i32_i64(local_tid_128342) - + sext_i32_i64(local_tid_128342 - + skip_threads_128359)); + + if (inactive_128360) { + x_86233 = x_86234; + } + if (!inactive_128360) { + bool isnan_res_86235; + + isnan_res_86235 = futrts_isnan64(x_86233); + + double defunc_1_op_res_86236; + + if (isnan_res_86235) { + defunc_1_op_res_86236 = x_86234; + } else { + bool isnan_res_86237; + + isnan_res_86237 = + futrts_isnan64(x_86234); + + double defunc_1_op_res_f_res_86238; + + if (isnan_res_86237) { + defunc_1_op_res_f_res_86238 = + x_86233; + } else { + double + defunc_1_op_res_f_res_f_res_86239 = + x_86233 + x_86234; + + defunc_1_op_res_f_res_86238 = + defunc_1_op_res_f_res_f_res_86239; + } + defunc_1_op_res_86236 = + defunc_1_op_res_f_res_86238; + } + x_86233 = defunc_1_op_res_86236; + } + } + } + if (sle32(wave_sizze_128344, skip_threads_128359)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128359, local_tid_128342 - + squot32(local_tid_128342, 32) * 32) && + ltid_in_bounds_128358) { + // write result + { + ((volatile __local + double *) red_arr_mem_128346)[sext_i32_i64(local_tid_128342)] = + x_86233; + x_86234 = x_86233; + } + } + if (sle32(wave_sizze_128344, skip_threads_128359)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128359 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128342 - squot32(local_tid_128342, 32) * + 32) == 31 && ltid_in_bounds_128358) { + ((volatile __local + double *) red_arr_mem_128346)[sext_i32_i64(squot32(local_tid_128342, + 32))] = + x_86233; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128361; + + // read input for in-block scan + { + if (squot32(local_tid_128342, 32) == 0 && + ltid_in_bounds_128358) { + x_128352 = ((volatile __local + double *) red_arr_mem_128346)[sext_i32_i64(local_tid_128342)]; + if ((local_tid_128342 - squot32(local_tid_128342, + 32) * 32) == 0) { + x_128351 = x_128352; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128361 = 1; + while (slt32(skip_threads_128361, 32)) { + if (sle32(skip_threads_128361, local_tid_128342 - + squot32(local_tid_128342, 32) * 32) && + (squot32(local_tid_128342, 32) == 0 && + ltid_in_bounds_128358)) { + // read operands + { + x_128351 = ((volatile __local + double *) red_arr_mem_128346)[sext_i32_i64(local_tid_128342) - + sext_i32_i64(skip_threads_128361)]; + } + // perform operation + { + bool inactive_128362 = + slt64(srem64(sext_i32_i64(local_tid_128342 * + 32 + 32 - 1), + num_recresids_padded_71534), + sext_i32_i64(local_tid_128342 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_128342 - + skip_threads_128361) * + 32 + 32 - 1)); + + if (inactive_128362) { + x_128351 = x_128352; + } + if (!inactive_128362) { + bool isnan_res_128353; + + isnan_res_128353 = + futrts_isnan64(x_128351); + + double defunc_1_op_res_128354; + + if (isnan_res_128353) { + defunc_1_op_res_128354 = x_128352; + } else { + bool isnan_res_128355; + + isnan_res_128355 = + futrts_isnan64(x_128352); + + double defunc_1_op_res_f_res_128356; + + if (isnan_res_128355) { + defunc_1_op_res_f_res_128356 = + x_128351; + } else { + double + defunc_1_op_res_f_res_f_res_128357 + = x_128351 + x_128352; + + defunc_1_op_res_f_res_128356 = + defunc_1_op_res_f_res_f_res_128357; + } + defunc_1_op_res_128354 = + defunc_1_op_res_f_res_128356; + } + x_128351 = defunc_1_op_res_128354; + } + } + } + if (sle32(wave_sizze_128344, skip_threads_128361)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128361, local_tid_128342 - + squot32(local_tid_128342, 32) * 32) && + (squot32(local_tid_128342, 32) == 0 && + ltid_in_bounds_128358)) { + // write result + { + ((volatile __local + double *) red_arr_mem_128346)[sext_i32_i64(local_tid_128342)] = + x_128351; + x_128352 = x_128351; + } + } + if (sle32(wave_sizze_128344, skip_threads_128361)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128361 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128342, 32) == 0 || + !ltid_in_bounds_128358)) { + // read operands + { + x_86234 = x_86233; + x_86233 = ((__local + double *) red_arr_mem_128346)[sext_i32_i64(squot32(local_tid_128342, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128363 = + slt64(srem64(sext_i32_i64(local_tid_128342), + num_recresids_padded_71534), + sext_i32_i64(local_tid_128342) - + sext_i32_i64(squot32(local_tid_128342, + 32) * 32 - 1)); + + if (inactive_128363) { + x_86233 = x_86234; + } + if (!inactive_128363) { + bool isnan_res_86235; + + isnan_res_86235 = futrts_isnan64(x_86233); + + double defunc_1_op_res_86236; + + if (isnan_res_86235) { + defunc_1_op_res_86236 = x_86234; + } else { + bool isnan_res_86237; + + isnan_res_86237 = futrts_isnan64(x_86234); + + double defunc_1_op_res_f_res_86238; + + if (isnan_res_86237) { + defunc_1_op_res_f_res_86238 = x_86233; + } else { + double defunc_1_op_res_f_res_f_res_86239 + = x_86233 + x_86234; + + defunc_1_op_res_f_res_86238 = + defunc_1_op_res_f_res_f_res_86239; + } + defunc_1_op_res_86236 = + defunc_1_op_res_f_res_86238; + } + x_86233 = defunc_1_op_res_86236; + } + } + // write final result + { + ((__local + double *) red_arr_mem_128346)[sext_i32_i64(local_tid_128342)] = + x_86233; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128342, 32) == 0) { + ((__local + double *) red_arr_mem_128346)[sext_i32_i64(local_tid_128342)] = + x_86234; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_128350) * + squot64(segred_group_sizze_86229, + segment_sizze_nonzzero_128339) + + sext_i32_i64(local_tid_128342), m_70861) && + slt64(sext_i32_i64(local_tid_128342), + squot64(segred_group_sizze_86229, + segment_sizze_nonzzero_128339))) { + ((__global + double *) mem_124048)[sext_i32_i64(virt_group_id_128350) * + squot64(segred_group_sizze_86229, + segment_sizze_nonzzero_128339) + + sext_i32_i64(local_tid_128342)] = + ((__local + double *) red_arr_mem_128346)[(sext_i32_i64(local_tid_128342) + + (int64_t) 1) * + segment_sizze_nonzzero_128339 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_86229 +} +__kernel void mainDetailedzisegred_small_86835(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_128651_backing_aligned_0, + int64_t m_70861, + int64_t num_recresids_padded_71534, + int64_t Nmk_72261, + int64_t num_groups_87112, + int64_t segment_sizze_nonzzero_128644, + __global + unsigned char *defunc_3_map_res_mem_124068, + __global + unsigned char *mem_124078, + __global + unsigned char *mem_124130) +{ + #define segred_group_sizze_87111 (mainDetailedzisegred_group_sizze_86829) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_128651_backing_0 = + (__local volatile + char *) red_arr_mem_128651_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128646; + int32_t local_tid_128647; + int64_t group_sizze_128650; + int32_t wave_sizze_128649; + int32_t group_tid_128648; + + global_tid_128646 = get_global_id(0); + local_tid_128647 = get_local_id(0); + group_sizze_128650 = get_local_size(0); + wave_sizze_128649 = LOCKSTEP_WIDTH; + group_tid_128648 = get_group_id(0); + + int32_t phys_tid_86835; + + phys_tid_86835 = global_tid_128646; + + __local char *red_arr_mem_128651; + + red_arr_mem_128651 = (__local char *) red_arr_mem_128651_backing_0; + + int32_t phys_group_id_128653; + + phys_group_id_128653 = get_group_id(0); + for (int32_t i_128654 = 0; i_128654 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, + squot64(segred_group_sizze_87111, + segment_sizze_nonzzero_128644))) - + phys_group_id_128653, sext_i64_i32(num_groups_87112)); + i_128654++) { + int32_t virt_group_id_128655 = phys_group_id_128653 + i_128654 * + sext_i64_i32(num_groups_87112); + int64_t gtid_86826 = squot64(sext_i32_i64(local_tid_128647), + segment_sizze_nonzzero_128644) + + sext_i32_i64(virt_group_id_128655) * + squot64(segred_group_sizze_87111, + segment_sizze_nonzzero_128644); + int64_t gtid_86834 = srem64(sext_i32_i64(local_tid_128647), + num_recresids_padded_71534); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, num_recresids_padded_71534) && + (slt64(gtid_86826, m_70861) && + slt64(sext_i32_i64(local_tid_128647), + num_recresids_padded_71534 * + squot64(segred_group_sizze_87111, + segment_sizze_nonzzero_128644)))) { + int64_t slice_115288 = (int64_t) 1 + gtid_86834; + double x_87121 = ((__global + double *) defunc_3_map_res_mem_124068)[gtid_86826 * + Nmk_72261 + + slice_115288]; + double x_87122 = ((__global double *) mem_124078)[gtid_86826 * + Nmk_72261 + + slice_115288]; + double abs_res_87123 = fabs(x_87121); + bool cond_87124 = x_87122 < abs_res_87123; + int64_t defunc_2_f_res_87125; + + if (cond_87124) { + defunc_2_f_res_87125 = gtid_86834; + } else { + defunc_2_f_res_87125 = (int64_t) 9223372036854775807; + } + // save map-out results + { } + // save results to be reduced + { + ((__local + int64_t *) red_arr_mem_128651)[sext_i32_i64(local_tid_128647)] = + defunc_2_f_res_87125; + } + } else { + ((__local + int64_t *) red_arr_mem_128651)[sext_i32_i64(local_tid_128647)] = + (int64_t) 9223372036854775807; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, num_recresids_padded_71534)) { + // perform segmented scan to imitate reduction + { + int64_t x_87115; + int64_t x_87116; + int64_t x_128656; + int64_t x_128657; + bool ltid_in_bounds_128659; + + ltid_in_bounds_128659 = slt64(sext_i32_i64(local_tid_128647), + num_recresids_padded_71534 * + squot64(segred_group_sizze_87111, + segment_sizze_nonzzero_128644)); + + int32_t skip_threads_128660; + + // read input for in-block scan + { + if (ltid_in_bounds_128659) { + x_87116 = ((volatile __local + int64_t *) red_arr_mem_128651)[sext_i32_i64(local_tid_128647)]; + if ((local_tid_128647 - squot32(local_tid_128647, 32) * + 32) == 0) { + x_87115 = x_87116; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128660 = 1; + while (slt32(skip_threads_128660, 32)) { + if (sle32(skip_threads_128660, local_tid_128647 - + squot32(local_tid_128647, 32) * 32) && + ltid_in_bounds_128659) { + // read operands + { + x_87115 = ((volatile __local + int64_t *) red_arr_mem_128651)[sext_i32_i64(local_tid_128647) - + sext_i32_i64(skip_threads_128660)]; + } + // perform operation + { + bool inactive_128661 = + slt64(srem64(sext_i32_i64(local_tid_128647), + num_recresids_padded_71534), + sext_i32_i64(local_tid_128647) - + sext_i32_i64(local_tid_128647 - + skip_threads_128660)); + + if (inactive_128661) { + x_87115 = x_87116; + } + if (!inactive_128661) { + int64_t defunc_1_op_res_87117 = + smin64(x_87115, x_87116); + + x_87115 = defunc_1_op_res_87117; + } + } + } + if (sle32(wave_sizze_128649, skip_threads_128660)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128660, local_tid_128647 - + squot32(local_tid_128647, 32) * 32) && + ltid_in_bounds_128659) { + // write result + { + ((volatile __local + int64_t *) red_arr_mem_128651)[sext_i32_i64(local_tid_128647)] = + x_87115; + x_87116 = x_87115; + } + } + if (sle32(wave_sizze_128649, skip_threads_128660)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128660 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128647 - squot32(local_tid_128647, 32) * + 32) == 31 && ltid_in_bounds_128659) { + ((volatile __local + int64_t *) red_arr_mem_128651)[sext_i32_i64(squot32(local_tid_128647, + 32))] = + x_87115; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128662; + + // read input for in-block scan + { + if (squot32(local_tid_128647, 32) == 0 && + ltid_in_bounds_128659) { + x_128657 = ((volatile __local + int64_t *) red_arr_mem_128651)[sext_i32_i64(local_tid_128647)]; + if ((local_tid_128647 - squot32(local_tid_128647, + 32) * 32) == 0) { + x_128656 = x_128657; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128662 = 1; + while (slt32(skip_threads_128662, 32)) { + if (sle32(skip_threads_128662, local_tid_128647 - + squot32(local_tid_128647, 32) * 32) && + (squot32(local_tid_128647, 32) == 0 && + ltid_in_bounds_128659)) { + // read operands + { + x_128656 = ((volatile __local + int64_t *) red_arr_mem_128651)[sext_i32_i64(local_tid_128647) - + sext_i32_i64(skip_threads_128662)]; + } + // perform operation + { + bool inactive_128663 = + slt64(srem64(sext_i32_i64(local_tid_128647 * + 32 + 32 - 1), + num_recresids_padded_71534), + sext_i32_i64(local_tid_128647 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_128647 - + skip_threads_128662) * + 32 + 32 - 1)); + + if (inactive_128663) { + x_128656 = x_128657; + } + if (!inactive_128663) { + int64_t defunc_1_op_res_128658 = + smin64(x_128656, x_128657); + + x_128656 = defunc_1_op_res_128658; + } + } + } + if (sle32(wave_sizze_128649, skip_threads_128662)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128662, local_tid_128647 - + squot32(local_tid_128647, 32) * 32) && + (squot32(local_tid_128647, 32) == 0 && + ltid_in_bounds_128659)) { + // write result + { + ((volatile __local + int64_t *) red_arr_mem_128651)[sext_i32_i64(local_tid_128647)] = + x_128656; + x_128657 = x_128656; + } + } + if (sle32(wave_sizze_128649, skip_threads_128662)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128662 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128647, 32) == 0 || + !ltid_in_bounds_128659)) { + // read operands + { + x_87116 = x_87115; + x_87115 = ((__local + int64_t *) red_arr_mem_128651)[sext_i32_i64(squot32(local_tid_128647, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128664 = + slt64(srem64(sext_i32_i64(local_tid_128647), + num_recresids_padded_71534), + sext_i32_i64(local_tid_128647) - + sext_i32_i64(squot32(local_tid_128647, + 32) * 32 - 1)); + + if (inactive_128664) { + x_87115 = x_87116; + } + if (!inactive_128664) { + int64_t defunc_1_op_res_87117 = smin64(x_87115, + x_87116); + + x_87115 = defunc_1_op_res_87117; + } + } + // write final result + { + ((__local + int64_t *) red_arr_mem_128651)[sext_i32_i64(local_tid_128647)] = + x_87115; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128647, 32) == 0) { + ((__local + int64_t *) red_arr_mem_128651)[sext_i32_i64(local_tid_128647)] = + x_87116; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_128655) * + squot64(segred_group_sizze_87111, + segment_sizze_nonzzero_128644) + + sext_i32_i64(local_tid_128647), m_70861) && + slt64(sext_i32_i64(local_tid_128647), + squot64(segred_group_sizze_87111, + segment_sizze_nonzzero_128644))) { + ((__global + int64_t *) mem_124130)[sext_i32_i64(virt_group_id_128655) * + squot64(segred_group_sizze_87111, + segment_sizze_nonzzero_128644) + + sext_i32_i64(local_tid_128647)] = + ((__local + int64_t *) red_arr_mem_128651)[(sext_i32_i64(local_tid_128647) + + (int64_t) 1) * + segment_sizze_nonzzero_128644 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_87111 +} +__kernel void mainDetailedzisegred_small_86960(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_128586_backing_aligned_0, + int64_t m_70861, + int64_t num_recresids_padded_71534, + int64_t Nmk_72261, + int64_t num_groups_86992, + int64_t segment_sizze_nonzzero_128579, + __global + unsigned char *defunc_3_map_res_mem_124068, + __global + unsigned char *mem_124121, + __global + unsigned char *mem_124124) +{ + #define segred_group_sizze_86991 (mainDetailedzisegred_group_sizze_86954) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_128586_backing_0 = + (__local volatile + char *) red_arr_mem_128586_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128581; + int32_t local_tid_128582; + int64_t group_sizze_128585; + int32_t wave_sizze_128584; + int32_t group_tid_128583; + + global_tid_128581 = get_global_id(0); + local_tid_128582 = get_local_id(0); + group_sizze_128585 = get_local_size(0); + wave_sizze_128584 = LOCKSTEP_WIDTH; + group_tid_128583 = get_group_id(0); + + int32_t phys_tid_86960; + + phys_tid_86960 = global_tid_128581; + + __local char *red_arr_mem_128586; + + red_arr_mem_128586 = (__local char *) red_arr_mem_128586_backing_0; + + int32_t phys_group_id_128588; + + phys_group_id_128588 = get_group_id(0); + for (int32_t i_128589 = 0; i_128589 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, + squot64(segred_group_sizze_86991, + segment_sizze_nonzzero_128579))) - + phys_group_id_128588, sext_i64_i32(num_groups_86992)); + i_128589++) { + int32_t virt_group_id_128590 = phys_group_id_128588 + i_128589 * + sext_i64_i32(num_groups_86992); + int64_t gtid_86951 = squot64(sext_i32_i64(local_tid_128582), + segment_sizze_nonzzero_128579) + + sext_i32_i64(virt_group_id_128590) * + squot64(segred_group_sizze_86991, + segment_sizze_nonzzero_128579); + int64_t gtid_86959 = srem64(sext_i32_i64(local_tid_128582), + num_recresids_padded_71534); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, num_recresids_padded_71534) && + (slt64(gtid_86951, m_70861) && + slt64(sext_i32_i64(local_tid_128582), + num_recresids_padded_71534 * + squot64(segred_group_sizze_86991, + segment_sizze_nonzzero_128579)))) { + double i64_res_86999 = ((__global + double *) mem_124121)[gtid_86951]; + int64_t slice_115286 = (int64_t) 1 + gtid_86959; + double x_87000 = ((__global + double *) defunc_3_map_res_mem_124068)[gtid_86951 * + Nmk_72261 + + slice_115286]; + int64_t x_87002 = mul64((int64_t) 2, gtid_86959); + int64_t i64_arg_87003 = add64((int64_t) 2, x_87002); + double i64_res_87004 = sitofp_i64_f64(i64_arg_87003); + double y_87005 = i64_res_87004 / i64_res_86999; + double lifted_div_res_87006 = 1.0 + y_87005; + double abs_arg_87007 = x_87000 / lifted_div_res_87006; + double abs_res_87008 = fabs(abs_arg_87007); + + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_128586)[sext_i32_i64(local_tid_128582)] = + abs_res_87008; + } + } else { + ((__local + double *) red_arr_mem_128586)[sext_i32_i64(local_tid_128582)] = + -INFINITY; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, num_recresids_padded_71534)) { + // perform segmented scan to imitate reduction + { + double x_86995; + double x_86996; + double x_128591; + double x_128592; + bool ltid_in_bounds_128594; + + ltid_in_bounds_128594 = slt64(sext_i32_i64(local_tid_128582), + num_recresids_padded_71534 * + squot64(segred_group_sizze_86991, + segment_sizze_nonzzero_128579)); + + int32_t skip_threads_128595; + + // read input for in-block scan + { + if (ltid_in_bounds_128594) { + x_86996 = ((volatile __local + double *) red_arr_mem_128586)[sext_i32_i64(local_tid_128582)]; + if ((local_tid_128582 - squot32(local_tid_128582, 32) * + 32) == 0) { + x_86995 = x_86996; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128595 = 1; + while (slt32(skip_threads_128595, 32)) { + if (sle32(skip_threads_128595, local_tid_128582 - + squot32(local_tid_128582, 32) * 32) && + ltid_in_bounds_128594) { + // read operands + { + x_86995 = ((volatile __local + double *) red_arr_mem_128586)[sext_i32_i64(local_tid_128582) - + sext_i32_i64(skip_threads_128595)]; + } + // perform operation + { + bool inactive_128596 = + slt64(srem64(sext_i32_i64(local_tid_128582), + num_recresids_padded_71534), + sext_i32_i64(local_tid_128582) - + sext_i32_i64(local_tid_128582 - + skip_threads_128595)); + + if (inactive_128596) { + x_86995 = x_86996; + } + if (!inactive_128596) { + double defunc_1_op_res_86997 = + fmax64(x_86995, x_86996); + + x_86995 = defunc_1_op_res_86997; + } + } + } + if (sle32(wave_sizze_128584, skip_threads_128595)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128595, local_tid_128582 - + squot32(local_tid_128582, 32) * 32) && + ltid_in_bounds_128594) { + // write result + { + ((volatile __local + double *) red_arr_mem_128586)[sext_i32_i64(local_tid_128582)] = + x_86995; + x_86996 = x_86995; + } + } + if (sle32(wave_sizze_128584, skip_threads_128595)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128595 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128582 - squot32(local_tid_128582, 32) * + 32) == 31 && ltid_in_bounds_128594) { + ((volatile __local + double *) red_arr_mem_128586)[sext_i32_i64(squot32(local_tid_128582, + 32))] = + x_86995; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128597; + + // read input for in-block scan + { + if (squot32(local_tid_128582, 32) == 0 && + ltid_in_bounds_128594) { + x_128592 = ((volatile __local + double *) red_arr_mem_128586)[sext_i32_i64(local_tid_128582)]; + if ((local_tid_128582 - squot32(local_tid_128582, + 32) * 32) == 0) { + x_128591 = x_128592; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128597 = 1; + while (slt32(skip_threads_128597, 32)) { + if (sle32(skip_threads_128597, local_tid_128582 - + squot32(local_tid_128582, 32) * 32) && + (squot32(local_tid_128582, 32) == 0 && + ltid_in_bounds_128594)) { + // read operands + { + x_128591 = ((volatile __local + double *) red_arr_mem_128586)[sext_i32_i64(local_tid_128582) - + sext_i32_i64(skip_threads_128597)]; + } + // perform operation + { + bool inactive_128598 = + slt64(srem64(sext_i32_i64(local_tid_128582 * + 32 + 32 - 1), + num_recresids_padded_71534), + sext_i32_i64(local_tid_128582 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_128582 - + skip_threads_128597) * + 32 + 32 - 1)); + + if (inactive_128598) { + x_128591 = x_128592; + } + if (!inactive_128598) { + double defunc_1_op_res_128593 = + fmax64(x_128591, x_128592); + + x_128591 = defunc_1_op_res_128593; + } + } + } + if (sle32(wave_sizze_128584, skip_threads_128597)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128597, local_tid_128582 - + squot32(local_tid_128582, 32) * 32) && + (squot32(local_tid_128582, 32) == 0 && + ltid_in_bounds_128594)) { + // write result + { + ((volatile __local + double *) red_arr_mem_128586)[sext_i32_i64(local_tid_128582)] = + x_128591; + x_128592 = x_128591; + } + } + if (sle32(wave_sizze_128584, skip_threads_128597)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128597 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128582, 32) == 0 || + !ltid_in_bounds_128594)) { + // read operands + { + x_86996 = x_86995; + x_86995 = ((__local + double *) red_arr_mem_128586)[sext_i32_i64(squot32(local_tid_128582, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128599 = + slt64(srem64(sext_i32_i64(local_tid_128582), + num_recresids_padded_71534), + sext_i32_i64(local_tid_128582) - + sext_i32_i64(squot32(local_tid_128582, + 32) * 32 - 1)); + + if (inactive_128599) { + x_86995 = x_86996; + } + if (!inactive_128599) { + double defunc_1_op_res_86997 = fmax64(x_86995, + x_86996); + + x_86995 = defunc_1_op_res_86997; + } + } + // write final result + { + ((__local + double *) red_arr_mem_128586)[sext_i32_i64(local_tid_128582)] = + x_86995; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128582, 32) == 0) { + ((__local + double *) red_arr_mem_128586)[sext_i32_i64(local_tid_128582)] = + x_86996; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_128590) * + squot64(segred_group_sizze_86991, + segment_sizze_nonzzero_128579) + + sext_i32_i64(local_tid_128582), m_70861) && + slt64(sext_i32_i64(local_tid_128582), + squot64(segred_group_sizze_86991, + segment_sizze_nonzzero_128579))) { + ((__global + double *) mem_124124)[sext_i32_i64(virt_group_id_128590) * + squot64(segred_group_sizze_86991, + segment_sizze_nonzzero_128579) + + sext_i32_i64(local_tid_128582)] = + ((__local + double *) red_arr_mem_128586)[(sext_i32_i64(local_tid_128582) + + (int64_t) 1) * + segment_sizze_nonzzero_128579 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_86991 +} +__kernel void mainDetailedzisegred_small_87308(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_128785_backing_aligned_0, + int64_t N_70860, int64_t m_70861, + int64_t n_70864, + int64_t k2p2zq_70876, + int64_t num_groups_87447, + int64_t segment_sizze_nonzzero_128778, + __global + unsigned char *binop_p_mem_120117, + __global + unsigned char *mem_124142, + __global + unsigned char *mem_124276, + __global + unsigned char *mem_124281) +{ + #define segred_group_sizze_87446 (mainDetailedzisegred_group_sizze_87302) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_128785_backing_0 = + (__local volatile + char *) red_arr_mem_128785_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128780; + int32_t local_tid_128781; + int64_t group_sizze_128784; + int32_t wave_sizze_128783; + int32_t group_tid_128782; + + global_tid_128780 = get_global_id(0); + local_tid_128781 = get_local_id(0); + group_sizze_128784 = get_local_size(0); + wave_sizze_128783 = LOCKSTEP_WIDTH; + group_tid_128782 = get_group_id(0); + + int32_t phys_tid_87308; + + phys_tid_87308 = global_tid_128780; + + __local char *red_arr_mem_128785; + + red_arr_mem_128785 = (__local char *) red_arr_mem_128785_backing_0; + + int32_t phys_group_id_128787; + + phys_group_id_128787 = get_group_id(0); + for (int32_t i_128788 = 0; i_128788 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861 * k2p2zq_70876 * k2p2zq_70876, + squot64(segred_group_sizze_87446, + segment_sizze_nonzzero_128778))) - + phys_group_id_128787, sext_i64_i32(num_groups_87447)); + i_128788++) { + int32_t virt_group_id_128789 = phys_group_id_128787 + i_128788 * + sext_i64_i32(num_groups_87447); + int64_t gtid_87295 = squot64(squot64(sext_i32_i64(local_tid_128781), + segment_sizze_nonzzero_128778) + + sext_i32_i64(virt_group_id_128789) * + squot64(segred_group_sizze_87446, + segment_sizze_nonzzero_128778), + k2p2zq_70876 * k2p2zq_70876); + int64_t gtid_87296 = squot64(squot64(sext_i32_i64(local_tid_128781), + segment_sizze_nonzzero_128778) + + sext_i32_i64(virt_group_id_128789) * + squot64(segred_group_sizze_87446, + segment_sizze_nonzzero_128778) - + squot64(squot64(sext_i32_i64(local_tid_128781), + segment_sizze_nonzzero_128778) + + sext_i32_i64(virt_group_id_128789) * + squot64(segred_group_sizze_87446, + segment_sizze_nonzzero_128778), + k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876); + int64_t gtid_87297 = squot64(sext_i32_i64(local_tid_128781), + segment_sizze_nonzzero_128778) + + sext_i32_i64(virt_group_id_128789) * + squot64(segred_group_sizze_87446, + segment_sizze_nonzzero_128778) - + squot64(squot64(sext_i32_i64(local_tid_128781), + segment_sizze_nonzzero_128778) + + sext_i32_i64(virt_group_id_128789) * + squot64(segred_group_sizze_87446, + segment_sizze_nonzzero_128778), k2p2zq_70876 * + k2p2zq_70876) * (k2p2zq_70876 * k2p2zq_70876) - + squot64(squot64(sext_i32_i64(local_tid_128781), + segment_sizze_nonzzero_128778) + + sext_i32_i64(virt_group_id_128789) * + squot64(segred_group_sizze_87446, + segment_sizze_nonzzero_128778) - + squot64(squot64(sext_i32_i64(local_tid_128781), + segment_sizze_nonzzero_128778) + + sext_i32_i64(virt_group_id_128789) * + squot64(segred_group_sizze_87446, + segment_sizze_nonzzero_128778), + k2p2zq_70876 * k2p2zq_70876) * (k2p2zq_70876 * + k2p2zq_70876), + k2p2zq_70876) * k2p2zq_70876; + int64_t gtid_87307 = srem64(sext_i32_i64(local_tid_128781), n_70864); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, n_70864) && (((slt64(gtid_87295, m_70861) && + slt64(gtid_87296, + k2p2zq_70876)) && + slt64(gtid_87297, + k2p2zq_70876)) && + slt64(sext_i32_i64(local_tid_128781), + n_70864 * + squot64(segred_group_sizze_87446, + segment_sizze_nonzzero_128778)))) { + double x_87456 = ((__global double *) mem_124142)[gtid_87295 * + N_70860 + + gtid_87307]; + double x_87457 = ((__global + double *) binop_p_mem_120117)[gtid_87296 * + N_70860 + + gtid_87307]; + double x_87458 = ((__global double *) mem_124276)[gtid_87297 * + N_70860 + + gtid_87307]; + double x_87459 = x_87457 * x_87458; + bool isnan_res_87460; + + isnan_res_87460 = futrts_isnan64(x_87456); + + double y_87461; + + if (isnan_res_87460) { + y_87461 = 0.0; + } else { + y_87461 = 1.0; + } + + double defunc_2_f_res_87462 = x_87459 * y_87461; + + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_128785)[sext_i32_i64(local_tid_128781)] = + defunc_2_f_res_87462; + } + } else { + ((__local + double *) red_arr_mem_128785)[sext_i32_i64(local_tid_128781)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, n_70864)) { + // perform segmented scan to imitate reduction + { + double x_87450; + double x_87451; + double x_128790; + double x_128791; + bool ltid_in_bounds_128793; + + ltid_in_bounds_128793 = slt64(sext_i32_i64(local_tid_128781), + n_70864 * + squot64(segred_group_sizze_87446, + segment_sizze_nonzzero_128778)); + + int32_t skip_threads_128794; + + // read input for in-block scan + { + if (ltid_in_bounds_128793) { + x_87451 = ((volatile __local + double *) red_arr_mem_128785)[sext_i32_i64(local_tid_128781)]; + if ((local_tid_128781 - squot32(local_tid_128781, 32) * + 32) == 0) { + x_87450 = x_87451; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128794 = 1; + while (slt32(skip_threads_128794, 32)) { + if (sle32(skip_threads_128794, local_tid_128781 - + squot32(local_tid_128781, 32) * 32) && + ltid_in_bounds_128793) { + // read operands + { + x_87450 = ((volatile __local + double *) red_arr_mem_128785)[sext_i32_i64(local_tid_128781) - + sext_i32_i64(skip_threads_128794)]; + } + // perform operation + { + bool inactive_128795 = + slt64(srem64(sext_i32_i64(local_tid_128781), + n_70864), + sext_i32_i64(local_tid_128781) - + sext_i32_i64(local_tid_128781 - + skip_threads_128794)); + + if (inactive_128795) { + x_87450 = x_87451; + } + if (!inactive_128795) { + double defunc_1_op_res_87452 = x_87450 + + x_87451; + + x_87450 = defunc_1_op_res_87452; + } + } + } + if (sle32(wave_sizze_128783, skip_threads_128794)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128794, local_tid_128781 - + squot32(local_tid_128781, 32) * 32) && + ltid_in_bounds_128793) { + // write result + { + ((volatile __local + double *) red_arr_mem_128785)[sext_i32_i64(local_tid_128781)] = + x_87450; + x_87451 = x_87450; + } + } + if (sle32(wave_sizze_128783, skip_threads_128794)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128794 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128781 - squot32(local_tid_128781, 32) * + 32) == 31 && ltid_in_bounds_128793) { + ((volatile __local + double *) red_arr_mem_128785)[sext_i32_i64(squot32(local_tid_128781, + 32))] = + x_87450; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128796; + + // read input for in-block scan + { + if (squot32(local_tid_128781, 32) == 0 && + ltid_in_bounds_128793) { + x_128791 = ((volatile __local + double *) red_arr_mem_128785)[sext_i32_i64(local_tid_128781)]; + if ((local_tid_128781 - squot32(local_tid_128781, + 32) * 32) == 0) { + x_128790 = x_128791; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128796 = 1; + while (slt32(skip_threads_128796, 32)) { + if (sle32(skip_threads_128796, local_tid_128781 - + squot32(local_tid_128781, 32) * 32) && + (squot32(local_tid_128781, 32) == 0 && + ltid_in_bounds_128793)) { + // read operands + { + x_128790 = ((volatile __local + double *) red_arr_mem_128785)[sext_i32_i64(local_tid_128781) - + sext_i32_i64(skip_threads_128796)]; + } + // perform operation + { + bool inactive_128797 = + slt64(srem64(sext_i32_i64(local_tid_128781 * + 32 + 32 - 1), n_70864), + sext_i32_i64(local_tid_128781 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_128781 - + skip_threads_128796) * + 32 + 32 - 1)); + + if (inactive_128797) { + x_128790 = x_128791; + } + if (!inactive_128797) { + double defunc_1_op_res_128792 = + x_128790 + x_128791; + + x_128790 = defunc_1_op_res_128792; + } + } + } + if (sle32(wave_sizze_128783, skip_threads_128796)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128796, local_tid_128781 - + squot32(local_tid_128781, 32) * 32) && + (squot32(local_tid_128781, 32) == 0 && + ltid_in_bounds_128793)) { + // write result + { + ((volatile __local + double *) red_arr_mem_128785)[sext_i32_i64(local_tid_128781)] = + x_128790; + x_128791 = x_128790; + } + } + if (sle32(wave_sizze_128783, skip_threads_128796)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128796 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128781, 32) == 0 || + !ltid_in_bounds_128793)) { + // read operands + { + x_87451 = x_87450; + x_87450 = ((__local + double *) red_arr_mem_128785)[sext_i32_i64(squot32(local_tid_128781, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128798 = + slt64(srem64(sext_i32_i64(local_tid_128781), + n_70864), + sext_i32_i64(local_tid_128781) - + sext_i32_i64(squot32(local_tid_128781, + 32) * 32 - 1)); + + if (inactive_128798) { + x_87450 = x_87451; + } + if (!inactive_128798) { + double defunc_1_op_res_87452 = x_87450 + + x_87451; + + x_87450 = defunc_1_op_res_87452; + } + } + // write final result + { + ((__local + double *) red_arr_mem_128785)[sext_i32_i64(local_tid_128781)] = + x_87450; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128781, 32) == 0) { + ((__local + double *) red_arr_mem_128785)[sext_i32_i64(local_tid_128781)] = + x_87451; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_128789) * + squot64(segred_group_sizze_87446, + segment_sizze_nonzzero_128778) + + sext_i32_i64(local_tid_128781), m_70861 * k2p2zq_70876 * + k2p2zq_70876) && slt64(sext_i32_i64(local_tid_128781), + squot64(segred_group_sizze_87446, + segment_sizze_nonzzero_128778))) { + ((__global + double *) mem_124281)[squot64(sext_i32_i64(virt_group_id_128789) * + squot64(segred_group_sizze_87446, + segment_sizze_nonzzero_128778) + + sext_i32_i64(local_tid_128781), + k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876) + + squot64(sext_i32_i64(virt_group_id_128789) * + squot64(segred_group_sizze_87446, + segment_sizze_nonzzero_128778) + + sext_i32_i64(local_tid_128781) - + squot64(sext_i32_i64(virt_group_id_128789) * + squot64(segred_group_sizze_87446, + segment_sizze_nonzzero_128778) + + sext_i32_i64(local_tid_128781), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876) * k2p2zq_70876 + + (sext_i32_i64(virt_group_id_128789) * + squot64(segred_group_sizze_87446, + segment_sizze_nonzzero_128778) + + sext_i32_i64(local_tid_128781) - + squot64(sext_i32_i64(virt_group_id_128789) * + squot64(segred_group_sizze_87446, + segment_sizze_nonzzero_128778) + + sext_i32_i64(local_tid_128781), + k2p2zq_70876 * k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876) - + squot64(sext_i32_i64(virt_group_id_128789) * + squot64(segred_group_sizze_87446, + segment_sizze_nonzzero_128778) + + sext_i32_i64(local_tid_128781) - + squot64(sext_i32_i64(virt_group_id_128789) * + squot64(segred_group_sizze_87446, + segment_sizze_nonzzero_128778) + + sext_i32_i64(local_tid_128781), + k2p2zq_70876 * + k2p2zq_70876) * + (k2p2zq_70876 * k2p2zq_70876), + k2p2zq_70876) * + k2p2zq_70876)] = ((__local + double *) red_arr_mem_128785)[(sext_i32_i64(local_tid_128781) + + (int64_t) 1) * + segment_sizze_nonzzero_128778 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_87446 +} +__kernel void mainDetailedzisegred_small_88192(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_128975_backing_aligned_0, + int64_t N_70860, int64_t m_70861, + int64_t n_70864, + int64_t k2p2zq_70876, + int64_t num_groups_88245, + int64_t segment_sizze_nonzzero_128968, + __global + unsigned char *binop_p_mem_120117, + __global + unsigned char *mem_124142, + __global + unsigned char *mem_124587) +{ + #define segred_group_sizze_88244 (mainDetailedzisegred_group_sizze_88186) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_128975_backing_0 = + (__local volatile + char *) red_arr_mem_128975_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128970; + int32_t local_tid_128971; + int64_t group_sizze_128974; + int32_t wave_sizze_128973; + int32_t group_tid_128972; + + global_tid_128970 = get_global_id(0); + local_tid_128971 = get_local_id(0); + group_sizze_128974 = get_local_size(0); + wave_sizze_128973 = LOCKSTEP_WIDTH; + group_tid_128972 = get_group_id(0); + + int32_t phys_tid_88192; + + phys_tid_88192 = global_tid_128970; + + __local char *red_arr_mem_128975; + + red_arr_mem_128975 = (__local char *) red_arr_mem_128975_backing_0; + + int32_t phys_group_id_128977; + + phys_group_id_128977 = get_group_id(0); + for (int32_t i_128978 = 0; i_128978 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861 * k2p2zq_70876, + squot64(segred_group_sizze_88244, + segment_sizze_nonzzero_128968))) - + phys_group_id_128977, sext_i64_i32(num_groups_88245)); + i_128978++) { + int32_t virt_group_id_128979 = phys_group_id_128977 + i_128978 * + sext_i64_i32(num_groups_88245); + int64_t gtid_88181 = squot64(squot64(sext_i32_i64(local_tid_128971), + segment_sizze_nonzzero_128968) + + sext_i32_i64(virt_group_id_128979) * + squot64(segred_group_sizze_88244, + segment_sizze_nonzzero_128968), + k2p2zq_70876); + int64_t gtid_88182 = squot64(sext_i32_i64(local_tid_128971), + segment_sizze_nonzzero_128968) + + sext_i32_i64(virt_group_id_128979) * + squot64(segred_group_sizze_88244, + segment_sizze_nonzzero_128968) - + squot64(squot64(sext_i32_i64(local_tid_128971), + segment_sizze_nonzzero_128968) + + sext_i32_i64(virt_group_id_128979) * + squot64(segred_group_sizze_88244, + segment_sizze_nonzzero_128968), k2p2zq_70876) * + k2p2zq_70876; + int64_t gtid_88191 = srem64(sext_i32_i64(local_tid_128971), n_70864); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, n_70864) && ((slt64(gtid_88181, m_70861) && + slt64(gtid_88182, + k2p2zq_70876)) && + slt64(sext_i32_i64(local_tid_128971), + n_70864 * + squot64(segred_group_sizze_88244, + segment_sizze_nonzzero_128968)))) { + double x_88254 = ((__global double *) mem_124142)[gtid_88181 * + N_70860 + + gtid_88191]; + bool isnan_res_88255; + + isnan_res_88255 = futrts_isnan64(x_88254); + + double defunc_1_f_res_88256; + + if (isnan_res_88255) { + defunc_1_f_res_88256 = 0.0; + } else { + double x_88253 = ((__global + double *) binop_p_mem_120117)[gtid_88182 * + N_70860 + + gtid_88191]; + double defunc_1_f_res_f_res_88257 = x_88253 * x_88254; + + defunc_1_f_res_88256 = defunc_1_f_res_f_res_88257; + } + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_128975)[sext_i32_i64(local_tid_128971)] = + defunc_1_f_res_88256; + } + } else { + ((__local + double *) red_arr_mem_128975)[sext_i32_i64(local_tid_128971)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, n_70864)) { + // perform segmented scan to imitate reduction + { + double x_88248; + double x_88249; + double x_128980; + double x_128981; + bool ltid_in_bounds_128983; + + ltid_in_bounds_128983 = slt64(sext_i32_i64(local_tid_128971), + n_70864 * + squot64(segred_group_sizze_88244, + segment_sizze_nonzzero_128968)); + + int32_t skip_threads_128984; + + // read input for in-block scan + { + if (ltid_in_bounds_128983) { + x_88249 = ((volatile __local + double *) red_arr_mem_128975)[sext_i32_i64(local_tid_128971)]; + if ((local_tid_128971 - squot32(local_tid_128971, 32) * + 32) == 0) { + x_88248 = x_88249; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128984 = 1; + while (slt32(skip_threads_128984, 32)) { + if (sle32(skip_threads_128984, local_tid_128971 - + squot32(local_tid_128971, 32) * 32) && + ltid_in_bounds_128983) { + // read operands + { + x_88248 = ((volatile __local + double *) red_arr_mem_128975)[sext_i32_i64(local_tid_128971) - + sext_i32_i64(skip_threads_128984)]; + } + // perform operation + { + bool inactive_128985 = + slt64(srem64(sext_i32_i64(local_tid_128971), + n_70864), + sext_i32_i64(local_tid_128971) - + sext_i32_i64(local_tid_128971 - + skip_threads_128984)); + + if (inactive_128985) { + x_88248 = x_88249; + } + if (!inactive_128985) { + double defunc_1_op_res_88250 = x_88248 + + x_88249; + + x_88248 = defunc_1_op_res_88250; + } + } + } + if (sle32(wave_sizze_128973, skip_threads_128984)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128984, local_tid_128971 - + squot32(local_tid_128971, 32) * 32) && + ltid_in_bounds_128983) { + // write result + { + ((volatile __local + double *) red_arr_mem_128975)[sext_i32_i64(local_tid_128971)] = + x_88248; + x_88249 = x_88248; + } + } + if (sle32(wave_sizze_128973, skip_threads_128984)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128984 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128971 - squot32(local_tid_128971, 32) * + 32) == 31 && ltid_in_bounds_128983) { + ((volatile __local + double *) red_arr_mem_128975)[sext_i32_i64(squot32(local_tid_128971, + 32))] = + x_88248; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128986; + + // read input for in-block scan + { + if (squot32(local_tid_128971, 32) == 0 && + ltid_in_bounds_128983) { + x_128981 = ((volatile __local + double *) red_arr_mem_128975)[sext_i32_i64(local_tid_128971)]; + if ((local_tid_128971 - squot32(local_tid_128971, + 32) * 32) == 0) { + x_128980 = x_128981; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128986 = 1; + while (slt32(skip_threads_128986, 32)) { + if (sle32(skip_threads_128986, local_tid_128971 - + squot32(local_tid_128971, 32) * 32) && + (squot32(local_tid_128971, 32) == 0 && + ltid_in_bounds_128983)) { + // read operands + { + x_128980 = ((volatile __local + double *) red_arr_mem_128975)[sext_i32_i64(local_tid_128971) - + sext_i32_i64(skip_threads_128986)]; + } + // perform operation + { + bool inactive_128987 = + slt64(srem64(sext_i32_i64(local_tid_128971 * + 32 + 32 - 1), n_70864), + sext_i32_i64(local_tid_128971 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_128971 - + skip_threads_128986) * + 32 + 32 - 1)); + + if (inactive_128987) { + x_128980 = x_128981; + } + if (!inactive_128987) { + double defunc_1_op_res_128982 = + x_128980 + x_128981; + + x_128980 = defunc_1_op_res_128982; + } + } + } + if (sle32(wave_sizze_128973, skip_threads_128986)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128986, local_tid_128971 - + squot32(local_tid_128971, 32) * 32) && + (squot32(local_tid_128971, 32) == 0 && + ltid_in_bounds_128983)) { + // write result + { + ((volatile __local + double *) red_arr_mem_128975)[sext_i32_i64(local_tid_128971)] = + x_128980; + x_128981 = x_128980; + } + } + if (sle32(wave_sizze_128973, skip_threads_128986)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128986 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128971, 32) == 0 || + !ltid_in_bounds_128983)) { + // read operands + { + x_88249 = x_88248; + x_88248 = ((__local + double *) red_arr_mem_128975)[sext_i32_i64(squot32(local_tid_128971, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128988 = + slt64(srem64(sext_i32_i64(local_tid_128971), + n_70864), + sext_i32_i64(local_tid_128971) - + sext_i32_i64(squot32(local_tid_128971, + 32) * 32 - 1)); + + if (inactive_128988) { + x_88248 = x_88249; + } + if (!inactive_128988) { + double defunc_1_op_res_88250 = x_88248 + + x_88249; + + x_88248 = defunc_1_op_res_88250; + } + } + // write final result + { + ((__local + double *) red_arr_mem_128975)[sext_i32_i64(local_tid_128971)] = + x_88248; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128971, 32) == 0) { + ((__local + double *) red_arr_mem_128975)[sext_i32_i64(local_tid_128971)] = + x_88249; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_128979) * + squot64(segred_group_sizze_88244, + segment_sizze_nonzzero_128968) + + sext_i32_i64(local_tid_128971), m_70861 * k2p2zq_70876) && + slt64(sext_i32_i64(local_tid_128971), + squot64(segred_group_sizze_88244, + segment_sizze_nonzzero_128968))) { + ((__global + double *) mem_124587)[squot64(sext_i32_i64(virt_group_id_128979) * + squot64(segred_group_sizze_88244, + segment_sizze_nonzzero_128968) + + sext_i32_i64(local_tid_128971), + k2p2zq_70876) * k2p2zq_70876 + + (sext_i32_i64(virt_group_id_128979) * + squot64(segred_group_sizze_88244, + segment_sizze_nonzzero_128968) + + sext_i32_i64(local_tid_128971) - + squot64(sext_i32_i64(virt_group_id_128979) * + squot64(segred_group_sizze_88244, + segment_sizze_nonzzero_128968) + + sext_i32_i64(local_tid_128971), + k2p2zq_70876) * + k2p2zq_70876)] = ((__local + double *) red_arr_mem_128975)[(sext_i32_i64(local_tid_128971) + + (int64_t) 1) * + segment_sizze_nonzzero_128968 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_88244 +} +__kernel void mainDetailedzisegred_small_88329(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_129063_backing_aligned_0, + int64_t m_70861, + int64_t k2p2zq_70876, + int64_t num_groups_88378, + int64_t segment_sizze_nonzzero_129056, + __global + unsigned char *defunc_3_map_res_mem_124372, + __global + unsigned char *defunc_3_map_res_mem_124593, + __global + unsigned char *mem_124653) +{ + #define segred_group_sizze_88377 (mainDetailedzisegred_group_sizze_88323) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_129063_backing_0 = + (__local volatile + char *) red_arr_mem_129063_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129058; + int32_t local_tid_129059; + int64_t group_sizze_129062; + int32_t wave_sizze_129061; + int32_t group_tid_129060; + + global_tid_129058 = get_global_id(0); + local_tid_129059 = get_local_id(0); + group_sizze_129062 = get_local_size(0); + wave_sizze_129061 = LOCKSTEP_WIDTH; + group_tid_129060 = get_group_id(0); + + int32_t phys_tid_88329; + + phys_tid_88329 = global_tid_129058; + + __local char *red_arr_mem_129063; + + red_arr_mem_129063 = (__local char *) red_arr_mem_129063_backing_0; + + int32_t phys_group_id_129065; + + phys_group_id_129065 = get_group_id(0); + for (int32_t i_129066 = 0; i_129066 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861 * k2p2zq_70876, + squot64(segred_group_sizze_88377, + segment_sizze_nonzzero_129056))) - + phys_group_id_129065, sext_i64_i32(num_groups_88378)); + i_129066++) { + int32_t virt_group_id_129067 = phys_group_id_129065 + i_129066 * + sext_i64_i32(num_groups_88378); + int64_t gtid_88318 = squot64(squot64(sext_i32_i64(local_tid_129059), + segment_sizze_nonzzero_129056) + + sext_i32_i64(virt_group_id_129067) * + squot64(segred_group_sizze_88377, + segment_sizze_nonzzero_129056), + k2p2zq_70876); + int64_t gtid_88319 = squot64(sext_i32_i64(local_tid_129059), + segment_sizze_nonzzero_129056) + + sext_i32_i64(virt_group_id_129067) * + squot64(segred_group_sizze_88377, + segment_sizze_nonzzero_129056) - + squot64(squot64(sext_i32_i64(local_tid_129059), + segment_sizze_nonzzero_129056) + + sext_i32_i64(virt_group_id_129067) * + squot64(segred_group_sizze_88377, + segment_sizze_nonzzero_129056), k2p2zq_70876) * + k2p2zq_70876; + int64_t gtid_88328 = srem64(sext_i32_i64(local_tid_129059), + k2p2zq_70876); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, k2p2zq_70876) && ((slt64(gtid_88318, + m_70861) && + slt64(gtid_88319, + k2p2zq_70876)) && + slt64(sext_i32_i64(local_tid_129059), + k2p2zq_70876 * + squot64(segred_group_sizze_88377, + segment_sizze_nonzzero_129056)))) { + double x_88387 = ((__global + double *) defunc_3_map_res_mem_124593)[gtid_88318 * + k2p2zq_70876 + + gtid_88328]; + double x_88388 = ((__global + double *) defunc_3_map_res_mem_124372)[gtid_88318 * + (k2p2zq_70876 * + k2p2zq_70876) + + gtid_88319 * + k2p2zq_70876 + + gtid_88328]; + double defunc_1_f_res_88389 = x_88387 * x_88388; + + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_129063)[sext_i32_i64(local_tid_129059)] = + defunc_1_f_res_88389; + } + } else { + ((__local + double *) red_arr_mem_129063)[sext_i32_i64(local_tid_129059)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, k2p2zq_70876)) { + // perform segmented scan to imitate reduction + { + double x_88381; + double x_88382; + double x_129068; + double x_129069; + bool ltid_in_bounds_129071; + + ltid_in_bounds_129071 = slt64(sext_i32_i64(local_tid_129059), + k2p2zq_70876 * + squot64(segred_group_sizze_88377, + segment_sizze_nonzzero_129056)); + + int32_t skip_threads_129072; + + // read input for in-block scan + { + if (ltid_in_bounds_129071) { + x_88382 = ((volatile __local + double *) red_arr_mem_129063)[sext_i32_i64(local_tid_129059)]; + if ((local_tid_129059 - squot32(local_tid_129059, 32) * + 32) == 0) { + x_88381 = x_88382; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129072 = 1; + while (slt32(skip_threads_129072, 32)) { + if (sle32(skip_threads_129072, local_tid_129059 - + squot32(local_tid_129059, 32) * 32) && + ltid_in_bounds_129071) { + // read operands + { + x_88381 = ((volatile __local + double *) red_arr_mem_129063)[sext_i32_i64(local_tid_129059) - + sext_i32_i64(skip_threads_129072)]; + } + // perform operation + { + bool inactive_129073 = + slt64(srem64(sext_i32_i64(local_tid_129059), + k2p2zq_70876), + sext_i32_i64(local_tid_129059) - + sext_i32_i64(local_tid_129059 - + skip_threads_129072)); + + if (inactive_129073) { + x_88381 = x_88382; + } + if (!inactive_129073) { + double defunc_1_op_res_88383 = x_88381 + + x_88382; + + x_88381 = defunc_1_op_res_88383; + } + } + } + if (sle32(wave_sizze_129061, skip_threads_129072)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129072, local_tid_129059 - + squot32(local_tid_129059, 32) * 32) && + ltid_in_bounds_129071) { + // write result + { + ((volatile __local + double *) red_arr_mem_129063)[sext_i32_i64(local_tid_129059)] = + x_88381; + x_88382 = x_88381; + } + } + if (sle32(wave_sizze_129061, skip_threads_129072)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129072 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129059 - squot32(local_tid_129059, 32) * + 32) == 31 && ltid_in_bounds_129071) { + ((volatile __local + double *) red_arr_mem_129063)[sext_i32_i64(squot32(local_tid_129059, + 32))] = + x_88381; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129074; + + // read input for in-block scan + { + if (squot32(local_tid_129059, 32) == 0 && + ltid_in_bounds_129071) { + x_129069 = ((volatile __local + double *) red_arr_mem_129063)[sext_i32_i64(local_tid_129059)]; + if ((local_tid_129059 - squot32(local_tid_129059, + 32) * 32) == 0) { + x_129068 = x_129069; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129074 = 1; + while (slt32(skip_threads_129074, 32)) { + if (sle32(skip_threads_129074, local_tid_129059 - + squot32(local_tid_129059, 32) * 32) && + (squot32(local_tid_129059, 32) == 0 && + ltid_in_bounds_129071)) { + // read operands + { + x_129068 = ((volatile __local + double *) red_arr_mem_129063)[sext_i32_i64(local_tid_129059) - + sext_i32_i64(skip_threads_129074)]; + } + // perform operation + { + bool inactive_129075 = + slt64(srem64(sext_i32_i64(local_tid_129059 * + 32 + 32 - 1), + k2p2zq_70876), + sext_i32_i64(local_tid_129059 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_129059 - + skip_threads_129074) * + 32 + 32 - 1)); + + if (inactive_129075) { + x_129068 = x_129069; + } + if (!inactive_129075) { + double defunc_1_op_res_129070 = + x_129068 + x_129069; + + x_129068 = defunc_1_op_res_129070; + } + } + } + if (sle32(wave_sizze_129061, skip_threads_129074)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129074, local_tid_129059 - + squot32(local_tid_129059, 32) * 32) && + (squot32(local_tid_129059, 32) == 0 && + ltid_in_bounds_129071)) { + // write result + { + ((volatile __local + double *) red_arr_mem_129063)[sext_i32_i64(local_tid_129059)] = + x_129068; + x_129069 = x_129068; + } + } + if (sle32(wave_sizze_129061, skip_threads_129074)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129074 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129059, 32) == 0 || + !ltid_in_bounds_129071)) { + // read operands + { + x_88382 = x_88381; + x_88381 = ((__local + double *) red_arr_mem_129063)[sext_i32_i64(squot32(local_tid_129059, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129076 = + slt64(srem64(sext_i32_i64(local_tid_129059), + k2p2zq_70876), + sext_i32_i64(local_tid_129059) - + sext_i32_i64(squot32(local_tid_129059, + 32) * 32 - 1)); + + if (inactive_129076) { + x_88381 = x_88382; + } + if (!inactive_129076) { + double defunc_1_op_res_88383 = x_88381 + + x_88382; + + x_88381 = defunc_1_op_res_88383; + } + } + // write final result + { + ((__local + double *) red_arr_mem_129063)[sext_i32_i64(local_tid_129059)] = + x_88381; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129059, 32) == 0) { + ((__local + double *) red_arr_mem_129063)[sext_i32_i64(local_tid_129059)] = + x_88382; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_129067) * + squot64(segred_group_sizze_88377, + segment_sizze_nonzzero_129056) + + sext_i32_i64(local_tid_129059), m_70861 * k2p2zq_70876) && + slt64(sext_i32_i64(local_tid_129059), + squot64(segred_group_sizze_88377, + segment_sizze_nonzzero_129056))) { + ((__global + double *) mem_124653)[squot64(sext_i32_i64(virt_group_id_129067) * + squot64(segred_group_sizze_88377, + segment_sizze_nonzzero_129056) + + sext_i32_i64(local_tid_129059), + k2p2zq_70876) * k2p2zq_70876 + + (sext_i32_i64(virt_group_id_129067) * + squot64(segred_group_sizze_88377, + segment_sizze_nonzzero_129056) + + sext_i32_i64(local_tid_129059) - + squot64(sext_i32_i64(virt_group_id_129067) * + squot64(segred_group_sizze_88377, + segment_sizze_nonzzero_129056) + + sext_i32_i64(local_tid_129059), + k2p2zq_70876) * + k2p2zq_70876)] = ((__local + double *) red_arr_mem_129063)[(sext_i32_i64(local_tid_129059) + + (int64_t) 1) * + segment_sizze_nonzzero_129056 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_88377 +} +__kernel void mainDetailedzisegred_small_88459(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_129195_backing_aligned_0, + int64_t N_70860, int64_t m_70861, + int64_t k2p2zq_70876, + int64_t num_groups_88506, + int64_t segment_sizze_nonzzero_129188, + __global + unsigned char *mem_120124, + __global + unsigned char *defunc_4_map_res_mem_124659, + __global + unsigned char *mem_124877) +{ + #define segred_group_sizze_88505 (mainDetailedzisegred_group_sizze_88453) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_129195_backing_0 = + (__local volatile + char *) red_arr_mem_129195_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129190; + int32_t local_tid_129191; + int64_t group_sizze_129194; + int32_t wave_sizze_129193; + int32_t group_tid_129192; + + global_tid_129190 = get_global_id(0); + local_tid_129191 = get_local_id(0); + group_sizze_129194 = get_local_size(0); + wave_sizze_129193 = LOCKSTEP_WIDTH; + group_tid_129192 = get_group_id(0); + + int32_t phys_tid_88459; + + phys_tid_88459 = global_tid_129190; + + __local char *red_arr_mem_129195; + + red_arr_mem_129195 = (__local char *) red_arr_mem_129195_backing_0; + + int32_t phys_group_id_129197; + + phys_group_id_129197 = get_group_id(0); + for (int32_t i_129198 = 0; i_129198 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861 * N_70860, + squot64(segred_group_sizze_88505, + segment_sizze_nonzzero_129188))) - + phys_group_id_129197, sext_i64_i32(num_groups_88506)); + i_129198++) { + int32_t virt_group_id_129199 = phys_group_id_129197 + i_129198 * + sext_i64_i32(num_groups_88506); + int64_t gtid_88448 = squot64(squot64(sext_i32_i64(local_tid_129191), + segment_sizze_nonzzero_129188) + + sext_i32_i64(virt_group_id_129199) * + squot64(segred_group_sizze_88505, + segment_sizze_nonzzero_129188), + N_70860); + int64_t gtid_88449 = squot64(sext_i32_i64(local_tid_129191), + segment_sizze_nonzzero_129188) + + sext_i32_i64(virt_group_id_129199) * + squot64(segred_group_sizze_88505, + segment_sizze_nonzzero_129188) - + squot64(squot64(sext_i32_i64(local_tid_129191), + segment_sizze_nonzzero_129188) + + sext_i32_i64(virt_group_id_129199) * + squot64(segred_group_sizze_88505, + segment_sizze_nonzzero_129188), N_70860) * + N_70860; + int64_t gtid_88458 = srem64(sext_i32_i64(local_tid_129191), + k2p2zq_70876); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, k2p2zq_70876) && ((slt64(gtid_88448, + m_70861) && + slt64(gtid_88449, + N_70860)) && + slt64(sext_i32_i64(local_tid_129191), + k2p2zq_70876 * + squot64(segred_group_sizze_88505, + segment_sizze_nonzzero_129188)))) { + double x_88514 = ((__global + double *) defunc_4_map_res_mem_124659)[gtid_88448 * + k2p2zq_70876 + + gtid_88458]; + double x_88515 = ((__global double *) mem_120124)[gtid_88449 * + k2p2zq_70876 + + gtid_88458]; + double defunc_1_f_res_88516 = x_88514 * x_88515; + + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_129195)[sext_i32_i64(local_tid_129191)] = + defunc_1_f_res_88516; + } + } else { + ((__local + double *) red_arr_mem_129195)[sext_i32_i64(local_tid_129191)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, k2p2zq_70876)) { + // perform segmented scan to imitate reduction + { + double x_88509; + double x_88510; + double x_129200; + double x_129201; + bool ltid_in_bounds_129203; + + ltid_in_bounds_129203 = slt64(sext_i32_i64(local_tid_129191), + k2p2zq_70876 * + squot64(segred_group_sizze_88505, + segment_sizze_nonzzero_129188)); + + int32_t skip_threads_129204; + + // read input for in-block scan + { + if (ltid_in_bounds_129203) { + x_88510 = ((volatile __local + double *) red_arr_mem_129195)[sext_i32_i64(local_tid_129191)]; + if ((local_tid_129191 - squot32(local_tid_129191, 32) * + 32) == 0) { + x_88509 = x_88510; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129204 = 1; + while (slt32(skip_threads_129204, 32)) { + if (sle32(skip_threads_129204, local_tid_129191 - + squot32(local_tid_129191, 32) * 32) && + ltid_in_bounds_129203) { + // read operands + { + x_88509 = ((volatile __local + double *) red_arr_mem_129195)[sext_i32_i64(local_tid_129191) - + sext_i32_i64(skip_threads_129204)]; + } + // perform operation + { + bool inactive_129205 = + slt64(srem64(sext_i32_i64(local_tid_129191), + k2p2zq_70876), + sext_i32_i64(local_tid_129191) - + sext_i32_i64(local_tid_129191 - + skip_threads_129204)); + + if (inactive_129205) { + x_88509 = x_88510; + } + if (!inactive_129205) { + double defunc_1_op_res_88511 = x_88509 + + x_88510; + + x_88509 = defunc_1_op_res_88511; + } + } + } + if (sle32(wave_sizze_129193, skip_threads_129204)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129204, local_tid_129191 - + squot32(local_tid_129191, 32) * 32) && + ltid_in_bounds_129203) { + // write result + { + ((volatile __local + double *) red_arr_mem_129195)[sext_i32_i64(local_tid_129191)] = + x_88509; + x_88510 = x_88509; + } + } + if (sle32(wave_sizze_129193, skip_threads_129204)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129204 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129191 - squot32(local_tid_129191, 32) * + 32) == 31 && ltid_in_bounds_129203) { + ((volatile __local + double *) red_arr_mem_129195)[sext_i32_i64(squot32(local_tid_129191, + 32))] = + x_88509; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129206; + + // read input for in-block scan + { + if (squot32(local_tid_129191, 32) == 0 && + ltid_in_bounds_129203) { + x_129201 = ((volatile __local + double *) red_arr_mem_129195)[sext_i32_i64(local_tid_129191)]; + if ((local_tid_129191 - squot32(local_tid_129191, + 32) * 32) == 0) { + x_129200 = x_129201; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129206 = 1; + while (slt32(skip_threads_129206, 32)) { + if (sle32(skip_threads_129206, local_tid_129191 - + squot32(local_tid_129191, 32) * 32) && + (squot32(local_tid_129191, 32) == 0 && + ltid_in_bounds_129203)) { + // read operands + { + x_129200 = ((volatile __local + double *) red_arr_mem_129195)[sext_i32_i64(local_tid_129191) - + sext_i32_i64(skip_threads_129206)]; + } + // perform operation + { + bool inactive_129207 = + slt64(srem64(sext_i32_i64(local_tid_129191 * + 32 + 32 - 1), + k2p2zq_70876), + sext_i32_i64(local_tid_129191 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_129191 - + skip_threads_129206) * + 32 + 32 - 1)); + + if (inactive_129207) { + x_129200 = x_129201; + } + if (!inactive_129207) { + double defunc_1_op_res_129202 = + x_129200 + x_129201; + + x_129200 = defunc_1_op_res_129202; + } + } + } + if (sle32(wave_sizze_129193, skip_threads_129206)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129206, local_tid_129191 - + squot32(local_tid_129191, 32) * 32) && + (squot32(local_tid_129191, 32) == 0 && + ltid_in_bounds_129203)) { + // write result + { + ((volatile __local + double *) red_arr_mem_129195)[sext_i32_i64(local_tid_129191)] = + x_129200; + x_129201 = x_129200; + } + } + if (sle32(wave_sizze_129193, skip_threads_129206)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129206 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129191, 32) == 0 || + !ltid_in_bounds_129203)) { + // read operands + { + x_88510 = x_88509; + x_88509 = ((__local + double *) red_arr_mem_129195)[sext_i32_i64(squot32(local_tid_129191, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129208 = + slt64(srem64(sext_i32_i64(local_tid_129191), + k2p2zq_70876), + sext_i32_i64(local_tid_129191) - + sext_i32_i64(squot32(local_tid_129191, + 32) * 32 - 1)); + + if (inactive_129208) { + x_88509 = x_88510; + } + if (!inactive_129208) { + double defunc_1_op_res_88511 = x_88509 + + x_88510; + + x_88509 = defunc_1_op_res_88511; + } + } + // write final result + { + ((__local + double *) red_arr_mem_129195)[sext_i32_i64(local_tid_129191)] = + x_88509; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129191, 32) == 0) { + ((__local + double *) red_arr_mem_129195)[sext_i32_i64(local_tid_129191)] = + x_88510; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_129199) * + squot64(segred_group_sizze_88505, + segment_sizze_nonzzero_129188) + + sext_i32_i64(local_tid_129191), m_70861 * N_70860) && + slt64(sext_i32_i64(local_tid_129191), + squot64(segred_group_sizze_88505, + segment_sizze_nonzzero_129188))) { + ((__global + double *) mem_124877)[squot64(sext_i32_i64(virt_group_id_129199) * + squot64(segred_group_sizze_88505, + segment_sizze_nonzzero_129188) + + sext_i32_i64(local_tid_129191), + N_70860) * N_70860 + + (sext_i32_i64(virt_group_id_129199) * + squot64(segred_group_sizze_88505, + segment_sizze_nonzzero_129188) + + sext_i32_i64(local_tid_129191) - + squot64(sext_i32_i64(virt_group_id_129199) * + squot64(segred_group_sizze_88505, + segment_sizze_nonzzero_129188) + + sext_i32_i64(local_tid_129191), + N_70860) * N_70860)] = + ((__local + double *) red_arr_mem_129195)[(sext_i32_i64(local_tid_129191) + + (int64_t) 1) * + segment_sizze_nonzzero_129188 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_88505 +} +__kernel void mainDetailedzisegred_small_88880(__global int *global_failure, + int failure_is_an_option, + __global + int64_t *global_failure_args, + __local volatile + int64_t *red_arr_mem_129425_backing_aligned_0, + int64_t N_70860, int64_t m_70861, + int64_t n_70864, + int64_t num_groups_88930, + int64_t segment_sizze_nonzzero_129418, + __global + unsigned char *mem_124924, + __global + unsigned char *mem_124949, + __global + unsigned char *mem_124952) +{ + #define segred_group_sizze_88929 (mainDetailedzisegred_group_sizze_88874) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_129425_backing_0 = + (__local volatile + char *) red_arr_mem_129425_backing_aligned_0; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_129420; + int32_t local_tid_129421; + int64_t group_sizze_129424; + int32_t wave_sizze_129423; + int32_t group_tid_129422; + + global_tid_129420 = get_global_id(0); + local_tid_129421 = get_local_id(0); + group_sizze_129424 = get_local_size(0); + wave_sizze_129423 = LOCKSTEP_WIDTH; + group_tid_129422 = get_group_id(0); + + int32_t phys_tid_88880; + + phys_tid_88880 = global_tid_129420; + + __local char *red_arr_mem_129425; + + red_arr_mem_129425 = (__local char *) red_arr_mem_129425_backing_0; + + int32_t phys_group_id_129427; + + phys_group_id_129427 = get_group_id(0); + for (int32_t i_129428 = 0; i_129428 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, + squot64(segred_group_sizze_88929, + segment_sizze_nonzzero_129418))) - + phys_group_id_129427, sext_i64_i32(num_groups_88930)); + i_129428++) { + int32_t virt_group_id_129429 = phys_group_id_129427 + i_129428 * + sext_i64_i32(num_groups_88930); + int64_t gtid_88871 = squot64(sext_i32_i64(local_tid_129421), + segment_sizze_nonzzero_129418) + + sext_i32_i64(virt_group_id_129429) * + squot64(segred_group_sizze_88929, + segment_sizze_nonzzero_129418); + int64_t gtid_88879 = srem64(sext_i32_i64(local_tid_129421), n_70864); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, n_70864) && (slt64(gtid_88871, m_70861) && + slt64(sext_i32_i64(local_tid_129421), + n_70864 * + squot64(segred_group_sizze_88929, + segment_sizze_nonzzero_129418)))) { + int64_t defunc_0_f_res_88937 = ((__global + int64_t *) mem_124949)[gtid_88871]; + bool cond_88939 = slt64(gtid_88879, defunc_0_f_res_88937); + double defunc_0_f_res_88940; + + if (cond_88939) { + bool y_88942 = slt64(gtid_88879, N_70860); + bool index_certs_88944; + + if (!y_88942) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 406) == -1) { + global_failure_args[0] = gtid_88879; + global_failure_args[1] = N_70860; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_0_f_res_t_res_88945 = ((__global + double *) mem_124924)[gtid_88871 * + N_70860 + + gtid_88879]; + + defunc_0_f_res_88940 = defunc_0_f_res_t_res_88945; + } else { + defunc_0_f_res_88940 = 0.0; + } + + double defunc_0_f_res_88946 = defunc_0_f_res_88940 * + defunc_0_f_res_88940; + + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_129425)[sext_i32_i64(local_tid_129421)] = + defunc_0_f_res_88946; + } + } else { + ((__local + double *) red_arr_mem_129425)[sext_i32_i64(local_tid_129421)] = + 0.0; + } + } + + error_0: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, n_70864)) { + // perform segmented scan to imitate reduction + { + double x_88933; + double x_88934; + double x_129430; + double x_129431; + bool ltid_in_bounds_129433; + + ltid_in_bounds_129433 = slt64(sext_i32_i64(local_tid_129421), + n_70864 * + squot64(segred_group_sizze_88929, + segment_sizze_nonzzero_129418)); + + int32_t skip_threads_129434; + + // read input for in-block scan + { + if (ltid_in_bounds_129433) { + x_88934 = ((volatile __local + double *) red_arr_mem_129425)[sext_i32_i64(local_tid_129421)]; + if ((local_tid_129421 - squot32(local_tid_129421, 32) * + 32) == 0) { + x_88933 = x_88934; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129434 = 1; + while (slt32(skip_threads_129434, 32)) { + if (sle32(skip_threads_129434, local_tid_129421 - + squot32(local_tid_129421, 32) * 32) && + ltid_in_bounds_129433) { + // read operands + { + x_88933 = ((volatile __local + double *) red_arr_mem_129425)[sext_i32_i64(local_tid_129421) - + sext_i32_i64(skip_threads_129434)]; + } + // perform operation + { + bool inactive_129435 = + slt64(srem64(sext_i32_i64(local_tid_129421), + n_70864), + sext_i32_i64(local_tid_129421) - + sext_i32_i64(local_tid_129421 - + skip_threads_129434)); + + if (inactive_129435) { + x_88933 = x_88934; + } + if (!inactive_129435) { + double defunc_1_op_res_88935 = x_88933 + + x_88934; + + x_88933 = defunc_1_op_res_88935; + } + } + } + if (sle32(wave_sizze_129423, skip_threads_129434)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129434, local_tid_129421 - + squot32(local_tid_129421, 32) * 32) && + ltid_in_bounds_129433) { + // write result + { + ((volatile __local + double *) red_arr_mem_129425)[sext_i32_i64(local_tid_129421)] = + x_88933; + x_88934 = x_88933; + } + } + if (sle32(wave_sizze_129423, skip_threads_129434)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129434 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129421 - squot32(local_tid_129421, 32) * + 32) == 31 && ltid_in_bounds_129433) { + ((volatile __local + double *) red_arr_mem_129425)[sext_i32_i64(squot32(local_tid_129421, + 32))] = + x_88933; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129436; + + // read input for in-block scan + { + if (squot32(local_tid_129421, 32) == 0 && + ltid_in_bounds_129433) { + x_129431 = ((volatile __local + double *) red_arr_mem_129425)[sext_i32_i64(local_tid_129421)]; + if ((local_tid_129421 - squot32(local_tid_129421, + 32) * 32) == 0) { + x_129430 = x_129431; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129436 = 1; + while (slt32(skip_threads_129436, 32)) { + if (sle32(skip_threads_129436, local_tid_129421 - + squot32(local_tid_129421, 32) * 32) && + (squot32(local_tid_129421, 32) == 0 && + ltid_in_bounds_129433)) { + // read operands + { + x_129430 = ((volatile __local + double *) red_arr_mem_129425)[sext_i32_i64(local_tid_129421) - + sext_i32_i64(skip_threads_129436)]; + } + // perform operation + { + bool inactive_129437 = + slt64(srem64(sext_i32_i64(local_tid_129421 * + 32 + 32 - 1), n_70864), + sext_i32_i64(local_tid_129421 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_129421 - + skip_threads_129436) * + 32 + 32 - 1)); + + if (inactive_129437) { + x_129430 = x_129431; + } + if (!inactive_129437) { + double defunc_1_op_res_129432 = + x_129430 + x_129431; + + x_129430 = defunc_1_op_res_129432; + } + } + } + if (sle32(wave_sizze_129423, skip_threads_129436)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129436, local_tid_129421 - + squot32(local_tid_129421, 32) * 32) && + (squot32(local_tid_129421, 32) == 0 && + ltid_in_bounds_129433)) { + // write result + { + ((volatile __local + double *) red_arr_mem_129425)[sext_i32_i64(local_tid_129421)] = + x_129430; + x_129431 = x_129430; + } + } + if (sle32(wave_sizze_129423, skip_threads_129436)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129436 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129421, 32) == 0 || + !ltid_in_bounds_129433)) { + // read operands + { + x_88934 = x_88933; + x_88933 = ((__local + double *) red_arr_mem_129425)[sext_i32_i64(squot32(local_tid_129421, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129438 = + slt64(srem64(sext_i32_i64(local_tid_129421), + n_70864), + sext_i32_i64(local_tid_129421) - + sext_i32_i64(squot32(local_tid_129421, + 32) * 32 - 1)); + + if (inactive_129438) { + x_88933 = x_88934; + } + if (!inactive_129438) { + double defunc_1_op_res_88935 = x_88933 + + x_88934; + + x_88933 = defunc_1_op_res_88935; + } + } + // write final result + { + ((__local + double *) red_arr_mem_129425)[sext_i32_i64(local_tid_129421)] = + x_88933; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129421, 32) == 0) { + ((__local + double *) red_arr_mem_129425)[sext_i32_i64(local_tid_129421)] = + x_88934; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_129429) * + squot64(segred_group_sizze_88929, + segment_sizze_nonzzero_129418) + + sext_i32_i64(local_tid_129421), m_70861) && + slt64(sext_i32_i64(local_tid_129421), + squot64(segred_group_sizze_88929, + segment_sizze_nonzzero_129418))) { + ((__global + double *) mem_124952)[sext_i32_i64(virt_group_id_129429) * + squot64(segred_group_sizze_88929, + segment_sizze_nonzzero_129418) + + sext_i32_i64(local_tid_129421)] = + ((__local + double *) red_arr_mem_129425)[(sext_i32_i64(local_tid_129421) + + (int64_t) 1) * + segment_sizze_nonzzero_129418 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_88929 +} +__kernel void mainDetailedzisegred_small_88904(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_129365_backing_aligned_0, + int64_t N_70860, int64_t m_70861, + int64_t n_70864, + int64_t num_groups_88916, + int64_t segment_sizze_nonzzero_129358, + __global + unsigned char *mem_124142, + __global + unsigned char *mem_124949) +{ + #define segred_group_sizze_88915 (mainDetailedzisegred_group_sizze_88898) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_129365_backing_0 = + (__local volatile + char *) red_arr_mem_129365_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129360; + int32_t local_tid_129361; + int64_t group_sizze_129364; + int32_t wave_sizze_129363; + int32_t group_tid_129362; + + global_tid_129360 = get_global_id(0); + local_tid_129361 = get_local_id(0); + group_sizze_129364 = get_local_size(0); + wave_sizze_129363 = LOCKSTEP_WIDTH; + group_tid_129362 = get_group_id(0); + + int32_t phys_tid_88904; + + phys_tid_88904 = global_tid_129360; + + __local char *red_arr_mem_129365; + + red_arr_mem_129365 = (__local char *) red_arr_mem_129365_backing_0; + + int32_t phys_group_id_129367; + + phys_group_id_129367 = get_group_id(0); + for (int32_t i_129368 = 0; i_129368 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, + squot64(segred_group_sizze_88915, + segment_sizze_nonzzero_129358))) - + phys_group_id_129367, sext_i64_i32(num_groups_88916)); + i_129368++) { + int32_t virt_group_id_129369 = phys_group_id_129367 + i_129368 * + sext_i64_i32(num_groups_88916); + int64_t gtid_88895 = squot64(sext_i32_i64(local_tid_129361), + segment_sizze_nonzzero_129358) + + sext_i32_i64(virt_group_id_129369) * + squot64(segred_group_sizze_88915, + segment_sizze_nonzzero_129358); + int64_t gtid_88903 = srem64(sext_i32_i64(local_tid_129361), n_70864); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, n_70864) && (slt64(gtid_88895, m_70861) && + slt64(sext_i32_i64(local_tid_129361), + n_70864 * + squot64(segred_group_sizze_88915, + segment_sizze_nonzzero_129358)))) { + double x_88923 = ((__global double *) mem_124142)[gtid_88895 * + N_70860 + + gtid_88903]; + bool isnan_res_88924; + + isnan_res_88924 = futrts_isnan64(x_88923); + + bool cond_88925 = !isnan_res_88924; + int64_t defunc_0_f_res_88926 = btoi_bool_i64(cond_88925); + + // save map-out results + { } + // save results to be reduced + { + ((__local + int64_t *) red_arr_mem_129365)[sext_i32_i64(local_tid_129361)] = + defunc_0_f_res_88926; + } + } else { + ((__local + int64_t *) red_arr_mem_129365)[sext_i32_i64(local_tid_129361)] = + (int64_t) 0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, n_70864)) { + // perform segmented scan to imitate reduction + { + int64_t x_88919; + int64_t x_88920; + int64_t x_129370; + int64_t x_129371; + bool ltid_in_bounds_129373; + + ltid_in_bounds_129373 = slt64(sext_i32_i64(local_tid_129361), + n_70864 * + squot64(segred_group_sizze_88915, + segment_sizze_nonzzero_129358)); + + int32_t skip_threads_129374; + + // read input for in-block scan + { + if (ltid_in_bounds_129373) { + x_88920 = ((volatile __local + int64_t *) red_arr_mem_129365)[sext_i32_i64(local_tid_129361)]; + if ((local_tid_129361 - squot32(local_tid_129361, 32) * + 32) == 0) { + x_88919 = x_88920; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129374 = 1; + while (slt32(skip_threads_129374, 32)) { + if (sle32(skip_threads_129374, local_tid_129361 - + squot32(local_tid_129361, 32) * 32) && + ltid_in_bounds_129373) { + // read operands + { + x_88919 = ((volatile __local + int64_t *) red_arr_mem_129365)[sext_i32_i64(local_tid_129361) - + sext_i32_i64(skip_threads_129374)]; + } + // perform operation + { + bool inactive_129375 = + slt64(srem64(sext_i32_i64(local_tid_129361), + n_70864), + sext_i32_i64(local_tid_129361) - + sext_i32_i64(local_tid_129361 - + skip_threads_129374)); + + if (inactive_129375) { + x_88919 = x_88920; + } + if (!inactive_129375) { + int64_t defunc_1_op_res_88921 = + add64(x_88919, x_88920); + + x_88919 = defunc_1_op_res_88921; + } + } + } + if (sle32(wave_sizze_129363, skip_threads_129374)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129374, local_tid_129361 - + squot32(local_tid_129361, 32) * 32) && + ltid_in_bounds_129373) { + // write result + { + ((volatile __local + int64_t *) red_arr_mem_129365)[sext_i32_i64(local_tid_129361)] = + x_88919; + x_88920 = x_88919; + } + } + if (sle32(wave_sizze_129363, skip_threads_129374)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129374 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129361 - squot32(local_tid_129361, 32) * + 32) == 31 && ltid_in_bounds_129373) { + ((volatile __local + int64_t *) red_arr_mem_129365)[sext_i32_i64(squot32(local_tid_129361, + 32))] = + x_88919; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129376; + + // read input for in-block scan + { + if (squot32(local_tid_129361, 32) == 0 && + ltid_in_bounds_129373) { + x_129371 = ((volatile __local + int64_t *) red_arr_mem_129365)[sext_i32_i64(local_tid_129361)]; + if ((local_tid_129361 - squot32(local_tid_129361, + 32) * 32) == 0) { + x_129370 = x_129371; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129376 = 1; + while (slt32(skip_threads_129376, 32)) { + if (sle32(skip_threads_129376, local_tid_129361 - + squot32(local_tid_129361, 32) * 32) && + (squot32(local_tid_129361, 32) == 0 && + ltid_in_bounds_129373)) { + // read operands + { + x_129370 = ((volatile __local + int64_t *) red_arr_mem_129365)[sext_i32_i64(local_tid_129361) - + sext_i32_i64(skip_threads_129376)]; + } + // perform operation + { + bool inactive_129377 = + slt64(srem64(sext_i32_i64(local_tid_129361 * + 32 + 32 - 1), n_70864), + sext_i32_i64(local_tid_129361 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_129361 - + skip_threads_129376) * + 32 + 32 - 1)); + + if (inactive_129377) { + x_129370 = x_129371; + } + if (!inactive_129377) { + int64_t defunc_1_op_res_129372 = + add64(x_129370, x_129371); + + x_129370 = defunc_1_op_res_129372; + } + } + } + if (sle32(wave_sizze_129363, skip_threads_129376)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129376, local_tid_129361 - + squot32(local_tid_129361, 32) * 32) && + (squot32(local_tid_129361, 32) == 0 && + ltid_in_bounds_129373)) { + // write result + { + ((volatile __local + int64_t *) red_arr_mem_129365)[sext_i32_i64(local_tid_129361)] = + x_129370; + x_129371 = x_129370; + } + } + if (sle32(wave_sizze_129363, skip_threads_129376)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129376 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129361, 32) == 0 || + !ltid_in_bounds_129373)) { + // read operands + { + x_88920 = x_88919; + x_88919 = ((__local + int64_t *) red_arr_mem_129365)[sext_i32_i64(squot32(local_tid_129361, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129378 = + slt64(srem64(sext_i32_i64(local_tid_129361), + n_70864), + sext_i32_i64(local_tid_129361) - + sext_i32_i64(squot32(local_tid_129361, + 32) * 32 - 1)); + + if (inactive_129378) { + x_88919 = x_88920; + } + if (!inactive_129378) { + int64_t defunc_1_op_res_88921 = add64(x_88919, + x_88920); + + x_88919 = defunc_1_op_res_88921; + } + } + // write final result + { + ((__local + int64_t *) red_arr_mem_129365)[sext_i32_i64(local_tid_129361)] = + x_88919; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129361, 32) == 0) { + ((__local + int64_t *) red_arr_mem_129365)[sext_i32_i64(local_tid_129361)] = + x_88920; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_129369) * + squot64(segred_group_sizze_88915, + segment_sizze_nonzzero_129358) + + sext_i32_i64(local_tid_129361), m_70861) && + slt64(sext_i32_i64(local_tid_129361), + squot64(segred_group_sizze_88915, + segment_sizze_nonzzero_129358))) { + ((__global + int64_t *) mem_124949)[sext_i32_i64(virt_group_id_129369) * + squot64(segred_group_sizze_88915, + segment_sizze_nonzzero_129358) + + sext_i32_i64(local_tid_129361)] = + ((__local + int64_t *) red_arr_mem_129365)[(sext_i32_i64(local_tid_129361) + + (int64_t) 1) * + segment_sizze_nonzzero_129358 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_88915 +} +__kernel void mainDetailedzisegred_small_89034(__global int *global_failure, + int failure_is_an_option, + __global + int64_t *global_failure_args, + __local volatile + int64_t *red_arr_mem_129530_backing_aligned_0, + int64_t N_70860, int64_t m_70861, + int64_t defunc_2_reduce_comm_res_72722, + int64_t num_groups_89055, + int64_t segment_sizze_nonzzero_129523, + __global + unsigned char *mem_124924, + __global + unsigned char *defunc_3_map_res_mem_124961, + __global + unsigned char *defunc_3_map_res_mem_124962, + __global + unsigned char *mem_124972) +{ + #define segred_group_sizze_89054 (mainDetailedzisegred_group_sizze_89028) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_129530_backing_0 = + (__local volatile + char *) red_arr_mem_129530_backing_aligned_0; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_129525; + int32_t local_tid_129526; + int64_t group_sizze_129529; + int32_t wave_sizze_129528; + int32_t group_tid_129527; + + global_tid_129525 = get_global_id(0); + local_tid_129526 = get_local_id(0); + group_sizze_129529 = get_local_size(0); + wave_sizze_129528 = LOCKSTEP_WIDTH; + group_tid_129527 = get_group_id(0); + + int32_t phys_tid_89034; + + phys_tid_89034 = global_tid_129525; + + __local char *red_arr_mem_129530; + + red_arr_mem_129530 = (__local char *) red_arr_mem_129530_backing_0; + + int32_t phys_group_id_129532; + + phys_group_id_129532 = get_group_id(0); + for (int32_t i_129533 = 0; i_129533 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, + squot64(segred_group_sizze_89054, + segment_sizze_nonzzero_129523))) - + phys_group_id_129532, sext_i64_i32(num_groups_89055)); + i_129533++) { + int32_t virt_group_id_129534 = phys_group_id_129532 + i_129533 * + sext_i64_i32(num_groups_89055); + int64_t gtid_89025 = squot64(sext_i32_i64(local_tid_129526), + segment_sizze_nonzzero_129523) + + sext_i32_i64(virt_group_id_129534) * + squot64(segred_group_sizze_89054, + segment_sizze_nonzzero_129523); + int64_t gtid_89033 = srem64(sext_i32_i64(local_tid_129526), + defunc_2_reduce_comm_res_72722); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, defunc_2_reduce_comm_res_72722) && + (slt64(gtid_89025, m_70861) && + slt64(sext_i32_i64(local_tid_129526), + defunc_2_reduce_comm_res_72722 * + squot64(segred_group_sizze_89054, + segment_sizze_nonzzero_129523)))) { + int64_t x_89063 = ((__global + int64_t *) defunc_3_map_res_mem_124961)[gtid_89025]; + bool cond_89065 = slt64(gtid_89033, x_89063); + double defunc_0_f_res_89066; + + if (cond_89065) { + int64_t x_89062 = ((__global + int64_t *) defunc_3_map_res_mem_124962)[gtid_89025]; + int64_t x_89067 = add64(gtid_89033, x_89062); + int64_t x_89068 = sub64(x_89067, x_89063); + int64_t i_89069 = add64((int64_t) 1, x_89068); + bool x_89070 = sle64((int64_t) 0, i_89069); + bool y_89071 = slt64(i_89069, N_70860); + bool bounds_check_89072 = x_89070 && y_89071; + bool index_certs_89073; + + if (!bounds_check_89072) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 409) == -1) { + global_failure_args[0] = i_89069; + global_failure_args[1] = N_70860; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_0_f_res_t_res_89074 = ((__global + double *) mem_124924)[gtid_89025 * + N_70860 + + i_89069]; + + defunc_0_f_res_89066 = defunc_0_f_res_t_res_89074; + } else { + defunc_0_f_res_89066 = 0.0; + } + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_129530)[sext_i32_i64(local_tid_129526)] = + defunc_0_f_res_89066; + } + } else { + ((__local + double *) red_arr_mem_129530)[sext_i32_i64(local_tid_129526)] = + 0.0; + } + } + + error_0: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, defunc_2_reduce_comm_res_72722)) { + // perform segmented scan to imitate reduction + { + double x_89058; + double x_89059; + double x_129535; + double x_129536; + bool ltid_in_bounds_129538; + + ltid_in_bounds_129538 = slt64(sext_i32_i64(local_tid_129526), + defunc_2_reduce_comm_res_72722 * + squot64(segred_group_sizze_89054, + segment_sizze_nonzzero_129523)); + + int32_t skip_threads_129539; + + // read input for in-block scan + { + if (ltid_in_bounds_129538) { + x_89059 = ((volatile __local + double *) red_arr_mem_129530)[sext_i32_i64(local_tid_129526)]; + if ((local_tid_129526 - squot32(local_tid_129526, 32) * + 32) == 0) { + x_89058 = x_89059; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129539 = 1; + while (slt32(skip_threads_129539, 32)) { + if (sle32(skip_threads_129539, local_tid_129526 - + squot32(local_tid_129526, 32) * 32) && + ltid_in_bounds_129538) { + // read operands + { + x_89058 = ((volatile __local + double *) red_arr_mem_129530)[sext_i32_i64(local_tid_129526) - + sext_i32_i64(skip_threads_129539)]; + } + // perform operation + { + bool inactive_129540 = + slt64(srem64(sext_i32_i64(local_tid_129526), + defunc_2_reduce_comm_res_72722), + sext_i32_i64(local_tid_129526) - + sext_i32_i64(local_tid_129526 - + skip_threads_129539)); + + if (inactive_129540) { + x_89058 = x_89059; + } + if (!inactive_129540) { + double defunc_1_op_res_89060 = x_89058 + + x_89059; + + x_89058 = defunc_1_op_res_89060; + } + } + } + if (sle32(wave_sizze_129528, skip_threads_129539)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129539, local_tid_129526 - + squot32(local_tid_129526, 32) * 32) && + ltid_in_bounds_129538) { + // write result + { + ((volatile __local + double *) red_arr_mem_129530)[sext_i32_i64(local_tid_129526)] = + x_89058; + x_89059 = x_89058; + } + } + if (sle32(wave_sizze_129528, skip_threads_129539)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129539 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129526 - squot32(local_tid_129526, 32) * + 32) == 31 && ltid_in_bounds_129538) { + ((volatile __local + double *) red_arr_mem_129530)[sext_i32_i64(squot32(local_tid_129526, + 32))] = + x_89058; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129541; + + // read input for in-block scan + { + if (squot32(local_tid_129526, 32) == 0 && + ltid_in_bounds_129538) { + x_129536 = ((volatile __local + double *) red_arr_mem_129530)[sext_i32_i64(local_tid_129526)]; + if ((local_tid_129526 - squot32(local_tid_129526, + 32) * 32) == 0) { + x_129535 = x_129536; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129541 = 1; + while (slt32(skip_threads_129541, 32)) { + if (sle32(skip_threads_129541, local_tid_129526 - + squot32(local_tid_129526, 32) * 32) && + (squot32(local_tid_129526, 32) == 0 && + ltid_in_bounds_129538)) { + // read operands + { + x_129535 = ((volatile __local + double *) red_arr_mem_129530)[sext_i32_i64(local_tid_129526) - + sext_i32_i64(skip_threads_129541)]; + } + // perform operation + { + bool inactive_129542 = + slt64(srem64(sext_i32_i64(local_tid_129526 * + 32 + 32 - 1), + defunc_2_reduce_comm_res_72722), + sext_i32_i64(local_tid_129526 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_129526 - + skip_threads_129541) * + 32 + 32 - 1)); + + if (inactive_129542) { + x_129535 = x_129536; + } + if (!inactive_129542) { + double defunc_1_op_res_129537 = + x_129535 + x_129536; + + x_129535 = defunc_1_op_res_129537; + } + } + } + if (sle32(wave_sizze_129528, skip_threads_129541)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129541, local_tid_129526 - + squot32(local_tid_129526, 32) * 32) && + (squot32(local_tid_129526, 32) == 0 && + ltid_in_bounds_129538)) { + // write result + { + ((volatile __local + double *) red_arr_mem_129530)[sext_i32_i64(local_tid_129526)] = + x_129535; + x_129536 = x_129535; + } + } + if (sle32(wave_sizze_129528, skip_threads_129541)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129541 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129526, 32) == 0 || + !ltid_in_bounds_129538)) { + // read operands + { + x_89059 = x_89058; + x_89058 = ((__local + double *) red_arr_mem_129530)[sext_i32_i64(squot32(local_tid_129526, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129543 = + slt64(srem64(sext_i32_i64(local_tid_129526), + defunc_2_reduce_comm_res_72722), + sext_i32_i64(local_tid_129526) - + sext_i32_i64(squot32(local_tid_129526, + 32) * 32 - 1)); + + if (inactive_129543) { + x_89058 = x_89059; + } + if (!inactive_129543) { + double defunc_1_op_res_89060 = x_89058 + + x_89059; + + x_89058 = defunc_1_op_res_89060; + } + } + // write final result + { + ((__local + double *) red_arr_mem_129530)[sext_i32_i64(local_tid_129526)] = + x_89058; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129526, 32) == 0) { + ((__local + double *) red_arr_mem_129530)[sext_i32_i64(local_tid_129526)] = + x_89059; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_129534) * + squot64(segred_group_sizze_89054, + segment_sizze_nonzzero_129523) + + sext_i32_i64(local_tid_129526), m_70861) && + slt64(sext_i32_i64(local_tid_129526), + squot64(segred_group_sizze_89054, + segment_sizze_nonzzero_129523))) { + ((__global + double *) mem_124972)[sext_i32_i64(virt_group_id_129534) * + squot64(segred_group_sizze_89054, + segment_sizze_nonzzero_129523) + + sext_i32_i64(local_tid_129526)] = + ((__local + double *) red_arr_mem_129530)[(sext_i32_i64(local_tid_129526) + + (int64_t) 1) * + segment_sizze_nonzzero_129523 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_89054 +} +__kernel void mainDetailedzisegred_small_89828(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_129738_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_129736_backing_aligned_1, + __local volatile + int64_t *red_arr_mem_129734_backing_aligned_2, + int64_t m_70861, + int64_t iota_arg_72752, + int64_t iota_arg_72776, + int64_t num_groups_89969, + int64_t segment_sizze_nonzzero_129727, + __global + unsigned char *mem_124976, + __global + unsigned char *mem_125093, + __global + unsigned char *mem_125097, + __global + unsigned char *mem_125100, + __global + unsigned char *mem_125103, + __global + unsigned char *mem_125105, + __global + unsigned char *mem_125107, + __global + unsigned char *mem_125110) +{ + #define segred_group_sizze_89968 (mainDetailedzisegred_group_sizze_89822) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_129738_backing_2 = + (__local volatile + char *) red_arr_mem_129738_backing_aligned_0; + __local volatile char *restrict red_arr_mem_129736_backing_1 = + (__local volatile + char *) red_arr_mem_129736_backing_aligned_1; + __local volatile char *restrict red_arr_mem_129734_backing_0 = + (__local volatile + char *) red_arr_mem_129734_backing_aligned_2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129729; + int32_t local_tid_129730; + int64_t group_sizze_129733; + int32_t wave_sizze_129732; + int32_t group_tid_129731; + + global_tid_129729 = get_global_id(0); + local_tid_129730 = get_local_id(0); + group_sizze_129733 = get_local_size(0); + wave_sizze_129732 = LOCKSTEP_WIDTH; + group_tid_129731 = get_group_id(0); + + int32_t phys_tid_89828; + + phys_tid_89828 = global_tid_129729; + + __local char *red_arr_mem_129734; + + red_arr_mem_129734 = (__local char *) red_arr_mem_129734_backing_0; + + __local char *red_arr_mem_129736; + + red_arr_mem_129736 = (__local char *) red_arr_mem_129736_backing_1; + + __local char *red_arr_mem_129738; + + red_arr_mem_129738 = (__local char *) red_arr_mem_129738_backing_2; + + int32_t phys_group_id_129740; + + phys_group_id_129740 = get_group_id(0); + for (int32_t i_129741 = 0; i_129741 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_70861, + squot64(segred_group_sizze_89968, + segment_sizze_nonzzero_129727))) - + phys_group_id_129740, sext_i64_i32(num_groups_89969)); + i_129741++) { + int32_t virt_group_id_129742 = phys_group_id_129740 + i_129741 * + sext_i64_i32(num_groups_89969); + int64_t gtid_89819 = squot64(sext_i32_i64(local_tid_129730), + segment_sizze_nonzzero_129727) + + sext_i32_i64(virt_group_id_129742) * + squot64(segred_group_sizze_89968, + segment_sizze_nonzzero_129727); + int64_t gtid_89827 = srem64(sext_i32_i64(local_tid_129730), + iota_arg_72752); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, iota_arg_72752) && (slt64(gtid_89819, + m_70861) && + slt64(sext_i32_i64(local_tid_129730), + iota_arg_72752 * + squot64(segred_group_sizze_89968, + segment_sizze_nonzzero_129727)))) { + int64_t y_89989 = ((__global int64_t *) mem_125093)[gtid_89819]; + double y_89990 = ((__global double *) mem_125100)[gtid_89819]; + int64_t binop_x_115392 = iota_arg_72752 * gtid_89819; + int64_t binop_x_115393 = gtid_89827 + binop_x_115392; + int64_t new_index_115394 = squot64(binop_x_115393, + iota_arg_72776); + int64_t binop_y_115400 = iota_arg_72776 * new_index_115394; + int64_t new_index_115401 = binop_x_115393 - binop_y_115400; + double x_89992 = ((__global + double *) mem_125097)[new_index_115394 * + iota_arg_72776 + + new_index_115401]; + double x_89993 = ((__global double *) mem_124976)[gtid_89827]; + double defunc_0_f_res_89995 = x_89992 / y_89990; + bool cond_89996 = slt64(gtid_89827, y_89989); + bool isnan_res_89997; + + isnan_res_89997 = futrts_isnan64(defunc_0_f_res_89995); + + bool cond_t_res_89998 = !isnan_res_89997; + bool x_89999 = cond_89996 && cond_t_res_89998; + double abs_res_90000 = fabs(defunc_0_f_res_89995); + bool defunc_2_f_res_t_res_90001 = x_89993 < abs_res_90000; + bool x_90002 = x_89999 && defunc_2_f_res_t_res_90001; + double defunc_1_f_res_90003; + + if (cond_89996) { + defunc_1_f_res_90003 = defunc_0_f_res_89995; + } else { + defunc_1_f_res_90003 = 0.0; + } + // save map-out results + { + ((__global double *) mem_125110)[gtid_89819 * + iota_arg_72752 + + gtid_89827] = + defunc_0_f_res_89995; + } + // save results to be reduced + { + ((__local + bool *) red_arr_mem_129734)[sext_i32_i64(local_tid_129730)] = + x_90002; + ((__local + int64_t *) red_arr_mem_129736)[sext_i32_i64(local_tid_129730)] = + gtid_89827; + ((__local + double *) red_arr_mem_129738)[sext_i32_i64(local_tid_129730)] = + defunc_1_f_res_90003; + } + } else { + ((__local + bool *) red_arr_mem_129734)[sext_i32_i64(local_tid_129730)] = + 0; + ((__local + int64_t *) red_arr_mem_129736)[sext_i32_i64(local_tid_129730)] = + (int64_t) -1; + ((__local + double *) red_arr_mem_129738)[sext_i32_i64(local_tid_129730)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, iota_arg_72752)) { + // perform segmented scan to imitate reduction + { + bool x_89975; + int64_t x_89976; + double x_89977; + bool x_89978; + int64_t x_89979; + double x_89980; + bool x_129743; + int64_t x_129744; + double x_129745; + bool x_129746; + int64_t x_129747; + double x_129748; + bool ltid_in_bounds_129757; + + ltid_in_bounds_129757 = slt64(sext_i32_i64(local_tid_129730), + iota_arg_72752 * + squot64(segred_group_sizze_89968, + segment_sizze_nonzzero_129727)); + + int32_t skip_threads_129758; + + // read input for in-block scan + { + if (ltid_in_bounds_129757) { + x_89978 = ((volatile __local + bool *) red_arr_mem_129734)[sext_i32_i64(local_tid_129730)]; + x_89979 = ((volatile __local + int64_t *) red_arr_mem_129736)[sext_i32_i64(local_tid_129730)]; + x_89980 = ((volatile __local + double *) red_arr_mem_129738)[sext_i32_i64(local_tid_129730)]; + if ((local_tid_129730 - squot32(local_tid_129730, 32) * + 32) == 0) { + x_89975 = x_89978; + x_89976 = x_89979; + x_89977 = x_89980; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129758 = 1; + while (slt32(skip_threads_129758, 32)) { + if (sle32(skip_threads_129758, local_tid_129730 - + squot32(local_tid_129730, 32) * 32) && + ltid_in_bounds_129757) { + // read operands + { + x_89975 = ((volatile __local + bool *) red_arr_mem_129734)[sext_i32_i64(local_tid_129730) - + sext_i32_i64(skip_threads_129758)]; + x_89976 = ((volatile __local + int64_t *) red_arr_mem_129736)[sext_i32_i64(local_tid_129730) - + sext_i32_i64(skip_threads_129758)]; + x_89977 = ((volatile __local + double *) red_arr_mem_129738)[sext_i32_i64(local_tid_129730) - + sext_i32_i64(skip_threads_129758)]; + } + // perform operation + { + bool inactive_129759 = + slt64(srem64(sext_i32_i64(local_tid_129730), + iota_arg_72752), + sext_i32_i64(local_tid_129730) - + sext_i32_i64(local_tid_129730 - + skip_threads_129758)); + + if (inactive_129759) { + x_89975 = x_89978; + x_89976 = x_89979; + x_89977 = x_89980; + } + if (!inactive_129759) { + bool defunc_1_op_res_89981; + int64_t defunc_1_op_res_89982; + + if (x_89975) { + defunc_1_op_res_89981 = x_89975; + defunc_1_op_res_89982 = x_89976; + } else { + bool x_89983 = x_89978 && x_89978; + bool x_89984 = !x_89978; + bool y_89985 = x_89975 && x_89984; + bool defunc_1_op_res_f_res_89986 = + x_89983 || y_89985; + int64_t defunc_1_op_res_f_res_89987; + + if (x_89978) { + defunc_1_op_res_f_res_89987 = + x_89979; + } else { + defunc_1_op_res_f_res_89987 = + x_89976; + } + defunc_1_op_res_89981 = + defunc_1_op_res_f_res_89986; + defunc_1_op_res_89982 = + defunc_1_op_res_f_res_89987; + } + + double defunc_1_op_res_89988 = x_89977 + + x_89980; + + x_89975 = defunc_1_op_res_89981; + x_89976 = defunc_1_op_res_89982; + x_89977 = defunc_1_op_res_89988; + } + } + } + if (sle32(wave_sizze_129732, skip_threads_129758)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129758, local_tid_129730 - + squot32(local_tid_129730, 32) * 32) && + ltid_in_bounds_129757) { + // write result + { + ((volatile __local + bool *) red_arr_mem_129734)[sext_i32_i64(local_tid_129730)] = + x_89975; + x_89978 = x_89975; + ((volatile __local + int64_t *) red_arr_mem_129736)[sext_i32_i64(local_tid_129730)] = + x_89976; + x_89979 = x_89976; + ((volatile __local + double *) red_arr_mem_129738)[sext_i32_i64(local_tid_129730)] = + x_89977; + x_89980 = x_89977; + } + } + if (sle32(wave_sizze_129732, skip_threads_129758)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129758 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129730 - squot32(local_tid_129730, 32) * + 32) == 31 && ltid_in_bounds_129757) { + ((volatile __local + bool *) red_arr_mem_129734)[sext_i32_i64(squot32(local_tid_129730, + 32))] = + x_89975; + ((volatile __local + int64_t *) red_arr_mem_129736)[sext_i32_i64(squot32(local_tid_129730, + 32))] = + x_89976; + ((volatile __local + double *) red_arr_mem_129738)[sext_i32_i64(squot32(local_tid_129730, + 32))] = + x_89977; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129760; + + // read input for in-block scan + { + if (squot32(local_tid_129730, 32) == 0 && + ltid_in_bounds_129757) { + x_129746 = ((volatile __local + bool *) red_arr_mem_129734)[sext_i32_i64(local_tid_129730)]; + x_129747 = ((volatile __local + int64_t *) red_arr_mem_129736)[sext_i32_i64(local_tid_129730)]; + x_129748 = ((volatile __local + double *) red_arr_mem_129738)[sext_i32_i64(local_tid_129730)]; + if ((local_tid_129730 - squot32(local_tid_129730, + 32) * 32) == 0) { + x_129743 = x_129746; + x_129744 = x_129747; + x_129745 = x_129748; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129760 = 1; + while (slt32(skip_threads_129760, 32)) { + if (sle32(skip_threads_129760, local_tid_129730 - + squot32(local_tid_129730, 32) * 32) && + (squot32(local_tid_129730, 32) == 0 && + ltid_in_bounds_129757)) { + // read operands + { + x_129743 = ((volatile __local + bool *) red_arr_mem_129734)[sext_i32_i64(local_tid_129730) - + sext_i32_i64(skip_threads_129760)]; + x_129744 = ((volatile __local + int64_t *) red_arr_mem_129736)[sext_i32_i64(local_tid_129730) - + sext_i32_i64(skip_threads_129760)]; + x_129745 = ((volatile __local + double *) red_arr_mem_129738)[sext_i32_i64(local_tid_129730) - + sext_i32_i64(skip_threads_129760)]; + } + // perform operation + { + bool inactive_129761 = + slt64(srem64(sext_i32_i64(local_tid_129730 * + 32 + 32 - 1), + iota_arg_72752), + sext_i32_i64(local_tid_129730 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_129730 - + skip_threads_129760) * + 32 + 32 - 1)); + + if (inactive_129761) { + x_129743 = x_129746; + x_129744 = x_129747; + x_129745 = x_129748; + } + if (!inactive_129761) { + bool defunc_1_op_res_129749; + int64_t defunc_1_op_res_129750; + + if (x_129743) { + defunc_1_op_res_129749 = x_129743; + defunc_1_op_res_129750 = x_129744; + } else { + bool x_129751 = x_129746 && + x_129746; + bool x_129752 = !x_129746; + bool y_129753 = x_129743 && + x_129752; + bool defunc_1_op_res_f_res_129754 = + x_129751 || y_129753; + int64_t + defunc_1_op_res_f_res_129755; + + if (x_129746) { + defunc_1_op_res_f_res_129755 = + x_129747; + } else { + defunc_1_op_res_f_res_129755 = + x_129744; + } + defunc_1_op_res_129749 = + defunc_1_op_res_f_res_129754; + defunc_1_op_res_129750 = + defunc_1_op_res_f_res_129755; + } + + double defunc_1_op_res_129756 = + x_129745 + x_129748; + + x_129743 = defunc_1_op_res_129749; + x_129744 = defunc_1_op_res_129750; + x_129745 = defunc_1_op_res_129756; + } + } + } + if (sle32(wave_sizze_129732, skip_threads_129760)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129760, local_tid_129730 - + squot32(local_tid_129730, 32) * 32) && + (squot32(local_tid_129730, 32) == 0 && + ltid_in_bounds_129757)) { + // write result + { + ((volatile __local + bool *) red_arr_mem_129734)[sext_i32_i64(local_tid_129730)] = + x_129743; + x_129746 = x_129743; + ((volatile __local + int64_t *) red_arr_mem_129736)[sext_i32_i64(local_tid_129730)] = + x_129744; + x_129747 = x_129744; + ((volatile __local + double *) red_arr_mem_129738)[sext_i32_i64(local_tid_129730)] = + x_129745; + x_129748 = x_129745; + } + } + if (sle32(wave_sizze_129732, skip_threads_129760)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129760 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129730, 32) == 0 || + !ltid_in_bounds_129757)) { + // read operands + { + x_89978 = x_89975; + x_89979 = x_89976; + x_89980 = x_89977; + x_89975 = ((__local + bool *) red_arr_mem_129734)[sext_i32_i64(squot32(local_tid_129730, + 32)) - + (int64_t) 1]; + x_89976 = ((__local + int64_t *) red_arr_mem_129736)[sext_i32_i64(squot32(local_tid_129730, + 32)) - + (int64_t) 1]; + x_89977 = ((__local + double *) red_arr_mem_129738)[sext_i32_i64(squot32(local_tid_129730, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129762 = + slt64(srem64(sext_i32_i64(local_tid_129730), + iota_arg_72752), + sext_i32_i64(local_tid_129730) - + sext_i32_i64(squot32(local_tid_129730, + 32) * 32 - 1)); + + if (inactive_129762) { + x_89975 = x_89978; + x_89976 = x_89979; + x_89977 = x_89980; + } + if (!inactive_129762) { + bool defunc_1_op_res_89981; + int64_t defunc_1_op_res_89982; + + if (x_89975) { + defunc_1_op_res_89981 = x_89975; + defunc_1_op_res_89982 = x_89976; + } else { + bool x_89983 = x_89978 && x_89978; + bool x_89984 = !x_89978; + bool y_89985 = x_89975 && x_89984; + bool defunc_1_op_res_f_res_89986 = + x_89983 || y_89985; + int64_t defunc_1_op_res_f_res_89987; + + if (x_89978) { + defunc_1_op_res_f_res_89987 = x_89979; + } else { + defunc_1_op_res_f_res_89987 = x_89976; + } + defunc_1_op_res_89981 = + defunc_1_op_res_f_res_89986; + defunc_1_op_res_89982 = + defunc_1_op_res_f_res_89987; + } + + double defunc_1_op_res_89988 = x_89977 + + x_89980; + + x_89975 = defunc_1_op_res_89981; + x_89976 = defunc_1_op_res_89982; + x_89977 = defunc_1_op_res_89988; + } + } + // write final result + { + ((__local + bool *) red_arr_mem_129734)[sext_i32_i64(local_tid_129730)] = + x_89975; + ((__local + int64_t *) red_arr_mem_129736)[sext_i32_i64(local_tid_129730)] = + x_89976; + ((__local + double *) red_arr_mem_129738)[sext_i32_i64(local_tid_129730)] = + x_89977; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129730, 32) == 0) { + ((__local + bool *) red_arr_mem_129734)[sext_i32_i64(local_tid_129730)] = + x_89978; + ((__local + int64_t *) red_arr_mem_129736)[sext_i32_i64(local_tid_129730)] = + x_89979; + ((__local + double *) red_arr_mem_129738)[sext_i32_i64(local_tid_129730)] = + x_89980; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_129742) * + squot64(segred_group_sizze_89968, + segment_sizze_nonzzero_129727) + + sext_i32_i64(local_tid_129730), m_70861) && + slt64(sext_i32_i64(local_tid_129730), + squot64(segred_group_sizze_89968, + segment_sizze_nonzzero_129727))) { + ((__global + bool *) mem_125103)[sext_i32_i64(virt_group_id_129742) * + squot64(segred_group_sizze_89968, + segment_sizze_nonzzero_129727) + + sext_i32_i64(local_tid_129730)] = + ((__local + bool *) red_arr_mem_129734)[(sext_i32_i64(local_tid_129730) + + (int64_t) 1) * + segment_sizze_nonzzero_129727 - + (int64_t) 1]; + ((__global + int64_t *) mem_125105)[sext_i32_i64(virt_group_id_129742) * + squot64(segred_group_sizze_89968, + segment_sizze_nonzzero_129727) + + sext_i32_i64(local_tid_129730)] = + ((__local + int64_t *) red_arr_mem_129736)[(sext_i32_i64(local_tid_129730) + + (int64_t) 1) * + segment_sizze_nonzzero_129727 - + (int64_t) 1]; + ((__global + double *) mem_125107)[sext_i32_i64(virt_group_id_129742) * + squot64(segred_group_sizze_89968, + segment_sizze_nonzzero_129727) + + sext_i32_i64(local_tid_129730)] = + ((__local + double *) red_arr_mem_129738)[(sext_i32_i64(local_tid_129730) + + (int64_t) 1) * + segment_sizze_nonzzero_129727 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_89968 +} +__kernel void mainMagnitudezicopy_126384(int64_t m_73008, int64_t n_73011, + __global unsigned char *mem_120177, + __global unsigned char *mem_120224) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_126384; + int32_t copy_ltid_126385; + int32_t copy_gid_126386; + + copy_gtid_126384 = get_global_id(0); + copy_ltid_126385 = get_local_id(0); + copy_gid_126386 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_126384), m_73008 * n_73011)) { + ((__global double *) mem_120224)[squot64(sext_i32_i64(copy_gtid_126384), + n_73011) * n_73011 + + (sext_i32_i64(copy_gtid_126384) - + squot64(sext_i32_i64(copy_gtid_126384), + n_73011) * n_73011)] = + ((__global double *) mem_120177)[(sext_i32_i64(copy_gtid_126384) - + squot64(sext_i32_i64(copy_gtid_126384), + n_73011) * n_73011) * + m_73008 + + squot64(sext_i32_i64(copy_gtid_126384), + n_73011)]; + } + + error_0: + return; +} +__kernel void mainMagnitudezicopy_126389(int64_t m_73008, int64_t n_73011, + __global unsigned char *mem_120180, + __global unsigned char *mem_120228) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_126389; + int32_t copy_ltid_126390; + int32_t copy_gid_126391; + + copy_gtid_126389 = get_global_id(0); + copy_ltid_126390 = get_local_id(0); + copy_gid_126391 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_126389), m_73008 * n_73011)) { + ((__global + int64_t *) mem_120228)[squot64(sext_i32_i64(copy_gtid_126389), + n_73011) * n_73011 + + (sext_i32_i64(copy_gtid_126389) - + squot64(sext_i32_i64(copy_gtid_126389), + n_73011) * n_73011)] = ((__global + int64_t *) mem_120180)[(sext_i32_i64(copy_gtid_126389) - + squot64(sext_i32_i64(copy_gtid_126389), + n_73011) * + n_73011) * + m_73008 + + squot64(sext_i32_i64(copy_gtid_126389), + n_73011)]; + } + + error_0: + return; +} +__kernel void mainMagnitudezicopy_126467(int64_t m_73008, int64_t n_73011, + int64_t m_73103, __global + unsigned char *mem_120201, __global + unsigned char *mem_120203) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_126467; + int32_t copy_ltid_126468; + int32_t copy_gid_126469; + + copy_gtid_126467 = get_global_id(0); + copy_ltid_126468 = get_local_id(0); + copy_gid_126469 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_126467), m_73008)) { + ((__global int64_t *) mem_120203)[sext_i32_i64(copy_gtid_126467)] = + ((__global int64_t *) mem_120201)[m_73103 + + sext_i32_i64(copy_gtid_126467) * + n_73011]; + } + + error_0: + return; +} +__kernel void mainMagnitudezicopy_126563(int64_t m_73008, int64_t n_73011, + int64_t k2p2zq_73023, __global + unsigned char *defunc_3_map_res_mem_120231, + __global unsigned char *mem_120257) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_126563; + int32_t copy_ltid_126564; + int32_t copy_gid_126565; + + copy_gtid_126563 = get_global_id(0); + copy_ltid_126564 = get_local_id(0); + copy_gid_126565 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_126563), m_73008 * k2p2zq_73023)) { + ((__global double *) mem_120257)[(sext_i32_i64(copy_gtid_126563) - + squot64(sext_i32_i64(copy_gtid_126563), + k2p2zq_73023) * + k2p2zq_73023) * m_73008 + + squot64(sext_i32_i64(copy_gtid_126563), + k2p2zq_73023)] = ((__global + double *) defunc_3_map_res_mem_120231)[squot64(sext_i32_i64(copy_gtid_126563), + k2p2zq_73023) * + n_73011 + + (sext_i32_i64(copy_gtid_126563) - + squot64(sext_i32_i64(copy_gtid_126563), + k2p2zq_73023) * + k2p2zq_73023)]; + } + + error_0: + return; +} +__kernel void mainMagnitudezicopy_126568(int64_t m_73008, int64_t k2p2zq_73023, + int64_t defunc_2_reduce_res_73132, + __global unsigned char *mem_120246, + __global unsigned char *mem_120261) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_126568; + int32_t copy_ltid_126569; + int32_t copy_gid_126570; + + copy_gtid_126568 = get_global_id(0); + copy_ltid_126569 = get_local_id(0); + copy_gid_126570 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_126568), m_73008 * k2p2zq_73023 * + k2p2zq_73023)) { + ((__global + double *) mem_120261)[squot64(sext_i32_i64(copy_gtid_126568) - + squot64(sext_i32_i64(copy_gtid_126568), + k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023) * (m_73008 * + k2p2zq_73023) + + (sext_i32_i64(copy_gtid_126568) - + squot64(sext_i32_i64(copy_gtid_126568), + k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023) - + squot64(sext_i32_i64(copy_gtid_126568) - + squot64(sext_i32_i64(copy_gtid_126568), + k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023) * k2p2zq_73023) * + m_73008 + + squot64(sext_i32_i64(copy_gtid_126568), + k2p2zq_73023 * k2p2zq_73023)] = + ((__global + double *) mem_120246)[squot64(sext_i32_i64(copy_gtid_126568) - + squot64(sext_i32_i64(copy_gtid_126568), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023) * + (defunc_2_reduce_res_73132 * m_73008) + + squot64(sext_i32_i64(copy_gtid_126568), + k2p2zq_73023 * k2p2zq_73023) * + defunc_2_reduce_res_73132 + + (sext_i32_i64(copy_gtid_126568) - + squot64(sext_i32_i64(copy_gtid_126568), + k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023) - + squot64(sext_i32_i64(copy_gtid_126568) - + squot64(sext_i32_i64(copy_gtid_126568), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023) * k2p2zq_73023)]; + } + + error_0: + return; +} +__kernel void mainMagnitudezicopy_126573(int64_t m_73008, int64_t k2p2zq_73023, + int64_t defunc_2_reduce_res_73132, + __global unsigned char *mem_120246, + __global unsigned char *mem_120265) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_126573; + int32_t copy_ltid_126574; + int32_t copy_gid_126575; + + copy_gtid_126573 = get_global_id(0); + copy_ltid_126574 = get_local_id(0); + copy_gid_126575 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_126573), m_73008 * k2p2zq_73023 * + k2p2zq_73023)) { + ((__global double *) mem_120265)[(sext_i32_i64(copy_gtid_126573) - + squot64(sext_i32_i64(copy_gtid_126573), + k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023) - + squot64(sext_i32_i64(copy_gtid_126573) - + squot64(sext_i32_i64(copy_gtid_126573), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023) * + k2p2zq_73023) * (k2p2zq_73023 * + m_73008) + + squot64(sext_i32_i64(copy_gtid_126573), + k2p2zq_73023 * k2p2zq_73023) * + k2p2zq_73023 + + squot64(sext_i32_i64(copy_gtid_126573) - + squot64(sext_i32_i64(copy_gtid_126573), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023)] = ((__global + double *) mem_120246)[squot64(sext_i32_i64(copy_gtid_126573) - + squot64(sext_i32_i64(copy_gtid_126573), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * + k2p2zq_73023), + k2p2zq_73023) * + (defunc_2_reduce_res_73132 * + m_73008) + + squot64(sext_i32_i64(copy_gtid_126573), + k2p2zq_73023 * + k2p2zq_73023) * + defunc_2_reduce_res_73132 + + (sext_i32_i64(copy_gtid_126573) - + squot64(sext_i32_i64(copy_gtid_126573), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * + k2p2zq_73023) - + squot64(sext_i32_i64(copy_gtid_126573) - + squot64(sext_i32_i64(copy_gtid_126573), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * + k2p2zq_73023), + k2p2zq_73023) * + k2p2zq_73023)]; + } + + error_0: + return; +} +__kernel void mainMagnitudezicopy_126683(int64_t m_73008, int64_t k2p2zq_73023, + int64_t defunc_2_reduce_res_73132, + __global unsigned char *mem_120246, + __global unsigned char *mem_120894) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_126683; + int32_t copy_ltid_126684; + int32_t copy_gid_126685; + + copy_gtid_126683 = get_global_id(0); + copy_ltid_126684 = get_local_id(0); + copy_gid_126685 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_126683), m_73008 * k2p2zq_73023 * + k2p2zq_73023)) { + ((__global double *) mem_120894)[(sext_i32_i64(copy_gtid_126683) - + squot64(sext_i32_i64(copy_gtid_126683), + k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023) - + squot64(sext_i32_i64(copy_gtid_126683) - + squot64(sext_i32_i64(copy_gtid_126683), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023) * + k2p2zq_73023) * (k2p2zq_73023 * + m_73008) + + squot64(sext_i32_i64(copy_gtid_126683), + k2p2zq_73023 * k2p2zq_73023) * + k2p2zq_73023 + + squot64(sext_i32_i64(copy_gtid_126683) - + squot64(sext_i32_i64(copy_gtid_126683), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023)] = ((__global + double *) mem_120246)[squot64(sext_i32_i64(copy_gtid_126683) - + squot64(sext_i32_i64(copy_gtid_126683), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * + k2p2zq_73023), + k2p2zq_73023) * + (defunc_2_reduce_res_73132 * + m_73008) + + squot64(sext_i32_i64(copy_gtid_126683), + k2p2zq_73023 * + k2p2zq_73023) * + defunc_2_reduce_res_73132 + + (sext_i32_i64(copy_gtid_126683) - + squot64(sext_i32_i64(copy_gtid_126683), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * + k2p2zq_73023) - + squot64(sext_i32_i64(copy_gtid_126683) - + squot64(sext_i32_i64(copy_gtid_126683), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * + k2p2zq_73023), + k2p2zq_73023) * + k2p2zq_73023)]; + } + + error_0: + return; +} +__kernel void mainMagnitudezicopy_126787(int64_t m_73008, int64_t k2p2zq_73023, + int64_t defunc_2_reduce_res_73132, + __global unsigned char *mem_120246, + __global unsigned char *mem_121001) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_126787; + int32_t copy_ltid_126788; + int32_t copy_gid_126789; + + copy_gtid_126787 = get_global_id(0); + copy_ltid_126788 = get_local_id(0); + copy_gid_126789 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_126787), m_73008 * k2p2zq_73023 * + k2p2zq_73023)) { + ((__global + double *) mem_121001)[squot64(sext_i32_i64(copy_gtid_126787) - + squot64(sext_i32_i64(copy_gtid_126787), + k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023) * (m_73008 * + k2p2zq_73023) + + (sext_i32_i64(copy_gtid_126787) - + squot64(sext_i32_i64(copy_gtid_126787), + k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023) - + squot64(sext_i32_i64(copy_gtid_126787) - + squot64(sext_i32_i64(copy_gtid_126787), + k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023) * k2p2zq_73023) * + m_73008 + + squot64(sext_i32_i64(copy_gtid_126787), + k2p2zq_73023 * k2p2zq_73023)] = + ((__global + double *) mem_120246)[squot64(sext_i32_i64(copy_gtid_126787) - + squot64(sext_i32_i64(copy_gtid_126787), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023) * + (defunc_2_reduce_res_73132 * m_73008) + + squot64(sext_i32_i64(copy_gtid_126787), + k2p2zq_73023 * k2p2zq_73023) * + defunc_2_reduce_res_73132 + + (sext_i32_i64(copy_gtid_126787) - + squot64(sext_i32_i64(copy_gtid_126787), + k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023) - + squot64(sext_i32_i64(copy_gtid_126787) - + squot64(sext_i32_i64(copy_gtid_126787), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023) * k2p2zq_73023)]; + } + + error_0: + return; +} +__kernel void mainMagnitudezicopy_126871(int64_t m_73008, int64_t k2p2zq_73023, + __global unsigned char *mem_121351, + __global unsigned char *mem_121363) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_126871; + int32_t copy_ltid_126872; + int32_t copy_gid_126873; + + copy_gtid_126871 = get_global_id(0); + copy_ltid_126872 = get_local_id(0); + copy_gid_126873 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_126871), m_73008 * k2p2zq_73023 * + k2p2zq_73023)) { + ((__global + double *) mem_121363)[squot64(sext_i32_i64(copy_gtid_126871) - + squot64(sext_i32_i64(copy_gtid_126871), + k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023) * (k2p2zq_73023 * + m_73008) + + squot64(sext_i32_i64(copy_gtid_126871), + k2p2zq_73023 * k2p2zq_73023) * + k2p2zq_73023 + (sext_i32_i64(copy_gtid_126871) - + squot64(sext_i32_i64(copy_gtid_126871), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023) - + squot64(sext_i32_i64(copy_gtid_126871) - + squot64(sext_i32_i64(copy_gtid_126871), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * + k2p2zq_73023), + k2p2zq_73023) * + k2p2zq_73023)] = ((__global + double *) mem_121351)[squot64(sext_i32_i64(copy_gtid_126871), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * + k2p2zq_73023) + + squot64(sext_i32_i64(copy_gtid_126871) - + squot64(sext_i32_i64(copy_gtid_126871), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * + k2p2zq_73023), + k2p2zq_73023) * + k2p2zq_73023 + + (sext_i32_i64(copy_gtid_126871) - + squot64(sext_i32_i64(copy_gtid_126871), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * + k2p2zq_73023) - + squot64(sext_i32_i64(copy_gtid_126871) - + squot64(sext_i32_i64(copy_gtid_126871), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * + k2p2zq_73023), + k2p2zq_73023) * + k2p2zq_73023)]; + } + + error_0: + return; +} +__kernel void mainMagnitudezicopy_127135(int64_t m_73008, int64_t n_73011, + int64_t k2p2zq_73023, __global + unsigned char *defunc_3_map_res_mem_120231, + __global unsigned char *mem_121850) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127135; + int32_t copy_ltid_127136; + int32_t copy_gid_127137; + + copy_gtid_127135 = get_global_id(0); + copy_ltid_127136 = get_local_id(0); + copy_gid_127137 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127135), m_73008 * k2p2zq_73023)) { + ((__global double *) mem_121850)[(sext_i32_i64(copy_gtid_127135) - + squot64(sext_i32_i64(copy_gtid_127135), + k2p2zq_73023) * + k2p2zq_73023) * m_73008 + + squot64(sext_i32_i64(copy_gtid_127135), + k2p2zq_73023)] = ((__global + double *) defunc_3_map_res_mem_120231)[squot64(sext_i32_i64(copy_gtid_127135), + k2p2zq_73023) * + n_73011 + + (sext_i32_i64(copy_gtid_127135) - + squot64(sext_i32_i64(copy_gtid_127135), + k2p2zq_73023) * + k2p2zq_73023)]; + } + + error_0: + return; +} +__kernel void mainMagnitudezicopy_127140(int64_t m_73008, int64_t k2p2zq_73023, + __global unsigned char *mem_121854, + __global unsigned char *mem_121858) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127140; + int32_t copy_ltid_127141; + int32_t copy_gid_127142; + + copy_gtid_127140 = get_global_id(0); + copy_ltid_127141 = get_local_id(0); + copy_gid_127142 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127140), m_73008 * k2p2zq_73023 * + k2p2zq_73023)) { + ((__global double *) mem_121858)[(sext_i32_i64(copy_gtid_127140) - + squot64(sext_i32_i64(copy_gtid_127140), + k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023) - + squot64(sext_i32_i64(copy_gtid_127140) - + squot64(sext_i32_i64(copy_gtid_127140), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023) * + k2p2zq_73023) * (m_73008 * + k2p2zq_73023) + + squot64(sext_i32_i64(copy_gtid_127140) - + squot64(sext_i32_i64(copy_gtid_127140), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023) * m_73008 + + squot64(sext_i32_i64(copy_gtid_127140), + k2p2zq_73023 * k2p2zq_73023)] = + ((__global + double *) mem_121854)[squot64(sext_i32_i64(copy_gtid_127140), + k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023) + + squot64(sext_i32_i64(copy_gtid_127140) - + squot64(sext_i32_i64(copy_gtid_127140), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023) * k2p2zq_73023 + + (sext_i32_i64(copy_gtid_127140) - + squot64(sext_i32_i64(copy_gtid_127140), + k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023) - + squot64(sext_i32_i64(copy_gtid_127140) - + squot64(sext_i32_i64(copy_gtid_127140), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023) * k2p2zq_73023)]; + } + + error_0: + return; +} +__kernel void mainMagnitudezicopy_127208(int64_t m_73008, int64_t n_73011, + int64_t rp1_73709, __global + unsigned char *defunc_3_map_res_mem_120231, + __global unsigned char *mem_122017) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127208; + int32_t copy_ltid_127209; + int32_t copy_gid_127210; + + copy_gtid_127208 = get_global_id(0); + copy_ltid_127209 = get_local_id(0); + copy_gid_127210 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127208), m_73008 * rp1_73709)) { + ((__global double *) mem_122017)[(sext_i32_i64(copy_gtid_127208) - + squot64(sext_i32_i64(copy_gtid_127208), + rp1_73709) * rp1_73709) * + m_73008 + + squot64(sext_i32_i64(copy_gtid_127208), + rp1_73709)] = ((__global + double *) defunc_3_map_res_mem_120231)[squot64(sext_i32_i64(copy_gtid_127208), + rp1_73709) * + n_73011 + + (sext_i32_i64(copy_gtid_127208) - + squot64(sext_i32_i64(copy_gtid_127208), + rp1_73709) * + rp1_73709)]; + } + + error_0: + return; +} +__kernel void mainMagnitudezicopy_127213(int64_t m_73008, int64_t k2p2zq_73023, + int64_t defunc_2_reduce_res_73132, + int64_t rp1_73709, __global + unsigned char *mem_120246, __global + unsigned char *mem_122021) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127213; + int32_t copy_ltid_127214; + int32_t copy_gid_127215; + + copy_gtid_127213 = get_global_id(0); + copy_ltid_127214 = get_local_id(0); + copy_gid_127215 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127213), m_73008 * k2p2zq_73023 * + rp1_73709)) { + ((__global + double *) mem_122021)[squot64(sext_i32_i64(copy_gtid_127213) - + squot64(sext_i32_i64(copy_gtid_127213), + k2p2zq_73023 * rp1_73709) * + (k2p2zq_73023 * rp1_73709), rp1_73709) * + (m_73008 * rp1_73709) + + (sext_i32_i64(copy_gtid_127213) - + squot64(sext_i32_i64(copy_gtid_127213), + k2p2zq_73023 * rp1_73709) * + (k2p2zq_73023 * rp1_73709) - + squot64(sext_i32_i64(copy_gtid_127213) - + squot64(sext_i32_i64(copy_gtid_127213), + k2p2zq_73023 * rp1_73709) * + (k2p2zq_73023 * rp1_73709), + rp1_73709) * rp1_73709) * m_73008 + + squot64(sext_i32_i64(copy_gtid_127213), + k2p2zq_73023 * rp1_73709)] = ((__global + double *) mem_120246)[squot64(sext_i32_i64(copy_gtid_127213) - + squot64(sext_i32_i64(copy_gtid_127213), + k2p2zq_73023 * + rp1_73709) * + (k2p2zq_73023 * + rp1_73709), + rp1_73709) * + (defunc_2_reduce_res_73132 * + m_73008) + + squot64(sext_i32_i64(copy_gtid_127213), + k2p2zq_73023 * + rp1_73709) * + defunc_2_reduce_res_73132 + + (sext_i32_i64(copy_gtid_127213) - + squot64(sext_i32_i64(copy_gtid_127213), + k2p2zq_73023 * + rp1_73709) * + (k2p2zq_73023 * + rp1_73709) - + squot64(sext_i32_i64(copy_gtid_127213) - + squot64(sext_i32_i64(copy_gtid_127213), + k2p2zq_73023 * + rp1_73709) * + (k2p2zq_73023 * + rp1_73709), + rp1_73709) * + rp1_73709)]; + } + + error_0: + return; +} +__kernel void mainMagnitudezicopy_127218(int64_t m_73008, int64_t k2p2zq_73023, + int64_t defunc_2_reduce_res_73132, + int64_t rp1_73709, __global + unsigned char *mem_120246, __global + unsigned char *mem_122025) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127218; + int32_t copy_ltid_127219; + int32_t copy_gid_127220; + + copy_gtid_127218 = get_global_id(0); + copy_ltid_127219 = get_local_id(0); + copy_gid_127220 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127218), m_73008 * k2p2zq_73023 * + rp1_73709)) { + ((__global double *) mem_122025)[(sext_i32_i64(copy_gtid_127218) - + squot64(sext_i32_i64(copy_gtid_127218), + k2p2zq_73023 * rp1_73709) * + (k2p2zq_73023 * rp1_73709) - + squot64(sext_i32_i64(copy_gtid_127218) - + squot64(sext_i32_i64(copy_gtid_127218), + k2p2zq_73023 * + rp1_73709) * + (k2p2zq_73023 * rp1_73709), + rp1_73709) * rp1_73709) * + (k2p2zq_73023 * m_73008) + + squot64(sext_i32_i64(copy_gtid_127218), + k2p2zq_73023 * rp1_73709) * + k2p2zq_73023 + + squot64(sext_i32_i64(copy_gtid_127218) - + squot64(sext_i32_i64(copy_gtid_127218), + k2p2zq_73023 * + rp1_73709) * + (k2p2zq_73023 * rp1_73709), + rp1_73709)] = ((__global + double *) mem_120246)[squot64(sext_i32_i64(copy_gtid_127218) - + squot64(sext_i32_i64(copy_gtid_127218), + k2p2zq_73023 * + rp1_73709) * + (k2p2zq_73023 * + rp1_73709), + rp1_73709) * + (defunc_2_reduce_res_73132 * + m_73008) + + squot64(sext_i32_i64(copy_gtid_127218), + k2p2zq_73023 * + rp1_73709) * + defunc_2_reduce_res_73132 + + (sext_i32_i64(copy_gtid_127218) - + squot64(sext_i32_i64(copy_gtid_127218), + k2p2zq_73023 * + rp1_73709) * + (k2p2zq_73023 * + rp1_73709) - + squot64(sext_i32_i64(copy_gtid_127218) - + squot64(sext_i32_i64(copy_gtid_127218), + k2p2zq_73023 * + rp1_73709) * + (k2p2zq_73023 * + rp1_73709), + rp1_73709) * + rp1_73709)]; + } + + error_0: + return; +} +__kernel void mainMagnitudezicopy_127467(int64_t m_73008, int64_t k2p2zq_73023, + int64_t defunc_2_reduce_res_73132, + int64_t rp1_73709, __global + unsigned char *mem_120246, __global + unsigned char *mem_122686) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127467; + int32_t copy_ltid_127468; + int32_t copy_gid_127469; + + copy_gtid_127467 = get_global_id(0); + copy_ltid_127468 = get_local_id(0); + copy_gid_127469 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127467), m_73008 * k2p2zq_73023 * + rp1_73709)) { + ((__global double *) mem_122686)[(sext_i32_i64(copy_gtid_127467) - + squot64(sext_i32_i64(copy_gtid_127467), + k2p2zq_73023 * rp1_73709) * + (k2p2zq_73023 * rp1_73709) - + squot64(sext_i32_i64(copy_gtid_127467) - + squot64(sext_i32_i64(copy_gtid_127467), + k2p2zq_73023 * + rp1_73709) * + (k2p2zq_73023 * rp1_73709), + rp1_73709) * rp1_73709) * + (k2p2zq_73023 * m_73008) + + squot64(sext_i32_i64(copy_gtid_127467), + k2p2zq_73023 * rp1_73709) * + k2p2zq_73023 + + squot64(sext_i32_i64(copy_gtid_127467) - + squot64(sext_i32_i64(copy_gtid_127467), + k2p2zq_73023 * + rp1_73709) * + (k2p2zq_73023 * rp1_73709), + rp1_73709)] = ((__global + double *) mem_120246)[squot64(sext_i32_i64(copy_gtid_127467) - + squot64(sext_i32_i64(copy_gtid_127467), + k2p2zq_73023 * + rp1_73709) * + (k2p2zq_73023 * + rp1_73709), + rp1_73709) * + (defunc_2_reduce_res_73132 * + m_73008) + + squot64(sext_i32_i64(copy_gtid_127467), + k2p2zq_73023 * + rp1_73709) * + defunc_2_reduce_res_73132 + + (sext_i32_i64(copy_gtid_127467) - + squot64(sext_i32_i64(copy_gtid_127467), + k2p2zq_73023 * + rp1_73709) * + (k2p2zq_73023 * + rp1_73709) - + squot64(sext_i32_i64(copy_gtid_127467) - + squot64(sext_i32_i64(copy_gtid_127467), + k2p2zq_73023 * + rp1_73709) * + (k2p2zq_73023 * + rp1_73709), + rp1_73709) * + rp1_73709)]; + } + + error_0: + return; +} +__kernel void mainMagnitudezicopy_127571(int64_t m_73008, int64_t k2p2zq_73023, + int64_t defunc_2_reduce_res_73132, + int64_t rp1_73709, __global + unsigned char *mem_120246, __global + unsigned char *mem_122793) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127571; + int32_t copy_ltid_127572; + int32_t copy_gid_127573; + + copy_gtid_127571 = get_global_id(0); + copy_ltid_127572 = get_local_id(0); + copy_gid_127573 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127571), m_73008 * k2p2zq_73023 * + rp1_73709)) { + ((__global + double *) mem_122793)[squot64(sext_i32_i64(copy_gtid_127571) - + squot64(sext_i32_i64(copy_gtid_127571), + k2p2zq_73023 * rp1_73709) * + (k2p2zq_73023 * rp1_73709), rp1_73709) * + (m_73008 * rp1_73709) + + (sext_i32_i64(copy_gtid_127571) - + squot64(sext_i32_i64(copy_gtid_127571), + k2p2zq_73023 * rp1_73709) * + (k2p2zq_73023 * rp1_73709) - + squot64(sext_i32_i64(copy_gtid_127571) - + squot64(sext_i32_i64(copy_gtid_127571), + k2p2zq_73023 * rp1_73709) * + (k2p2zq_73023 * rp1_73709), + rp1_73709) * rp1_73709) * m_73008 + + squot64(sext_i32_i64(copy_gtid_127571), + k2p2zq_73023 * rp1_73709)] = ((__global + double *) mem_120246)[squot64(sext_i32_i64(copy_gtid_127571) - + squot64(sext_i32_i64(copy_gtid_127571), + k2p2zq_73023 * + rp1_73709) * + (k2p2zq_73023 * + rp1_73709), + rp1_73709) * + (defunc_2_reduce_res_73132 * + m_73008) + + squot64(sext_i32_i64(copy_gtid_127571), + k2p2zq_73023 * + rp1_73709) * + defunc_2_reduce_res_73132 + + (sext_i32_i64(copy_gtid_127571) - + squot64(sext_i32_i64(copy_gtid_127571), + k2p2zq_73023 * + rp1_73709) * + (k2p2zq_73023 * + rp1_73709) - + squot64(sext_i32_i64(copy_gtid_127571) - + squot64(sext_i32_i64(copy_gtid_127571), + k2p2zq_73023 * + rp1_73709) * + (k2p2zq_73023 * + rp1_73709), + rp1_73709) * + rp1_73709)]; + } + + error_0: + return; +} +__kernel void mainMagnitudezicopy_127655(int64_t m_73008, int64_t k2p2zq_73023, + __global unsigned char *mem_123143, + __global unsigned char *mem_123155) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127655; + int32_t copy_ltid_127656; + int32_t copy_gid_127657; + + copy_gtid_127655 = get_global_id(0); + copy_ltid_127656 = get_local_id(0); + copy_gid_127657 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127655), m_73008 * k2p2zq_73023 * + k2p2zq_73023)) { + ((__global + double *) mem_123155)[squot64(sext_i32_i64(copy_gtid_127655) - + squot64(sext_i32_i64(copy_gtid_127655), + k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023) * (k2p2zq_73023 * + m_73008) + + squot64(sext_i32_i64(copy_gtid_127655), + k2p2zq_73023 * k2p2zq_73023) * + k2p2zq_73023 + (sext_i32_i64(copy_gtid_127655) - + squot64(sext_i32_i64(copy_gtid_127655), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023) - + squot64(sext_i32_i64(copy_gtid_127655) - + squot64(sext_i32_i64(copy_gtid_127655), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * + k2p2zq_73023), + k2p2zq_73023) * + k2p2zq_73023)] = ((__global + double *) mem_123143)[squot64(sext_i32_i64(copy_gtid_127655), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * + k2p2zq_73023) + + squot64(sext_i32_i64(copy_gtid_127655) - + squot64(sext_i32_i64(copy_gtid_127655), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * + k2p2zq_73023), + k2p2zq_73023) * + k2p2zq_73023 + + (sext_i32_i64(copy_gtid_127655) - + squot64(sext_i32_i64(copy_gtid_127655), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * + k2p2zq_73023) - + squot64(sext_i32_i64(copy_gtid_127655) - + squot64(sext_i32_i64(copy_gtid_127655), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * + k2p2zq_73023), + k2p2zq_73023) * + k2p2zq_73023)]; + } + + error_0: + return; +} +__kernel void mainMagnitudezicopy_127919(int64_t m_73008, int64_t n_73011, + int64_t rp1_73709, __global + unsigned char *defunc_3_map_res_mem_120231, + __global unsigned char *mem_123633) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127919; + int32_t copy_ltid_127920; + int32_t copy_gid_127921; + + copy_gtid_127919 = get_global_id(0); + copy_ltid_127920 = get_local_id(0); + copy_gid_127921 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127919), m_73008 * rp1_73709)) { + ((__global double *) mem_123633)[(sext_i32_i64(copy_gtid_127919) - + squot64(sext_i32_i64(copy_gtid_127919), + rp1_73709) * rp1_73709) * + m_73008 + + squot64(sext_i32_i64(copy_gtid_127919), + rp1_73709)] = ((__global + double *) defunc_3_map_res_mem_120231)[squot64(sext_i32_i64(copy_gtid_127919), + rp1_73709) * + n_73011 + + (sext_i32_i64(copy_gtid_127919) - + squot64(sext_i32_i64(copy_gtid_127919), + rp1_73709) * + rp1_73709)]; + } + + error_0: + return; +} +__kernel void mainMagnitudezicopy_127924(int64_t m_73008, int64_t k2p2zq_73023, + int64_t rp1_73709, __global + unsigned char *mem_123637, __global + unsigned char *mem_123641) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_127924; + int32_t copy_ltid_127925; + int32_t copy_gid_127926; + + copy_gtid_127924 = get_global_id(0); + copy_ltid_127925 = get_local_id(0); + copy_gid_127926 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_127924), m_73008 * k2p2zq_73023 * + rp1_73709)) { + ((__global double *) mem_123641)[(sext_i32_i64(copy_gtid_127924) - + squot64(sext_i32_i64(copy_gtid_127924), + k2p2zq_73023 * rp1_73709) * + (k2p2zq_73023 * rp1_73709) - + squot64(sext_i32_i64(copy_gtid_127924) - + squot64(sext_i32_i64(copy_gtid_127924), + k2p2zq_73023 * + rp1_73709) * + (k2p2zq_73023 * rp1_73709), + rp1_73709) * rp1_73709) * + (m_73008 * k2p2zq_73023) + + squot64(sext_i32_i64(copy_gtid_127924) - + squot64(sext_i32_i64(copy_gtid_127924), + k2p2zq_73023 * + rp1_73709) * + (k2p2zq_73023 * rp1_73709), + rp1_73709) * m_73008 + + squot64(sext_i32_i64(copy_gtid_127924), + k2p2zq_73023 * rp1_73709)] = + ((__global + double *) mem_123637)[squot64(sext_i32_i64(copy_gtid_127924), + k2p2zq_73023 * rp1_73709) * + (rp1_73709 * k2p2zq_73023) + + squot64(sext_i32_i64(copy_gtid_127924) - + squot64(sext_i32_i64(copy_gtid_127924), + k2p2zq_73023 * rp1_73709) * + (k2p2zq_73023 * rp1_73709), + rp1_73709) * rp1_73709 + + (sext_i32_i64(copy_gtid_127924) - + squot64(sext_i32_i64(copy_gtid_127924), + k2p2zq_73023 * rp1_73709) * + (k2p2zq_73023 * rp1_73709) - + squot64(sext_i32_i64(copy_gtid_127924) - + squot64(sext_i32_i64(copy_gtid_127924), + k2p2zq_73023 * rp1_73709) * + (k2p2zq_73023 * rp1_73709), + rp1_73709) * rp1_73709)]; + } + + error_0: + return; +} +__kernel void mainMagnitudezicopy_129313(int64_t N_73007, int64_t m_73008, + int64_t i_74783, __global + unsigned char *mem_124906, __global + unsigned char *mem_124911) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + int32_t copy_gtid_129313; + int32_t copy_ltid_129314; + int32_t copy_gid_129315; + + copy_gtid_129313 = get_global_id(0); + copy_ltid_129314 = get_local_id(0); + copy_gid_129315 = get_group_id(0); + if (slt64(sext_i32_i64(copy_gtid_129313), m_73008)) { + ((__global int64_t *) mem_124911)[sext_i32_i64(copy_gtid_129313)] = + ((__global int64_t *) mem_124906)[i_74783 + + sext_i32_i64(copy_gtid_129313) * + N_73007]; + } + + error_0: + return; +} +__kernel void mainMagnitudeziscan_stage1_101483(__global int *global_failure, + __local volatile + int64_t *scan_arr_mem_129265_backing_aligned_0, + int64_t N_73007, + int64_t m_73008, + int32_t num_threads_129259, + __global + unsigned char *mem_124142, + __global + unsigned char *defunc_3_map_res_mem_124883, + __global + unsigned char *mem_124906, + __global + unsigned char *mem_124909) +{ + #define segscan_group_sizze_101500 (mainMagnitudezisegscan_group_sizze_101477) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict scan_arr_mem_129265_backing_0 = + (__local volatile + char *) scan_arr_mem_129265_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129260; + int32_t local_tid_129261; + int64_t group_sizze_129264; + int32_t wave_sizze_129263; + int32_t group_tid_129262; + + global_tid_129260 = get_global_id(0); + local_tid_129261 = get_local_id(0); + group_sizze_129264 = get_local_size(0); + wave_sizze_129263 = LOCKSTEP_WIDTH; + group_tid_129262 = get_group_id(0); + + int32_t phys_tid_101483; + + phys_tid_101483 = global_tid_129260; + + __local char *scan_arr_mem_129265; + + scan_arr_mem_129265 = (__local char *) scan_arr_mem_129265_backing_0; + + int64_t x_101505; + int64_t x_101506; + + x_101505 = (int64_t) 0; + for (int64_t j_129267 = 0; j_129267 < sdiv_up64(m_73008 * N_73007, + sext_i32_i64(num_threads_129259)); + j_129267++) { + int64_t chunk_offset_129268 = segscan_group_sizze_101500 * j_129267 + + sext_i32_i64(group_tid_129262) * (segscan_group_sizze_101500 * + sdiv_up64(m_73008 * N_73007, + sext_i32_i64(num_threads_129259))); + int64_t flat_idx_129269 = chunk_offset_129268 + + sext_i32_i64(local_tid_129261); + int64_t gtid_101474 = squot64(flat_idx_129269, N_73007); + int64_t gtid_101482 = flat_idx_129269 - squot64(flat_idx_129269, + N_73007) * N_73007; + + // threads in bounds read input + { + if (slt64(gtid_101474, m_73008) && slt64(gtid_101482, N_73007)) { + double x_101510 = ((__global double *) mem_124142)[gtid_101474 * + N_73007 + + gtid_101482]; + bool isnan_res_101512; + + isnan_res_101512 = futrts_isnan64(x_101510); + + bool cond_101513 = !isnan_res_101512; + double defunc_1_f_res_101514; + + if (cond_101513) { + double x_101511 = ((__global + double *) defunc_3_map_res_mem_124883)[gtid_101474 * + N_73007 + + gtid_101482]; + double defunc_1_f_res_t_res_101515 = x_101510 - x_101511; + + defunc_1_f_res_101514 = defunc_1_f_res_t_res_101515; + } else { + defunc_1_f_res_101514 = NAN; + } + + bool isnan_res_101516; + + isnan_res_101516 = futrts_isnan64(defunc_1_f_res_101514); + + bool defunc_0_p_res_101517 = !isnan_res_101516; + int64_t defunc_0_f_res_101518 = + btoi_bool_i64(defunc_0_p_res_101517); + + // write to-scan values to parameters + { + x_101506 = defunc_0_f_res_101518; + } + // write mapped values results to global memory + { + ((__global double *) mem_124909)[gtid_101474 * N_73007 + + gtid_101482] = + defunc_1_f_res_101514; + } + } + } + // do one intra-group scan operation + { + // maybe restore some to-scan values to parameters, or read neutral + { + if (!(slt64(gtid_101474, m_73008) && slt64(gtid_101482, + N_73007))) { + x_101506 = (int64_t) 0; + } + } + // combine with carry and write to local memory + { + int64_t defunc_1_op_res_101507 = add64(x_101505, x_101506); + + ((__local + int64_t *) scan_arr_mem_129265)[sext_i32_i64(local_tid_129261)] = + defunc_1_op_res_101507; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t x_129270; + int64_t x_129271; + int64_t x_129273; + int64_t x_129274; + bool ltid_in_bounds_129276; + + ltid_in_bounds_129276 = slt64(sext_i32_i64(local_tid_129261), + segscan_group_sizze_101500); + + int32_t skip_threads_129277; + + // read input for in-block scan + { + if (ltid_in_bounds_129276) { + x_129271 = ((volatile __local + int64_t *) scan_arr_mem_129265)[sext_i32_i64(local_tid_129261)]; + if ((local_tid_129261 - squot32(local_tid_129261, 32) * + 32) == 0) { + x_129270 = x_129271; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129277 = 1; + while (slt32(skip_threads_129277, 32)) { + if (sle32(skip_threads_129277, local_tid_129261 - + squot32(local_tid_129261, 32) * 32) && + ltid_in_bounds_129276) { + // read operands + { + x_129270 = ((volatile __local + int64_t *) scan_arr_mem_129265)[sext_i32_i64(local_tid_129261) - + sext_i32_i64(skip_threads_129277)]; + } + // perform operation + { + bool inactive_129278 = + slt64(srem64(sext_i32_i64(local_tid_129261) + + chunk_offset_129268, N_73007), + sext_i32_i64(local_tid_129261) + + chunk_offset_129268 - + (sext_i32_i64(local_tid_129261 - + skip_threads_129277) + + chunk_offset_129268)); + + if (inactive_129278) { + x_129270 = x_129271; + } + if (!inactive_129278) { + int64_t defunc_1_op_res_129272 = add64(x_129270, + x_129271); + + x_129270 = defunc_1_op_res_129272; + } + } + } + if (sle32(wave_sizze_129263, skip_threads_129277)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129277, local_tid_129261 - + squot32(local_tid_129261, 32) * 32) && + ltid_in_bounds_129276) { + // write result + { + ((volatile __local + int64_t *) scan_arr_mem_129265)[sext_i32_i64(local_tid_129261)] = + x_129270; + x_129271 = x_129270; + } + } + if (sle32(wave_sizze_129263, skip_threads_129277)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129277 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129261 - squot32(local_tid_129261, 32) * 32) == + 31 && ltid_in_bounds_129276) { + ((volatile __local + int64_t *) scan_arr_mem_129265)[sext_i32_i64(squot32(local_tid_129261, + 32))] = + x_129270; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129279; + + // read input for in-block scan + { + if (squot32(local_tid_129261, 32) == 0 && + ltid_in_bounds_129276) { + x_129274 = ((volatile __local + int64_t *) scan_arr_mem_129265)[sext_i32_i64(local_tid_129261)]; + if ((local_tid_129261 - squot32(local_tid_129261, 32) * + 32) == 0) { + x_129273 = x_129274; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129279 = 1; + while (slt32(skip_threads_129279, 32)) { + if (sle32(skip_threads_129279, local_tid_129261 - + squot32(local_tid_129261, 32) * 32) && + (squot32(local_tid_129261, 32) == 0 && + ltid_in_bounds_129276)) { + // read operands + { + x_129273 = ((volatile __local + int64_t *) scan_arr_mem_129265)[sext_i32_i64(local_tid_129261) - + sext_i32_i64(skip_threads_129279)]; + } + // perform operation + { + bool inactive_129280 = + slt64(srem64(sext_i32_i64(local_tid_129261 * + 32 + 32 - 1) + + chunk_offset_129268, N_73007), + sext_i32_i64(local_tid_129261 * 32 + + 32 - 1) + chunk_offset_129268 - + (sext_i32_i64((local_tid_129261 - + skip_threads_129279) * + 32 + 32 - 1) + + chunk_offset_129268)); + + if (inactive_129280) { + x_129273 = x_129274; + } + if (!inactive_129280) { + int64_t defunc_1_op_res_129275 = + add64(x_129273, x_129274); + + x_129273 = defunc_1_op_res_129275; + } + } + } + if (sle32(wave_sizze_129263, skip_threads_129279)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129279, local_tid_129261 - + squot32(local_tid_129261, 32) * 32) && + (squot32(local_tid_129261, 32) == 0 && + ltid_in_bounds_129276)) { + // write result + { + ((volatile __local + int64_t *) scan_arr_mem_129265)[sext_i32_i64(local_tid_129261)] = + x_129273; + x_129274 = x_129273; + } + } + if (sle32(wave_sizze_129263, skip_threads_129279)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129279 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129261, 32) == 0 || + !ltid_in_bounds_129276)) { + // read operands + { + x_129271 = x_129270; + x_129270 = ((__local + int64_t *) scan_arr_mem_129265)[sext_i32_i64(squot32(local_tid_129261, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129281 = + slt64(srem64(sext_i32_i64(local_tid_129261) + + chunk_offset_129268, N_73007), + sext_i32_i64(local_tid_129261) + + chunk_offset_129268 - + (sext_i32_i64(squot32(local_tid_129261, 32) * + 32 - 1) + chunk_offset_129268)); + + if (inactive_129281) { + x_129270 = x_129271; + } + if (!inactive_129281) { + int64_t defunc_1_op_res_129272 = add64(x_129270, + x_129271); + + x_129270 = defunc_1_op_res_129272; + } + } + // write final result + { + ((__local + int64_t *) scan_arr_mem_129265)[sext_i32_i64(local_tid_129261)] = + x_129270; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129261, 32) == 0) { + ((__local + int64_t *) scan_arr_mem_129265)[sext_i32_i64(local_tid_129261)] = + x_129271; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // threads in bounds write partial scan result + { + if (slt64(gtid_101474, m_73008) && slt64(gtid_101482, + N_73007)) { + ((__global int64_t *) mem_124906)[gtid_101474 * N_73007 + + gtid_101482] = ((__local + int64_t *) scan_arr_mem_129265)[sext_i32_i64(local_tid_129261)]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread reads last element as carry-in for next iteration + { + bool crosses_segment_129282 = slt64(srem64(chunk_offset_129268 + + segscan_group_sizze_101500, + N_73007), + chunk_offset_129268 + + segscan_group_sizze_101500 - + (chunk_offset_129268 + + segscan_group_sizze_101500 - + (int64_t) 1)); + bool should_load_carry_129283 = local_tid_129261 == 0 && + !crosses_segment_129282; + + if (should_load_carry_129283) { + x_101505 = ((__local + int64_t *) scan_arr_mem_129265)[segscan_group_sizze_101500 - + (int64_t) 1]; + } + if (!should_load_carry_129283) { + x_101505 = (int64_t) 0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + + error_1: + return; + #undef segscan_group_sizze_101500 +} +__kernel void mainMagnitudeziscan_stage1_102464(__global int *global_failure, + int failure_is_an_option, + __global + int64_t *global_failure_args, + __local volatile + int64_t *scan_arr_mem_129642_backing_aligned_0, + int64_t N_73007, + int64_t m_73008, + int64_t iota_arg_74896, + int32_t num_threads_129636, + __global + unsigned char *defunc_4_map_res_mem_124920, + __global + unsigned char *defunc_3_map_res_mem_124958, + __global + unsigned char *defunc_3_map_res_mem_124959, + __global + unsigned char *defunc_0_f_res_mem_124970, + __global + unsigned char *mem_125028, + __global + unsigned char *mem_125032) +{ + #define segscan_group_sizze_102548 (mainMagnitudezisegscan_group_sizze_102458) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict scan_arr_mem_129642_backing_0 = + (__local volatile + char *) scan_arr_mem_129642_backing_aligned_0; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_129637; + int32_t local_tid_129638; + int64_t group_sizze_129641; + int32_t wave_sizze_129640; + int32_t group_tid_129639; + + global_tid_129637 = get_global_id(0); + local_tid_129638 = get_local_id(0); + group_sizze_129641 = get_local_size(0); + wave_sizze_129640 = LOCKSTEP_WIDTH; + group_tid_129639 = get_group_id(0); + + int32_t phys_tid_102464; + + phys_tid_102464 = global_tid_129637; + + __local char *scan_arr_mem_129642; + + scan_arr_mem_129642 = (__local char *) scan_arr_mem_129642_backing_0; + + double x_102552; + double x_102553; + + x_102552 = 0.0; + for (int64_t j_129644 = 0; j_129644 < sdiv_up64(m_73008 * iota_arg_74896, + sext_i32_i64(num_threads_129636)); + j_129644++) { + int64_t chunk_offset_129645 = segscan_group_sizze_102548 * j_129644 + + sext_i32_i64(group_tid_129639) * (segscan_group_sizze_102548 * + sdiv_up64(m_73008 * + iota_arg_74896, + sext_i32_i64(num_threads_129636))); + int64_t flat_idx_129646 = chunk_offset_129645 + + sext_i32_i64(local_tid_129638); + int64_t gtid_102455 = squot64(flat_idx_129646, iota_arg_74896); + int64_t gtid_102463 = flat_idx_129646 - squot64(flat_idx_129646, + iota_arg_74896) * + iota_arg_74896; + + // threads in bounds read input + { + if (slt64(gtid_102455, m_73008) && slt64(gtid_102463, + iota_arg_74896)) { + int64_t y_102559 = ((__global + int64_t *) mem_125028)[gtid_102455]; + bool cond_102562 = sle64(y_102559, gtid_102463); + double defunc_0_f_res_102563; + + if (cond_102562) { + defunc_0_f_res_102563 = 0.0; + } else { + int64_t x_102555 = ((__global + int64_t *) defunc_3_map_res_mem_124959)[gtid_102455]; + int64_t x_102556 = ((__global + int64_t *) defunc_3_map_res_mem_124958)[gtid_102455]; + double x_102557 = ((__global + double *) defunc_0_f_res_mem_124970)[gtid_102455]; + bool cond_102564 = gtid_102463 == (int64_t) 0; + double defunc_0_f_res_f_res_102565; + + if (cond_102564) { + defunc_0_f_res_f_res_102565 = x_102557; + } else { + int64_t i_102566 = add64(gtid_102463, x_102555); + bool x_102567 = sle64((int64_t) 0, i_102566); + bool y_102568 = slt64(i_102566, N_73007); + bool bounds_check_102569 = x_102567 && y_102568; + bool index_certs_102570; + + if (!bounds_check_102569) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 652) == -1) { + global_failure_args[0] = i_102566; + global_failure_args[1] = N_73007; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_102571 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_102455 * + N_73007 + + i_102566]; + int64_t x_102572 = sub64(x_102555, x_102556); + int64_t i_102573 = add64(gtid_102463, x_102572); + bool x_102574 = sle64((int64_t) 0, i_102573); + bool y_102575 = slt64(i_102573, N_73007); + bool bounds_check_102576 = x_102574 && y_102575; + bool index_certs_102577; + + if (!bounds_check_102576) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 653) == -1) { + global_failure_args[0] = i_102573; + global_failure_args[1] = N_73007; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_102578 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_102455 * + N_73007 + + i_102573]; + double defunc_0_f_res_f_res_f_res_102579 = x_102571 - + y_102578; + + defunc_0_f_res_f_res_102565 = + defunc_0_f_res_f_res_f_res_102579; + } + defunc_0_f_res_102563 = defunc_0_f_res_f_res_102565; + } + // write to-scan values to parameters + { + x_102553 = defunc_0_f_res_102563; + } + // write mapped values results to global memory + { } + } + } + // do one intra-group scan operation + { + // maybe restore some to-scan values to parameters, or read neutral + { + if (!(slt64(gtid_102455, m_73008) && slt64(gtid_102463, + iota_arg_74896))) { + x_102553 = 0.0; + } + } + // combine with carry and write to local memory + { + double defunc_1_op_res_102554 = x_102552 + x_102553; + + ((__local + double *) scan_arr_mem_129642)[sext_i32_i64(local_tid_129638)] = + defunc_1_op_res_102554; + } + + error_0: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + double x_129647; + double x_129648; + double x_129650; + double x_129651; + bool ltid_in_bounds_129653; + + ltid_in_bounds_129653 = slt64(sext_i32_i64(local_tid_129638), + segscan_group_sizze_102548); + + int32_t skip_threads_129654; + + // read input for in-block scan + { + if (ltid_in_bounds_129653) { + x_129648 = ((volatile __local + double *) scan_arr_mem_129642)[sext_i32_i64(local_tid_129638)]; + if ((local_tid_129638 - squot32(local_tid_129638, 32) * + 32) == 0) { + x_129647 = x_129648; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129654 = 1; + while (slt32(skip_threads_129654, 32)) { + if (sle32(skip_threads_129654, local_tid_129638 - + squot32(local_tid_129638, 32) * 32) && + ltid_in_bounds_129653) { + // read operands + { + x_129647 = ((volatile __local + double *) scan_arr_mem_129642)[sext_i32_i64(local_tid_129638) - + sext_i32_i64(skip_threads_129654)]; + } + // perform operation + { + bool inactive_129655 = + slt64(srem64(sext_i32_i64(local_tid_129638) + + chunk_offset_129645, + iota_arg_74896), + sext_i32_i64(local_tid_129638) + + chunk_offset_129645 - + (sext_i32_i64(local_tid_129638 - + skip_threads_129654) + + chunk_offset_129645)); + + if (inactive_129655) { + x_129647 = x_129648; + } + if (!inactive_129655) { + double defunc_1_op_res_129649 = x_129647 + + x_129648; + + x_129647 = defunc_1_op_res_129649; + } + } + } + if (sle32(wave_sizze_129640, skip_threads_129654)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129654, local_tid_129638 - + squot32(local_tid_129638, 32) * 32) && + ltid_in_bounds_129653) { + // write result + { + ((volatile __local + double *) scan_arr_mem_129642)[sext_i32_i64(local_tid_129638)] = + x_129647; + x_129648 = x_129647; + } + } + if (sle32(wave_sizze_129640, skip_threads_129654)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129654 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129638 - squot32(local_tid_129638, 32) * 32) == + 31 && ltid_in_bounds_129653) { + ((volatile __local + double *) scan_arr_mem_129642)[sext_i32_i64(squot32(local_tid_129638, + 32))] = + x_129647; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129656; + + // read input for in-block scan + { + if (squot32(local_tid_129638, 32) == 0 && + ltid_in_bounds_129653) { + x_129651 = ((volatile __local + double *) scan_arr_mem_129642)[sext_i32_i64(local_tid_129638)]; + if ((local_tid_129638 - squot32(local_tid_129638, 32) * + 32) == 0) { + x_129650 = x_129651; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129656 = 1; + while (slt32(skip_threads_129656, 32)) { + if (sle32(skip_threads_129656, local_tid_129638 - + squot32(local_tid_129638, 32) * 32) && + (squot32(local_tid_129638, 32) == 0 && + ltid_in_bounds_129653)) { + // read operands + { + x_129650 = ((volatile __local + double *) scan_arr_mem_129642)[sext_i32_i64(local_tid_129638) - + sext_i32_i64(skip_threads_129656)]; + } + // perform operation + { + bool inactive_129657 = + slt64(srem64(sext_i32_i64(local_tid_129638 * + 32 + 32 - 1) + + chunk_offset_129645, + iota_arg_74896), + sext_i32_i64(local_tid_129638 * 32 + + 32 - 1) + chunk_offset_129645 - + (sext_i32_i64((local_tid_129638 - + skip_threads_129656) * + 32 + 32 - 1) + + chunk_offset_129645)); + + if (inactive_129657) { + x_129650 = x_129651; + } + if (!inactive_129657) { + double defunc_1_op_res_129652 = x_129650 + + x_129651; + + x_129650 = defunc_1_op_res_129652; + } + } + } + if (sle32(wave_sizze_129640, skip_threads_129656)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129656, local_tid_129638 - + squot32(local_tid_129638, 32) * 32) && + (squot32(local_tid_129638, 32) == 0 && + ltid_in_bounds_129653)) { + // write result + { + ((volatile __local + double *) scan_arr_mem_129642)[sext_i32_i64(local_tid_129638)] = + x_129650; + x_129651 = x_129650; + } + } + if (sle32(wave_sizze_129640, skip_threads_129656)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129656 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129638, 32) == 0 || + !ltid_in_bounds_129653)) { + // read operands + { + x_129648 = x_129647; + x_129647 = ((__local + double *) scan_arr_mem_129642)[sext_i32_i64(squot32(local_tid_129638, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129658 = + slt64(srem64(sext_i32_i64(local_tid_129638) + + chunk_offset_129645, iota_arg_74896), + sext_i32_i64(local_tid_129638) + + chunk_offset_129645 - + (sext_i32_i64(squot32(local_tid_129638, 32) * + 32 - 1) + chunk_offset_129645)); + + if (inactive_129658) { + x_129647 = x_129648; + } + if (!inactive_129658) { + double defunc_1_op_res_129649 = x_129647 + x_129648; + + x_129647 = defunc_1_op_res_129649; + } + } + // write final result + { + ((__local + double *) scan_arr_mem_129642)[sext_i32_i64(local_tid_129638)] = + x_129647; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129638, 32) == 0) { + ((__local + double *) scan_arr_mem_129642)[sext_i32_i64(local_tid_129638)] = + x_129648; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // threads in bounds write partial scan result + { + if (slt64(gtid_102455, m_73008) && slt64(gtid_102463, + iota_arg_74896)) { + ((__global double *) mem_125032)[gtid_102455 * + iota_arg_74896 + + gtid_102463] = ((__local + double *) scan_arr_mem_129642)[sext_i32_i64(local_tid_129638)]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread reads last element as carry-in for next iteration + { + bool crosses_segment_129659 = slt64(srem64(chunk_offset_129645 + + segscan_group_sizze_102548, + iota_arg_74896), + chunk_offset_129645 + + segscan_group_sizze_102548 - + (chunk_offset_129645 + + segscan_group_sizze_102548 - + (int64_t) 1)); + bool should_load_carry_129660 = local_tid_129638 == 0 && + !crosses_segment_129659; + + if (should_load_carry_129660) { + x_102552 = ((__local + double *) scan_arr_mem_129642)[segscan_group_sizze_102548 - + (int64_t) 1]; + } + if (!should_load_carry_129660) { + x_102552 = 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + + error_1: + return; + #undef segscan_group_sizze_102548 +} +__kernel void mainMagnitudeziscan_stage1_90482(__global int *global_failure, + __local volatile + int64_t *scan_arr_mem_126419_backing_aligned_0, + int64_t N_73007, int64_t m_73008, + int64_t n_73011, int64_t m_73103, + int32_t num_threads_126413, + __global + unsigned char *images_mem_120108, + __global + unsigned char *mem_120201) +{ + #define segscan_group_sizze_90596 (mainMagnitudezisegscan_group_sizze_90476) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict scan_arr_mem_126419_backing_0 = + (__local volatile + char *) scan_arr_mem_126419_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126414; + int32_t local_tid_126415; + int64_t group_sizze_126418; + int32_t wave_sizze_126417; + int32_t group_tid_126416; + + global_tid_126414 = get_global_id(0); + local_tid_126415 = get_local_id(0); + group_sizze_126418 = get_local_size(0); + wave_sizze_126417 = LOCKSTEP_WIDTH; + group_tid_126416 = get_group_id(0); + + int32_t phys_tid_90482; + + phys_tid_90482 = global_tid_126414; + + __local char *scan_arr_mem_126419; + + scan_arr_mem_126419 = (__local char *) scan_arr_mem_126419_backing_0; + + int64_t x_90600; + int64_t x_90601; + + x_90600 = (int64_t) 0; + for (int64_t j_126421 = 0; j_126421 < sdiv_up64(m_73008 * n_73011, + sext_i32_i64(num_threads_126413)); + j_126421++) { + int64_t chunk_offset_126422 = segscan_group_sizze_90596 * j_126421 + + sext_i32_i64(group_tid_126416) * (segscan_group_sizze_90596 * + sdiv_up64(m_73008 * n_73011, + sext_i32_i64(num_threads_126413))); + int64_t flat_idx_126423 = chunk_offset_126422 + + sext_i32_i64(local_tid_126415); + int64_t gtid_90473 = squot64(flat_idx_126423, n_73011); + int64_t gtid_90481 = flat_idx_126423 - squot64(flat_idx_126423, + n_73011) * n_73011; + + // threads in bounds read input + { + if (slt64(gtid_90473, m_73008) && slt64(gtid_90481, n_73011)) { + int64_t binop_y_115026 = (int64_t) -1 * gtid_90481; + int64_t slice_115027 = m_73103 + binop_y_115026; + double x_90604 = ((__global + double *) images_mem_120108)[gtid_90473 * + N_73007 + + slice_115027]; + bool defunc_0_f_res_90605; + + defunc_0_f_res_90605 = futrts_isnan64(x_90604); + + bool defunc_0_g_res_90606 = !defunc_0_f_res_90605; + int64_t defunc_0_f_res_90607 = + btoi_bool_i64(defunc_0_g_res_90606); + + // write to-scan values to parameters + { + x_90601 = defunc_0_f_res_90607; + } + // write mapped values results to global memory + { } + } + } + // do one intra-group scan operation + { + // maybe restore some to-scan values to parameters, or read neutral + { + if (!(slt64(gtid_90473, m_73008) && slt64(gtid_90481, + n_73011))) { + x_90601 = (int64_t) 0; + } + } + // combine with carry and write to local memory + { + int64_t defunc_1_op_res_90602 = add64(x_90600, x_90601); + + ((__local + int64_t *) scan_arr_mem_126419)[sext_i32_i64(local_tid_126415)] = + defunc_1_op_res_90602; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t x_126424; + int64_t x_126425; + int64_t x_126427; + int64_t x_126428; + bool ltid_in_bounds_126430; + + ltid_in_bounds_126430 = slt64(sext_i32_i64(local_tid_126415), + segscan_group_sizze_90596); + + int32_t skip_threads_126431; + + // read input for in-block scan + { + if (ltid_in_bounds_126430) { + x_126425 = ((volatile __local + int64_t *) scan_arr_mem_126419)[sext_i32_i64(local_tid_126415)]; + if ((local_tid_126415 - squot32(local_tid_126415, 32) * + 32) == 0) { + x_126424 = x_126425; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_126431 = 1; + while (slt32(skip_threads_126431, 32)) { + if (sle32(skip_threads_126431, local_tid_126415 - + squot32(local_tid_126415, 32) * 32) && + ltid_in_bounds_126430) { + // read operands + { + x_126424 = ((volatile __local + int64_t *) scan_arr_mem_126419)[sext_i32_i64(local_tid_126415) - + sext_i32_i64(skip_threads_126431)]; + } + // perform operation + { + bool inactive_126432 = + slt64(srem64(sext_i32_i64(local_tid_126415) + + chunk_offset_126422, n_73011), + sext_i32_i64(local_tid_126415) + + chunk_offset_126422 - + (sext_i32_i64(local_tid_126415 - + skip_threads_126431) + + chunk_offset_126422)); + + if (inactive_126432) { + x_126424 = x_126425; + } + if (!inactive_126432) { + int64_t defunc_1_op_res_126426 = add64(x_126424, + x_126425); + + x_126424 = defunc_1_op_res_126426; + } + } + } + if (sle32(wave_sizze_126417, skip_threads_126431)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_126431, local_tid_126415 - + squot32(local_tid_126415, 32) * 32) && + ltid_in_bounds_126430) { + // write result + { + ((volatile __local + int64_t *) scan_arr_mem_126419)[sext_i32_i64(local_tid_126415)] = + x_126424; + x_126425 = x_126424; + } + } + if (sle32(wave_sizze_126417, skip_threads_126431)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_126431 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_126415 - squot32(local_tid_126415, 32) * 32) == + 31 && ltid_in_bounds_126430) { + ((volatile __local + int64_t *) scan_arr_mem_126419)[sext_i32_i64(squot32(local_tid_126415, + 32))] = + x_126424; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_126433; + + // read input for in-block scan + { + if (squot32(local_tid_126415, 32) == 0 && + ltid_in_bounds_126430) { + x_126428 = ((volatile __local + int64_t *) scan_arr_mem_126419)[sext_i32_i64(local_tid_126415)]; + if ((local_tid_126415 - squot32(local_tid_126415, 32) * + 32) == 0) { + x_126427 = x_126428; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_126433 = 1; + while (slt32(skip_threads_126433, 32)) { + if (sle32(skip_threads_126433, local_tid_126415 - + squot32(local_tid_126415, 32) * 32) && + (squot32(local_tid_126415, 32) == 0 && + ltid_in_bounds_126430)) { + // read operands + { + x_126427 = ((volatile __local + int64_t *) scan_arr_mem_126419)[sext_i32_i64(local_tid_126415) - + sext_i32_i64(skip_threads_126433)]; + } + // perform operation + { + bool inactive_126434 = + slt64(srem64(sext_i32_i64(local_tid_126415 * + 32 + 32 - 1) + + chunk_offset_126422, n_73011), + sext_i32_i64(local_tid_126415 * 32 + + 32 - 1) + chunk_offset_126422 - + (sext_i32_i64((local_tid_126415 - + skip_threads_126433) * + 32 + 32 - 1) + + chunk_offset_126422)); + + if (inactive_126434) { + x_126427 = x_126428; + } + if (!inactive_126434) { + int64_t defunc_1_op_res_126429 = + add64(x_126427, x_126428); + + x_126427 = defunc_1_op_res_126429; + } + } + } + if (sle32(wave_sizze_126417, skip_threads_126433)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_126433, local_tid_126415 - + squot32(local_tid_126415, 32) * 32) && + (squot32(local_tid_126415, 32) == 0 && + ltid_in_bounds_126430)) { + // write result + { + ((volatile __local + int64_t *) scan_arr_mem_126419)[sext_i32_i64(local_tid_126415)] = + x_126427; + x_126428 = x_126427; + } + } + if (sle32(wave_sizze_126417, skip_threads_126433)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_126433 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_126415, 32) == 0 || + !ltid_in_bounds_126430)) { + // read operands + { + x_126425 = x_126424; + x_126424 = ((__local + int64_t *) scan_arr_mem_126419)[sext_i32_i64(squot32(local_tid_126415, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_126435 = + slt64(srem64(sext_i32_i64(local_tid_126415) + + chunk_offset_126422, n_73011), + sext_i32_i64(local_tid_126415) + + chunk_offset_126422 - + (sext_i32_i64(squot32(local_tid_126415, 32) * + 32 - 1) + chunk_offset_126422)); + + if (inactive_126435) { + x_126424 = x_126425; + } + if (!inactive_126435) { + int64_t defunc_1_op_res_126426 = add64(x_126424, + x_126425); + + x_126424 = defunc_1_op_res_126426; + } + } + // write final result + { + ((__local + int64_t *) scan_arr_mem_126419)[sext_i32_i64(local_tid_126415)] = + x_126424; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_126415, 32) == 0) { + ((__local + int64_t *) scan_arr_mem_126419)[sext_i32_i64(local_tid_126415)] = + x_126425; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // threads in bounds write partial scan result + { + if (slt64(gtid_90473, m_73008) && slt64(gtid_90481, n_73011)) { + ((__global int64_t *) mem_120201)[gtid_90473 * n_73011 + + gtid_90481] = ((__local + int64_t *) scan_arr_mem_126419)[sext_i32_i64(local_tid_126415)]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread reads last element as carry-in for next iteration + { + bool crosses_segment_126436 = slt64(srem64(chunk_offset_126422 + + segscan_group_sizze_90596, + n_73011), + chunk_offset_126422 + + segscan_group_sizze_90596 - + (chunk_offset_126422 + + segscan_group_sizze_90596 - + (int64_t) 1)); + bool should_load_carry_126437 = local_tid_126415 == 0 && + !crosses_segment_126436; + + if (should_load_carry_126437) { + x_90600 = ((__local + int64_t *) scan_arr_mem_126419)[segscan_group_sizze_90596 - + (int64_t) 1]; + } + if (!should_load_carry_126437) { + x_90600 = (int64_t) 0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + + error_1: + return; + #undef segscan_group_sizze_90596 +} +__kernel void mainMagnitudeziscan_stage1_98961(__global int *global_failure, + int failure_is_an_option, + __global + int64_t *global_failure_args, + __local volatile + int64_t *scan_arr_mem_128476_backing_aligned_0, + int64_t m_73008, + int64_t num_recresids_padded_73681, + int64_t Nmk_74408, + int32_t num_threads_128470, + __global + unsigned char *mem_124045, + __global + unsigned char *mem_124057, + __global + unsigned char *mem_124061) +{ + #define segscan_group_sizze_99110 (mainMagnitudezisegscan_group_sizze_98955) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict scan_arr_mem_128476_backing_0 = + (__local volatile + char *) scan_arr_mem_128476_backing_aligned_0; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_128471; + int32_t local_tid_128472; + int64_t group_sizze_128475; + int32_t wave_sizze_128474; + int32_t group_tid_128473; + + global_tid_128471 = get_global_id(0); + local_tid_128472 = get_local_id(0); + group_sizze_128475 = get_local_size(0); + wave_sizze_128474 = LOCKSTEP_WIDTH; + group_tid_128473 = get_group_id(0); + + int32_t phys_tid_98961; + + phys_tid_98961 = global_tid_128471; + + __local char *scan_arr_mem_128476; + + scan_arr_mem_128476 = (__local char *) scan_arr_mem_128476_backing_0; + + double x_99114; + double x_99115; + + x_99114 = 0.0; + for (int64_t j_128478 = 0; j_128478 < sdiv_up64(m_73008 * Nmk_74408, + sext_i32_i64(num_threads_128470)); + j_128478++) { + int64_t chunk_offset_128479 = segscan_group_sizze_99110 * j_128478 + + sext_i32_i64(group_tid_128473) * (segscan_group_sizze_99110 * + sdiv_up64(m_73008 * Nmk_74408, + sext_i32_i64(num_threads_128470))); + int64_t flat_idx_128480 = chunk_offset_128479 + + sext_i32_i64(local_tid_128472); + int64_t gtid_98952 = squot64(flat_idx_128480, Nmk_74408); + int64_t gtid_98960 = flat_idx_128480 - squot64(flat_idx_128480, + Nmk_74408) * Nmk_74408; + + // threads in bounds read input + { + if (slt64(gtid_98952, m_73008) && slt64(gtid_98960, Nmk_74408)) { + bool cond_99120 = gtid_98960 == (int64_t) 0; + double defunc_0_f_res_99121; + + if (cond_99120) { + defunc_0_f_res_99121 = 0.0; + } else { + double fr_99118 = ((__global + double *) mem_124057)[gtid_98952]; + int64_t i_99122 = sub64(gtid_98960, (int64_t) 1); + bool x_99123 = sle64((int64_t) 0, i_99122); + bool y_99124 = slt64(i_99122, num_recresids_padded_73681); + bool bounds_check_99125 = x_99123 && y_99124; + bool index_certs_99126; + + if (!bounds_check_99125) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 614) == -1) { + global_failure_args[0] = i_99122; + global_failure_args[1] = + num_recresids_padded_73681; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_99127 = ((__global + double *) mem_124045)[gtid_98952 * + num_recresids_padded_73681 + + i_99122]; + double defunc_0_f_res_f_res_99128 = x_99127 / fr_99118; + + defunc_0_f_res_99121 = defunc_0_f_res_f_res_99128; + } + // write to-scan values to parameters + { + x_99115 = defunc_0_f_res_99121; + } + // write mapped values results to global memory + { } + } + } + // do one intra-group scan operation + { + // maybe restore some to-scan values to parameters, or read neutral + { + if (!(slt64(gtid_98952, m_73008) && slt64(gtid_98960, + Nmk_74408))) { + x_99115 = 0.0; + } + } + // combine with carry and write to local memory + { + double defunc_1_op_res_99116 = x_99114 + x_99115; + + ((__local + double *) scan_arr_mem_128476)[sext_i32_i64(local_tid_128472)] = + defunc_1_op_res_99116; + } + + error_0: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + double x_128481; + double x_128482; + double x_128484; + double x_128485; + bool ltid_in_bounds_128487; + + ltid_in_bounds_128487 = slt64(sext_i32_i64(local_tid_128472), + segscan_group_sizze_99110); + + int32_t skip_threads_128488; + + // read input for in-block scan + { + if (ltid_in_bounds_128487) { + x_128482 = ((volatile __local + double *) scan_arr_mem_128476)[sext_i32_i64(local_tid_128472)]; + if ((local_tid_128472 - squot32(local_tid_128472, 32) * + 32) == 0) { + x_128481 = x_128482; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128488 = 1; + while (slt32(skip_threads_128488, 32)) { + if (sle32(skip_threads_128488, local_tid_128472 - + squot32(local_tid_128472, 32) * 32) && + ltid_in_bounds_128487) { + // read operands + { + x_128481 = ((volatile __local + double *) scan_arr_mem_128476)[sext_i32_i64(local_tid_128472) - + sext_i32_i64(skip_threads_128488)]; + } + // perform operation + { + bool inactive_128489 = + slt64(srem64(sext_i32_i64(local_tid_128472) + + chunk_offset_128479, Nmk_74408), + sext_i32_i64(local_tid_128472) + + chunk_offset_128479 - + (sext_i32_i64(local_tid_128472 - + skip_threads_128488) + + chunk_offset_128479)); + + if (inactive_128489) { + x_128481 = x_128482; + } + if (!inactive_128489) { + double defunc_1_op_res_128483 = x_128481 + + x_128482; + + x_128481 = defunc_1_op_res_128483; + } + } + } + if (sle32(wave_sizze_128474, skip_threads_128488)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128488, local_tid_128472 - + squot32(local_tid_128472, 32) * 32) && + ltid_in_bounds_128487) { + // write result + { + ((volatile __local + double *) scan_arr_mem_128476)[sext_i32_i64(local_tid_128472)] = + x_128481; + x_128482 = x_128481; + } + } + if (sle32(wave_sizze_128474, skip_threads_128488)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128488 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128472 - squot32(local_tid_128472, 32) * 32) == + 31 && ltid_in_bounds_128487) { + ((volatile __local + double *) scan_arr_mem_128476)[sext_i32_i64(squot32(local_tid_128472, + 32))] = + x_128481; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128490; + + // read input for in-block scan + { + if (squot32(local_tid_128472, 32) == 0 && + ltid_in_bounds_128487) { + x_128485 = ((volatile __local + double *) scan_arr_mem_128476)[sext_i32_i64(local_tid_128472)]; + if ((local_tid_128472 - squot32(local_tid_128472, 32) * + 32) == 0) { + x_128484 = x_128485; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128490 = 1; + while (slt32(skip_threads_128490, 32)) { + if (sle32(skip_threads_128490, local_tid_128472 - + squot32(local_tid_128472, 32) * 32) && + (squot32(local_tid_128472, 32) == 0 && + ltid_in_bounds_128487)) { + // read operands + { + x_128484 = ((volatile __local + double *) scan_arr_mem_128476)[sext_i32_i64(local_tid_128472) - + sext_i32_i64(skip_threads_128490)]; + } + // perform operation + { + bool inactive_128491 = + slt64(srem64(sext_i32_i64(local_tid_128472 * + 32 + 32 - 1) + + chunk_offset_128479, + Nmk_74408), + sext_i32_i64(local_tid_128472 * 32 + + 32 - 1) + chunk_offset_128479 - + (sext_i32_i64((local_tid_128472 - + skip_threads_128490) * + 32 + 32 - 1) + + chunk_offset_128479)); + + if (inactive_128491) { + x_128484 = x_128485; + } + if (!inactive_128491) { + double defunc_1_op_res_128486 = x_128484 + + x_128485; + + x_128484 = defunc_1_op_res_128486; + } + } + } + if (sle32(wave_sizze_128474, skip_threads_128490)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128490, local_tid_128472 - + squot32(local_tid_128472, 32) * 32) && + (squot32(local_tid_128472, 32) == 0 && + ltid_in_bounds_128487)) { + // write result + { + ((volatile __local + double *) scan_arr_mem_128476)[sext_i32_i64(local_tid_128472)] = + x_128484; + x_128485 = x_128484; + } + } + if (sle32(wave_sizze_128474, skip_threads_128490)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128490 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128472, 32) == 0 || + !ltid_in_bounds_128487)) { + // read operands + { + x_128482 = x_128481; + x_128481 = ((__local + double *) scan_arr_mem_128476)[sext_i32_i64(squot32(local_tid_128472, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128492 = + slt64(srem64(sext_i32_i64(local_tid_128472) + + chunk_offset_128479, Nmk_74408), + sext_i32_i64(local_tid_128472) + + chunk_offset_128479 - + (sext_i32_i64(squot32(local_tid_128472, 32) * + 32 - 1) + chunk_offset_128479)); + + if (inactive_128492) { + x_128481 = x_128482; + } + if (!inactive_128492) { + double defunc_1_op_res_128483 = x_128481 + x_128482; + + x_128481 = defunc_1_op_res_128483; + } + } + // write final result + { + ((__local + double *) scan_arr_mem_128476)[sext_i32_i64(local_tid_128472)] = + x_128481; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128472, 32) == 0) { + ((__local + double *) scan_arr_mem_128476)[sext_i32_i64(local_tid_128472)] = + x_128482; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // threads in bounds write partial scan result + { + if (slt64(gtid_98952, m_73008) && slt64(gtid_98960, + Nmk_74408)) { + ((__global double *) mem_124061)[gtid_98952 * Nmk_74408 + + gtid_98960] = ((__local + double *) scan_arr_mem_128476)[sext_i32_i64(local_tid_128472)]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread reads last element as carry-in for next iteration + { + bool crosses_segment_128493 = slt64(srem64(chunk_offset_128479 + + segscan_group_sizze_99110, + Nmk_74408), + chunk_offset_128479 + + segscan_group_sizze_99110 - + (chunk_offset_128479 + + segscan_group_sizze_99110 - + (int64_t) 1)); + bool should_load_carry_128494 = local_tid_128472 == 0 && + !crosses_segment_128493; + + if (should_load_carry_128494) { + x_99114 = ((__local + double *) scan_arr_mem_128476)[segscan_group_sizze_99110 - + (int64_t) 1]; + } + if (!should_load_carry_128494) { + x_99114 = 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + + error_1: + return; + #undef segscan_group_sizze_99110 +} +__kernel void mainMagnitudeziscan_stage2_101483(__global int *global_failure, + __local volatile + int64_t *scan_arr_mem_129289_backing_aligned_0, + int64_t N_73007, + int64_t m_73008, + int64_t stage1_num_groups_129258, + int32_t num_threads_129259, + __global + unsigned char *mem_124906) +{ + #define segscan_group_sizze_101500 (mainMagnitudezisegscan_group_sizze_101477) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict scan_arr_mem_129289_backing_0 = + (__local volatile + char *) scan_arr_mem_129289_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129284; + int32_t local_tid_129285; + int64_t group_sizze_129288; + int32_t wave_sizze_129287; + int32_t group_tid_129286; + + global_tid_129284 = get_global_id(0); + local_tid_129285 = get_local_id(0); + group_sizze_129288 = get_local_size(0); + wave_sizze_129287 = LOCKSTEP_WIDTH; + group_tid_129286 = get_group_id(0); + + int32_t phys_tid_101483; + + phys_tid_101483 = global_tid_129284; + + __local char *scan_arr_mem_129289; + + scan_arr_mem_129289 = (__local char *) scan_arr_mem_129289_backing_0; + + int64_t flat_idx_129291; + + flat_idx_129291 = (sext_i32_i64(local_tid_129285) + (int64_t) 1) * + (segscan_group_sizze_101500 * sdiv_up64(m_73008 * N_73007, + sext_i32_i64(num_threads_129259))) - + (int64_t) 1; + + int64_t gtid_101474; + + gtid_101474 = squot64(flat_idx_129291, N_73007); + + int64_t gtid_101482; + + gtid_101482 = flat_idx_129291 - squot64(flat_idx_129291, N_73007) * N_73007; + // threads in bound read carries; others get neutral element + { + if (slt64(gtid_101474, m_73008) && slt64(gtid_101482, N_73007)) { + ((__local + int64_t *) scan_arr_mem_129289)[sext_i32_i64(local_tid_129285)] = + ((__global int64_t *) mem_124906)[gtid_101474 * N_73007 + + gtid_101482]; + } else { + ((__local + int64_t *) scan_arr_mem_129289)[sext_i32_i64(local_tid_129285)] = + (int64_t) 0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t x_101505; + int64_t x_101506; + int64_t x_129292; + int64_t x_129293; + bool ltid_in_bounds_129295; + + ltid_in_bounds_129295 = slt64(sext_i32_i64(local_tid_129285), + stage1_num_groups_129258); + + int32_t skip_threads_129296; + + // read input for in-block scan + { + if (ltid_in_bounds_129295) { + x_101506 = ((volatile __local + int64_t *) scan_arr_mem_129289)[sext_i32_i64(local_tid_129285)]; + if ((local_tid_129285 - squot32(local_tid_129285, 32) * 32) == 0) { + x_101505 = x_101506; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129296 = 1; + while (slt32(skip_threads_129296, 32)) { + if (sle32(skip_threads_129296, local_tid_129285 - + squot32(local_tid_129285, 32) * 32) && + ltid_in_bounds_129295) { + // read operands + { + x_101505 = ((volatile __local + int64_t *) scan_arr_mem_129289)[sext_i32_i64(local_tid_129285) - + sext_i32_i64(skip_threads_129296)]; + } + // perform operation + { + bool inactive_129297 = + slt64(srem64((sext_i32_i64(local_tid_129285) + + (int64_t) 1) * + (segscan_group_sizze_101500 * + sdiv_up64(m_73008 * N_73007, + sext_i32_i64(num_threads_129259))) - + (int64_t) 1, N_73007), + (sext_i32_i64(local_tid_129285) + (int64_t) 1) * + (segscan_group_sizze_101500 * sdiv_up64(m_73008 * + N_73007, + sext_i32_i64(num_threads_129259))) - + (int64_t) 1 - ((sext_i32_i64(local_tid_129285 - + skip_threads_129296) + + (int64_t) 1) * + (segscan_group_sizze_101500 * + sdiv_up64(m_73008 * N_73007, + sext_i32_i64(num_threads_129259))) - + (int64_t) 1)); + + if (inactive_129297) { + x_101505 = x_101506; + } + if (!inactive_129297) { + int64_t defunc_1_op_res_101507 = add64(x_101505, + x_101506); + + x_101505 = defunc_1_op_res_101507; + } + } + } + if (sle32(wave_sizze_129287, skip_threads_129296)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129296, local_tid_129285 - + squot32(local_tid_129285, 32) * 32) && + ltid_in_bounds_129295) { + // write result + { + ((volatile __local + int64_t *) scan_arr_mem_129289)[sext_i32_i64(local_tid_129285)] = + x_101505; + x_101506 = x_101505; + } + } + if (sle32(wave_sizze_129287, skip_threads_129296)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129296 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129285 - squot32(local_tid_129285, 32) * 32) == 31 && + ltid_in_bounds_129295) { + ((volatile __local + int64_t *) scan_arr_mem_129289)[sext_i32_i64(squot32(local_tid_129285, + 32))] = + x_101505; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129298; + + // read input for in-block scan + { + if (squot32(local_tid_129285, 32) == 0 && ltid_in_bounds_129295) { + x_129293 = ((volatile __local + int64_t *) scan_arr_mem_129289)[sext_i32_i64(local_tid_129285)]; + if ((local_tid_129285 - squot32(local_tid_129285, 32) * 32) == + 0) { + x_129292 = x_129293; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129298 = 1; + while (slt32(skip_threads_129298, 32)) { + if (sle32(skip_threads_129298, local_tid_129285 - + squot32(local_tid_129285, 32) * 32) && + (squot32(local_tid_129285, 32) == 0 && + ltid_in_bounds_129295)) { + // read operands + { + x_129292 = ((volatile __local + int64_t *) scan_arr_mem_129289)[sext_i32_i64(local_tid_129285) - + sext_i32_i64(skip_threads_129298)]; + } + // perform operation + { + bool inactive_129299 = + slt64(srem64((sext_i32_i64(local_tid_129285 * 32 + + 32 - 1) + (int64_t) 1) * + (segscan_group_sizze_101500 * + sdiv_up64(m_73008 * N_73007, + sext_i32_i64(num_threads_129259))) - + (int64_t) 1, N_73007), + (sext_i32_i64(local_tid_129285 * 32 + 32 - + 1) + (int64_t) 1) * + (segscan_group_sizze_101500 * + sdiv_up64(m_73008 * N_73007, + sext_i32_i64(num_threads_129259))) - + (int64_t) 1 - + ((sext_i32_i64((local_tid_129285 - + skip_threads_129298) * 32 + + 32 - 1) + (int64_t) 1) * + (segscan_group_sizze_101500 * + sdiv_up64(m_73008 * N_73007, + sext_i32_i64(num_threads_129259))) - + (int64_t) 1)); + + if (inactive_129299) { + x_129292 = x_129293; + } + if (!inactive_129299) { + int64_t defunc_1_op_res_129294 = add64(x_129292, + x_129293); + + x_129292 = defunc_1_op_res_129294; + } + } + } + if (sle32(wave_sizze_129287, skip_threads_129298)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129298, local_tid_129285 - + squot32(local_tid_129285, 32) * 32) && + (squot32(local_tid_129285, 32) == 0 && + ltid_in_bounds_129295)) { + // write result + { + ((volatile __local + int64_t *) scan_arr_mem_129289)[sext_i32_i64(local_tid_129285)] = + x_129292; + x_129293 = x_129292; + } + } + if (sle32(wave_sizze_129287, skip_threads_129298)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129298 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129285, 32) == 0 || !ltid_in_bounds_129295)) { + // read operands + { + x_101506 = x_101505; + x_101505 = ((__local + int64_t *) scan_arr_mem_129289)[sext_i32_i64(squot32(local_tid_129285, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129300 = + slt64(srem64((sext_i32_i64(local_tid_129285) + + (int64_t) 1) * (segscan_group_sizze_101500 * + sdiv_up64(m_73008 * N_73007, + sext_i32_i64(num_threads_129259))) - + (int64_t) 1, N_73007), + (sext_i32_i64(local_tid_129285) + (int64_t) 1) * + (segscan_group_sizze_101500 * sdiv_up64(m_73008 * + N_73007, + sext_i32_i64(num_threads_129259))) - + (int64_t) 1 - + ((sext_i32_i64(squot32(local_tid_129285, 32) * 32 - + 1) + (int64_t) 1) * (segscan_group_sizze_101500 * + sdiv_up64(m_73008 * N_73007, + sext_i32_i64(num_threads_129259))) - + (int64_t) 1)); + + if (inactive_129300) { + x_101505 = x_101506; + } + if (!inactive_129300) { + int64_t defunc_1_op_res_101507 = add64(x_101505, x_101506); + + x_101505 = defunc_1_op_res_101507; + } + } + // write final result + { + ((__local + int64_t *) scan_arr_mem_129289)[sext_i32_i64(local_tid_129285)] = + x_101505; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129285, 32) == 0) { + ((__local + int64_t *) scan_arr_mem_129289)[sext_i32_i64(local_tid_129285)] = + x_101506; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // threads in bounds write scanned carries + { + if (slt64(gtid_101474, m_73008) && slt64(gtid_101482, N_73007)) { + ((__global int64_t *) mem_124906)[gtid_101474 * N_73007 + + gtid_101482] = ((__local + int64_t *) scan_arr_mem_129289)[sext_i32_i64(local_tid_129285)]; + } + } + + error_0: + return; + #undef segscan_group_sizze_101500 +} +__kernel void mainMagnitudeziscan_stage2_102464(__global int *global_failure, + __local volatile + int64_t *scan_arr_mem_129666_backing_aligned_0, + int64_t m_73008, + int64_t iota_arg_74896, + int64_t stage1_num_groups_129635, + int32_t num_threads_129636, + __global + unsigned char *mem_125032) +{ + #define segscan_group_sizze_102548 (mainMagnitudezisegscan_group_sizze_102458) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict scan_arr_mem_129666_backing_0 = + (__local volatile + char *) scan_arr_mem_129666_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129661; + int32_t local_tid_129662; + int64_t group_sizze_129665; + int32_t wave_sizze_129664; + int32_t group_tid_129663; + + global_tid_129661 = get_global_id(0); + local_tid_129662 = get_local_id(0); + group_sizze_129665 = get_local_size(0); + wave_sizze_129664 = LOCKSTEP_WIDTH; + group_tid_129663 = get_group_id(0); + + int32_t phys_tid_102464; + + phys_tid_102464 = global_tid_129661; + + __local char *scan_arr_mem_129666; + + scan_arr_mem_129666 = (__local char *) scan_arr_mem_129666_backing_0; + + int64_t flat_idx_129668; + + flat_idx_129668 = (sext_i32_i64(local_tid_129662) + (int64_t) 1) * + (segscan_group_sizze_102548 * sdiv_up64(m_73008 * iota_arg_74896, + sext_i32_i64(num_threads_129636))) - + (int64_t) 1; + + int64_t gtid_102455; + + gtid_102455 = squot64(flat_idx_129668, iota_arg_74896); + + int64_t gtid_102463; + + gtid_102463 = flat_idx_129668 - squot64(flat_idx_129668, iota_arg_74896) * + iota_arg_74896; + // threads in bound read carries; others get neutral element + { + if (slt64(gtid_102455, m_73008) && slt64(gtid_102463, iota_arg_74896)) { + ((__local + double *) scan_arr_mem_129666)[sext_i32_i64(local_tid_129662)] = + ((__global double *) mem_125032)[gtid_102455 * iota_arg_74896 + + gtid_102463]; + } else { + ((__local + double *) scan_arr_mem_129666)[sext_i32_i64(local_tid_129662)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + double x_102552; + double x_102553; + double x_129669; + double x_129670; + bool ltid_in_bounds_129672; + + ltid_in_bounds_129672 = slt64(sext_i32_i64(local_tid_129662), + stage1_num_groups_129635); + + int32_t skip_threads_129673; + + // read input for in-block scan + { + if (ltid_in_bounds_129672) { + x_102553 = ((volatile __local + double *) scan_arr_mem_129666)[sext_i32_i64(local_tid_129662)]; + if ((local_tid_129662 - squot32(local_tid_129662, 32) * 32) == 0) { + x_102552 = x_102553; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129673 = 1; + while (slt32(skip_threads_129673, 32)) { + if (sle32(skip_threads_129673, local_tid_129662 - + squot32(local_tid_129662, 32) * 32) && + ltid_in_bounds_129672) { + // read operands + { + x_102552 = ((volatile __local + double *) scan_arr_mem_129666)[sext_i32_i64(local_tid_129662) - + sext_i32_i64(skip_threads_129673)]; + } + // perform operation + { + bool inactive_129674 = + slt64(srem64((sext_i32_i64(local_tid_129662) + + (int64_t) 1) * + (segscan_group_sizze_102548 * + sdiv_up64(m_73008 * iota_arg_74896, + sext_i32_i64(num_threads_129636))) - + (int64_t) 1, iota_arg_74896), + (sext_i32_i64(local_tid_129662) + (int64_t) 1) * + (segscan_group_sizze_102548 * sdiv_up64(m_73008 * + iota_arg_74896, + sext_i32_i64(num_threads_129636))) - + (int64_t) 1 - ((sext_i32_i64(local_tid_129662 - + skip_threads_129673) + + (int64_t) 1) * + (segscan_group_sizze_102548 * + sdiv_up64(m_73008 * + iota_arg_74896, + sext_i32_i64(num_threads_129636))) - + (int64_t) 1)); + + if (inactive_129674) { + x_102552 = x_102553; + } + if (!inactive_129674) { + double defunc_1_op_res_102554 = x_102552 + x_102553; + + x_102552 = defunc_1_op_res_102554; + } + } + } + if (sle32(wave_sizze_129664, skip_threads_129673)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129673, local_tid_129662 - + squot32(local_tid_129662, 32) * 32) && + ltid_in_bounds_129672) { + // write result + { + ((volatile __local + double *) scan_arr_mem_129666)[sext_i32_i64(local_tid_129662)] = + x_102552; + x_102553 = x_102552; + } + } + if (sle32(wave_sizze_129664, skip_threads_129673)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129673 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129662 - squot32(local_tid_129662, 32) * 32) == 31 && + ltid_in_bounds_129672) { + ((volatile __local + double *) scan_arr_mem_129666)[sext_i32_i64(squot32(local_tid_129662, + 32))] = + x_102552; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129675; + + // read input for in-block scan + { + if (squot32(local_tid_129662, 32) == 0 && ltid_in_bounds_129672) { + x_129670 = ((volatile __local + double *) scan_arr_mem_129666)[sext_i32_i64(local_tid_129662)]; + if ((local_tid_129662 - squot32(local_tid_129662, 32) * 32) == + 0) { + x_129669 = x_129670; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129675 = 1; + while (slt32(skip_threads_129675, 32)) { + if (sle32(skip_threads_129675, local_tid_129662 - + squot32(local_tid_129662, 32) * 32) && + (squot32(local_tid_129662, 32) == 0 && + ltid_in_bounds_129672)) { + // read operands + { + x_129669 = ((volatile __local + double *) scan_arr_mem_129666)[sext_i32_i64(local_tid_129662) - + sext_i32_i64(skip_threads_129675)]; + } + // perform operation + { + bool inactive_129676 = + slt64(srem64((sext_i32_i64(local_tid_129662 * 32 + + 32 - 1) + (int64_t) 1) * + (segscan_group_sizze_102548 * + sdiv_up64(m_73008 * iota_arg_74896, + sext_i32_i64(num_threads_129636))) - + (int64_t) 1, iota_arg_74896), + (sext_i32_i64(local_tid_129662 * 32 + 32 - + 1) + (int64_t) 1) * + (segscan_group_sizze_102548 * + sdiv_up64(m_73008 * iota_arg_74896, + sext_i32_i64(num_threads_129636))) - + (int64_t) 1 - + ((sext_i32_i64((local_tid_129662 - + skip_threads_129675) * 32 + + 32 - 1) + (int64_t) 1) * + (segscan_group_sizze_102548 * + sdiv_up64(m_73008 * iota_arg_74896, + sext_i32_i64(num_threads_129636))) - + (int64_t) 1)); + + if (inactive_129676) { + x_129669 = x_129670; + } + if (!inactive_129676) { + double defunc_1_op_res_129671 = x_129669 + x_129670; + + x_129669 = defunc_1_op_res_129671; + } + } + } + if (sle32(wave_sizze_129664, skip_threads_129675)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129675, local_tid_129662 - + squot32(local_tid_129662, 32) * 32) && + (squot32(local_tid_129662, 32) == 0 && + ltid_in_bounds_129672)) { + // write result + { + ((volatile __local + double *) scan_arr_mem_129666)[sext_i32_i64(local_tid_129662)] = + x_129669; + x_129670 = x_129669; + } + } + if (sle32(wave_sizze_129664, skip_threads_129675)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129675 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129662, 32) == 0 || !ltid_in_bounds_129672)) { + // read operands + { + x_102553 = x_102552; + x_102552 = ((__local + double *) scan_arr_mem_129666)[sext_i32_i64(squot32(local_tid_129662, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129677 = + slt64(srem64((sext_i32_i64(local_tid_129662) + + (int64_t) 1) * (segscan_group_sizze_102548 * + sdiv_up64(m_73008 * + iota_arg_74896, + sext_i32_i64(num_threads_129636))) - + (int64_t) 1, iota_arg_74896), + (sext_i32_i64(local_tid_129662) + (int64_t) 1) * + (segscan_group_sizze_102548 * sdiv_up64(m_73008 * + iota_arg_74896, + sext_i32_i64(num_threads_129636))) - + (int64_t) 1 - + ((sext_i32_i64(squot32(local_tid_129662, 32) * 32 - + 1) + (int64_t) 1) * (segscan_group_sizze_102548 * + sdiv_up64(m_73008 * + iota_arg_74896, + sext_i32_i64(num_threads_129636))) - + (int64_t) 1)); + + if (inactive_129677) { + x_102552 = x_102553; + } + if (!inactive_129677) { + double defunc_1_op_res_102554 = x_102552 + x_102553; + + x_102552 = defunc_1_op_res_102554; + } + } + // write final result + { + ((__local + double *) scan_arr_mem_129666)[sext_i32_i64(local_tid_129662)] = + x_102552; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129662, 32) == 0) { + ((__local + double *) scan_arr_mem_129666)[sext_i32_i64(local_tid_129662)] = + x_102553; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // threads in bounds write scanned carries + { + if (slt64(gtid_102455, m_73008) && slt64(gtid_102463, iota_arg_74896)) { + ((__global double *) mem_125032)[gtid_102455 * iota_arg_74896 + + gtid_102463] = ((__local + double *) scan_arr_mem_129666)[sext_i32_i64(local_tid_129662)]; + } + } + + error_0: + return; + #undef segscan_group_sizze_102548 +} +__kernel void mainMagnitudeziscan_stage2_90482(__global int *global_failure, + __local volatile + int64_t *scan_arr_mem_126443_backing_aligned_0, + int64_t m_73008, int64_t n_73011, + int64_t stage1_num_groups_126412, + int32_t num_threads_126413, + __global + unsigned char *mem_120201) +{ + #define segscan_group_sizze_90596 (mainMagnitudezisegscan_group_sizze_90476) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict scan_arr_mem_126443_backing_0 = + (__local volatile + char *) scan_arr_mem_126443_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126438; + int32_t local_tid_126439; + int64_t group_sizze_126442; + int32_t wave_sizze_126441; + int32_t group_tid_126440; + + global_tid_126438 = get_global_id(0); + local_tid_126439 = get_local_id(0); + group_sizze_126442 = get_local_size(0); + wave_sizze_126441 = LOCKSTEP_WIDTH; + group_tid_126440 = get_group_id(0); + + int32_t phys_tid_90482; + + phys_tid_90482 = global_tid_126438; + + __local char *scan_arr_mem_126443; + + scan_arr_mem_126443 = (__local char *) scan_arr_mem_126443_backing_0; + + int64_t flat_idx_126445; + + flat_idx_126445 = (sext_i32_i64(local_tid_126439) + (int64_t) 1) * + (segscan_group_sizze_90596 * sdiv_up64(m_73008 * n_73011, + sext_i32_i64(num_threads_126413))) - + (int64_t) 1; + + int64_t gtid_90473; + + gtid_90473 = squot64(flat_idx_126445, n_73011); + + int64_t gtid_90481; + + gtid_90481 = flat_idx_126445 - squot64(flat_idx_126445, n_73011) * n_73011; + // threads in bound read carries; others get neutral element + { + if (slt64(gtid_90473, m_73008) && slt64(gtid_90481, n_73011)) { + ((__local + int64_t *) scan_arr_mem_126443)[sext_i32_i64(local_tid_126439)] = + ((__global int64_t *) mem_120201)[gtid_90473 * n_73011 + + gtid_90481]; + } else { + ((__local + int64_t *) scan_arr_mem_126443)[sext_i32_i64(local_tid_126439)] = + (int64_t) 0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t x_90600; + int64_t x_90601; + int64_t x_126446; + int64_t x_126447; + bool ltid_in_bounds_126449; + + ltid_in_bounds_126449 = slt64(sext_i32_i64(local_tid_126439), + stage1_num_groups_126412); + + int32_t skip_threads_126450; + + // read input for in-block scan + { + if (ltid_in_bounds_126449) { + x_90601 = ((volatile __local + int64_t *) scan_arr_mem_126443)[sext_i32_i64(local_tid_126439)]; + if ((local_tid_126439 - squot32(local_tid_126439, 32) * 32) == 0) { + x_90600 = x_90601; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_126450 = 1; + while (slt32(skip_threads_126450, 32)) { + if (sle32(skip_threads_126450, local_tid_126439 - + squot32(local_tid_126439, 32) * 32) && + ltid_in_bounds_126449) { + // read operands + { + x_90600 = ((volatile __local + int64_t *) scan_arr_mem_126443)[sext_i32_i64(local_tid_126439) - + sext_i32_i64(skip_threads_126450)]; + } + // perform operation + { + bool inactive_126451 = + slt64(srem64((sext_i32_i64(local_tid_126439) + + (int64_t) 1) * + (segscan_group_sizze_90596 * + sdiv_up64(m_73008 * n_73011, + sext_i32_i64(num_threads_126413))) - + (int64_t) 1, n_73011), + (sext_i32_i64(local_tid_126439) + (int64_t) 1) * + (segscan_group_sizze_90596 * sdiv_up64(m_73008 * + n_73011, + sext_i32_i64(num_threads_126413))) - + (int64_t) 1 - ((sext_i32_i64(local_tid_126439 - + skip_threads_126450) + + (int64_t) 1) * + (segscan_group_sizze_90596 * + sdiv_up64(m_73008 * n_73011, + sext_i32_i64(num_threads_126413))) - + (int64_t) 1)); + + if (inactive_126451) { + x_90600 = x_90601; + } + if (!inactive_126451) { + int64_t defunc_1_op_res_90602 = add64(x_90600, x_90601); + + x_90600 = defunc_1_op_res_90602; + } + } + } + if (sle32(wave_sizze_126441, skip_threads_126450)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_126450, local_tid_126439 - + squot32(local_tid_126439, 32) * 32) && + ltid_in_bounds_126449) { + // write result + { + ((volatile __local + int64_t *) scan_arr_mem_126443)[sext_i32_i64(local_tid_126439)] = + x_90600; + x_90601 = x_90600; + } + } + if (sle32(wave_sizze_126441, skip_threads_126450)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_126450 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_126439 - squot32(local_tid_126439, 32) * 32) == 31 && + ltid_in_bounds_126449) { + ((volatile __local + int64_t *) scan_arr_mem_126443)[sext_i32_i64(squot32(local_tid_126439, + 32))] = + x_90600; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_126452; + + // read input for in-block scan + { + if (squot32(local_tid_126439, 32) == 0 && ltid_in_bounds_126449) { + x_126447 = ((volatile __local + int64_t *) scan_arr_mem_126443)[sext_i32_i64(local_tid_126439)]; + if ((local_tid_126439 - squot32(local_tid_126439, 32) * 32) == + 0) { + x_126446 = x_126447; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_126452 = 1; + while (slt32(skip_threads_126452, 32)) { + if (sle32(skip_threads_126452, local_tid_126439 - + squot32(local_tid_126439, 32) * 32) && + (squot32(local_tid_126439, 32) == 0 && + ltid_in_bounds_126449)) { + // read operands + { + x_126446 = ((volatile __local + int64_t *) scan_arr_mem_126443)[sext_i32_i64(local_tid_126439) - + sext_i32_i64(skip_threads_126452)]; + } + // perform operation + { + bool inactive_126453 = + slt64(srem64((sext_i32_i64(local_tid_126439 * 32 + + 32 - 1) + (int64_t) 1) * + (segscan_group_sizze_90596 * + sdiv_up64(m_73008 * n_73011, + sext_i32_i64(num_threads_126413))) - + (int64_t) 1, n_73011), + (sext_i32_i64(local_tid_126439 * 32 + 32 - + 1) + (int64_t) 1) * + (segscan_group_sizze_90596 * + sdiv_up64(m_73008 * n_73011, + sext_i32_i64(num_threads_126413))) - + (int64_t) 1 - + ((sext_i32_i64((local_tid_126439 - + skip_threads_126452) * 32 + + 32 - 1) + (int64_t) 1) * + (segscan_group_sizze_90596 * + sdiv_up64(m_73008 * n_73011, + sext_i32_i64(num_threads_126413))) - + (int64_t) 1)); + + if (inactive_126453) { + x_126446 = x_126447; + } + if (!inactive_126453) { + int64_t defunc_1_op_res_126448 = add64(x_126446, + x_126447); + + x_126446 = defunc_1_op_res_126448; + } + } + } + if (sle32(wave_sizze_126441, skip_threads_126452)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_126452, local_tid_126439 - + squot32(local_tid_126439, 32) * 32) && + (squot32(local_tid_126439, 32) == 0 && + ltid_in_bounds_126449)) { + // write result + { + ((volatile __local + int64_t *) scan_arr_mem_126443)[sext_i32_i64(local_tid_126439)] = + x_126446; + x_126447 = x_126446; + } + } + if (sle32(wave_sizze_126441, skip_threads_126452)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_126452 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_126439, 32) == 0 || !ltid_in_bounds_126449)) { + // read operands + { + x_90601 = x_90600; + x_90600 = ((__local + int64_t *) scan_arr_mem_126443)[sext_i32_i64(squot32(local_tid_126439, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_126454 = + slt64(srem64((sext_i32_i64(local_tid_126439) + + (int64_t) 1) * (segscan_group_sizze_90596 * + sdiv_up64(m_73008 * n_73011, + sext_i32_i64(num_threads_126413))) - + (int64_t) 1, n_73011), + (sext_i32_i64(local_tid_126439) + (int64_t) 1) * + (segscan_group_sizze_90596 * sdiv_up64(m_73008 * + n_73011, + sext_i32_i64(num_threads_126413))) - + (int64_t) 1 - + ((sext_i32_i64(squot32(local_tid_126439, 32) * 32 - + 1) + (int64_t) 1) * (segscan_group_sizze_90596 * + sdiv_up64(m_73008 * n_73011, + sext_i32_i64(num_threads_126413))) - + (int64_t) 1)); + + if (inactive_126454) { + x_90600 = x_90601; + } + if (!inactive_126454) { + int64_t defunc_1_op_res_90602 = add64(x_90600, x_90601); + + x_90600 = defunc_1_op_res_90602; + } + } + // write final result + { + ((__local + int64_t *) scan_arr_mem_126443)[sext_i32_i64(local_tid_126439)] = + x_90600; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_126439, 32) == 0) { + ((__local + int64_t *) scan_arr_mem_126443)[sext_i32_i64(local_tid_126439)] = + x_90601; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // threads in bounds write scanned carries + { + if (slt64(gtid_90473, m_73008) && slt64(gtid_90481, n_73011)) { + ((__global int64_t *) mem_120201)[gtid_90473 * n_73011 + + gtid_90481] = ((__local + int64_t *) scan_arr_mem_126443)[sext_i32_i64(local_tid_126439)]; + } + } + + error_0: + return; + #undef segscan_group_sizze_90596 +} +__kernel void mainMagnitudeziscan_stage2_98961(__global int *global_failure, + __local volatile + int64_t *scan_arr_mem_128500_backing_aligned_0, + int64_t m_73008, + int64_t Nmk_74408, + int64_t stage1_num_groups_128469, + int32_t num_threads_128470, + __global + unsigned char *mem_124061) +{ + #define segscan_group_sizze_99110 (mainMagnitudezisegscan_group_sizze_98955) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict scan_arr_mem_128500_backing_0 = + (__local volatile + char *) scan_arr_mem_128500_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128495; + int32_t local_tid_128496; + int64_t group_sizze_128499; + int32_t wave_sizze_128498; + int32_t group_tid_128497; + + global_tid_128495 = get_global_id(0); + local_tid_128496 = get_local_id(0); + group_sizze_128499 = get_local_size(0); + wave_sizze_128498 = LOCKSTEP_WIDTH; + group_tid_128497 = get_group_id(0); + + int32_t phys_tid_98961; + + phys_tid_98961 = global_tid_128495; + + __local char *scan_arr_mem_128500; + + scan_arr_mem_128500 = (__local char *) scan_arr_mem_128500_backing_0; + + int64_t flat_idx_128502; + + flat_idx_128502 = (sext_i32_i64(local_tid_128496) + (int64_t) 1) * + (segscan_group_sizze_99110 * sdiv_up64(m_73008 * Nmk_74408, + sext_i32_i64(num_threads_128470))) - + (int64_t) 1; + + int64_t gtid_98952; + + gtid_98952 = squot64(flat_idx_128502, Nmk_74408); + + int64_t gtid_98960; + + gtid_98960 = flat_idx_128502 - squot64(flat_idx_128502, Nmk_74408) * + Nmk_74408; + // threads in bound read carries; others get neutral element + { + if (slt64(gtid_98952, m_73008) && slt64(gtid_98960, Nmk_74408)) { + ((__local + double *) scan_arr_mem_128500)[sext_i32_i64(local_tid_128496)] = + ((__global double *) mem_124061)[gtid_98952 * Nmk_74408 + + gtid_98960]; + } else { + ((__local + double *) scan_arr_mem_128500)[sext_i32_i64(local_tid_128496)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + double x_99114; + double x_99115; + double x_128503; + double x_128504; + bool ltid_in_bounds_128506; + + ltid_in_bounds_128506 = slt64(sext_i32_i64(local_tid_128496), + stage1_num_groups_128469); + + int32_t skip_threads_128507; + + // read input for in-block scan + { + if (ltid_in_bounds_128506) { + x_99115 = ((volatile __local + double *) scan_arr_mem_128500)[sext_i32_i64(local_tid_128496)]; + if ((local_tid_128496 - squot32(local_tid_128496, 32) * 32) == 0) { + x_99114 = x_99115; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128507 = 1; + while (slt32(skip_threads_128507, 32)) { + if (sle32(skip_threads_128507, local_tid_128496 - + squot32(local_tid_128496, 32) * 32) && + ltid_in_bounds_128506) { + // read operands + { + x_99114 = ((volatile __local + double *) scan_arr_mem_128500)[sext_i32_i64(local_tid_128496) - + sext_i32_i64(skip_threads_128507)]; + } + // perform operation + { + bool inactive_128508 = + slt64(srem64((sext_i32_i64(local_tid_128496) + + (int64_t) 1) * + (segscan_group_sizze_99110 * + sdiv_up64(m_73008 * Nmk_74408, + sext_i32_i64(num_threads_128470))) - + (int64_t) 1, Nmk_74408), + (sext_i32_i64(local_tid_128496) + (int64_t) 1) * + (segscan_group_sizze_99110 * sdiv_up64(m_73008 * + Nmk_74408, + sext_i32_i64(num_threads_128470))) - + (int64_t) 1 - ((sext_i32_i64(local_tid_128496 - + skip_threads_128507) + + (int64_t) 1) * + (segscan_group_sizze_99110 * + sdiv_up64(m_73008 * Nmk_74408, + sext_i32_i64(num_threads_128470))) - + (int64_t) 1)); + + if (inactive_128508) { + x_99114 = x_99115; + } + if (!inactive_128508) { + double defunc_1_op_res_99116 = x_99114 + x_99115; + + x_99114 = defunc_1_op_res_99116; + } + } + } + if (sle32(wave_sizze_128498, skip_threads_128507)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128507, local_tid_128496 - + squot32(local_tid_128496, 32) * 32) && + ltid_in_bounds_128506) { + // write result + { + ((volatile __local + double *) scan_arr_mem_128500)[sext_i32_i64(local_tid_128496)] = + x_99114; + x_99115 = x_99114; + } + } + if (sle32(wave_sizze_128498, skip_threads_128507)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128507 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128496 - squot32(local_tid_128496, 32) * 32) == 31 && + ltid_in_bounds_128506) { + ((volatile __local + double *) scan_arr_mem_128500)[sext_i32_i64(squot32(local_tid_128496, + 32))] = + x_99114; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128509; + + // read input for in-block scan + { + if (squot32(local_tid_128496, 32) == 0 && ltid_in_bounds_128506) { + x_128504 = ((volatile __local + double *) scan_arr_mem_128500)[sext_i32_i64(local_tid_128496)]; + if ((local_tid_128496 - squot32(local_tid_128496, 32) * 32) == + 0) { + x_128503 = x_128504; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128509 = 1; + while (slt32(skip_threads_128509, 32)) { + if (sle32(skip_threads_128509, local_tid_128496 - + squot32(local_tid_128496, 32) * 32) && + (squot32(local_tid_128496, 32) == 0 && + ltid_in_bounds_128506)) { + // read operands + { + x_128503 = ((volatile __local + double *) scan_arr_mem_128500)[sext_i32_i64(local_tid_128496) - + sext_i32_i64(skip_threads_128509)]; + } + // perform operation + { + bool inactive_128510 = + slt64(srem64((sext_i32_i64(local_tid_128496 * 32 + + 32 - 1) + (int64_t) 1) * + (segscan_group_sizze_99110 * + sdiv_up64(m_73008 * Nmk_74408, + sext_i32_i64(num_threads_128470))) - + (int64_t) 1, Nmk_74408), + (sext_i32_i64(local_tid_128496 * 32 + 32 - + 1) + (int64_t) 1) * + (segscan_group_sizze_99110 * + sdiv_up64(m_73008 * Nmk_74408, + sext_i32_i64(num_threads_128470))) - + (int64_t) 1 - + ((sext_i32_i64((local_tid_128496 - + skip_threads_128509) * 32 + + 32 - 1) + (int64_t) 1) * + (segscan_group_sizze_99110 * + sdiv_up64(m_73008 * Nmk_74408, + sext_i32_i64(num_threads_128470))) - + (int64_t) 1)); + + if (inactive_128510) { + x_128503 = x_128504; + } + if (!inactive_128510) { + double defunc_1_op_res_128505 = x_128503 + x_128504; + + x_128503 = defunc_1_op_res_128505; + } + } + } + if (sle32(wave_sizze_128498, skip_threads_128509)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128509, local_tid_128496 - + squot32(local_tid_128496, 32) * 32) && + (squot32(local_tid_128496, 32) == 0 && + ltid_in_bounds_128506)) { + // write result + { + ((volatile __local + double *) scan_arr_mem_128500)[sext_i32_i64(local_tid_128496)] = + x_128503; + x_128504 = x_128503; + } + } + if (sle32(wave_sizze_128498, skip_threads_128509)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128509 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128496, 32) == 0 || !ltid_in_bounds_128506)) { + // read operands + { + x_99115 = x_99114; + x_99114 = ((__local + double *) scan_arr_mem_128500)[sext_i32_i64(squot32(local_tid_128496, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128511 = + slt64(srem64((sext_i32_i64(local_tid_128496) + + (int64_t) 1) * (segscan_group_sizze_99110 * + sdiv_up64(m_73008 * + Nmk_74408, + sext_i32_i64(num_threads_128470))) - + (int64_t) 1, Nmk_74408), + (sext_i32_i64(local_tid_128496) + (int64_t) 1) * + (segscan_group_sizze_99110 * sdiv_up64(m_73008 * + Nmk_74408, + sext_i32_i64(num_threads_128470))) - + (int64_t) 1 - + ((sext_i32_i64(squot32(local_tid_128496, 32) * 32 - + 1) + (int64_t) 1) * (segscan_group_sizze_99110 * + sdiv_up64(m_73008 * Nmk_74408, + sext_i32_i64(num_threads_128470))) - + (int64_t) 1)); + + if (inactive_128511) { + x_99114 = x_99115; + } + if (!inactive_128511) { + double defunc_1_op_res_99116 = x_99114 + x_99115; + + x_99114 = defunc_1_op_res_99116; + } + } + // write final result + { + ((__local + double *) scan_arr_mem_128500)[sext_i32_i64(local_tid_128496)] = + x_99114; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128496, 32) == 0) { + ((__local + double *) scan_arr_mem_128500)[sext_i32_i64(local_tid_128496)] = + x_99115; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // threads in bounds write scanned carries + { + if (slt64(gtid_98952, m_73008) && slt64(gtid_98960, Nmk_74408)) { + ((__global double *) mem_124061)[gtid_98952 * Nmk_74408 + + gtid_98960] = ((__local + double *) scan_arr_mem_128500)[sext_i32_i64(local_tid_128496)]; + } + } + + error_0: + return; + #undef segscan_group_sizze_99110 +} +__kernel void mainMagnitudeziscan_stage3_101483(__global int *global_failure, + int64_t N_73007, + int64_t m_73008, + int64_t num_groups_101501, + int32_t num_threads_129259, + int32_t required_groups_129301, + __global + unsigned char *mem_124906) +{ + #define segscan_group_sizze_101500 (mainMagnitudezisegscan_group_sizze_101477) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129302; + int32_t local_tid_129303; + int64_t group_sizze_129306; + int32_t wave_sizze_129305; + int32_t group_tid_129304; + + global_tid_129302 = get_global_id(0); + local_tid_129303 = get_local_id(0); + group_sizze_129306 = get_local_size(0); + wave_sizze_129305 = LOCKSTEP_WIDTH; + group_tid_129304 = get_group_id(0); + + int32_t phys_tid_101483; + + phys_tid_101483 = global_tid_129302; + + int32_t phys_group_id_129307; + + phys_group_id_129307 = get_group_id(0); + for (int32_t i_129308 = 0; i_129308 < sdiv_up32(required_groups_129301 - + phys_group_id_129307, + sext_i64_i32(num_groups_101501)); + i_129308++) { + int32_t virt_group_id_129309 = phys_group_id_129307 + i_129308 * + sext_i64_i32(num_groups_101501); + int64_t flat_idx_129310 = sext_i32_i64(virt_group_id_129309) * + segscan_group_sizze_101500 + sext_i32_i64(local_tid_129303); + int64_t gtid_101474 = squot64(flat_idx_129310, N_73007); + int64_t gtid_101482 = flat_idx_129310 - squot64(flat_idx_129310, + N_73007) * N_73007; + int64_t orig_group_129311 = squot64(flat_idx_129310, + segscan_group_sizze_101500 * + sdiv_up64(m_73008 * N_73007, + sext_i32_i64(num_threads_129259))); + int64_t carry_in_flat_idx_129312 = orig_group_129311 * + (segscan_group_sizze_101500 * sdiv_up64(m_73008 * N_73007, + sext_i32_i64(num_threads_129259))) - + (int64_t) 1; + + if (slt64(gtid_101474, m_73008) && slt64(gtid_101482, N_73007)) { + if (!(orig_group_129311 == (int64_t) 0 || (flat_idx_129310 == + (orig_group_129311 + + (int64_t) 1) * + (segscan_group_sizze_101500 * + sdiv_up64(m_73008 * + N_73007, + sext_i32_i64(num_threads_129259))) - + (int64_t) 1 || + slt64(srem64(flat_idx_129310, + N_73007), + flat_idx_129310 - + carry_in_flat_idx_129312)))) { + int64_t x_101505; + int64_t x_101506; + + x_101505 = ((__global + int64_t *) mem_124906)[squot64(carry_in_flat_idx_129312, + N_73007) * N_73007 + + (carry_in_flat_idx_129312 - + squot64(carry_in_flat_idx_129312, + N_73007) * + N_73007)]; + x_101506 = ((__global int64_t *) mem_124906)[gtid_101474 * + N_73007 + + gtid_101482]; + + int64_t defunc_1_op_res_101507; + + defunc_1_op_res_101507 = add64(x_101505, x_101506); + x_101505 = defunc_1_op_res_101507; + ((__global int64_t *) mem_124906)[gtid_101474 * N_73007 + + gtid_101482] = x_101505; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segscan_group_sizze_101500 +} +__kernel void mainMagnitudeziscan_stage3_102464(__global int *global_failure, + int64_t m_73008, + int64_t iota_arg_74896, + int64_t num_groups_102549, + int32_t num_threads_129636, + int32_t required_groups_129678, + __global + unsigned char *mem_125032) +{ + #define segscan_group_sizze_102548 (mainMagnitudezisegscan_group_sizze_102458) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129679; + int32_t local_tid_129680; + int64_t group_sizze_129683; + int32_t wave_sizze_129682; + int32_t group_tid_129681; + + global_tid_129679 = get_global_id(0); + local_tid_129680 = get_local_id(0); + group_sizze_129683 = get_local_size(0); + wave_sizze_129682 = LOCKSTEP_WIDTH; + group_tid_129681 = get_group_id(0); + + int32_t phys_tid_102464; + + phys_tid_102464 = global_tid_129679; + + int32_t phys_group_id_129684; + + phys_group_id_129684 = get_group_id(0); + for (int32_t i_129685 = 0; i_129685 < sdiv_up32(required_groups_129678 - + phys_group_id_129684, + sext_i64_i32(num_groups_102549)); + i_129685++) { + int32_t virt_group_id_129686 = phys_group_id_129684 + i_129685 * + sext_i64_i32(num_groups_102549); + int64_t flat_idx_129687 = sext_i32_i64(virt_group_id_129686) * + segscan_group_sizze_102548 + sext_i32_i64(local_tid_129680); + int64_t gtid_102455 = squot64(flat_idx_129687, iota_arg_74896); + int64_t gtid_102463 = flat_idx_129687 - squot64(flat_idx_129687, + iota_arg_74896) * + iota_arg_74896; + int64_t orig_group_129688 = squot64(flat_idx_129687, + segscan_group_sizze_102548 * + sdiv_up64(m_73008 * iota_arg_74896, + sext_i32_i64(num_threads_129636))); + int64_t carry_in_flat_idx_129689 = orig_group_129688 * + (segscan_group_sizze_102548 * sdiv_up64(m_73008 * + iota_arg_74896, + sext_i32_i64(num_threads_129636))) - + (int64_t) 1; + + if (slt64(gtid_102455, m_73008) && slt64(gtid_102463, iota_arg_74896)) { + if (!(orig_group_129688 == (int64_t) 0 || (flat_idx_129687 == + (orig_group_129688 + + (int64_t) 1) * + (segscan_group_sizze_102548 * + sdiv_up64(m_73008 * + iota_arg_74896, + sext_i32_i64(num_threads_129636))) - + (int64_t) 1 || + slt64(srem64(flat_idx_129687, + iota_arg_74896), + flat_idx_129687 - + carry_in_flat_idx_129689)))) { + double x_102552; + double x_102553; + + x_102552 = ((__global + double *) mem_125032)[squot64(carry_in_flat_idx_129689, + iota_arg_74896) * + iota_arg_74896 + + (carry_in_flat_idx_129689 - + squot64(carry_in_flat_idx_129689, + iota_arg_74896) * + iota_arg_74896)]; + x_102553 = ((__global double *) mem_125032)[gtid_102455 * + iota_arg_74896 + + gtid_102463]; + + double defunc_1_op_res_102554; + + defunc_1_op_res_102554 = x_102552 + x_102553; + x_102552 = defunc_1_op_res_102554; + ((__global double *) mem_125032)[gtid_102455 * iota_arg_74896 + + gtid_102463] = x_102552; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segscan_group_sizze_102548 +} +__kernel void mainMagnitudeziscan_stage3_90482(__global int *global_failure, + int64_t m_73008, int64_t n_73011, + int64_t num_groups_90597, + int32_t num_threads_126413, + int32_t required_groups_126455, + __global + unsigned char *mem_120201) +{ + #define segscan_group_sizze_90596 (mainMagnitudezisegscan_group_sizze_90476) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126456; + int32_t local_tid_126457; + int64_t group_sizze_126460; + int32_t wave_sizze_126459; + int32_t group_tid_126458; + + global_tid_126456 = get_global_id(0); + local_tid_126457 = get_local_id(0); + group_sizze_126460 = get_local_size(0); + wave_sizze_126459 = LOCKSTEP_WIDTH; + group_tid_126458 = get_group_id(0); + + int32_t phys_tid_90482; + + phys_tid_90482 = global_tid_126456; + + int32_t phys_group_id_126461; + + phys_group_id_126461 = get_group_id(0); + for (int32_t i_126462 = 0; i_126462 < sdiv_up32(required_groups_126455 - + phys_group_id_126461, + sext_i64_i32(num_groups_90597)); + i_126462++) { + int32_t virt_group_id_126463 = phys_group_id_126461 + i_126462 * + sext_i64_i32(num_groups_90597); + int64_t flat_idx_126464 = sext_i32_i64(virt_group_id_126463) * + segscan_group_sizze_90596 + sext_i32_i64(local_tid_126457); + int64_t gtid_90473 = squot64(flat_idx_126464, n_73011); + int64_t gtid_90481 = flat_idx_126464 - squot64(flat_idx_126464, + n_73011) * n_73011; + int64_t orig_group_126465 = squot64(flat_idx_126464, + segscan_group_sizze_90596 * + sdiv_up64(m_73008 * n_73011, + sext_i32_i64(num_threads_126413))); + int64_t carry_in_flat_idx_126466 = orig_group_126465 * + (segscan_group_sizze_90596 * sdiv_up64(m_73008 * n_73011, + sext_i32_i64(num_threads_126413))) - + (int64_t) 1; + + if (slt64(gtid_90473, m_73008) && slt64(gtid_90481, n_73011)) { + if (!(orig_group_126465 == (int64_t) 0 || (flat_idx_126464 == + (orig_group_126465 + + (int64_t) 1) * + (segscan_group_sizze_90596 * + sdiv_up64(m_73008 * + n_73011, + sext_i32_i64(num_threads_126413))) - + (int64_t) 1 || + slt64(srem64(flat_idx_126464, + n_73011), + flat_idx_126464 - + carry_in_flat_idx_126466)))) { + int64_t x_90600; + int64_t x_90601; + + x_90600 = ((__global + int64_t *) mem_120201)[squot64(carry_in_flat_idx_126466, + n_73011) * n_73011 + + (carry_in_flat_idx_126466 - + squot64(carry_in_flat_idx_126466, + n_73011) * + n_73011)]; + x_90601 = ((__global int64_t *) mem_120201)[gtid_90473 * + n_73011 + + gtid_90481]; + + int64_t defunc_1_op_res_90602; + + defunc_1_op_res_90602 = add64(x_90600, x_90601); + x_90600 = defunc_1_op_res_90602; + ((__global int64_t *) mem_120201)[gtid_90473 * n_73011 + + gtid_90481] = x_90600; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segscan_group_sizze_90596 +} +__kernel void mainMagnitudeziscan_stage3_98961(__global int *global_failure, + int64_t m_73008, + int64_t Nmk_74408, + int64_t num_groups_99111, + int32_t num_threads_128470, + int32_t required_groups_128512, + __global + unsigned char *mem_124061) +{ + #define segscan_group_sizze_99110 (mainMagnitudezisegscan_group_sizze_98955) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128513; + int32_t local_tid_128514; + int64_t group_sizze_128517; + int32_t wave_sizze_128516; + int32_t group_tid_128515; + + global_tid_128513 = get_global_id(0); + local_tid_128514 = get_local_id(0); + group_sizze_128517 = get_local_size(0); + wave_sizze_128516 = LOCKSTEP_WIDTH; + group_tid_128515 = get_group_id(0); + + int32_t phys_tid_98961; + + phys_tid_98961 = global_tid_128513; + + int32_t phys_group_id_128518; + + phys_group_id_128518 = get_group_id(0); + for (int32_t i_128519 = 0; i_128519 < sdiv_up32(required_groups_128512 - + phys_group_id_128518, + sext_i64_i32(num_groups_99111)); + i_128519++) { + int32_t virt_group_id_128520 = phys_group_id_128518 + i_128519 * + sext_i64_i32(num_groups_99111); + int64_t flat_idx_128521 = sext_i32_i64(virt_group_id_128520) * + segscan_group_sizze_99110 + sext_i32_i64(local_tid_128514); + int64_t gtid_98952 = squot64(flat_idx_128521, Nmk_74408); + int64_t gtid_98960 = flat_idx_128521 - squot64(flat_idx_128521, + Nmk_74408) * Nmk_74408; + int64_t orig_group_128522 = squot64(flat_idx_128521, + segscan_group_sizze_99110 * + sdiv_up64(m_73008 * Nmk_74408, + sext_i32_i64(num_threads_128470))); + int64_t carry_in_flat_idx_128523 = orig_group_128522 * + (segscan_group_sizze_99110 * sdiv_up64(m_73008 * Nmk_74408, + sext_i32_i64(num_threads_128470))) - + (int64_t) 1; + + if (slt64(gtid_98952, m_73008) && slt64(gtid_98960, Nmk_74408)) { + if (!(orig_group_128522 == (int64_t) 0 || (flat_idx_128521 == + (orig_group_128522 + + (int64_t) 1) * + (segscan_group_sizze_99110 * + sdiv_up64(m_73008 * + Nmk_74408, + sext_i32_i64(num_threads_128470))) - + (int64_t) 1 || + slt64(srem64(flat_idx_128521, + Nmk_74408), + flat_idx_128521 - + carry_in_flat_idx_128523)))) { + double x_99114; + double x_99115; + + x_99114 = ((__global + double *) mem_124061)[squot64(carry_in_flat_idx_128523, + Nmk_74408) * + Nmk_74408 + + (carry_in_flat_idx_128523 - + squot64(carry_in_flat_idx_128523, + Nmk_74408) * + Nmk_74408)]; + x_99115 = ((__global double *) mem_124061)[gtid_98952 * + Nmk_74408 + + gtid_98960]; + + double defunc_1_op_res_99116; + + defunc_1_op_res_99116 = x_99114 + x_99115; + x_99114 = defunc_1_op_res_99116; + ((__global double *) mem_124061)[gtid_98952 * Nmk_74408 + + gtid_98960] = x_99114; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segscan_group_sizze_99110 +} +__kernel void mainMagnitudezisegmap_100025(__global int *global_failure, + int64_t N_73007, int64_t m_73008, + int64_t n_73011, + int64_t k2p2zq_73023, + int64_t num_groups_100050, + int64_t num_threads_126048, __global + unsigned char *binop_p_mem_120117, + __global unsigned char *mem_120124, + __global unsigned char *mem_124145, + __global unsigned char *mem_124149, + __global unsigned char *mem_124191) +{ + #define segmap_group_sizze_100049 (mainMagnitudezisegmap_group_sizze_100027) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128708; + int32_t local_tid_128709; + int64_t group_sizze_128712; + int32_t wave_sizze_128711; + int32_t group_tid_128710; + + global_tid_128708 = get_global_id(0); + local_tid_128709 = get_local_id(0); + group_sizze_128712 = get_local_size(0); + wave_sizze_128711 = LOCKSTEP_WIDTH; + group_tid_128710 = get_group_id(0); + + int32_t phys_tid_100025; + + phys_tid_100025 = global_tid_128708; + + int32_t phys_group_id_128713; + + phys_group_id_128713 = get_group_id(0); + for (int32_t i_128714 = 0; i_128714 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, segmap_group_sizze_100049)) - + phys_group_id_128713, sext_i64_i32(num_groups_100050)); + i_128714++) { + int32_t virt_group_id_128715 = phys_group_id_128713 + i_128714 * + sext_i64_i32(num_groups_100050); + int64_t gtid_100024 = sext_i32_i64(virt_group_id_128715) * + segmap_group_sizze_100049 + sext_i32_i64(local_tid_128709); + + if (slt64(gtid_100024, m_73008)) { + for (int64_t i_119914 = 0; i_119914 < k2p2zq_73023; i_119914++) { + for (int64_t i_119918 = 0; i_119918 < k2p2zq_73023; + i_119918++) { + double defunc_2_reduce_res_100058; + double redout_119920 = 0.0; + + for (int64_t i_119921 = 0; i_119921 < n_73011; i_119921++) { + double x_100062 = ((__global + double *) mem_124145)[i_119921 * + m_73008 + + gtid_100024]; + double x_100063 = ((__global + double *) binop_p_mem_120117)[i_119914 * + N_73007 + + i_119921]; + double x_100064 = ((__global + double *) mem_120124)[i_119921 * + k2p2zq_73023 + + i_119918]; + double x_100065 = x_100063 * x_100064; + bool isnan_res_100066; + + isnan_res_100066 = futrts_isnan64(x_100062); + + double y_100067; + + if (isnan_res_100066) { + y_100067 = 0.0; + } else { + y_100067 = 1.0; + } + + double defunc_2_f_res_100068 = x_100065 * y_100067; + double defunc_1_op_res_100061 = defunc_2_f_res_100068 + + redout_119920; + double redout_tmp_128718 = defunc_1_op_res_100061; + + redout_119920 = redout_tmp_128718; + } + defunc_2_reduce_res_100058 = redout_119920; + ((__global double *) mem_124149)[phys_tid_100025 + + (i_119914 * + (num_threads_126048 * + k2p2zq_73023) + + i_119918 * + num_threads_126048)] = + defunc_2_reduce_res_100058; + } + } + for (int64_t i_128719 = 0; i_128719 < k2p2zq_73023; i_128719++) { + for (int64_t i_128720 = 0; i_128720 < k2p2zq_73023; + i_128720++) { + ((__global double *) mem_124191)[i_128719 * (m_73008 * + k2p2zq_73023) + + i_128720 * m_73008 + + gtid_100024] = ((__global + double *) mem_124149)[phys_tid_100025 + + (i_128719 * + (num_threads_126048 * + k2p2zq_73023) + + i_128720 * + num_threads_126048)]; + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_100049 +} +__kernel void mainMagnitudezisegmap_100071(__global int *global_failure, + int64_t N_73007, int64_t m_73008, + int64_t n_73011, + int64_t k2p2zq_73023, + int64_t num_groups_100227, + int64_t num_threads_126050, __global + unsigned char *mem_120120, __global + unsigned char *mem_120124, __global + unsigned char *mem_124142, __global + unsigned char *mem_124194, __global + unsigned char *mem_124210) +{ + #define segmap_group_sizze_100226 (mainMagnitudezisegmap_group_sizze_100074) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128721; + int32_t local_tid_128722; + int64_t group_sizze_128725; + int32_t wave_sizze_128724; + int32_t group_tid_128723; + + global_tid_128721 = get_global_id(0); + local_tid_128722 = get_local_id(0); + group_sizze_128725 = get_local_size(0); + wave_sizze_128724 = LOCKSTEP_WIDTH; + group_tid_128723 = get_group_id(0); + + int32_t phys_tid_100071; + + phys_tid_100071 = global_tid_128721; + + int32_t phys_group_id_128726; + + phys_group_id_128726 = get_group_id(0); + for (int32_t i_128727 = 0; i_128727 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008 * k2p2zq_73023, + segmap_group_sizze_100226)) - + phys_group_id_128726, sext_i64_i32(num_groups_100227)); + i_128727++) { + int32_t virt_group_id_128728 = phys_group_id_128726 + i_128727 * + sext_i64_i32(num_groups_100227); + int64_t gtid_100069 = squot64(sext_i32_i64(virt_group_id_128728) * + segmap_group_sizze_100226 + + sext_i32_i64(local_tid_128722), + k2p2zq_73023); + int64_t gtid_100070 = sext_i32_i64(virt_group_id_128728) * + segmap_group_sizze_100226 + sext_i32_i64(local_tid_128722) - + squot64(sext_i32_i64(virt_group_id_128728) * + segmap_group_sizze_100226 + + sext_i32_i64(local_tid_128722), k2p2zq_73023) * + k2p2zq_73023; + + if (slt64(gtid_100069, m_73008) && slt64(gtid_100070, k2p2zq_73023)) { + for (int64_t i_119924 = 0; i_119924 < k2p2zq_73023; i_119924++) { + double defunc_2_reduce_res_100238; + double redout_119926 = 0.0; + + for (int64_t i_119927 = 0; i_119927 < n_73011; i_119927++) { + double x_100242 = ((__global + double *) mem_124142)[gtid_100069 * + N_73007 + + i_119927]; + double x_100243 = ((__global + double *) mem_120120)[i_119927 * + k2p2zq_73023 + + gtid_100070]; + double x_100244 = ((__global + double *) mem_120124)[i_119927 * + k2p2zq_73023 + + i_119924]; + double x_100245 = x_100243 * x_100244; + bool isnan_res_100246; + + isnan_res_100246 = futrts_isnan64(x_100242); + + double y_100247; + + if (isnan_res_100246) { + y_100247 = 0.0; + } else { + y_100247 = 1.0; + } + + double defunc_2_f_res_100248 = x_100245 * y_100247; + double defunc_1_op_res_100241 = defunc_2_f_res_100248 + + redout_119926; + double redout_tmp_128730 = defunc_1_op_res_100241; + + redout_119926 = redout_tmp_128730; + } + defunc_2_reduce_res_100238 = redout_119926; + ((__global double *) mem_124194)[phys_tid_100071 + i_119924 * + num_threads_126050] = + defunc_2_reduce_res_100238; + } + for (int64_t i_128731 = 0; i_128731 < k2p2zq_73023; i_128731++) { + ((__global double *) mem_124210)[i_128731 * (k2p2zq_73023 * + m_73008) + + gtid_100069 * k2p2zq_73023 + + gtid_100070] = ((__global + double *) mem_124194)[phys_tid_100071 + + i_128731 * + num_threads_126050]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_100226 +} +__kernel void mainMagnitudezisegmap_100441(__global int *global_failure, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t m_74646, int64_t nm_74647, + int64_t gauss_jordan_res_r_ixfn_124361, + int64_t gauss_jordan_res_r_ixfn_124362, + int64_t gauss_jordan_res_r_ixfn_124364, + __global + unsigned char *gauss_jordan_res_r_mem_124366, + __global unsigned char *mem_124371) +{ + #define segmap_group_sizze_100944 (mainMagnitudezisegmap_group_sizze_100445) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128881; + int32_t local_tid_128882; + int64_t group_sizze_128885; + int32_t wave_sizze_128884; + int32_t group_tid_128883; + + global_tid_128881 = get_global_id(0); + local_tid_128882 = get_local_id(0); + group_sizze_128885 = get_local_size(0); + wave_sizze_128884 = LOCKSTEP_WIDTH; + group_tid_128883 = get_group_id(0); + + int32_t phys_tid_100441; + + phys_tid_100441 = global_tid_128881; + + int64_t gtid_100438; + + gtid_100438 = squot64(sext_i32_i64(group_tid_128883) * + segmap_group_sizze_100944 + + sext_i32_i64(local_tid_128882), k2p2zq_73023 * + k2p2zq_73023); + + int64_t gtid_slice_100436; + + gtid_slice_100436 = squot64(sext_i32_i64(group_tid_128883) * + segmap_group_sizze_100944 + + sext_i32_i64(local_tid_128882) - + squot64(sext_i32_i64(group_tid_128883) * + segmap_group_sizze_100944 + + sext_i32_i64(local_tid_128882), + k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), k2p2zq_73023); + + int64_t gtid_slice_100437; + + gtid_slice_100437 = sext_i32_i64(group_tid_128883) * + segmap_group_sizze_100944 + sext_i32_i64(local_tid_128882) - + squot64(sext_i32_i64(group_tid_128883) * segmap_group_sizze_100944 + + sext_i32_i64(local_tid_128882), k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023) - squot64(sext_i32_i64(group_tid_128883) * + segmap_group_sizze_100944 + + sext_i32_i64(local_tid_128882) - + squot64(sext_i32_i64(group_tid_128883) * + segmap_group_sizze_100944 + + sext_i32_i64(local_tid_128882), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023) * k2p2zq_73023; + if ((slt64(gtid_100438, m_73008) && slt64(gtid_slice_100436, + k2p2zq_73023)) && + slt64(gtid_slice_100437, k2p2zq_73023)) { + int64_t slice_100948 = k2p2zq_73023 + gtid_slice_100437; + int64_t binop_x_115356 = nm_74647 * gtid_100438; + int64_t binop_y_115357 = m_74646 * gtid_slice_100436; + int64_t binop_x_115358 = binop_x_115356 + binop_y_115357; + int64_t binop_x_115359 = slice_100948 + binop_x_115358; + int64_t new_index_115360 = squot64(binop_x_115359, nm_74647); + int64_t binop_y_115372 = nm_74647 * new_index_115360; + int64_t new_index_115373 = binop_x_115359 - binop_y_115372; + double v_100949 = ((__global + double *) gauss_jordan_res_r_mem_124366)[gauss_jordan_res_r_ixfn_124361 + + (new_index_115360 * + gauss_jordan_res_r_ixfn_124362 + + new_index_115373 * + gauss_jordan_res_r_ixfn_124364)]; + + ((__global double *) mem_124371)[gtid_100438 * (k2p2zq_73023 * + k2p2zq_73023) + + gtid_slice_100436 * k2p2zq_73023 + + gtid_slice_100437] = v_100949; + } + + error_0: + return; + #undef segmap_group_sizze_100944 +} +__kernel void mainMagnitudezisegmap_100552(__global int *global_failure, + int64_t m_73008, int64_t nm_74647, + int64_t ctx_param_ext_124325, + int64_t ctx_param_ext_124326, + int64_t ctx_param_ext_124328, + __global + unsigned char *mem_param_124330, + __global unsigned char *mem_124349) +{ + #define segmap_group_sizze_100934 (mainMagnitudezisegmap_group_sizze_100555) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128875; + int32_t local_tid_128876; + int64_t group_sizze_128879; + int32_t wave_sizze_128878; + int32_t group_tid_128877; + + global_tid_128875 = get_global_id(0); + local_tid_128876 = get_local_id(0); + group_sizze_128879 = get_local_size(0); + wave_sizze_128878 = LOCKSTEP_WIDTH; + group_tid_128877 = get_group_id(0); + + int32_t phys_tid_100552; + + phys_tid_100552 = global_tid_128875; + + int64_t gtid_100550; + + gtid_100550 = squot64(sext_i32_i64(group_tid_128877) * + segmap_group_sizze_100934 + + sext_i32_i64(local_tid_128876), nm_74647); + + int64_t gtid_100551; + + gtid_100551 = sext_i32_i64(group_tid_128877) * segmap_group_sizze_100934 + + sext_i32_i64(local_tid_128876) - + squot64(sext_i32_i64(group_tid_128877) * segmap_group_sizze_100934 + + sext_i32_i64(local_tid_128876), nm_74647) * nm_74647; + if (slt64(gtid_100550, m_73008) && slt64(gtid_100551, nm_74647)) { + double write_value_100938 = ((__global + double *) mem_124349)[gtid_100550 * + nm_74647 + + gtid_100551]; + + if ((sle64((int64_t) 0, gtid_100550) && slt64(gtid_100550, m_73008)) && + (sle64((int64_t) 0, gtid_100551) && slt64(gtid_100551, nm_74647))) { + ((__global double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_100550 * + ctx_param_ext_124326 + + gtid_100551 * + ctx_param_ext_124328)] = + write_value_100938; + } + } + + error_0: + return; + #undef segmap_group_sizze_100934 +} +__kernel void mainMagnitudezisegmap_100572(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t m_74646, int64_t nm_74647, + int64_t i_100818, + int64_t ctx_param_ext_124325, + int64_t ctx_param_ext_124326, + int64_t ctx_param_ext_124328, + __global + unsigned char *mem_param_124330, + __global unsigned char *mem_124345, + __global unsigned char *mem_124349) +{ + #define segmap_group_sizze_100889 (mainMagnitudezisegmap_group_sizze_100575) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128870; + int32_t local_tid_128871; + int64_t group_sizze_128874; + int32_t wave_sizze_128873; + int32_t group_tid_128872; + + global_tid_128870 = get_global_id(0); + local_tid_128871 = get_local_id(0); + group_sizze_128874 = get_local_size(0); + wave_sizze_128873 = LOCKSTEP_WIDTH; + group_tid_128872 = get_group_id(0); + + int32_t phys_tid_100572; + + phys_tid_100572 = global_tid_128870; + + int64_t gtid_100570; + + gtid_100570 = squot64(sext_i32_i64(group_tid_128872) * + segmap_group_sizze_100889 + + sext_i32_i64(local_tid_128871), nm_74647); + + int64_t gtid_100571; + + gtid_100571 = sext_i32_i64(group_tid_128872) * segmap_group_sizze_100889 + + sext_i32_i64(local_tid_128871) - + squot64(sext_i32_i64(group_tid_128872) * segmap_group_sizze_100889 + + sext_i32_i64(local_tid_128871), nm_74647) * nm_74647; + if (slt64(gtid_100570, m_73008) && slt64(gtid_100571, nm_74647)) { + bool cond_100894 = ((__global bool *) mem_124345)[gtid_100570]; + int64_t defunc_0_f_res_100896 = sdiv64(gtid_100571, m_74646); + int64_t defunc_0_f_res_100897 = smod64(gtid_100571, m_74646); + double defunc_0_f_res_100898; + + if (cond_100894) { + int64_t x_100899 = mul64(m_74646, defunc_0_f_res_100896); + int64_t i_100900 = add64(defunc_0_f_res_100897, x_100899); + bool x_100901 = sle64((int64_t) 0, i_100900); + bool y_100902 = slt64(i_100900, nm_74647); + bool bounds_check_100903 = x_100901 && y_100902; + bool index_certs_100904; + + if (!bounds_check_100903) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 626) == + -1) { + global_failure_args[0] = i_100900; + global_failure_args[1] = nm_74647; + ; + } + return; + } + } + + double defunc_0_f_res_t_res_100905 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_100570 * + ctx_param_ext_124326 + + i_100900 * + ctx_param_ext_124328)]; + + defunc_0_f_res_100898 = defunc_0_f_res_t_res_100905; + } else { + double v1_100893 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_100570 * + ctx_param_ext_124326 + + i_100818 * + ctx_param_ext_124328)]; + bool x_100906 = sle64((int64_t) 0, defunc_0_f_res_100897); + bool y_100907 = slt64(defunc_0_f_res_100897, nm_74647); + bool bounds_check_100908 = x_100906 && y_100907; + bool index_certs_100909; + + if (!bounds_check_100908) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 627) == + -1) { + global_failure_args[0] = defunc_0_f_res_100897; + global_failure_args[1] = nm_74647; + ; + } + return; + } + } + + double x_100910 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_100570 * + ctx_param_ext_124326 + + defunc_0_f_res_100897 * + ctx_param_ext_124328)]; + double x_100911 = x_100910 / v1_100893; + int64_t y_100912 = sub64(k2p2zq_73023, (int64_t) 1); + bool cond_100913 = slt64(defunc_0_f_res_100896, y_100912); + double defunc_0_f_res_f_res_100914; + + if (cond_100913) { + int64_t x_100915 = add64((int64_t) 1, defunc_0_f_res_100896); + int64_t x_100916 = mul64(m_74646, x_100915); + int64_t i_100917 = add64(defunc_0_f_res_100897, x_100916); + bool x_100918 = sle64((int64_t) 0, i_100917); + bool y_100919 = slt64(i_100917, nm_74647); + bool bounds_check_100920 = x_100918 && y_100919; + bool index_certs_100921; + + if (!bounds_check_100920) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 628) == -1) { + global_failure_args[0] = i_100917; + global_failure_args[1] = nm_74647; + ; + } + return; + } + } + + double x_100922 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_100570 * + ctx_param_ext_124326 + + i_100917 * + ctx_param_ext_124328)]; + int64_t i_100923 = add64(i_100818, x_100916); + bool x_100924 = sle64((int64_t) 0, i_100923); + bool y_100925 = slt64(i_100923, nm_74647); + bool bounds_check_100926 = x_100924 && y_100925; + bool index_certs_100927; + + if (!bounds_check_100926) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 629) == -1) { + global_failure_args[0] = i_100923; + global_failure_args[1] = nm_74647; + ; + } + return; + } + } + + double x_100928 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_100570 * + ctx_param_ext_124326 + + i_100923 * + ctx_param_ext_124328)]; + double y_100929 = x_100911 * x_100928; + double defunc_0_f_res_f_res_t_res_100930 = x_100922 - y_100929; + + defunc_0_f_res_f_res_100914 = defunc_0_f_res_f_res_t_res_100930; + } else { + defunc_0_f_res_f_res_100914 = x_100911; + } + defunc_0_f_res_100898 = defunc_0_f_res_f_res_100914; + } + ((__global double *) mem_124349)[gtid_100570 * nm_74647 + gtid_100571] = + defunc_0_f_res_100898; + } + + error_0: + return; + #undef segmap_group_sizze_100889 +} +__kernel void mainMagnitudezisegmap_100662(__global int *global_failure, + int64_t m_73008, int64_t i_100818, + int64_t ctx_param_ext_124325, + int64_t ctx_param_ext_124326, + int64_t ctx_param_ext_124328, + __global + unsigned char *mem_param_124330, + __global unsigned char *mem_124345) +{ + #define segmap_group_sizze_100876 (mainMagnitudezisegmap_group_sizze_100664) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128865; + int32_t local_tid_128866; + int64_t group_sizze_128869; + int32_t wave_sizze_128868; + int32_t group_tid_128867; + + global_tid_128865 = get_global_id(0); + local_tid_128866 = get_local_id(0); + group_sizze_128869 = get_local_size(0); + wave_sizze_128868 = LOCKSTEP_WIDTH; + group_tid_128867 = get_group_id(0); + + int32_t phys_tid_100662; + + phys_tid_100662 = global_tid_128865; + + int64_t gtid_100661; + + gtid_100661 = sext_i32_i64(group_tid_128867) * segmap_group_sizze_100876 + + sext_i32_i64(local_tid_128866); + if (slt64(gtid_100661, m_73008)) { + double v1_100881 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_100661 * + ctx_param_ext_124326 + + i_100818 * + ctx_param_ext_124328)]; + bool cond_100882 = v1_100881 == 0.0; + + ((__global bool *) mem_124345)[gtid_100661] = cond_100882; + } + + error_0: + return; + #undef segmap_group_sizze_100876 +} +__kernel void mainMagnitudezisegmap_100745(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t m_74646, int64_t nm_74647, + __global + unsigned char *defunc_3_map_res_mem_124294, + __global unsigned char *mem_124322) +{ + #define segmap_group_sizze_100796 (mainMagnitudezisegmap_group_sizze_100748) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128838; + int32_t local_tid_128839; + int64_t group_sizze_128842; + int32_t wave_sizze_128841; + int32_t group_tid_128840; + + global_tid_128838 = get_global_id(0); + local_tid_128839 = get_local_id(0); + group_sizze_128842 = get_local_size(0); + wave_sizze_128841 = LOCKSTEP_WIDTH; + group_tid_128840 = get_group_id(0); + + int32_t phys_tid_100745; + + phys_tid_100745 = global_tid_128838; + + int64_t gtid_100743; + + gtid_100743 = squot64(sext_i32_i64(group_tid_128840) * + segmap_group_sizze_100796 + + sext_i32_i64(local_tid_128839), nm_74647); + + int64_t gtid_100744; + + gtid_100744 = sext_i32_i64(group_tid_128840) * segmap_group_sizze_100796 + + sext_i32_i64(local_tid_128839) - + squot64(sext_i32_i64(group_tid_128840) * segmap_group_sizze_100796 + + sext_i32_i64(local_tid_128839), nm_74647) * nm_74647; + if (slt64(gtid_100743, m_73008) && slt64(gtid_100744, nm_74647)) { + int64_t defunc_0_f_res_100801 = sdiv64(gtid_100744, m_74646); + int64_t defunc_0_f_res_100802 = smod64(gtid_100744, m_74646); + bool cond_100803 = slt64(defunc_0_f_res_100802, k2p2zq_73023); + double defunc_0_f_res_100804; + + if (cond_100803) { + bool x_100805 = sle64((int64_t) 0, defunc_0_f_res_100801); + bool y_100806 = slt64(defunc_0_f_res_100801, k2p2zq_73023); + bool bounds_check_100807 = x_100805 && y_100806; + bool x_100808 = sle64((int64_t) 0, defunc_0_f_res_100802); + bool bounds_check_100809 = cond_100803 && x_100808; + bool index_ok_100810 = bounds_check_100807 && bounds_check_100809; + bool index_certs_100811; + + if (!index_ok_100810) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 621) == + -1) { + global_failure_args[0] = defunc_0_f_res_100801; + global_failure_args[1] = defunc_0_f_res_100802; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + ; + } + return; + } + } + + double defunc_0_f_res_t_res_100812 = ((__global + double *) defunc_3_map_res_mem_124294)[gtid_100743 * + (k2p2zq_73023 * + k2p2zq_73023) + + defunc_0_f_res_100801 * + k2p2zq_73023 + + defunc_0_f_res_100802]; + + defunc_0_f_res_100804 = defunc_0_f_res_t_res_100812; + } else { + int64_t y_100813 = add64(k2p2zq_73023, defunc_0_f_res_100801); + bool cond_100814 = defunc_0_f_res_100802 == y_100813; + double defunc_0_f_res_f_res_100815; + + if (cond_100814) { + defunc_0_f_res_f_res_100815 = 1.0; + } else { + defunc_0_f_res_f_res_100815 = 0.0; + } + defunc_0_f_res_100804 = defunc_0_f_res_f_res_100815; + } + ((__global double *) mem_124322)[gtid_100743 * nm_74647 + gtid_100744] = + defunc_0_f_res_100804; + } + + error_0: + return; + #undef segmap_group_sizze_100796 +} +__kernel void mainMagnitudezisegmap_100954(__global int *global_failure, + int64_t N_73007, int64_t m_73008, + int64_t n_73011, + int64_t k2p2zq_73023, + int64_t num_groups_100975, + int64_t num_threads_126061, __global + unsigned char *binop_p_mem_120117, + __global unsigned char *mem_124375, + __global unsigned char *mem_124378, + __global unsigned char *mem_124393) +{ + #define segmap_group_sizze_100974 (mainMagnitudezisegmap_group_sizze_100956) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128889; + int32_t local_tid_128890; + int64_t group_sizze_128893; + int32_t wave_sizze_128892; + int32_t group_tid_128891; + + global_tid_128889 = get_global_id(0); + local_tid_128890 = get_local_id(0); + group_sizze_128893 = get_local_size(0); + wave_sizze_128892 = LOCKSTEP_WIDTH; + group_tid_128891 = get_group_id(0); + + int32_t phys_tid_100954; + + phys_tid_100954 = global_tid_128889; + + int32_t phys_group_id_128894; + + phys_group_id_128894 = get_group_id(0); + for (int32_t i_128895 = 0; i_128895 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, segmap_group_sizze_100974)) - + phys_group_id_128894, sext_i64_i32(num_groups_100975)); + i_128895++) { + int32_t virt_group_id_128896 = phys_group_id_128894 + i_128895 * + sext_i64_i32(num_groups_100975); + int64_t gtid_100953 = sext_i32_i64(virt_group_id_128896) * + segmap_group_sizze_100974 + sext_i32_i64(local_tid_128890); + + if (slt64(gtid_100953, m_73008)) { + for (int64_t i_119930 = 0; i_119930 < k2p2zq_73023; i_119930++) { + double defunc_2_reduce_res_100981; + double redout_119932 = 0.0; + + for (int64_t i_119933 = 0; i_119933 < n_73011; i_119933++) { + double x_100986 = ((__global + double *) mem_124375)[i_119933 * + m_73008 + + gtid_100953]; + bool isnan_res_100987; + + isnan_res_100987 = futrts_isnan64(x_100986); + + double defunc_1_f_res_100988; + + if (isnan_res_100987) { + defunc_1_f_res_100988 = 0.0; + } else { + double x_100985 = ((__global + double *) binop_p_mem_120117)[i_119930 * + N_73007 + + i_119933]; + double defunc_1_f_res_f_res_100989 = x_100985 * + x_100986; + + defunc_1_f_res_100988 = defunc_1_f_res_f_res_100989; + } + + double defunc_1_op_res_100984 = defunc_1_f_res_100988 + + redout_119932; + double redout_tmp_128898 = defunc_1_op_res_100984; + + redout_119932 = redout_tmp_128898; + } + defunc_2_reduce_res_100981 = redout_119932; + ((__global double *) mem_124378)[phys_tid_100954 + i_119930 * + num_threads_126061] = + defunc_2_reduce_res_100981; + } + for (int64_t i_128899 = 0; i_128899 < k2p2zq_73023; i_128899++) { + ((__global double *) mem_124393)[i_128899 * m_73008 + + gtid_100953] = ((__global + double *) mem_124378)[phys_tid_100954 + + i_128899 * + num_threads_126061]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_100974 +} +__kernel void mainMagnitudezisegmap_101095(__global int *global_failure, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t num_groups_101115, + int64_t num_threads_126065, __global + unsigned char *mem_124597, __global + unsigned char *mem_124600, __global + unsigned char *mem_124603, __global + unsigned char *mem_124618) +{ + #define segmap_group_sizze_101114 (mainMagnitudezisegmap_group_sizze_101097) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129021; + int32_t local_tid_129022; + int64_t group_sizze_129025; + int32_t wave_sizze_129024; + int32_t group_tid_129023; + + global_tid_129021 = get_global_id(0); + local_tid_129022 = get_local_id(0); + group_sizze_129025 = get_local_size(0); + wave_sizze_129024 = LOCKSTEP_WIDTH; + group_tid_129023 = get_group_id(0); + + int32_t phys_tid_101095; + + phys_tid_101095 = global_tid_129021; + + int32_t phys_group_id_129026; + + phys_group_id_129026 = get_group_id(0); + for (int32_t i_129027 = 0; i_129027 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, segmap_group_sizze_101114)) - + phys_group_id_129026, sext_i64_i32(num_groups_101115)); + i_129027++) { + int32_t virt_group_id_129028 = phys_group_id_129026 + i_129027 * + sext_i64_i32(num_groups_101115); + int64_t gtid_101094 = sext_i32_i64(virt_group_id_129028) * + segmap_group_sizze_101114 + sext_i32_i64(local_tid_129022); + + if (slt64(gtid_101094, m_73008)) { + for (int64_t i_119936 = 0; i_119936 < k2p2zq_73023; i_119936++) { + double defunc_0_f_res_101122; + double redout_119938 = 0.0; + + for (int64_t i_119939 = 0; i_119939 < k2p2zq_73023; + i_119939++) { + double x_101126 = ((__global + double *) mem_124600)[i_119939 * + m_73008 + + gtid_101094]; + double x_101127 = ((__global + double *) mem_124597)[i_119936 * + (m_73008 * + k2p2zq_73023) + + i_119939 * + m_73008 + + gtid_101094]; + double defunc_1_f_res_101128 = x_101126 * x_101127; + double defunc_1_op_res_101125 = defunc_1_f_res_101128 + + redout_119938; + double redout_tmp_129030 = defunc_1_op_res_101125; + + redout_119938 = redout_tmp_129030; + } + defunc_0_f_res_101122 = redout_119938; + ((__global double *) mem_124603)[phys_tid_101095 + i_119936 * + num_threads_126065] = + defunc_0_f_res_101122; + } + for (int64_t i_129031 = 0; i_129031 < k2p2zq_73023; i_129031++) { + ((__global double *) mem_124618)[i_129031 * m_73008 + + gtid_101094] = ((__global + double *) mem_124603)[phys_tid_101095 + + i_129031 * + num_threads_126065]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_101114 +} +__kernel void mainMagnitudezisegmap_101227(__global int *global_failure, + int64_t N_73007, int64_t m_73008, + int64_t k2p2zq_73023, + int64_t num_groups_101246, + int64_t num_threads_126069, __global + unsigned char *mem_120124, __global + unsigned char *mem_124662, __global + unsigned char *mem_124665, __global + unsigned char *mem_124680) +{ + #define segmap_group_sizze_101245 (mainMagnitudezisegmap_group_sizze_101229) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129109; + int32_t local_tid_129110; + int64_t group_sizze_129113; + int32_t wave_sizze_129112; + int32_t group_tid_129111; + + global_tid_129109 = get_global_id(0); + local_tid_129110 = get_local_id(0); + group_sizze_129113 = get_local_size(0); + wave_sizze_129112 = LOCKSTEP_WIDTH; + group_tid_129111 = get_group_id(0); + + int32_t phys_tid_101227; + + phys_tid_101227 = global_tid_129109; + + int32_t phys_group_id_129114; + + phys_group_id_129114 = get_group_id(0); + for (int32_t i_129115 = 0; i_129115 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, segmap_group_sizze_101245)) - + phys_group_id_129114, sext_i64_i32(num_groups_101246)); + i_129115++) { + int32_t virt_group_id_129116 = phys_group_id_129114 + i_129115 * + sext_i64_i32(num_groups_101246); + int64_t gtid_101226 = sext_i32_i64(virt_group_id_129116) * + segmap_group_sizze_101245 + sext_i32_i64(local_tid_129110); + + if (slt64(gtid_101226, m_73008)) { + for (int64_t i_119946 = 0; i_119946 < N_73007; i_119946++) { + double defunc_0_f_res_101252; + double redout_119948 = 0.0; + + for (int64_t i_119949 = 0; i_119949 < k2p2zq_73023; + i_119949++) { + double x_101256 = ((__global + double *) mem_124662)[i_119949 * + m_73008 + + gtid_101226]; + double x_101257 = ((__global + double *) mem_120124)[i_119946 * + k2p2zq_73023 + + i_119949]; + double defunc_1_f_res_101258 = x_101256 * x_101257; + double defunc_1_op_res_101255 = defunc_1_f_res_101258 + + redout_119948; + double redout_tmp_129118 = defunc_1_op_res_101255; + + redout_119948 = redout_tmp_129118; + } + defunc_0_f_res_101252 = redout_119948; + ((__global double *) mem_124665)[phys_tid_101227 + i_119946 * + num_threads_126069] = + defunc_0_f_res_101252; + } + for (int64_t i_129119 = 0; i_129119 < N_73007; i_129119++) { + ((__global double *) mem_124680)[i_129119 * m_73008 + + gtid_101226] = ((__global + double *) mem_124665)[phys_tid_101227 + + i_129119 * + num_threads_126069]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_101245 +} +__kernel void mainMagnitudezisegmap_101405(__global int *global_failure, + int64_t N_73007, int64_t m_73008, + __global unsigned char *mem_124906, + __global unsigned char *mem_124909, + __global unsigned char *mem_124914, + __global unsigned char *mem_124917) +{ + #define segmap_group_sizze_101549 (mainMagnitudezisegmap_group_sizze_101408) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129318; + int32_t local_tid_129319; + int64_t group_sizze_129322; + int32_t wave_sizze_129321; + int32_t group_tid_129320; + + global_tid_129318 = get_global_id(0); + local_tid_129319 = get_local_id(0); + group_sizze_129322 = get_local_size(0); + wave_sizze_129321 = LOCKSTEP_WIDTH; + group_tid_129320 = get_group_id(0); + + int32_t phys_tid_101405; + + phys_tid_101405 = global_tid_129318; + + int64_t gtid_101403; + + gtid_101403 = squot64(sext_i32_i64(group_tid_129320) * + segmap_group_sizze_101549 + + sext_i32_i64(local_tid_129319), N_73007); + + int64_t gtid_101404; + + gtid_101404 = sext_i32_i64(group_tid_129320) * segmap_group_sizze_101549 + + sext_i32_i64(local_tid_129319) - + squot64(sext_i32_i64(group_tid_129320) * segmap_group_sizze_101549 + + sext_i32_i64(local_tid_129319), N_73007) * N_73007; + if (slt64(gtid_101403, m_73008) && slt64(gtid_101404, N_73007)) { + double x_101553 = ((__global double *) mem_124909)[gtid_101403 * + N_73007 + + gtid_101404]; + bool isnan_res_101556; + + isnan_res_101556 = futrts_isnan64(x_101553); + + bool defunc_0_p_res_101557 = !isnan_res_101556; + int64_t defunc_1_f_res_101558; + + if (defunc_0_p_res_101557) { + int64_t x_101554 = ((__global int64_t *) mem_124906)[gtid_101403 * + N_73007 + + gtid_101404]; + int64_t defunc_1_f_res_t_res_101559 = sub64(x_101554, (int64_t) 1); + + defunc_1_f_res_101558 = defunc_1_f_res_t_res_101559; + } else { + defunc_1_f_res_101558 = (int64_t) -1; + } + if ((sle64((int64_t) 0, gtid_101403) && slt64(gtid_101403, m_73008)) && + (sle64((int64_t) 0, defunc_1_f_res_101558) && + slt64(defunc_1_f_res_101558, N_73007))) { + ((__global int64_t *) mem_124917)[gtid_101403 * N_73007 + + defunc_1_f_res_101558] = + gtid_101404; + } + if ((sle64((int64_t) 0, gtid_101403) && slt64(gtid_101403, m_73008)) && + (sle64((int64_t) 0, defunc_1_f_res_101558) && + slt64(defunc_1_f_res_101558, N_73007))) { + ((__global double *) mem_124914)[gtid_101403 * N_73007 + + defunc_1_f_res_101558] = x_101553; + } + } + + error_0: + return; + #undef segmap_group_sizze_101549 +} +__kernel void mainMagnitudezisegmap_101576(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t N_73007, int64_t m_73008, + int64_t n_73011, double hfrac_73013, + int64_t k2p2_73021, __global + unsigned char *mem_124924, __global + unsigned char *mem_124927, __global + unsigned char *mem_124930, __global + unsigned char *mem_124932, __global + unsigned char *mem_124934) +{ + #define segmap_group_sizze_101612 (mainMagnitudezisegmap_group_sizze_101578) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129326; + int32_t local_tid_129327; + int64_t group_sizze_129330; + int32_t wave_sizze_129329; + int32_t group_tid_129328; + + global_tid_129326 = get_global_id(0); + local_tid_129327 = get_local_id(0); + group_sizze_129330 = get_local_size(0); + wave_sizze_129329 = LOCKSTEP_WIDTH; + group_tid_129328 = get_group_id(0); + + int32_t phys_tid_101576; + + phys_tid_101576 = global_tid_129326; + + int64_t gtid_101575; + + gtid_101575 = sext_i32_i64(group_tid_129328) * segmap_group_sizze_101612 + + sext_i32_i64(local_tid_129327); + if (slt64(gtid_101575, m_73008)) { + int64_t defunc_0_f_res_101619; + int64_t redout_119950 = (int64_t) 0; + + for (int64_t i_119951 = 0; i_119951 < n_73011; i_119951++) { + double x_101623 = ((__global double *) mem_124924)[i_119951 * + m_73008 + + gtid_101575]; + bool isnan_res_101624; + + isnan_res_101624 = futrts_isnan64(x_101623); + + bool cond_101625 = !isnan_res_101624; + int64_t defunc_0_f_res_101626 = btoi_bool_i64(cond_101625); + int64_t defunc_1_op_res_101622 = add64(defunc_0_f_res_101626, + redout_119950); + int64_t redout_tmp_129331 = defunc_1_op_res_101622; + + redout_119950 = redout_tmp_129331; + } + defunc_0_f_res_101619 = redout_119950; + + double defunc_0_f_res_101627; + double redout_115314 = 0.0; + + for (int64_t i_115315 = 0; i_115315 < n_73011; i_115315++) { + bool cond_101632 = slt64(i_115315, defunc_0_f_res_101619); + double defunc_0_f_res_101633; + + if (cond_101632) { + bool y_101635 = slt64(i_115315, N_73007); + bool index_certs_101637; + + if (!y_101635) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 630) == -1) { + global_failure_args[0] = i_115315; + global_failure_args[1] = N_73007; + ; + } + return; + } + } + + double defunc_0_f_res_t_res_101638 = ((__global + double *) mem_124927)[i_115315 * + m_73008 + + gtid_101575]; + + defunc_0_f_res_101633 = defunc_0_f_res_t_res_101638; + } else { + defunc_0_f_res_101633 = 0.0; + } + + double defunc_0_f_res_101639 = defunc_0_f_res_101633 * + defunc_0_f_res_101633; + double defunc_1_op_res_101630 = defunc_0_f_res_101639 + + redout_115314; + double redout_tmp_129332 = defunc_1_op_res_101630; + + redout_115314 = redout_tmp_129332; + } + defunc_0_f_res_101627 = redout_115314; + + int64_t i64_arg_101640 = sub64(defunc_0_f_res_101619, k2p2_73021); + double i64_res_101641 = sitofp_i64_f64(i64_arg_101640); + double sqrt_arg_101642 = defunc_0_f_res_101627 / i64_res_101641; + double sqrt_res_101643; + + sqrt_res_101643 = futrts_sqrt64(sqrt_arg_101642); + + double i64_res_101644 = sitofp_i64_f64(defunc_0_f_res_101619); + double f64_arg_101645 = hfrac_73013 * i64_res_101644; + int64_t f64_res_101646 = fptosi_f64_i64(f64_arg_101645); + + ((__global int64_t *) mem_124930)[gtid_101575] = f64_res_101646; + ((__global int64_t *) mem_124932)[gtid_101575] = defunc_0_f_res_101619; + ((__global double *) mem_124934)[gtid_101575] = sqrt_res_101643; + } + + error_0: + return; + #undef segmap_group_sizze_101612 +} +__kernel void mainMagnitudezisegmap_101689(__global int *global_failure, + int64_t m_73008, double hfrac_73013, + int64_t k2p2_73021, __global + unsigned char *mem_124946, __global + unsigned char *mem_124949, __global + unsigned char *mem_124952, __global + unsigned char *mem_124954) +{ + #define segmap_group_sizze_101780 (mainMagnitudezisegmap_group_sizze_101691) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129467; + int32_t local_tid_129468; + int64_t group_sizze_129471; + int32_t wave_sizze_129470; + int32_t group_tid_129469; + + global_tid_129467 = get_global_id(0); + local_tid_129468 = get_local_id(0); + group_sizze_129471 = get_local_size(0); + wave_sizze_129470 = LOCKSTEP_WIDTH; + group_tid_129469 = get_group_id(0); + + int32_t phys_tid_101689; + + phys_tid_101689 = global_tid_129467; + + int64_t gtid_101688; + + gtid_101688 = sext_i32_i64(group_tid_129469) * segmap_group_sizze_101780 + + sext_i32_i64(local_tid_129468); + if (slt64(gtid_101688, m_73008)) { + int64_t defunc_0_f_res_101784 = ((__global + int64_t *) mem_124946)[gtid_101688]; + double defunc_0_f_res_101785 = ((__global + double *) mem_124949)[gtid_101688]; + int64_t i64_arg_101786 = sub64(defunc_0_f_res_101784, k2p2_73021); + double i64_res_101787 = sitofp_i64_f64(i64_arg_101786); + double sqrt_arg_101788 = defunc_0_f_res_101785 / i64_res_101787; + double sqrt_res_101789; + + sqrt_res_101789 = futrts_sqrt64(sqrt_arg_101788); + + double i64_res_101790 = sitofp_i64_f64(defunc_0_f_res_101784); + double f64_arg_101791 = hfrac_73013 * i64_res_101790; + int64_t f64_res_101792 = fptosi_f64_i64(f64_arg_101791); + + ((__global int64_t *) mem_124952)[gtid_101688] = f64_res_101792; + ((__global double *) mem_124954)[gtid_101688] = sqrt_res_101789; + } + + error_0: + return; + #undef segmap_group_sizze_101780 +} +__kernel void mainMagnitudezisegmap_101812(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t N_73007, int64_t m_73008, + int64_t defunc_2_reduce_comm_res_74867, + __global + unsigned char *defunc_4_map_res_mem_124920, + __global + unsigned char *defunc_3_map_res_mem_124958, + __global + unsigned char *defunc_3_map_res_mem_124959, + __global unsigned char *mem_124966) +{ + #define segmap_group_sizze_101836 (mainMagnitudezisegmap_group_sizze_101814) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129506; + int32_t local_tid_129507; + int64_t group_sizze_129510; + int32_t wave_sizze_129509; + int32_t group_tid_129508; + + global_tid_129506 = get_global_id(0); + local_tid_129507 = get_local_id(0); + group_sizze_129510 = get_local_size(0); + wave_sizze_129509 = LOCKSTEP_WIDTH; + group_tid_129508 = get_group_id(0); + + int32_t phys_tid_101812; + + phys_tid_101812 = global_tid_129506; + + int64_t gtid_101811; + + gtid_101811 = sext_i32_i64(group_tid_129508) * segmap_group_sizze_101836 + + sext_i32_i64(local_tid_129507); + if (slt64(gtid_101811, m_73008)) { + int64_t x_101840 = ((__global + int64_t *) defunc_3_map_res_mem_124959)[gtid_101811]; + int64_t x_101841 = ((__global + int64_t *) defunc_3_map_res_mem_124958)[gtid_101811]; + double defunc_0_f_res_101842; + double redout_115320 = 0.0; + + for (int64_t i_115321 = 0; i_115321 < defunc_2_reduce_comm_res_74867; + i_115321++) { + bool cond_101847 = slt64(i_115321, x_101841); + double defunc_0_f_res_101848; + + if (cond_101847) { + int64_t x_101849 = add64(x_101840, i_115321); + int64_t x_101850 = sub64(x_101849, x_101841); + int64_t i_101851 = add64((int64_t) 1, x_101850); + bool x_101852 = sle64((int64_t) 0, i_101851); + bool y_101853 = slt64(i_101851, N_73007); + bool bounds_check_101854 = x_101852 && y_101853; + bool index_certs_101855; + + if (!bounds_check_101854) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 634) == -1) { + global_failure_args[0] = i_101851; + global_failure_args[1] = N_73007; + ; + } + return; + } + } + + double defunc_0_f_res_t_res_101856 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_101811 * + N_73007 + + i_101851]; + + defunc_0_f_res_101848 = defunc_0_f_res_t_res_101856; + } else { + defunc_0_f_res_101848 = 0.0; + } + + double defunc_1_op_res_101845 = defunc_0_f_res_101848 + + redout_115320; + double redout_tmp_129511 = defunc_1_op_res_101845; + + redout_115320 = redout_tmp_129511; + } + defunc_0_f_res_101842 = redout_115320; + ((__global double *) mem_124966)[gtid_101811] = defunc_0_f_res_101842; + } + + error_0: + return; + #undef segmap_group_sizze_101836 +} +__kernel void mainMagnitudezisegmap_101909(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t N_73007, int64_t n_73011, + double lam_73015, + int64_t iota_arg_74896, + double i64_res_74903, __global + unsigned char *mappingindices_mem_120107, + __global unsigned char *mem_124973) +{ + #define segmap_group_sizze_101929 (mainMagnitudezisegmap_group_sizze_101911) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129573; + int32_t local_tid_129574; + int64_t group_sizze_129577; + int32_t wave_sizze_129576; + int32_t group_tid_129575; + + global_tid_129573 = get_global_id(0); + local_tid_129574 = get_local_id(0); + group_sizze_129577 = get_local_size(0); + wave_sizze_129576 = LOCKSTEP_WIDTH; + group_tid_129575 = get_group_id(0); + + int32_t phys_tid_101909; + + phys_tid_101909 = global_tid_129573; + + int64_t gtid_101908; + + gtid_101908 = sext_i32_i64(group_tid_129575) * segmap_group_sizze_101929 + + sext_i32_i64(local_tid_129574); + if (slt64(gtid_101908, iota_arg_74896)) { + int64_t i_101933 = add64(n_73011, gtid_101908); + bool x_101934 = sle64((int64_t) 0, i_101933); + bool y_101935 = slt64(i_101933, N_73007); + bool bounds_check_101936 = x_101934 && y_101935; + bool index_certs_101937; + + if (!bounds_check_101936) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 637) == -1) { + global_failure_args[0] = i_101933; + global_failure_args[1] = N_73007; + ; + } + return; + } + } + + int64_t time_101938 = ((__global + int64_t *) mappingindices_mem_120107)[i_101933]; + double i64_res_101939 = sitofp_i64_f64(time_101938); + double logplus_arg_101940 = i64_res_101939 / i64_res_74903; + bool cond_101941 = 2.718281828459045 < logplus_arg_101940; + double logplus_res_101942; + + if (cond_101941) { + double log_res_101943; + + log_res_101943 = futrts_log64(logplus_arg_101940); + logplus_res_101942 = log_res_101943; + } else { + logplus_res_101942 = 1.0; + } + + double sqrt_res_101944; + + sqrt_res_101944 = futrts_sqrt64(logplus_res_101942); + + double defunc_0_f_res_101945 = lam_73015 * sqrt_res_101944; + + ((__global double *) mem_124973)[gtid_101908] = defunc_0_f_res_101945; + } + + error_0: + return; + #undef segmap_group_sizze_101929 +} +__kernel void mainMagnitudezisegmap_101949(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t m_73008, + int64_t iota_arg_74896, + int64_t distance_74923, + int64_t num_threads_126087, __global + unsigned char *defunc_4_map_res_mem_124919, + __global + unsigned char *defunc_3_map_res_mem_124959, + __global unsigned char *mem_124976, + __global unsigned char *mem_124983, + __global unsigned char *mem_124994, + __global unsigned char *mem_125014) +{ + #define segmap_group_sizze_102140 (mainMagnitudezisegmap_group_sizze_101951) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129588; + int32_t local_tid_129589; + int64_t group_sizze_129592; + int32_t wave_sizze_129591; + int32_t group_tid_129590; + + global_tid_129588 = get_global_id(0); + local_tid_129589 = get_local_id(0); + group_sizze_129592 = get_local_size(0); + wave_sizze_129591 = LOCKSTEP_WIDTH; + group_tid_129590 = get_group_id(0); + + int32_t phys_tid_101949; + + phys_tid_101949 = global_tid_129588; + + int64_t gtid_101948; + + gtid_101948 = sext_i32_i64(group_tid_129590) * segmap_group_sizze_102140 + + sext_i32_i64(local_tid_129589); + if (slt64(gtid_101948, m_73008)) { + int64_t x_102143 = ((__global + int64_t *) defunc_4_map_res_mem_124919)[gtid_101948]; + int64_t x_102144 = ((__global + int64_t *) defunc_3_map_res_mem_124959)[gtid_101948]; + int64_t y_102145 = ((__global int64_t *) mem_124976)[gtid_101948]; + + for (int64_t i_129593 = 0; i_129593 < iota_arg_74896; i_129593++) { + ((__global double *) mem_124994)[phys_tid_101949 + i_129593 * + num_threads_126087] = ((__global + double *) mem_124983)[gtid_101948 + + i_129593 * + m_73008]; + } + for (int64_t i_102148 = 0; i_102148 < distance_74923; i_102148++) { + int64_t index_primexp_102150 = add64((int64_t) 1, i_102148); + bool cond_102151 = slt64((int64_t) 0, index_primexp_102150); + bool loop_cond_102152; + + if (cond_102151) { + bool x_102153 = sle64((int64_t) 0, index_primexp_102150); + bool y_102154 = slt64(index_primexp_102150, iota_arg_74896); + bool bounds_check_102155 = x_102153 && y_102154; + bool index_certs_102156; + + if (!bounds_check_102155) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 640) == -1) { + global_failure_args[0] = index_primexp_102150; + global_failure_args[1] = iota_arg_74896; + ; + } + return; + } + } + + double defunc_2_lifted_gt_arg_102157 = ((__global + double *) mem_124994)[phys_tid_101949 + + index_primexp_102150 * + num_threads_126087]; + bool y_102158 = slt64(i_102148, iota_arg_74896); + bool index_certs_102159; + + if (!y_102158) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 641) == -1) { + global_failure_args[0] = i_102148; + global_failure_args[1] = iota_arg_74896; + ; + } + return; + } + } + + double defunc_1_lifted_gt_arg_102160 = ((__global + double *) mem_124994)[phys_tid_101949 + + i_102148 * + num_threads_126087]; + bool defunc_1_zlze_res_102161 = defunc_1_lifted_gt_arg_102160 <= + defunc_2_lifted_gt_arg_102157; + bool defunc_2_lifted_gt_res_102162 = !defunc_1_zlze_res_102161; + + loop_cond_102152 = defunc_2_lifted_gt_res_102162; + } else { + loop_cond_102152 = 0; + } + + bool xszq_102163; + int64_t xszq_102164; + bool loop_while_102166; + int64_t j_102167; + + loop_while_102166 = loop_cond_102152; + j_102167 = index_primexp_102150; + while (loop_while_102166) { + int64_t loopres_102169 = sub64(j_102167, (int64_t) 1); + bool x_102170 = sle64((int64_t) 0, j_102167); + bool y_102171 = slt64(j_102167, iota_arg_74896); + bool bounds_check_102172 = x_102170 && y_102171; + bool index_certs_102173; + + if (!bounds_check_102172) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 642) == -1) { + global_failure_args[0] = j_102167; + global_failure_args[1] = iota_arg_74896; + ; + } + return; + } + } + + double copy_arg_102174 = ((__global + double *) mem_124994)[phys_tid_101949 + + j_102167 * + num_threads_126087]; + bool x_102175 = sle64((int64_t) 0, loopres_102169); + bool y_102176 = slt64(loopres_102169, iota_arg_74896); + bool bounds_check_102177 = x_102175 && y_102176; + bool index_certs_102178; + + if (!bounds_check_102177) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 643) == -1) { + global_failure_args[0] = loopres_102169; + global_failure_args[1] = iota_arg_74896; + ; + } + return; + } + } + + double copy_arg_102179 = ((__global + double *) mem_124994)[phys_tid_101949 + + loopres_102169 * + num_threads_126087]; + + ((__global double *) mem_124994)[phys_tid_101949 + j_102167 * + num_threads_126087] = + copy_arg_102179; + ((__global double *) mem_124994)[phys_tid_101949 + + loopres_102169 * + num_threads_126087] = + copy_arg_102174; + + bool cond_102182 = slt64((int64_t) 0, loopres_102169); + bool loop_cond_102183; + + if (cond_102182) { + bool index_certs_102184; + + if (!bounds_check_102177) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 644) == -1) { + global_failure_args[0] = loopres_102169; + global_failure_args[1] = iota_arg_74896; + ; + } + return; + } + } + + double defunc_2_lifted_gt_arg_102185 = ((__global + double *) mem_124994)[phys_tid_101949 + + loopres_102169 * + num_threads_126087]; + int64_t i_102186 = sub64(loopres_102169, (int64_t) 1); + bool x_102187 = sle64((int64_t) 0, i_102186); + bool y_102188 = slt64(i_102186, iota_arg_74896); + bool bounds_check_102189 = x_102187 && y_102188; + bool index_certs_102190; + + if (!bounds_check_102189) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 645) == -1) { + global_failure_args[0] = i_102186; + global_failure_args[1] = iota_arg_74896; + ; + } + return; + } + } + + double defunc_1_lifted_gt_arg_102191 = ((__global + double *) mem_124994)[phys_tid_101949 + + i_102186 * + num_threads_126087]; + bool defunc_1_zlze_res_102192 = + defunc_1_lifted_gt_arg_102191 <= + defunc_2_lifted_gt_arg_102185; + bool defunc_2_lifted_gt_res_102193 = + !defunc_1_zlze_res_102192; + + loop_cond_102183 = defunc_2_lifted_gt_res_102193; + } else { + loop_cond_102183 = 0; + } + + bool loop_while_tmp_129595 = loop_cond_102183; + int64_t j_tmp_129596 = loopres_102169; + + loop_while_102166 = loop_while_tmp_129595; + j_102167 = j_tmp_129596; + } + xszq_102163 = loop_while_102166; + xszq_102164 = j_102167; + } + + int64_t i_102194 = sdiv64(y_102145, (int64_t) 2); + int64_t j_102195 = sub64(i_102194, (int64_t) 1); + bool cond_102196 = x_102143 == x_102144; + double defunc_0_f_res_102197; + + if (cond_102196) { + defunc_0_f_res_102197 = 0.0; + } else { + int64_t x_102198 = smod64(y_102145, (int64_t) 2); + bool cond_102199 = x_102198 == (int64_t) 0; + double defunc_0_f_res_f_res_102200; + + if (cond_102199) { + bool x_102201 = sle64((int64_t) 0, j_102195); + bool y_102202 = slt64(j_102195, iota_arg_74896); + bool bounds_check_102203 = x_102201 && y_102202; + bool index_certs_102204; + + if (!bounds_check_102203) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 646) == -1) { + global_failure_args[0] = j_102195; + global_failure_args[1] = iota_arg_74896; + ; + } + return; + } + } + + double x_102205 = ((__global + double *) mem_124994)[phys_tid_101949 + + j_102195 * + num_threads_126087]; + bool x_102206 = sle64((int64_t) 0, i_102194); + bool y_102207 = slt64(i_102194, iota_arg_74896); + bool bounds_check_102208 = x_102206 && y_102207; + bool index_certs_102209; + + if (!bounds_check_102208) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 647) == -1) { + global_failure_args[0] = i_102194; + global_failure_args[1] = iota_arg_74896; + ; + } + return; + } + } + + double y_102210 = ((__global + double *) mem_124994)[phys_tid_101949 + + i_102194 * + num_threads_126087]; + double x_102211 = x_102205 + y_102210; + double defunc_0_f_res_f_res_t_res_102212 = x_102211 / 2.0; + + defunc_0_f_res_f_res_102200 = defunc_0_f_res_f_res_t_res_102212; + } else { + bool x_102213 = sle64((int64_t) 0, i_102194); + bool y_102214 = slt64(i_102194, iota_arg_74896); + bool bounds_check_102215 = x_102213 && y_102214; + bool index_certs_102216; + + if (!bounds_check_102215) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 648) == -1) { + global_failure_args[0] = i_102194; + global_failure_args[1] = iota_arg_74896; + ; + } + return; + } + } + + double defunc_0_f_res_f_res_f_res_102217 = ((__global + double *) mem_124994)[phys_tid_101949 + + i_102194 * + num_threads_126087]; + + defunc_0_f_res_f_res_102200 = defunc_0_f_res_f_res_f_res_102217; + } + defunc_0_f_res_102197 = defunc_0_f_res_f_res_102200; + } + ((__global double *) mem_125014)[gtid_101948] = defunc_0_f_res_102197; + } + + error_0: + return; + #undef segmap_group_sizze_102140 +} +__kernel void mainMagnitudezisegmap_102039(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t N_73007, int64_t m_73008, + int64_t iota_arg_74896, __global + unsigned char *defunc_4_map_res_mem_124920, + __global + unsigned char *defunc_3_map_res_mem_124959, + __global unsigned char *mem_124976, + __global unsigned char *mem_124980) +{ + #define segmap_group_sizze_102114 (mainMagnitudezisegmap_group_sizze_102042) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129583; + int32_t local_tid_129584; + int64_t group_sizze_129587; + int32_t wave_sizze_129586; + int32_t group_tid_129585; + + global_tid_129583 = get_global_id(0); + local_tid_129584 = get_local_id(0); + group_sizze_129587 = get_local_size(0); + wave_sizze_129586 = LOCKSTEP_WIDTH; + group_tid_129585 = get_group_id(0); + + int32_t phys_tid_102039; + + phys_tid_102039 = global_tid_129583; + + int64_t gtid_102037; + + gtid_102037 = squot64(sext_i32_i64(group_tid_129585) * + segmap_group_sizze_102114 + + sext_i32_i64(local_tid_129584), iota_arg_74896); + + int64_t gtid_102038; + + gtid_102038 = sext_i32_i64(group_tid_129585) * segmap_group_sizze_102114 + + sext_i32_i64(local_tid_129584) - + squot64(sext_i32_i64(group_tid_129585) * segmap_group_sizze_102114 + + sext_i32_i64(local_tid_129584), iota_arg_74896) * + iota_arg_74896; + if (slt64(gtid_102037, m_73008) && slt64(gtid_102038, iota_arg_74896)) { + int64_t x_102117 = ((__global + int64_t *) defunc_3_map_res_mem_124959)[gtid_102037]; + int64_t y_102119 = ((__global int64_t *) mem_124976)[gtid_102037]; + bool cond_102121 = slt64(gtid_102038, y_102119); + bool cond_102122; + + if (cond_102121) { + int64_t i_102123 = add64(gtid_102038, x_102117); + bool x_102124 = sle64((int64_t) 0, i_102123); + bool y_102125 = slt64(i_102123, N_73007); + bool bounds_check_102126 = x_102124 && y_102125; + bool index_certs_102127; + + if (!bounds_check_102126) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 638) == + -1) { + global_failure_args[0] = i_102123; + global_failure_args[1] = N_73007; + ; + } + return; + } + } + + double isnan_arg_102128 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_102037 * + N_73007 + + i_102123]; + bool isnan_res_102129; + + isnan_res_102129 = futrts_isnan64(isnan_arg_102128); + + bool cond_t_res_102130 = !isnan_res_102129; + + cond_102122 = cond_t_res_102130; + } else { + cond_102122 = 0; + } + + double defunc_0_f_res_102131; + + if (cond_102122) { + int64_t i_102132 = add64(gtid_102038, x_102117); + bool x_102133 = sle64((int64_t) 0, i_102132); + bool y_102134 = slt64(i_102132, N_73007); + bool bounds_check_102135 = x_102133 && y_102134; + bool index_certs_102136; + + if (!bounds_check_102135) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 639) == + -1) { + global_failure_args[0] = i_102132; + global_failure_args[1] = N_73007; + ; + } + return; + } + } + + double defunc_0_f_res_t_res_102137 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_102037 * + N_73007 + + i_102132]; + + defunc_0_f_res_102131 = defunc_0_f_res_t_res_102137; + } else { + defunc_0_f_res_102131 = INFINITY; + } + ((__global double *) mem_124980)[gtid_102037 * iota_arg_74896 + + gtid_102038] = defunc_0_f_res_102131; + } + + error_0: + return; + #undef segmap_group_sizze_102114 +} +__kernel void mainMagnitudezisegmap_102093(__global int *global_failure, + int64_t m_73008, __global + unsigned char *defunc_4_map_res_mem_124919, + __global + unsigned char *defunc_3_map_res_mem_124959, + __global unsigned char *mem_124976) +{ + #define segmap_group_sizze_102102 (mainMagnitudezisegmap_group_sizze_102095) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129578; + int32_t local_tid_129579; + int64_t group_sizze_129582; + int32_t wave_sizze_129581; + int32_t group_tid_129580; + + global_tid_129578 = get_global_id(0); + local_tid_129579 = get_local_id(0); + group_sizze_129582 = get_local_size(0); + wave_sizze_129581 = LOCKSTEP_WIDTH; + group_tid_129580 = get_group_id(0); + + int32_t phys_tid_102093; + + phys_tid_102093 = global_tid_129578; + + int64_t gtid_102092; + + gtid_102092 = sext_i32_i64(group_tid_129580) * segmap_group_sizze_102102 + + sext_i32_i64(local_tid_129579); + if (slt64(gtid_102092, m_73008)) { + int64_t x_102105 = ((__global + int64_t *) defunc_4_map_res_mem_124919)[gtid_102092]; + int64_t x_102106 = ((__global + int64_t *) defunc_3_map_res_mem_124959)[gtid_102092]; + int64_t y_102107 = sub64(x_102105, x_102106); + + ((__global int64_t *) mem_124976)[gtid_102092] = y_102107; + } + + error_0: + return; + #undef segmap_group_sizze_102102 +} +__kernel void mainMagnitudezisegmap_102352(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t N_73007, int64_t m_73008, + int64_t n_73011, __global + unsigned char *defunc_4_map_res_mem_124921, + __global + unsigned char *defunc_3_map_res_mem_124959, + __global unsigned char *mem_125028, + __global unsigned char *mem_125035, + __global unsigned char *mem_125037, + __global unsigned char *mem_125039, + __global unsigned char *mem_125042, + __global unsigned char *mem_125044) +{ + #define segmap_group_sizze_102631 (mainMagnitudezisegmap_group_sizze_102354) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129788; + int32_t local_tid_129789; + int64_t group_sizze_129792; + int32_t wave_sizze_129791; + int32_t group_tid_129790; + + global_tid_129788 = get_global_id(0); + local_tid_129789 = get_local_id(0); + group_sizze_129792 = get_local_size(0); + wave_sizze_129791 = LOCKSTEP_WIDTH; + group_tid_129790 = get_group_id(0); + + int32_t phys_tid_102352; + + phys_tid_102352 = global_tid_129788; + + int64_t gtid_102351; + + gtid_102351 = sext_i32_i64(group_tid_129790) * segmap_group_sizze_102631 + + sext_i32_i64(local_tid_129789); + if (slt64(gtid_102351, m_73008)) { + int64_t x_102635 = ((__global + int64_t *) defunc_3_map_res_mem_124959)[gtid_102351]; + int64_t y_102637 = ((__global int64_t *) mem_125028)[gtid_102351]; + bool acc0_102639 = ((__global bool *) mem_125035)[gtid_102351]; + bool x_102644 = acc0_102639 && acc0_102639; + int64_t defunc_1_op_res_f_res_102648; + + if (acc0_102639) { + int64_t acc0_102640 = ((__global + int64_t *) mem_125037)[gtid_102351]; + + defunc_1_op_res_f_res_102648 = acc0_102640; + } else { + defunc_1_op_res_f_res_102648 = (int64_t) -1; + } + + bool cond_102654 = y_102637 == (int64_t) 0; + double defunc_0_f_res_102655; + + if (cond_102654) { + defunc_0_f_res_102655 = 0.0; + } else { + double acc0_102641 = ((__global double *) mem_125039)[gtid_102351]; + double i64_res_102656 = sitofp_i64_f64(y_102637); + double defunc_0_f_res_f_res_102657 = acc0_102641 / i64_res_102656; + + defunc_0_f_res_102655 = defunc_0_f_res_f_res_102657; + } + + bool cond_102658 = !x_102644; + int64_t fst_breakzq_102659; + + if (cond_102658) { + fst_breakzq_102659 = (int64_t) -1; + } else { + bool cond_102660 = slt64(defunc_1_op_res_f_res_102648, y_102637); + int64_t adjustValInds_res_102661; + + if (cond_102660) { + int64_t i_102662 = add64(x_102635, + defunc_1_op_res_f_res_102648); + bool x_102663 = sle64((int64_t) 0, i_102662); + bool y_102664 = slt64(i_102662, N_73007); + bool bounds_check_102665 = x_102663 && y_102664; + bool index_certs_102666; + + if (!bounds_check_102665) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 654) == -1) { + global_failure_args[0] = i_102662; + global_failure_args[1] = N_73007; + ; + } + return; + } + } + + int64_t x_102667 = ((__global + int64_t *) defunc_4_map_res_mem_124921)[gtid_102351 * + N_73007 + + i_102662]; + int64_t adjustValInds_res_t_res_102668 = sub64(x_102667, + n_73011); + + adjustValInds_res_102661 = adjustValInds_res_t_res_102668; + } else { + adjustValInds_res_102661 = (int64_t) -1; + } + fst_breakzq_102659 = adjustValInds_res_102661; + } + + bool cond_102669 = sle64(x_102635, (int64_t) 5); + bool cond_f_res_102670 = sle64(y_102637, (int64_t) 5); + bool x_102671 = !cond_102669; + bool y_102672 = cond_f_res_102670 && x_102671; + bool cond_102673 = cond_102669 || y_102672; + int64_t fst_breakzq_102674; + + if (cond_102673) { + fst_breakzq_102674 = (int64_t) -2; + } else { + fst_breakzq_102674 = fst_breakzq_102659; + } + ((__global int64_t *) mem_125042)[gtid_102351] = fst_breakzq_102674; + ((__global double *) mem_125044)[gtid_102351] = defunc_0_f_res_102655; + } + + error_0: + return; + #undef segmap_group_sizze_102631 +} +__kernel void mainMagnitudezisegmap_102503(__global int *global_failure, + int64_t m_73008, + int64_t num_groups_102526, __global + unsigned char *defunc_4_map_res_mem_124919, + __global + unsigned char *defunc_3_map_res_mem_124959, + __global + unsigned char *defunc_3_map_res_mem_124960, + __global unsigned char *mem_125026, + __global unsigned char *mem_125028) +{ + #define segmap_group_sizze_102525 (mainMagnitudezisegmap_group_sizze_102505) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129626; + int32_t local_tid_129627; + int64_t group_sizze_129630; + int32_t wave_sizze_129629; + int32_t group_tid_129628; + + global_tid_129626 = get_global_id(0); + local_tid_129627 = get_local_id(0); + group_sizze_129630 = get_local_size(0); + wave_sizze_129629 = LOCKSTEP_WIDTH; + group_tid_129628 = get_group_id(0); + + int32_t phys_tid_102503; + + phys_tid_102503 = global_tid_129626; + + int32_t phys_group_id_129631; + + phys_group_id_129631 = get_group_id(0); + for (int32_t i_129632 = 0; i_129632 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, segmap_group_sizze_102525)) - + phys_group_id_129631, sext_i64_i32(num_groups_102526)); + i_129632++) { + int32_t virt_group_id_129633 = phys_group_id_129631 + i_129632 * + sext_i64_i32(num_groups_102526); + int64_t gtid_102502 = sext_i32_i64(virt_group_id_129633) * + segmap_group_sizze_102525 + sext_i32_i64(local_tid_129627); + + if (slt64(gtid_102502, m_73008)) { + int64_t x_102532 = ((__global + int64_t *) defunc_4_map_res_mem_124919)[gtid_102502]; + int64_t x_102533 = ((__global + int64_t *) defunc_3_map_res_mem_124959)[gtid_102502]; + double x_102534 = ((__global + double *) defunc_3_map_res_mem_124960)[gtid_102502]; + int64_t y_102535 = sub64(x_102532, x_102533); + double i64_res_102536 = sitofp_i64_f64(x_102533); + double sqrt_res_102537; + + sqrt_res_102537 = futrts_sqrt64(i64_res_102536); + + double y_102538 = x_102534 * sqrt_res_102537; + + ((__global double *) mem_125026)[gtid_102502] = y_102538; + ((__global int64_t *) mem_125028)[gtid_102502] = y_102535; + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_102525 +} +__kernel void mainMagnitudezisegmap_90087(__global int *global_failure, + int64_t N_73007, double freq_73012, + int64_t k2p2zq_73023, __global + unsigned char *mappingindices_mem_120107, + __global unsigned char *mem_120112) +{ + #define segmap_group_sizze_90154 (mainMagnitudezisegmap_group_sizze_90090) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126325; + int32_t local_tid_126326; + int64_t group_sizze_126329; + int32_t wave_sizze_126328; + int32_t group_tid_126327; + + global_tid_126325 = get_global_id(0); + local_tid_126326 = get_local_id(0); + group_sizze_126329 = get_local_size(0); + wave_sizze_126328 = LOCKSTEP_WIDTH; + group_tid_126327 = get_group_id(0); + + int32_t phys_tid_90087; + + phys_tid_90087 = global_tid_126325; + + int64_t gtid_90085; + + gtid_90085 = squot64(sext_i32_i64(group_tid_126327) * + segmap_group_sizze_90154 + + sext_i32_i64(local_tid_126326), N_73007); + + int64_t gtid_90086; + + gtid_90086 = sext_i32_i64(group_tid_126327) * segmap_group_sizze_90154 + + sext_i32_i64(local_tid_126326) - + squot64(sext_i32_i64(group_tid_126327) * segmap_group_sizze_90154 + + sext_i32_i64(local_tid_126326), N_73007) * N_73007; + if (slt64(gtid_90085, k2p2zq_73023) && slt64(gtid_90086, N_73007)) { + bool index_primexp_115010 = gtid_90085 == (int64_t) 0; + double defunc_0_f_res_90160; + + if (index_primexp_115010) { + defunc_0_f_res_90160 = 1.0; + } else { + int64_t x_90159 = ((__global + int64_t *) mappingindices_mem_120107)[gtid_90086]; + bool cond_90161 = gtid_90085 == (int64_t) 1; + double defunc_0_f_res_f_res_90162; + + if (cond_90161) { + double i64_res_90163 = sitofp_i64_f64(x_90159); + + defunc_0_f_res_f_res_90162 = i64_res_90163; + } else { + int64_t i64_arg_90164 = sdiv64(gtid_90085, (int64_t) 2); + double i64_res_90165 = sitofp_i64_f64(i64_arg_90164); + double i64_res_90166 = sitofp_i64_f64(x_90159); + double x_90167 = 6.283185307179586 * i64_res_90165; + double x_90168 = i64_res_90166 * x_90167; + double angle_90169 = x_90168 / freq_73012; + int64_t x_90170 = smod64(gtid_90085, (int64_t) 2); + bool cond_90171 = x_90170 == (int64_t) 0; + double defunc_0_f_res_f_res_f_res_90172; + + if (cond_90171) { + double sin_res_90173; + + sin_res_90173 = futrts_sin64(angle_90169); + defunc_0_f_res_f_res_f_res_90172 = sin_res_90173; + } else { + double cos_res_90174; + + cos_res_90174 = futrts_cos64(angle_90169); + defunc_0_f_res_f_res_f_res_90172 = cos_res_90174; + } + defunc_0_f_res_f_res_90162 = defunc_0_f_res_f_res_f_res_90172; + } + defunc_0_f_res_90160 = defunc_0_f_res_f_res_90162; + } + ((__global double *) mem_120112)[gtid_90085 * N_73007 + gtid_90086] = + defunc_0_f_res_90160; + } + + error_0: + return; + #undef segmap_group_sizze_90154 +} +__kernel void mainMagnitudezisegmap_90187(__global int *global_failure, + int64_t N_73007, double freq_73012, + int64_t k2p2zq_73023, __global + unsigned char *mappingindices_mem_120107, + __global unsigned char *mem_120116) +{ + #define segmap_group_sizze_90250 (mainMagnitudezisegmap_group_sizze_90190) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126330; + int32_t local_tid_126331; + int64_t group_sizze_126334; + int32_t wave_sizze_126333; + int32_t group_tid_126332; + + global_tid_126330 = get_global_id(0); + local_tid_126331 = get_local_id(0); + group_sizze_126334 = get_local_size(0); + wave_sizze_126333 = LOCKSTEP_WIDTH; + group_tid_126332 = get_group_id(0); + + int32_t phys_tid_90187; + + phys_tid_90187 = global_tid_126330; + + int64_t gtid_90185; + + gtid_90185 = squot64(sext_i32_i64(group_tid_126332) * + segmap_group_sizze_90250 + + sext_i32_i64(local_tid_126331), N_73007); + + int64_t gtid_90186; + + gtid_90186 = sext_i32_i64(group_tid_126332) * segmap_group_sizze_90250 + + sext_i32_i64(local_tid_126331) - + squot64(sext_i32_i64(group_tid_126332) * segmap_group_sizze_90250 + + sext_i32_i64(local_tid_126331), N_73007) * N_73007; + if (slt64(gtid_90185, k2p2zq_73023) && slt64(gtid_90186, N_73007)) { + bool index_primexp_115017 = gtid_90185 == (int64_t) 0; + double defunc_0_f_res_90256; + + if (index_primexp_115017) { + defunc_0_f_res_90256 = 1.0; + } else { + int64_t x_90255 = ((__global + int64_t *) mappingindices_mem_120107)[gtid_90186]; + int64_t i_90257 = add64((int64_t) 1, gtid_90185); + int64_t i64_arg_90258 = sdiv64(i_90257, (int64_t) 2); + double i64_res_90259 = sitofp_i64_f64(i64_arg_90258); + double i64_res_90260 = sitofp_i64_f64(x_90255); + double x_90261 = 6.283185307179586 * i64_res_90259; + double x_90262 = i64_res_90260 * x_90261; + double angle_90263 = x_90262 / freq_73012; + int64_t x_90264 = smod64(i_90257, (int64_t) 2); + bool cond_90265 = x_90264 == (int64_t) 0; + double defunc_0_f_res_f_res_90266; + + if (cond_90265) { + double sin_res_90267; + + sin_res_90267 = futrts_sin64(angle_90263); + defunc_0_f_res_f_res_90266 = sin_res_90267; + } else { + double cos_res_90268; + + cos_res_90268 = futrts_cos64(angle_90263); + defunc_0_f_res_f_res_90266 = cos_res_90268; + } + defunc_0_f_res_90256 = defunc_0_f_res_f_res_90266; + } + ((__global double *) mem_120116)[gtid_90185 * N_73007 + gtid_90186] = + defunc_0_f_res_90256; + } + + error_0: + return; + #undef segmap_group_sizze_90250 +} +__kernel void mainMagnitudezisegmap_90280(__global int *global_failure, + int64_t N_73007, int64_t k2p2zq_73023, + double defunc_0_f_res_73086, __global + unsigned char *mem_120120, __global + unsigned char *mem_120124) +{ + #define segmap_group_sizze_90301 (mainMagnitudezisegmap_group_sizze_90283) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126335; + int32_t local_tid_126336; + int64_t group_sizze_126339; + int32_t wave_sizze_126338; + int32_t group_tid_126337; + + global_tid_126335 = get_global_id(0); + local_tid_126336 = get_local_id(0); + group_sizze_126339 = get_local_size(0); + wave_sizze_126338 = LOCKSTEP_WIDTH; + group_tid_126337 = get_group_id(0); + + int32_t phys_tid_90280; + + phys_tid_90280 = global_tid_126335; + + int64_t gtid_90278; + + gtid_90278 = squot64(sext_i32_i64(group_tid_126337) * + segmap_group_sizze_90301 + + sext_i32_i64(local_tid_126336), k2p2zq_73023); + + int64_t gtid_90279; + + gtid_90279 = sext_i32_i64(group_tid_126337) * segmap_group_sizze_90301 + + sext_i32_i64(local_tid_126336) - + squot64(sext_i32_i64(group_tid_126337) * segmap_group_sizze_90301 + + sext_i32_i64(local_tid_126336), k2p2zq_73023) * k2p2zq_73023; + if (slt64(gtid_90278, N_73007) && slt64(gtid_90279, k2p2zq_73023)) { + double x_90304 = ((__global double *) mem_120120)[gtid_90278 * + k2p2zq_73023 + + gtid_90279]; + double defunc_0_f_res_90305 = defunc_0_f_res_73086 + x_90304; + + ((__global double *) mem_120124)[gtid_90278 * k2p2zq_73023 + + gtid_90279] = defunc_0_f_res_90305; + } + + error_0: + return; + #undef segmap_group_sizze_90301 +} +__kernel void mainMagnitudezisegmap_90404(__global int *global_failure, + int64_t N_73007, int64_t m_73008, + int64_t n_73011, int64_t m_73103, + __global + unsigned char *images_mem_120108, + __global unsigned char *mem_120201, + __global unsigned char *mem_120206, + __global unsigned char *mem_120209) +{ + #define segmap_group_sizze_90638 (mainMagnitudezisegmap_group_sizze_90407) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126490; + int32_t local_tid_126491; + int64_t group_sizze_126494; + int32_t wave_sizze_126493; + int32_t group_tid_126492; + + global_tid_126490 = get_global_id(0); + local_tid_126491 = get_local_id(0); + group_sizze_126494 = get_local_size(0); + wave_sizze_126493 = LOCKSTEP_WIDTH; + group_tid_126492 = get_group_id(0); + + int32_t phys_tid_90404; + + phys_tid_90404 = global_tid_126490; + + int64_t gtid_90402; + + gtid_90402 = squot64(sext_i32_i64(group_tid_126492) * + segmap_group_sizze_90638 + + sext_i32_i64(local_tid_126491), n_73011); + + int64_t gtid_90403; + + gtid_90403 = sext_i32_i64(group_tid_126492) * segmap_group_sizze_90638 + + sext_i32_i64(local_tid_126491) - + squot64(sext_i32_i64(group_tid_126492) * segmap_group_sizze_90638 + + sext_i32_i64(local_tid_126491), n_73011) * n_73011; + if (slt64(gtid_90402, m_73008) && slt64(gtid_90403, n_73011)) { + int64_t binop_y_115030 = (int64_t) -1 * gtid_90403; + int64_t slice_115031 = m_73103 + binop_y_115030; + double x_90642 = ((__global double *) images_mem_120108)[gtid_90402 * + N_73007 + + slice_115031]; + bool defunc_0_f_res_90645; + + defunc_0_f_res_90645 = futrts_isnan64(x_90642); + + bool defunc_0_g_res_90646 = !defunc_0_f_res_90645; + int64_t defunc_1_f_res_90647; + + if (defunc_0_g_res_90646) { + int64_t x_90643 = ((__global int64_t *) mem_120201)[gtid_90402 * + n_73011 + + gtid_90403]; + int64_t defunc_1_f_res_t_res_90648 = sub64(x_90643, (int64_t) 1); + + defunc_1_f_res_90647 = defunc_1_f_res_t_res_90648; + } else { + defunc_1_f_res_90647 = (int64_t) -1; + } + if ((sle64((int64_t) 0, gtid_90402) && slt64(gtid_90402, m_73008)) && + (sle64((int64_t) 0, defunc_1_f_res_90647) && + slt64(defunc_1_f_res_90647, n_73011))) { + ((__global int64_t *) mem_120209)[gtid_90402 * n_73011 + + defunc_1_f_res_90647] = + gtid_90403; + } + if ((sle64((int64_t) 0, gtid_90402) && slt64(gtid_90402, m_73008)) && + (sle64((int64_t) 0, defunc_1_f_res_90647) && + slt64(defunc_1_f_res_90647, n_73011))) { + ((__global double *) mem_120206)[gtid_90402 * n_73011 + + defunc_1_f_res_90647] = x_90642; + } + } + + error_0: + return; + #undef segmap_group_sizze_90638 +} +__kernel void mainMagnitudezisegmap_90682(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t N_73007, int64_t m_73008, + int64_t n_73011, int64_t k2p2zq_73023, + int64_t m_73103, + int64_t defunc_2_reduce_res_73132, + int64_t num_groups_90785, + int64_t num_threads_125875, __global + unsigned char *defunc_3_map_res_mem_120232, + __global unsigned char *mem_120235, + __global unsigned char *mem_120238, + __global unsigned char *mem_120246, + __global unsigned char *mem_125145) +{ + #define segmap_group_sizze_90784 (mainMagnitudezisegmap_group_sizze_90685) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_126528; + int32_t local_tid_126529; + int64_t group_sizze_126532; + int32_t wave_sizze_126531; + int32_t group_tid_126530; + + global_tid_126528 = get_global_id(0); + local_tid_126529 = get_local_id(0); + group_sizze_126532 = get_local_size(0); + wave_sizze_126531 = LOCKSTEP_WIDTH; + group_tid_126530 = get_group_id(0); + + int32_t phys_tid_90682; + + phys_tid_90682 = global_tid_126528; + + int32_t phys_group_id_126533; + + phys_group_id_126533 = get_group_id(0); + for (int32_t i_126534 = 0; i_126534 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008 * defunc_2_reduce_res_73132, + segmap_group_sizze_90784)) - + phys_group_id_126533, sext_i64_i32(num_groups_90785)); + i_126534++) { + int32_t virt_group_id_126535 = phys_group_id_126533 + i_126534 * + sext_i64_i32(num_groups_90785); + int64_t gtid_90680 = squot64(sext_i32_i64(virt_group_id_126535) * + segmap_group_sizze_90784 + + sext_i32_i64(local_tid_126529), + defunc_2_reduce_res_73132); + int64_t gtid_90681 = sext_i32_i64(virt_group_id_126535) * + segmap_group_sizze_90784 + sext_i32_i64(local_tid_126529) - + squot64(sext_i32_i64(virt_group_id_126535) * + segmap_group_sizze_90784 + + sext_i32_i64(local_tid_126529), + defunc_2_reduce_res_73132) * defunc_2_reduce_res_73132; + + if (slt64(gtid_90680, m_73008) && slt64(gtid_90681, + defunc_2_reduce_res_73132)) { + int64_t x_90788 = ((__global + int64_t *) defunc_3_map_res_mem_120232)[gtid_90680 * + n_73011 + + gtid_90681]; + bool cond_90789 = sle64((int64_t) 0, x_90788); + + if (cond_90789) { + bool y_90791 = slt64(x_90788, n_73011); + bool bounds_check_90792 = cond_90789 && y_90791; + bool empty_slice_90793 = k2p2zq_73023 == (int64_t) 0; + int64_t m_90794 = sub64(k2p2zq_73023, (int64_t) 1); + bool zzero_leq_i_p_m_t_s_90795 = sle64((int64_t) 0, m_90794); + bool i_p_m_t_s_leq_w_90796 = slt64(m_90794, k2p2zq_73023); + bool i_lte_j_90797 = sle64((int64_t) 0, k2p2zq_73023); + bool y_90798 = zzero_leq_i_p_m_t_s_90795 && + i_p_m_t_s_leq_w_90796; + bool y_90799 = i_lte_j_90797 && y_90798; + bool ok_or_empty_90800 = empty_slice_90793 || y_90799; + bool index_ok_90801 = bounds_check_90792 && ok_or_empty_90800; + bool index_certs_90802; + + if (!index_ok_90801) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 435) == -1) { + global_failure_args[0] = x_90788; + global_failure_args[1] = (int64_t) 0; + global_failure_args[2] = n_73011; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + int64_t binop_y_90803 = (int64_t) -1 * x_90788; + int64_t slice_90804 = m_73103 + binop_y_90803; + + for (int64_t i_126536 = 0; i_126536 < k2p2zq_73023; + i_126536++) { + ((__global double *) mem_125145)[phys_tid_90682 + i_126536 * + num_threads_125875] = + ((__global double *) mem_120235)[slice_90804 + + i_126536 * N_73007]; + } + } else { + for (int64_t i_126537 = 0; i_126537 < k2p2zq_73023; + i_126537++) { + ((__global double *) mem_120238)[phys_tid_90682 + i_126537 * + num_threads_125875] = NAN; + } + for (int64_t i_126538 = 0; i_126538 < k2p2zq_73023; + i_126538++) { + ((__global double *) mem_125145)[phys_tid_90682 + i_126538 * + num_threads_125875] = + ((__global double *) mem_120238)[phys_tid_90682 + + i_126538 * + num_threads_125875]; + } + } + for (int64_t i_126539 = 0; i_126539 < k2p2zq_73023; i_126539++) { + ((__global double *) mem_120246)[i_126539 * + (defunc_2_reduce_res_73132 * + m_73008) + gtid_90680 * + defunc_2_reduce_res_73132 + + gtid_90681] = ((__global + double *) mem_125145)[phys_tid_90682 + + i_126539 * + num_threads_125875]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_90784 +} +__kernel void mainMagnitudezisegmap_90818(__global int *global_failure, + int64_t k2p2zq_73023, __global + unsigned char *mem_120252) +{ + #define segmap_group_sizze_90843 (mainMagnitudezisegmap_group_sizze_90821) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126550; + int32_t local_tid_126551; + int64_t group_sizze_126554; + int32_t wave_sizze_126553; + int32_t group_tid_126552; + + global_tid_126550 = get_global_id(0); + local_tid_126551 = get_local_id(0); + group_sizze_126554 = get_local_size(0); + wave_sizze_126553 = LOCKSTEP_WIDTH; + group_tid_126552 = get_group_id(0); + + int32_t phys_tid_90818; + + phys_tid_90818 = global_tid_126550; + + int64_t gtid_90816; + + gtid_90816 = squot64(sext_i32_i64(group_tid_126552) * + segmap_group_sizze_90843 + + sext_i32_i64(local_tid_126551), k2p2zq_73023); + + int64_t gtid_90817; + + gtid_90817 = sext_i32_i64(group_tid_126552) * segmap_group_sizze_90843 + + sext_i32_i64(local_tid_126551) - + squot64(sext_i32_i64(group_tid_126552) * segmap_group_sizze_90843 + + sext_i32_i64(local_tid_126551), k2p2zq_73023) * k2p2zq_73023; + if (slt64(gtid_90816, k2p2zq_73023) && slt64(gtid_90817, k2p2zq_73023)) { + bool cond_90848 = gtid_90817 == gtid_90816; + double defunc_0_f_res_90849; + + if (cond_90848) { + defunc_0_f_res_90849 = 1.0; + } else { + defunc_0_f_res_90849 = 0.0; + } + ((__global double *) mem_120252)[gtid_90816 * k2p2zq_73023 + + gtid_90817] = defunc_0_f_res_90849; + } + + error_0: + return; + #undef segmap_group_sizze_90843 +} +__kernel void mainMagnitudezisegmap_90949(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t m_73008, int64_t k2p2zq_73023, + int64_t m_73095, + unsigned char y_73099, + unsigned char ok_or_empty_73101, + int64_t min_res_73213, + int64_t k_73214, + int64_t num_groups_91405, + int64_t binop_x_120251, + int64_t num_threads_125879, __global + unsigned char *mem_120252, __global + unsigned char *mem_120257, __global + unsigned char *mem_120261, __global + unsigned char *mem_120265, __global + unsigned char *mem_120268, __global + unsigned char *mem_120271, __global + unsigned char *mem_120273, __global + unsigned char *mem_120608, __global + unsigned char *mem_120649, __global + unsigned char *mem_120661, __global + unsigned char *mem_120690, __global + unsigned char *mem_120763, __global + unsigned char *mem_120778, __global + unsigned char *mem_120790, __global + unsigned char *mem_120801, __global + unsigned char *mem_120821, __global + unsigned char *mem_120824, __global + unsigned char *mem_120878, __global + unsigned char *mem_120881, __global + unsigned char *mem_120883, __global + unsigned char *mem_125150, __global + unsigned char *mem_125152, __global + unsigned char *mem_125160, __global + unsigned char *mem_125421, __global + unsigned char *mem_125429, __global + unsigned char *mem_125431, __global + unsigned char *mem_125491, __global + unsigned char *double_buffer_mem_125535, + __global + unsigned char *double_buffer_mem_125536, + __global + unsigned char *double_buffer_mem_125537, + __global + unsigned char *double_buffer_mem_125548) +{ + #define segmap_group_sizze_91404 (mainMagnitudezisegmap_group_sizze_90951) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_126578; + int32_t local_tid_126579; + int64_t group_sizze_126582; + int32_t wave_sizze_126581; + int32_t group_tid_126580; + + global_tid_126578 = get_global_id(0); + local_tid_126579 = get_local_id(0); + group_sizze_126582 = get_local_size(0); + wave_sizze_126581 = LOCKSTEP_WIDTH; + group_tid_126580 = get_group_id(0); + + int32_t phys_tid_90949; + + phys_tid_90949 = global_tid_126578; + + int32_t phys_group_id_126583; + + phys_group_id_126583 = get_group_id(0); + for (int32_t i_126584 = 0; i_126584 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, segmap_group_sizze_91404)) - + phys_group_id_126583, sext_i64_i32(num_groups_91405)); + i_126584++) { + int32_t virt_group_id_126585 = phys_group_id_126583 + i_126584 * + sext_i64_i32(num_groups_91405); + int64_t gtid_90948 = sext_i32_i64(virt_group_id_126585) * + segmap_group_sizze_91404 + sext_i32_i64(local_tid_126579); + + if (slt64(gtid_90948, m_73008)) { + for (int64_t i_126586 = 0; i_126586 < k2p2zq_73023; i_126586++) { + ((__global double *) mem_120268)[phys_tid_90949 + i_126586 * + num_threads_125879] = 0.0; + } + for (int64_t i_126587 = 0; i_126587 < (int64_t) 2; i_126587++) { + for (int64_t i_126588 = 0; i_126588 < k2p2zq_73023; + i_126588++) { + ((__global double *) mem_120271)[phys_tid_90949 + + (i_126587 * + (num_threads_125879 * + k2p2zq_73023) + + i_126588 * + num_threads_125879)] = + 0.0; + } + } + for (int64_t i_126589 = 0; i_126589 < k2p2zq_73023; i_126589++) { + int64_t x_126590 = (int64_t) 0 + i_126589 * (int64_t) 1; + + ((__global int64_t *) mem_120273)[phys_tid_90949 + i_126589 * + num_threads_125879] = + x_126590; + } + for (int64_t j_91417 = 0; j_91417 < k2p2zq_73023; j_91417++) { + bool index_certs_91420; + + if (!ok_or_empty_73101) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 436) == -1) { + global_failure_args[0] = j_91417; + global_failure_args[1] = (int64_t) 0; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_91422; + double redout_119641 = 0.0; + + for (int64_t i_119642 = 0; i_119642 < k2p2zq_73023; + i_119642++) { + double x_91426 = ((__global double *) mem_120265)[i_119642 * + (k2p2zq_73023 * + m_73008) + + gtid_90948 * + k2p2zq_73023 + + j_91417]; + double defunc_1_f_res_91427 = x_91426 * x_91426; + double defunc_1_op_res_91425 = defunc_1_f_res_91427 + + redout_119641; + double redout_tmp_126593 = defunc_1_op_res_91425; + + redout_119641 = redout_tmp_126593; + } + defunc_2_reduce_res_91422 = redout_119641; + + double sqrt_res_91428; + + sqrt_res_91428 = futrts_sqrt64(defunc_2_reduce_res_91422); + ((__global double *) mem_120268)[phys_tid_90949 + j_91417 * + num_threads_125879] = + sqrt_res_91428; + ((__global double *) mem_120271)[phys_tid_90949 + j_91417 * + num_threads_125879] = + sqrt_res_91428; + + bool zeze_res_91431 = sqrt_res_91428 == 0.0; + double lw_val_91432; + + if (zeze_res_91431) { + lw_val_91432 = 1.0; + } else { + lw_val_91432 = sqrt_res_91428; + } + ((__global double *) mem_120271)[phys_tid_90949 + + (num_threads_125879 * + k2p2zq_73023 + j_91417 * + num_threads_125879)] = + lw_val_91432; + } + for (int64_t i_126594 = 0; i_126594 < k2p2zq_73023; i_126594++) { + for (int64_t i_126595 = 0; i_126595 < k2p2zq_73023; + i_126595++) { + ((__global + double *) double_buffer_mem_125535)[phys_tid_90949 + + (i_126594 * + (num_threads_125879 * + k2p2zq_73023) + + i_126595 * + num_threads_125879)] = + ((__global double *) mem_120261)[gtid_90948 + + (i_126594 * (m_73008 * + k2p2zq_73023) + + i_126595 * m_73008)]; + } + } + for (int64_t i_126596 = 0; i_126596 < k2p2zq_73023; i_126596++) { + ((__global double *) double_buffer_mem_125536)[phys_tid_90949 + + i_126596 * + num_threads_125879] = + ((__global double *) mem_120268)[phys_tid_90949 + i_126596 * + num_threads_125879]; + } + for (int64_t i_126597 = 0; i_126597 < (int64_t) 2; i_126597++) { + for (int64_t i_126598 = 0; i_126598 < k2p2zq_73023; + i_126598++) { + ((__global + double *) double_buffer_mem_125537)[phys_tid_90949 + + (i_126597 * + (num_threads_125879 * + k2p2zq_73023) + + i_126598 * + num_threads_125879)] = + ((__global double *) mem_120271)[phys_tid_90949 + + (i_126597 * + (num_threads_125879 * + k2p2zq_73023) + + i_126598 * + num_threads_125879)]; + } + } + + int64_t dqrdc2_res_91438; + int64_t k_91444 = k_73214; + + for (int64_t l_91439 = 0; l_91439 < min_res_73213; l_91439++) { + int64_t x_91445 = add64((int64_t) 1, l_91439); + bool cond_91446 = slt64(x_91445, k_91444); + bool loop_cond_91447; + + if (cond_91446) { + bool y_91448 = slt64(l_91439, k2p2zq_73023); + bool index_certs_91449; + + if (!y_91448) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 437) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_91439; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double zt_arg_91450 = ((__global + double *) double_buffer_mem_125537)[phys_tid_90949 + + (num_threads_125879 * + k2p2zq_73023 + + l_91439 * + num_threads_125879)]; + double zt_res_91451 = 1.0e-7 * zt_arg_91450; + bool index_certs_91452; + + if (!y_91448) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 438) == -1) { + global_failure_args[0] = l_91439; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double zl_arg_91453 = ((__global + double *) double_buffer_mem_125536)[phys_tid_90949 + + l_91439 * + num_threads_125879]; + bool zl_res_91454 = zl_arg_91453 < zt_res_91451; + + loop_cond_91447 = zl_res_91454; + } else { + loop_cond_91447 = 0; + } + + bool y_91455 = slt64(l_91439, k2p2zq_73023); + int64_t upper_bound_91456 = sub64(k2p2zq_73023, x_91445); + bool loop_not_taken_91457 = !loop_cond_91447; + bool protect_assert_disj_91458 = y_91455 || + loop_not_taken_91457; + bool index_certs_91459; + + if (!protect_assert_disj_91458) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 439) == -1) { + global_failure_args[0] = l_91439; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_91460; + + if (!protect_assert_disj_91458) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 440) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = l_91439; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_91461; + + if (!protect_assert_disj_91458) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 441) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_91439; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool protect_assert_disj_91462 = y_73099 || + loop_not_taken_91457; + bool index_certs_91463; + + if (!protect_assert_disj_91462) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 442) == -1) { + global_failure_args[0] = m_73095; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_91464; + + if (!protect_assert_disj_91462) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 443) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = m_73095; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_91465; + + if (!protect_assert_disj_91462) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 444) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = m_73095; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool loopres_91466; + int64_t loopres_91471; + bool loop_while_91472; + int64_t k_91477; + + loop_while_91472 = loop_cond_91447; + k_91477 = k_91444; + while (loop_while_91472) { + for (int64_t i_91479 = 0; i_91479 < k2p2zq_73023; + i_91479++) { + bool index_certs_91481; + + if (!y_91455) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 445) == -1) { + global_failure_args[0] = l_91439; + global_failure_args[1] = i_91479; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double t_91482 = ((__global + double *) double_buffer_mem_125535)[phys_tid_90949 + + (l_91439 * + (num_threads_125879 * + k2p2zq_73023) + + i_91479 * + num_threads_125879)]; + + for (int64_t j0_91484 = 0; j0_91484 < upper_bound_91456; + j0_91484++) { + int64_t j_91486 = add64(x_91445, j0_91484); + bool x_91487 = sle64((int64_t) 0, j_91486); + bool y_91488 = slt64(j_91486, k2p2zq_73023); + bool bounds_check_91489 = x_91487 && y_91488; + bool index_certs_91490; + + if (!bounds_check_91489) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 446) == + -1) { + global_failure_args[0] = j_91486; + global_failure_args[1] = i_91479; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_91491 = ((__global + double *) double_buffer_mem_125535)[phys_tid_90949 + + (j_91486 * + (num_threads_125879 * + k2p2zq_73023) + + i_91479 * + num_threads_125879)]; + int64_t i_91492 = sub64(j_91486, (int64_t) 1); + bool x_91493 = sle64((int64_t) 0, i_91492); + bool y_91494 = slt64(i_91492, k2p2zq_73023); + bool bounds_check_91495 = x_91493 && y_91494; + bool index_certs_91496; + + if (!bounds_check_91495) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 447) == + -1) { + global_failure_args[0] = i_91492; + global_failure_args[1] = i_91479; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125535)[phys_tid_90949 + + (i_91492 * + (num_threads_125879 * + k2p2zq_73023) + + i_91479 * + num_threads_125879)] = + lw_val_91491; + } + + bool index_certs_91498; + + if (!y_73099) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 448) == -1) { + global_failure_args[0] = m_73095; + global_failure_args[1] = i_91479; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125535)[phys_tid_90949 + + (m_73095 * + (num_threads_125879 * + k2p2zq_73023) + + i_91479 * + num_threads_125879)] = + t_91482; + } + + int64_t i_91500 = ((__global + int64_t *) mem_120273)[phys_tid_90949 + + l_91439 * + num_threads_125879]; + double t_91501 = ((__global + double *) double_buffer_mem_125536)[phys_tid_90949 + + l_91439 * + num_threads_125879]; + double tt_91502 = ((__global + double *) double_buffer_mem_125537)[phys_tid_90949 + + l_91439 * + num_threads_125879]; + double ttt_91503 = ((__global + double *) double_buffer_mem_125537)[phys_tid_90949 + + (num_threads_125879 * + k2p2zq_73023 + + l_91439 * + num_threads_125879)]; + + for (int64_t j0_91507 = 0; j0_91507 < upper_bound_91456; + j0_91507++) { + int64_t j_91511 = add64(x_91445, j0_91507); + bool x_91512 = sle64((int64_t) 0, j_91511); + bool y_91513 = slt64(j_91511, k2p2zq_73023); + bool bounds_check_91514 = x_91512 && y_91513; + bool index_certs_91515; + + if (!bounds_check_91514) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 449) == -1) { + global_failure_args[0] = j_91511; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + int64_t lw_val_91516 = ((__global + int64_t *) mem_120273)[phys_tid_90949 + + j_91511 * + num_threads_125879]; + int64_t i_91517 = sub64(j_91511, (int64_t) 1); + bool x_91518 = sle64((int64_t) 0, i_91517); + bool y_91519 = slt64(i_91517, k2p2zq_73023); + bool bounds_check_91520 = x_91518 && y_91519; + bool index_certs_91521; + + if (!bounds_check_91520) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 450) == -1) { + global_failure_args[0] = i_91517; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global int64_t *) mem_120273)[phys_tid_90949 + + i_91517 * + num_threads_125879] = + lw_val_91516; + + double lw_val_91523 = ((__global + double *) double_buffer_mem_125536)[phys_tid_90949 + + j_91511 * + num_threads_125879]; + + ((__global + double *) double_buffer_mem_125536)[phys_tid_90949 + + i_91517 * + num_threads_125879] = + lw_val_91523; + + bool index_certs_91525; + + if (!bounds_check_91514) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 451) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = j_91511; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_91526 = ((__global + double *) double_buffer_mem_125537)[phys_tid_90949 + + j_91511 * + num_threads_125879]; + bool index_certs_91527; + + if (!bounds_check_91520) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 452) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = i_91517; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125537)[phys_tid_90949 + + i_91517 * + num_threads_125879] = + lw_val_91526; + + bool index_certs_91529; + + if (!bounds_check_91514) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 453) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = j_91511; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_91530 = ((__global + double *) double_buffer_mem_125537)[phys_tid_90949 + + (num_threads_125879 * + k2p2zq_73023 + + j_91511 * + num_threads_125879)]; + bool index_certs_91531; + + if (!bounds_check_91520) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 454) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = i_91517; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125537)[phys_tid_90949 + + (num_threads_125879 * + k2p2zq_73023 + + i_91517 * + num_threads_125879)] = + lw_val_91530; + } + ((__global int64_t *) mem_120273)[phys_tid_90949 + m_73095 * + num_threads_125879] = + i_91500; + ((__global + double *) double_buffer_mem_125536)[phys_tid_90949 + + m_73095 * + num_threads_125879] = + t_91501; + ((__global + double *) double_buffer_mem_125537)[phys_tid_90949 + + m_73095 * + num_threads_125879] = + tt_91502; + ((__global + double *) double_buffer_mem_125537)[phys_tid_90949 + + (num_threads_125879 * + k2p2zq_73023 + + m_73095 * + num_threads_125879)] = + ttt_91503; + + int64_t k_91537 = sub64(k_91477, (int64_t) 1); + bool cond_91538 = slt64(x_91445, k_91537); + bool loop_cond_91539; + + if (cond_91538) { + bool index_certs_91540; + + if (!y_91455) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 455) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_91439; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double zt_arg_91541 = ((__global + double *) double_buffer_mem_125537)[phys_tid_90949 + + (num_threads_125879 * + k2p2zq_73023 + + l_91439 * + num_threads_125879)]; + double zt_res_91542 = 1.0e-7 * zt_arg_91541; + bool index_certs_91543; + + if (!y_91455) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 456) == -1) { + global_failure_args[0] = l_91439; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double zl_arg_91544 = ((__global + double *) double_buffer_mem_125536)[phys_tid_90949 + + l_91439 * + num_threads_125879]; + bool zl_res_91545 = zl_arg_91544 < zt_res_91542; + + loop_cond_91539 = zl_res_91545; + } else { + loop_cond_91539 = 0; + } + + bool loop_while_tmp_126604 = loop_cond_91539; + int64_t k_tmp_126609 = k_91537; + + loop_while_91472 = loop_while_tmp_126604; + k_91477 = k_tmp_126609; + } + loopres_91466 = loop_while_91472; + loopres_91471 = k_91477; + + bool cond_91546 = x_91445 == k2p2zq_73023; + int64_t j_m_i_91547 = sub64(k2p2zq_73023, l_91439); + bool empty_slice_91551 = j_m_i_91547 == (int64_t) 0; + int64_t m_91552 = sub64(j_m_i_91547, (int64_t) 1); + int64_t i_p_m_t_s_91553 = add64(l_91439, m_91552); + bool zzero_leq_i_p_m_t_s_91554 = sle64((int64_t) 0, + i_p_m_t_s_91553); + bool i_p_m_t_s_leq_w_91555 = slt64(i_p_m_t_s_91553, + k2p2zq_73023); + bool i_lte_j_91556 = sle64(l_91439, k2p2zq_73023); + bool y_91557 = zzero_leq_i_p_m_t_s_91554 && + i_p_m_t_s_leq_w_91555; + bool y_91558 = i_lte_j_91556 && y_91557; + bool ok_or_empty_91559 = empty_slice_91551 || y_91558; + bool index_ok_91560 = y_91455 && ok_or_empty_91559; + + if (cond_91546) { + for (int64_t i_126615 = 0; i_126615 < k2p2zq_73023; + i_126615++) { + ((__global double *) mem_125431)[phys_tid_90949 + + i_126615 * + num_threads_125879] = + ((__global + double *) double_buffer_mem_125536)[phys_tid_90949 + + i_126615 * + num_threads_125879]; + } + for (int64_t i_126616 = 0; i_126616 < (int64_t) 2; + i_126616++) { + for (int64_t i_126617 = 0; i_126617 < k2p2zq_73023; + i_126617++) { + ((__global double *) mem_125429)[phys_tid_90949 + + (i_126616 * + (num_threads_125879 * + k2p2zq_73023) + + i_126617 * + num_threads_125879)] = + ((__global + double *) double_buffer_mem_125537)[phys_tid_90949 + + (i_126616 * + (num_threads_125879 * + k2p2zq_73023) + + i_126617 * + num_threads_125879)]; + } + } + for (int64_t i_126618 = 0; i_126618 < k2p2zq_73023; + i_126618++) { + for (int64_t i_126619 = 0; i_126619 < k2p2zq_73023; + i_126619++) { + ((__global double *) mem_125491)[phys_tid_90949 + + (i_126618 * + (num_threads_125879 * + k2p2zq_73023) + + i_126619 * + num_threads_125879)] = + ((__global + double *) double_buffer_mem_125535)[phys_tid_90949 + + (i_126618 * + (num_threads_125879 * + k2p2zq_73023) + + i_126619 * + num_threads_125879)]; + } + } + } else { + bool index_certs_91561; + + if (!index_ok_91560) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 457) == -1) { + global_failure_args[0] = l_91439; + global_failure_args[1] = l_91439; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_91563; + double redout_119643 = 0.0; + + for (int64_t i_119644 = 0; i_119644 < j_m_i_91547; + i_119644++) { + int64_t slice_119985 = l_91439 + i_119644; + double x_91567 = ((__global + double *) double_buffer_mem_125535)[phys_tid_90949 + + (l_91439 * + (num_threads_125879 * + k2p2zq_73023) + + slice_119985 * + num_threads_125879)]; + double defunc_1_f_res_91568 = x_91567 * x_91567; + double defunc_1_op_res_91566 = defunc_1_f_res_91568 + + redout_119643; + double redout_tmp_126620 = defunc_1_op_res_91566; + + redout_119643 = redout_tmp_126620; + } + defunc_2_reduce_res_91563 = redout_119643; + + double sqrt_res_91569; + + sqrt_res_91569 = futrts_sqrt64(defunc_2_reduce_res_91563); + + bool zeze_res_91570 = sqrt_res_91569 == 0.0; + + if (zeze_res_91570) { + for (int64_t i_126621 = 0; i_126621 < k2p2zq_73023; + i_126621++) { + ((__global double *) mem_125152)[phys_tid_90949 + + i_126621 * + num_threads_125879] = + ((__global + double *) double_buffer_mem_125536)[phys_tid_90949 + + i_126621 * + num_threads_125879]; + } + for (int64_t i_126622 = 0; i_126622 < (int64_t) 2; + i_126622++) { + for (int64_t i_126623 = 0; i_126623 < k2p2zq_73023; + i_126623++) { + ((__global + double *) mem_125150)[phys_tid_90949 + + (i_126622 * + (num_threads_125879 * + k2p2zq_73023) + + i_126623 * + num_threads_125879)] = + ((__global + double *) double_buffer_mem_125537)[phys_tid_90949 + + (i_126622 * + (num_threads_125879 * + k2p2zq_73023) + + i_126623 * + num_threads_125879)]; + } + } + for (int64_t i_126624 = 0; i_126624 < k2p2zq_73023; + i_126624++) { + for (int64_t i_126625 = 0; i_126625 < k2p2zq_73023; + i_126625++) { + ((__global + double *) mem_125421)[phys_tid_90949 + + (i_126624 * + (num_threads_125879 * + k2p2zq_73023) + + i_126625 * + num_threads_125879)] = + ((__global + double *) double_buffer_mem_125535)[phys_tid_90949 + + (i_126624 * + (num_threads_125879 * + k2p2zq_73023) + + i_126625 * + num_threads_125879)]; + } + } + } else { + bool index_ok_91574 = y_91455 && y_91455; + bool index_certs_91575; + + if (!index_ok_91574) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 458) == -1) { + global_failure_args[0] = l_91439; + global_failure_args[1] = l_91439; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double znze_arg_91576 = ((__global + double *) double_buffer_mem_125535)[phys_tid_90949 + + (l_91439 * + (num_threads_125879 * + k2p2zq_73023) + + l_91439 * + num_threads_125879)]; + bool zeze_res_91577 = znze_arg_91576 == 0.0; + bool znze_res_91578 = !zeze_res_91577; + double nrmxl_91579; + + if (znze_res_91578) { + double abs_res_91580 = fabs(sqrt_res_91569); + double sgn_res_91581 = fsignum32(znze_arg_91576); + double zt_res_91582 = abs_res_91580 * sgn_res_91581; + + nrmxl_91579 = zt_res_91582; + } else { + nrmxl_91579 = sqrt_res_91569; + } + for (int64_t i0_91584 = 0; i0_91584 < j_m_i_91547; + i0_91584++) { + int64_t i_91586 = add64(l_91439, i0_91584); + bool x_91587 = sle64((int64_t) 0, i_91586); + bool y_91588 = slt64(i_91586, k2p2zq_73023); + bool bounds_check_91589 = x_91587 && y_91588; + bool index_ok_91590 = y_91455 && bounds_check_91589; + bool index_certs_91591; + + if (!index_ok_91590) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 459) == + -1) { + global_failure_args[0] = l_91439; + global_failure_args[1] = i_91586; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_91592 = ((__global + double *) double_buffer_mem_125535)[phys_tid_90949 + + (l_91439 * + (num_threads_125879 * + k2p2zq_73023) + + i_91586 * + num_threads_125879)]; + double lw_val_91593 = x_91592 / nrmxl_91579; + + ((__global + double *) double_buffer_mem_125535)[phys_tid_90949 + + (l_91439 * + (num_threads_125879 * + k2p2zq_73023) + + i_91586 * + num_threads_125879)] = + lw_val_91593; + } + + double zp_arg_91595 = ((__global + double *) double_buffer_mem_125535)[phys_tid_90949 + + (l_91439 * + (num_threads_125879 * + k2p2zq_73023) + + l_91439 * + num_threads_125879)]; + double zp_res_91596 = 1.0 + zp_arg_91595; + + ((__global + double *) double_buffer_mem_125535)[phys_tid_90949 + + (l_91439 * + (num_threads_125879 * + k2p2zq_73023) + + l_91439 * + num_threads_125879)] = + zp_res_91596; + + bool bounds_invalid_upwards_91598 = slt64(k2p2zq_73023, + x_91445); + bool valid_91599 = !bounds_invalid_upwards_91598; + bool range_valid_c_91600; + + if (!valid_91599) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 460) == -1) { + global_failure_args[0] = x_91445; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool loop_nonempty_91601 = slt64((int64_t) 0, + upper_bound_91456); + bool loop_not_taken_91602 = !loop_nonempty_91601; + bool protect_assert_disj_91603 = index_ok_91574 || + loop_not_taken_91602; + bool index_certs_91604; + + if (!protect_assert_disj_91603) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 461) == -1) { + global_failure_args[0] = l_91439; + global_failure_args[1] = l_91439; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_91608 = 0; i_91608 < upper_bound_91456; + i_91608++) { + int64_t index_primexp_91612 = add64(x_91445, + i_91608); + bool x_91613 = sle64((int64_t) 0, + index_primexp_91612); + bool y_91614 = slt64(index_primexp_91612, + k2p2zq_73023); + bool bounds_check_91615 = x_91613 && y_91614; + double t_91616; + double t_91618 = 0.0; + + for (int64_t i0_91617 = 0; i0_91617 < j_m_i_91547; + i0_91617++) { + int64_t i_91619 = add64(l_91439, i0_91617); + bool x_91620 = sle64((int64_t) 0, i_91619); + bool y_91621 = slt64(i_91619, k2p2zq_73023); + bool bounds_check_91622 = x_91620 && y_91621; + bool index_ok_91623 = y_91455 && + bounds_check_91622; + bool index_certs_91624; + + if (!index_ok_91623) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 462) == + -1) { + global_failure_args[0] = l_91439; + global_failure_args[1] = i_91619; + global_failure_args[2] = + k2p2zq_73023; + global_failure_args[3] = + k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_91625 = ((__global + double *) double_buffer_mem_125535)[phys_tid_90949 + + (l_91439 * + (num_threads_125879 * + k2p2zq_73023) + + i_91619 * + num_threads_125879)]; + bool index_ok_91626 = bounds_check_91615 && + bounds_check_91622; + bool index_certs_91627; + + if (!index_ok_91626) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 463) == + -1) { + global_failure_args[0] = + index_primexp_91612; + global_failure_args[1] = i_91619; + global_failure_args[2] = + k2p2zq_73023; + global_failure_args[3] = + k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_91628 = ((__global + double *) double_buffer_mem_125535)[phys_tid_90949 + + (index_primexp_91612 * + (num_threads_125879 * + k2p2zq_73023) + + i_91619 * + num_threads_125879)]; + double y_91629 = x_91625 * y_91628; + double loopres_91630 = t_91618 - y_91629; + double t_tmp_126630 = loopres_91630; + + t_91618 = t_tmp_126630; + } + t_91616 = t_91618; + + double y_91631 = ((__global + double *) double_buffer_mem_125535)[phys_tid_90949 + + (l_91439 * + (num_threads_125879 * + k2p2zq_73023) + + l_91439 * + num_threads_125879)]; + double t_91632 = t_91616 / y_91631; + + for (int64_t i0_91634 = 0; i0_91634 < j_m_i_91547; + i0_91634++) { + int64_t i_91636 = add64(l_91439, i0_91634); + bool x_91637 = sle64((int64_t) 0, i_91636); + bool y_91638 = slt64(i_91636, k2p2zq_73023); + bool bounds_check_91639 = x_91637 && y_91638; + bool index_ok_91640 = bounds_check_91615 && + bounds_check_91639; + bool index_certs_91641; + + if (!index_ok_91640) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 464) == + -1) { + global_failure_args[0] = + index_primexp_91612; + global_failure_args[1] = i_91636; + global_failure_args[2] = + k2p2zq_73023; + global_failure_args[3] = + k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_91642 = ((__global + double *) double_buffer_mem_125535)[phys_tid_90949 + + (index_primexp_91612 * + (num_threads_125879 * + k2p2zq_73023) + + i_91636 * + num_threads_125879)]; + bool index_ok_91643 = y_91455 && + bounds_check_91639; + bool index_certs_91644; + + if (!index_ok_91643) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 465) == + -1) { + global_failure_args[0] = l_91439; + global_failure_args[1] = i_91636; + global_failure_args[2] = + k2p2zq_73023; + global_failure_args[3] = + k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_91645 = ((__global + double *) double_buffer_mem_125535)[phys_tid_90949 + + (l_91439 * + (num_threads_125879 * + k2p2zq_73023) + + i_91636 * + num_threads_125879)]; + double y_91646 = t_91632 * y_91645; + double lw_val_91647 = x_91642 + y_91646; + + ((__global + double *) double_buffer_mem_125535)[phys_tid_90949 + + (index_primexp_91612 * + (num_threads_125879 * + k2p2zq_73023) + + i_91636 * + num_threads_125879)] = + lw_val_91647; + } + + bool index_certs_91649; + + if (!bounds_check_91615) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 466) == + -1) { + global_failure_args[0] = + index_primexp_91612; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double zeze_arg_91650 = ((__global + double *) double_buffer_mem_125536)[phys_tid_90949 + + index_primexp_91612 * + num_threads_125879]; + bool zeze_res_91651 = zeze_arg_91650 == 0.0; + + if (!zeze_res_91651) { + bool index_ok_91654 = y_91455 && + bounds_check_91615; + bool index_certs_91655; + + if (!index_ok_91654) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 467) == + -1) { + global_failure_args[0] = + index_primexp_91612; + global_failure_args[1] = l_91439; + global_failure_args[2] = + k2p2zq_73023; + global_failure_args[3] = + k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double abs_arg_91656 = ((__global + double *) double_buffer_mem_125535)[phys_tid_90949 + + (index_primexp_91612 * + (num_threads_125879 * + k2p2zq_73023) + + l_91439 * + num_threads_125879)]; + double abs_res_91657 = fabs(abs_arg_91656); + double zs_res_91658 = abs_res_91657 / + zeze_arg_91650; + double ztzt_res_91659 = fpow64(zs_res_91658, + 2.0); + double zm_res_91660 = 1.0 - ztzt_res_91659; + double max_res_91661 = fmax64(0.0, + zm_res_91660); + double abs_res_91662 = fabs(max_res_91661); + bool zgze_res_91663 = 1.0e-6 <= abs_res_91662; + + if (zgze_res_91663) { + double sqrt_res_91666; + + sqrt_res_91666 = + futrts_sqrt64(max_res_91661); + + double zt_res_91667 = zeze_arg_91650 * + sqrt_res_91666; + + ((__global + double *) double_buffer_mem_125536)[phys_tid_90949 + + index_primexp_91612 * + num_threads_125879] = + zt_res_91667; + } else { + bool empty_slice_91669 = + upper_bound_91456 == (int64_t) 0; + int64_t m_91670 = sub64(upper_bound_91456, + (int64_t) 1); + int64_t i_p_m_t_s_91671 = add64(x_91445, + m_91670); + bool zzero_leq_i_p_m_t_s_91672 = + sle64((int64_t) 0, i_p_m_t_s_91671); + bool i_p_m_t_s_leq_w_91673 = + slt64(i_p_m_t_s_91671, k2p2zq_73023); + bool zzero_lte_i_91674 = sle64((int64_t) 0, + x_91445); + bool i_lte_j_91675 = sle64(x_91445, + k2p2zq_73023); + bool y_91676 = i_p_m_t_s_leq_w_91673 && + zzero_lte_i_91674; + bool y_91677 = zzero_leq_i_p_m_t_s_91672 && + y_91676; + bool y_91678 = i_lte_j_91675 && y_91677; + bool forwards_ok_91679 = + zzero_lte_i_91674 && y_91678; + bool ok_or_empty_91680 = + empty_slice_91669 || forwards_ok_91679; + bool index_ok_91681 = bounds_check_91615 && + ok_or_empty_91680; + bool index_certs_91682; + + if (!index_ok_91681) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 468) == + -1) { + global_failure_args[0] = + index_primexp_91612; + global_failure_args[1] = + x_91445; + global_failure_args[2] = + k2p2zq_73023; + global_failure_args[3] = + k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_91684; + double redout_119645 = 0.0; + + for (int64_t i_119646 = 0; i_119646 < + upper_bound_91456; i_119646++) { + int64_t slice_119986 = x_91445 + + i_119646; + double x_91688 = ((__global + double *) double_buffer_mem_125535)[phys_tid_90949 + + (index_primexp_91612 * + (num_threads_125879 * + k2p2zq_73023) + + slice_119986 * + num_threads_125879)]; + double defunc_1_f_res_91689 = x_91688 * + x_91688; + double defunc_1_op_res_91687 = + defunc_1_f_res_91689 + + redout_119645; + double redout_tmp_126632 = + defunc_1_op_res_91687; + + redout_119645 = redout_tmp_126632; + } + defunc_2_reduce_res_91684 = redout_119645; + + double sqrt_res_91690; + + sqrt_res_91690 = + futrts_sqrt64(defunc_2_reduce_res_91684); + ((__global + double *) double_buffer_mem_125536)[phys_tid_90949 + + index_primexp_91612 * + num_threads_125879] = + sqrt_res_91690; + + bool index_certs_91692; + + if (!bounds_check_91615) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 469) == + -1) { + global_failure_args[0] = + (int64_t) 0; + global_failure_args[1] = + index_primexp_91612; + global_failure_args[2] = + (int64_t) 2; + global_failure_args[3] = + k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_126633 = 0; i_126633 < + (int64_t) 1; i_126633++) { + ((__global + double *) double_buffer_mem_125537)[phys_tid_90949 + + (index_primexp_91612 + + i_126633) * + num_threads_125879] = + ((__global + double *) double_buffer_mem_125536)[phys_tid_90949 + + num_threads_125879 * + index_primexp_91612 + + i_126633 * + num_threads_125879]; + } + } + } + } + + bool index_certs_91695; + + if (!y_91455) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 470) == -1) { + global_failure_args[0] = l_91439; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_126634 = 0; i_126634 < (int64_t) 1; + i_126634++) { + ((__global + double *) double_buffer_mem_125536)[phys_tid_90949 + + (l_91439 + + i_126634) * + num_threads_125879] = + ((__global + double *) double_buffer_mem_125535)[phys_tid_90949 + + l_91439 * + (num_threads_125879 * + k2p2zq_73023) + + num_threads_125879 * + l_91439 + + i_126634 * + num_threads_125879]; + } + + double zt_res_91698 = -1.0 * nrmxl_91579; + + ((__global + double *) double_buffer_mem_125535)[phys_tid_90949 + + (l_91439 * + (num_threads_125879 * + k2p2zq_73023) + + l_91439 * + num_threads_125879)] = + zt_res_91698; + for (int64_t i_126635 = 0; i_126635 < k2p2zq_73023; + i_126635++) { + ((__global double *) mem_125152)[phys_tid_90949 + + i_126635 * + num_threads_125879] = + ((__global + double *) double_buffer_mem_125536)[phys_tid_90949 + + i_126635 * + num_threads_125879]; + } + for (int64_t i_126636 = 0; i_126636 < (int64_t) 2; + i_126636++) { + for (int64_t i_126637 = 0; i_126637 < k2p2zq_73023; + i_126637++) { + ((__global + double *) mem_125150)[phys_tid_90949 + + (i_126636 * + (num_threads_125879 * + k2p2zq_73023) + + i_126637 * + num_threads_125879)] = + ((__global + double *) double_buffer_mem_125537)[phys_tid_90949 + + (i_126636 * + (num_threads_125879 * + k2p2zq_73023) + + i_126637 * + num_threads_125879)]; + } + } + for (int64_t i_126638 = 0; i_126638 < k2p2zq_73023; + i_126638++) { + for (int64_t i_126639 = 0; i_126639 < k2p2zq_73023; + i_126639++) { + ((__global + double *) mem_125421)[phys_tid_90949 + + (i_126638 * + (num_threads_125879 * + k2p2zq_73023) + + i_126639 * + num_threads_125879)] = + ((__global + double *) double_buffer_mem_125535)[phys_tid_90949 + + (i_126638 * + (num_threads_125879 * + k2p2zq_73023) + + i_126639 * + num_threads_125879)]; + } + } + } + for (int64_t i_126640 = 0; i_126640 < k2p2zq_73023; + i_126640++) { + ((__global double *) mem_125431)[phys_tid_90949 + + i_126640 * + num_threads_125879] = + ((__global double *) mem_125152)[phys_tid_90949 + + i_126640 * + num_threads_125879]; + } + for (int64_t i_126641 = 0; i_126641 < (int64_t) 2; + i_126641++) { + for (int64_t i_126642 = 0; i_126642 < k2p2zq_73023; + i_126642++) { + ((__global double *) mem_125429)[phys_tid_90949 + + (i_126641 * + (num_threads_125879 * + k2p2zq_73023) + + i_126642 * + num_threads_125879)] = + ((__global + double *) mem_125150)[phys_tid_90949 + + (i_126641 * + (num_threads_125879 * + k2p2zq_73023) + + i_126642 * + num_threads_125879)]; + } + } + for (int64_t i_126643 = 0; i_126643 < k2p2zq_73023; + i_126643++) { + for (int64_t i_126644 = 0; i_126644 < k2p2zq_73023; + i_126644++) { + ((__global double *) mem_125491)[phys_tid_90949 + + (i_126643 * + (num_threads_125879 * + k2p2zq_73023) + + i_126644 * + num_threads_125879)] = + ((__global + double *) mem_125421)[phys_tid_90949 + + (i_126643 * + (num_threads_125879 * + k2p2zq_73023) + + i_126644 * + num_threads_125879)]; + } + } + } + for (int64_t i_126645 = 0; i_126645 < k2p2zq_73023; + i_126645++) { + for (int64_t i_126646 = 0; i_126646 < k2p2zq_73023; + i_126646++) { + ((__global + double *) double_buffer_mem_125535)[phys_tid_90949 + + (i_126645 * + (num_threads_125879 * + k2p2zq_73023) + + i_126646 * + num_threads_125879)] = + ((__global double *) mem_125491)[phys_tid_90949 + + (i_126645 * + (num_threads_125879 * + k2p2zq_73023) + + i_126646 * + num_threads_125879)]; + } + } + for (int64_t i_126647 = 0; i_126647 < k2p2zq_73023; + i_126647++) { + ((__global + double *) double_buffer_mem_125536)[phys_tid_90949 + + i_126647 * + num_threads_125879] = + ((__global double *) mem_125431)[phys_tid_90949 + + i_126647 * + num_threads_125879]; + } + for (int64_t i_126648 = 0; i_126648 < (int64_t) 2; i_126648++) { + for (int64_t i_126649 = 0; i_126649 < k2p2zq_73023; + i_126649++) { + ((__global + double *) double_buffer_mem_125537)[phys_tid_90949 + + (i_126648 * + (num_threads_125879 * + k2p2zq_73023) + + i_126649 * + num_threads_125879)] = + ((__global double *) mem_125429)[phys_tid_90949 + + (i_126648 * + (num_threads_125879 * + k2p2zq_73023) + + i_126649 * + num_threads_125879)]; + } + } + + int64_t k_tmp_126603 = loopres_91471; + + k_91444 = k_tmp_126603; + } + dqrdc2_res_91438 = k_91444; + + int64_t min_arg_91700 = sub64(dqrdc2_res_91438, (int64_t) 1); + int64_t min_res_91701 = smin64(k2p2zq_73023, min_arg_91700); + + for (int64_t i_119649 = 0; i_119649 < k2p2zq_73023; i_119649++) { + int64_t x_91705 = add64((int64_t) 1, i_119649); + bool cond_f_res_91706 = slt64(min_res_91701, x_91705); + + for (int64_t i_119653 = 0; i_119653 < k2p2zq_73023; + i_119653++) { + int64_t x_91710 = add64((int64_t) 1, i_119653); + bool cond_91711 = slt64(min_res_91701, x_91710); + bool x_91712 = !cond_91711; + bool y_91713 = cond_f_res_91706 && x_91712; + bool cond_91714 = cond_91711 || y_91713; + double defunc_1_f_res_91715; + + if (cond_91714) { + defunc_1_f_res_91715 = NAN; + } else { + double x_91709 = ((__global + double *) double_buffer_mem_125535)[phys_tid_90949 + + (i_119649 * + (num_threads_125879 * + k2p2zq_73023) + + i_119653 * + num_threads_125879)]; + + defunc_1_f_res_91715 = x_91709; + } + ((__global double *) mem_120608)[phys_tid_90949 + + (i_119649 * + (num_threads_125879 * + k2p2zq_73023) + + i_119653 * + num_threads_125879)] = + defunc_1_f_res_91715; + } + } + for (int64_t i_126652 = 0; i_126652 < k2p2zq_73023; i_126652++) { + ((__global double *) mem_120661)[phys_tid_90949 + i_126652 * + num_threads_125879] = 0.0; + } + for (int64_t i_119657 = 0; i_119657 < k2p2zq_73023; i_119657++) { + for (int64_t i_126654 = 0; i_126654 < k2p2zq_73023; + i_126654++) { + ((__global double *) mem_120649)[phys_tid_90949 + + (i_119657 * + (num_threads_125879 * + k2p2zq_73023) + + i_126654 * + num_threads_125879)] = + ((__global double *) mem_120661)[phys_tid_90949 + + i_126654 * + num_threads_125879]; + } + for (int64_t i_91721 = 0; i_91721 < k2p2zq_73023; i_91721++) { + int64_t x_91723 = sub64(k2p2zq_73023, i_91721); + int64_t i_91724 = sub64(x_91723, (int64_t) 1); + bool x_91725 = sle64((int64_t) 0, i_91724); + bool y_91726 = slt64(i_91724, k2p2zq_73023); + bool bounds_check_91727 = x_91725 && y_91726; + int64_t j_m_i_91728 = sub64(k2p2zq_73023, x_91723); + bool empty_slice_91729 = j_m_i_91728 == (int64_t) 0; + int64_t m_91730 = sub64(j_m_i_91728, (int64_t) 1); + int64_t i_p_m_t_s_91731 = add64(x_91723, m_91730); + bool zzero_leq_i_p_m_t_s_91732 = sle64((int64_t) 0, + i_p_m_t_s_91731); + bool i_p_m_t_s_leq_w_91733 = slt64(i_p_m_t_s_91731, + k2p2zq_73023); + bool zzero_lte_i_91734 = sle64((int64_t) 0, x_91723); + bool i_lte_j_91735 = sle64(x_91723, k2p2zq_73023); + bool y_91736 = i_p_m_t_s_leq_w_91733 && zzero_lte_i_91734; + bool y_91737 = zzero_leq_i_p_m_t_s_91732 && y_91736; + bool y_91738 = i_lte_j_91735 && y_91737; + bool forwards_ok_91739 = zzero_lte_i_91734 && y_91738; + bool ok_or_empty_91740 = empty_slice_91729 || + forwards_ok_91739; + bool index_ok_91741 = bounds_check_91727 && + ok_or_empty_91740; + bool index_certs_91742; + + if (!index_ok_91741) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 471) == -1) { + global_failure_args[0] = i_91724; + global_failure_args[1] = x_91723; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + global_failure_args[4] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_91743; + + if (!ok_or_empty_91740) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 472) == -1) { + global_failure_args[0] = x_91723; + global_failure_args[1] = k2p2zq_73023; + global_failure_args[2] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_91746; + double redout_119659 = 0.0; + + for (int64_t i_119660 = 0; i_119660 < j_m_i_91728; + i_119660++) { + int64_t slice_119992 = x_91723 + i_119660; + double x_91751 = ((__global + double *) mem_120608)[phys_tid_90949 + + (slice_119992 * + (num_threads_125879 * + k2p2zq_73023) + + i_91724 * + num_threads_125879)]; + bool isnan_res_91752; + + isnan_res_91752 = futrts_isnan64(x_91751); + + double defunc_1_f_res_91753; + + if (isnan_res_91752) { + defunc_1_f_res_91753 = 0.0; + } else { + double x_91750 = ((__global + double *) mem_120649)[phys_tid_90949 + + (i_119657 * + (num_threads_125879 * + k2p2zq_73023) + + slice_119992 * + num_threads_125879)]; + double defunc_1_f_res_f_res_91754 = x_91750 * + x_91751; + + defunc_1_f_res_91753 = defunc_1_f_res_f_res_91754; + } + + double defunc_1_op_res_91749 = defunc_1_f_res_91753 + + redout_119659; + double redout_tmp_126656 = defunc_1_op_res_91749; + + redout_119659 = redout_tmp_126656; + } + defunc_2_reduce_res_91746 = redout_119659; + + bool index_ok_91755 = bounds_check_91727 && + bounds_check_91727; + bool index_certs_91756; + + if (!index_ok_91755) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 473) == -1) { + global_failure_args[0] = i_91724; + global_failure_args[1] = i_91724; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double zs_arg_91757 = ((__global + double *) mem_120608)[phys_tid_90949 + + (i_91724 * + (num_threads_125879 * + k2p2zq_73023) + + i_91724 * + num_threads_125879)]; + bool index_certs_91758; + + if (!bounds_check_91727) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 474) == -1) { + global_failure_args[0] = i_91724; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double zm_arg_91759 = ((__global + double *) mem_120252)[i_119657 * + k2p2zq_73023 + + i_91724]; + double zm_res_91760 = zm_arg_91759 - + defunc_2_reduce_res_91746; + double zs_res_91761 = zm_res_91760 / zs_arg_91757; + + ((__global double *) mem_120649)[phys_tid_90949 + + (i_119657 * + (num_threads_125879 * + k2p2zq_73023) + i_91724 * + num_threads_125879)] = + zs_res_91761; + } + } + for (int64_t i_119663 = 0; i_119663 < k2p2zq_73023; i_119663++) { + for (int64_t i_119667 = 0; i_119667 < k2p2zq_73023; + i_119667++) { + double defunc_2_reduce_res_91768; + double redout_119669 = 0.0; + + for (int64_t i_119670 = 0; i_119670 < k2p2zq_73023; + i_119670++) { + double x_91772 = ((__global + double *) mem_120649)[phys_tid_90949 + + (i_119670 * + (num_threads_125879 * + k2p2zq_73023) + + i_119663 * + num_threads_125879)]; + double x_91773 = ((__global + double *) mem_120649)[phys_tid_90949 + + (i_119670 * + (num_threads_125879 * + k2p2zq_73023) + + i_119667 * + num_threads_125879)]; + double defunc_1_f_res_91774 = x_91772 * x_91773; + double defunc_1_op_res_91771 = defunc_1_f_res_91774 + + redout_119669; + double redout_tmp_126659 = defunc_1_op_res_91771; + + redout_119669 = redout_tmp_126659; + } + defunc_2_reduce_res_91768 = redout_119669; + ((__global double *) mem_120690)[phys_tid_90949 + + (i_119663 * + (num_threads_125879 * + k2p2zq_73023) + + i_119667 * + num_threads_125879)] = + defunc_2_reduce_res_91768; + } + } + + int64_t min_res_91775 = smin64(m_73095, min_res_91701); + + for (int64_t i_126660 = 0; i_126660 < k2p2zq_73023; i_126660++) { + ((__global double *) double_buffer_mem_125548)[phys_tid_90949 + + i_126660 * + num_threads_125879] = + ((__global double *) mem_120257)[gtid_90948 + i_126660 * + m_73008]; + } + for (int64_t j_91777 = 0; j_91777 < min_res_91775; j_91777++) { + bool y_91779 = slt64(j_91777, k2p2zq_73023); + bool index_certs_91780; + + if (!y_91779) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 475) == -1) { + global_failure_args[0] = j_91777; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double zeze_arg_91781 = ((__global + double *) double_buffer_mem_125536)[phys_tid_90949 + + j_91777 * + num_threads_125879]; + bool zeze_res_91782 = zeze_arg_91781 == 0.0; + + if (zeze_res_91782) { + for (int64_t i_126662 = 0; i_126662 < k2p2zq_73023; + i_126662++) { + ((__global double *) mem_125160)[phys_tid_90949 + + i_126662 * + num_threads_125879] = + ((__global + double *) double_buffer_mem_125548)[phys_tid_90949 + + i_126662 * + num_threads_125879]; + } + } else { + double y_91784 = ((__global + double *) double_buffer_mem_125548)[phys_tid_90949 + + j_91777 * + num_threads_125879]; + double negate_arg_91785 = zeze_arg_91781 * y_91784; + double t_91786 = 0.0 - negate_arg_91785; + int64_t x_91787 = sub64(k2p2zq_73023, j_91777); + int64_t upper_bound_91788 = sub64(x_91787, (int64_t) 1); + double t_91789; + double t_91791 = t_91786; + + for (int64_t i0_91790 = 0; i0_91790 < upper_bound_91788; + i0_91790++) { + int64_t x_91792 = add64(j_91777, i0_91790); + int64_t i_91793 = add64((int64_t) 1, x_91792); + bool x_91794 = sle64((int64_t) 0, i_91793); + bool y_91795 = slt64(i_91793, k2p2zq_73023); + bool bounds_check_91796 = x_91794 && y_91795; + bool index_ok_91797 = y_91779 && bounds_check_91796; + bool index_certs_91798; + + if (!index_ok_91797) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 476) == -1) { + global_failure_args[0] = j_91777; + global_failure_args[1] = i_91793; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_91799 = ((__global + double *) double_buffer_mem_125535)[phys_tid_90949 + + (j_91777 * + (num_threads_125879 * + k2p2zq_73023) + + i_91793 * + num_threads_125879)]; + bool index_certs_91800; + + if (!bounds_check_91796) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 477) == -1) { + global_failure_args[0] = i_91793; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_91801 = ((__global + double *) double_buffer_mem_125548)[phys_tid_90949 + + i_91793 * + num_threads_125879]; + double y_91802 = x_91799 * y_91801; + double loopres_91803 = t_91791 - y_91802; + double t_tmp_126663 = loopres_91803; + + t_91791 = t_tmp_126663; + } + t_91789 = t_91791; + + double t_91804 = t_91789 / zeze_arg_91781; + double y_91805 = zeze_arg_91781 * t_91804; + double lw_val_91806 = y_91784 + y_91805; + + ((__global + double *) double_buffer_mem_125548)[phys_tid_90949 + + j_91777 * + num_threads_125879] = + lw_val_91806; + for (int64_t i0_91809 = 0; i0_91809 < upper_bound_91788; + i0_91809++) { + int64_t x_91811 = add64(j_91777, i0_91809); + int64_t i_91812 = add64((int64_t) 1, x_91811); + bool x_91813 = sle64((int64_t) 0, i_91812); + bool y_91814 = slt64(i_91812, k2p2zq_73023); + bool bounds_check_91815 = x_91813 && y_91814; + bool index_certs_91816; + + if (!bounds_check_91815) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 478) == -1) { + global_failure_args[0] = i_91812; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_91817 = ((__global + double *) double_buffer_mem_125548)[phys_tid_90949 + + i_91812 * + num_threads_125879]; + bool index_ok_91818 = y_91779 && bounds_check_91815; + bool index_certs_91819; + + if (!index_ok_91818) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 479) == -1) { + global_failure_args[0] = j_91777; + global_failure_args[1] = i_91812; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_91820 = ((__global + double *) double_buffer_mem_125535)[phys_tid_90949 + + (j_91777 * + (num_threads_125879 * + k2p2zq_73023) + + i_91812 * + num_threads_125879)]; + double y_91821 = t_91804 * y_91820; + double lw_val_91822 = x_91817 + y_91821; + + ((__global + double *) double_buffer_mem_125548)[phys_tid_90949 + + i_91812 * + num_threads_125879] = + lw_val_91822; + } + for (int64_t i_126665 = 0; i_126665 < k2p2zq_73023; + i_126665++) { + ((__global double *) mem_125160)[phys_tid_90949 + + i_126665 * + num_threads_125879] = + ((__global + double *) double_buffer_mem_125548)[phys_tid_90949 + + i_126665 * + num_threads_125879]; + } + } + for (int64_t i_126666 = 0; i_126666 < k2p2zq_73023; + i_126666++) { + ((__global + double *) double_buffer_mem_125548)[phys_tid_90949 + + i_126666 * + num_threads_125879] = + ((__global double *) mem_125160)[phys_tid_90949 + + i_126666 * + num_threads_125879]; + } + } + for (int64_t i_126667 = 0; i_126667 < k2p2zq_73023; i_126667++) { + ((__global double *) mem_120763)[phys_tid_90949 + i_126667 * + num_threads_125879] = 0.0; + } + for (int64_t write_iter_119671 = 0; write_iter_119671 < + k2p2zq_73023; write_iter_119671++) { + int64_t write_iv_119674 = ((__global + int64_t *) mem_120273)[phys_tid_90949 + + write_iter_119671 * + num_threads_125879]; + double defunc_2_reduce_res_91828; + double redout_119681 = 0.0; + + for (int64_t i_119682 = 0; i_119682 < k2p2zq_73023; + i_119682++) { + double x_91832 = ((__global + double *) double_buffer_mem_125548)[phys_tid_90949 + + i_119682 * + num_threads_125879]; + double x_91833 = ((__global + double *) mem_120649)[phys_tid_90949 + + (i_119682 * + (num_threads_125879 * + k2p2zq_73023) + + write_iter_119671 * + num_threads_125879)]; + double defunc_1_f_res_91834 = x_91832 * x_91833; + double defunc_1_op_res_91831 = defunc_1_f_res_91834 + + redout_119681; + double redout_tmp_126669 = defunc_1_op_res_91831; + + redout_119681 = redout_tmp_126669; + } + defunc_2_reduce_res_91828 = redout_119681; + + bool less_than_zzero_119675 = slt64(write_iv_119674, + (int64_t) 0); + bool greater_than_sizze_119676 = sle64(k2p2zq_73023, + write_iv_119674); + bool outside_bounds_dim_119677 = less_than_zzero_119675 || + greater_than_sizze_119676; + + if (!outside_bounds_dim_119677) { + ((__global double *) mem_120763)[phys_tid_90949 + + write_iv_119674 * + num_threads_125879] = + defunc_2_reduce_res_91828; + } + } + for (int64_t i_119685 = 0; i_119685 < k2p2zq_73023; i_119685++) { + int64_t x_91837 = ((__global + int64_t *) mem_120273)[phys_tid_90949 + + i_119685 * + num_threads_125879]; + + for (int64_t i_126671 = 0; i_126671 < k2p2zq_73023; + i_126671++) { + ((__global int64_t *) mem_120790)[phys_tid_90949 + + i_126671 * + num_threads_125879] = + x_91837; + } + for (int64_t i_126672 = 0; i_126672 < k2p2zq_73023; + i_126672++) { + ((__global int64_t *) mem_120778)[phys_tid_90949 + + (i_119685 * + (num_threads_125879 * + k2p2zq_73023) + + i_126672 * + num_threads_125879)] = + ((__global int64_t *) mem_120790)[phys_tid_90949 + + i_126672 * + num_threads_125879]; + } + } + for (int64_t i_126673 = 0; i_126673 < k2p2zq_73023; i_126673++) { + for (int64_t i_126674 = 0; i_126674 < k2p2zq_73023; + i_126674++) { + ((__global double *) mem_120801)[phys_tid_90949 + + (i_126673 * + (num_threads_125879 * + k2p2zq_73023) + + i_126674 * + num_threads_125879)] = + 0.0; + } + } + for (int64_t write_iter_119687 = 0; write_iter_119687 < + binop_x_120251; write_iter_119687++) { + int64_t new_index_119993 = squot64(write_iter_119687, + k2p2zq_73023); + int64_t binop_y_119995 = k2p2zq_73023 * new_index_119993; + int64_t new_index_119996 = write_iter_119687 - binop_y_119995; + int64_t write_iv_119689 = ((__global + int64_t *) mem_120778)[phys_tid_90949 + + (new_index_119993 * + (num_threads_125879 * + k2p2zq_73023) + + new_index_119996 * + num_threads_125879)]; + int64_t write_iv_119690 = ((__global + int64_t *) mem_120273)[phys_tid_90949 + + new_index_119996 * + num_threads_125879]; + bool less_than_zzero_119692 = slt64(write_iv_119689, + (int64_t) 0); + bool greater_than_sizze_119693 = sle64(k2p2zq_73023, + write_iv_119689); + bool outside_bounds_dim_119694 = less_than_zzero_119692 || + greater_than_sizze_119693; + bool less_than_zzero_119695 = slt64(write_iv_119690, + (int64_t) 0); + bool greater_than_sizze_119696 = sle64(k2p2zq_73023, + write_iv_119690); + bool outside_bounds_dim_119697 = less_than_zzero_119695 || + greater_than_sizze_119696; + bool outside_bounds_119699 = outside_bounds_dim_119694 || + outside_bounds_dim_119697; + + if (!outside_bounds_119699) { + for (int64_t i_126676 = 0; i_126676 < (int64_t) 1; + i_126676++) { + ((__global double *) mem_120801)[phys_tid_90949 + + (write_iv_119689 * + (num_threads_125879 * + k2p2zq_73023) + + (write_iv_119690 + + i_126676) * + num_threads_125879)] = + ((__global double *) mem_120690)[phys_tid_90949 + + new_index_119993 * + (num_threads_125879 * + k2p2zq_73023) + + num_threads_125879 * + new_index_119996 + + i_126676 * + num_threads_125879]; + } + } + } + for (int64_t i_119706 = 0; i_119706 < k2p2zq_73023; i_119706++) { + double x_91850 = ((__global + double *) mem_120763)[phys_tid_90949 + + i_119706 * + num_threads_125879]; + + for (int64_t i_119711 = 0; i_119711 < k2p2zq_73023; + i_119711++) { + double x_91852 = ((__global + double *) mem_120801)[phys_tid_90949 + + (i_119706 * + (num_threads_125879 * + k2p2zq_73023) + + i_119711 * + num_threads_125879)]; + bool isnan_res_91853; + + isnan_res_91853 = futrts_isnan64(x_91852); + + double defunc_0_f_res_91854; + + if (isnan_res_91853) { + defunc_0_f_res_91854 = 0.0; + } else { + defunc_0_f_res_91854 = x_91852; + } + ((__global double *) mem_120824)[phys_tid_90949 + + (i_119706 * + (num_threads_125879 * + k2p2zq_73023) + + i_119711 * + num_threads_125879)] = + defunc_0_f_res_91854; + } + + bool isnan_res_91855; + + isnan_res_91855 = futrts_isnan64(x_91850); + + double defunc_0_f_res_91856; + + if (isnan_res_91855) { + defunc_0_f_res_91856 = 0.0; + } else { + defunc_0_f_res_91856 = x_91850; + } + ((__global double *) mem_120821)[phys_tid_90949 + i_119706 * + num_threads_125879] = + defunc_0_f_res_91856; + } + for (int64_t i_126680 = 0; i_126680 < k2p2zq_73023; i_126680++) { + for (int64_t i_126681 = 0; i_126681 < k2p2zq_73023; + i_126681++) { + ((__global double *) mem_120878)[i_126680 * (m_73008 * + k2p2zq_73023) + + i_126681 * m_73008 + + gtid_90948] = ((__global + double *) mem_120824)[phys_tid_90949 + + (i_126680 * + (num_threads_125879 * + k2p2zq_73023) + + i_126681 * + num_threads_125879)]; + } + } + for (int64_t i_126682 = 0; i_126682 < k2p2zq_73023; i_126682++) { + ((__global double *) mem_120881)[i_126682 * m_73008 + + gtid_90948] = ((__global + double *) mem_120821)[phys_tid_90949 + + i_126682 * + num_threads_125879]; + } + ((__global int64_t *) mem_120883)[gtid_90948] = min_res_91701; + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_91404 +} +__kernel void mainMagnitudezisegmap_91871(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + __global unsigned char *mem_121898, + __global unsigned char *mem_121919) +{ + #define segmap_group_sizze_94278 (mainMagnitudezisegmap_group_sizze_91874) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127177; + int32_t local_tid_127178; + int64_t group_sizze_127181; + int32_t wave_sizze_127180; + int32_t group_tid_127179; + + global_tid_127177 = get_global_id(0); + local_tid_127178 = get_local_id(0); + group_sizze_127181 = get_local_size(0); + wave_sizze_127180 = LOCKSTEP_WIDTH; + group_tid_127179 = get_group_id(0); + + int32_t phys_tid_91871; + + phys_tid_91871 = global_tid_127177; + + int64_t gtid_91869; + + gtid_91869 = squot64(sext_i32_i64(group_tid_127179) * + segmap_group_sizze_94278 + + sext_i32_i64(local_tid_127178), k2p2zq_73023); + + int64_t gtid_91870; + + gtid_91870 = sext_i32_i64(group_tid_127179) * segmap_group_sizze_94278 + + sext_i32_i64(local_tid_127178) - + squot64(sext_i32_i64(group_tid_127179) * segmap_group_sizze_94278 + + sext_i32_i64(local_tid_127178), k2p2zq_73023) * k2p2zq_73023; + if (slt64(gtid_91869, m_73008) && slt64(gtid_91870, k2p2zq_73023)) { + double x_94281 = ((__global double *) mem_121898)[gtid_91869 * + k2p2zq_73023 + + gtid_91870]; + bool isnan_res_94282; + + isnan_res_94282 = futrts_isnan64(x_94281); + + double defunc_0_f_res_94283; + + if (isnan_res_94282) { + defunc_0_f_res_94283 = 0.0; + } else { + defunc_0_f_res_94283 = x_94281; + } + ((__global double *) mem_121919)[gtid_91869 * k2p2zq_73023 + + gtid_91870] = defunc_0_f_res_94283; + } + + error_0: + return; + #undef segmap_group_sizze_94278 +} +__kernel void mainMagnitudezisegmap_91893(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + __global unsigned char *mem_121906, + __global unsigned char *mem_121915) +{ + #define segmap_group_sizze_94269 (mainMagnitudezisegmap_group_sizze_91897) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127172; + int32_t local_tid_127173; + int64_t group_sizze_127176; + int32_t wave_sizze_127175; + int32_t group_tid_127174; + + global_tid_127172 = get_global_id(0); + local_tid_127173 = get_local_id(0); + group_sizze_127176 = get_local_size(0); + wave_sizze_127175 = LOCKSTEP_WIDTH; + group_tid_127174 = get_group_id(0); + + int32_t phys_tid_91893; + + phys_tid_91893 = global_tid_127172; + + int64_t gtid_91890; + + gtid_91890 = squot64(sext_i32_i64(group_tid_127174) * + segmap_group_sizze_94269 + + sext_i32_i64(local_tid_127173), k2p2zq_73023 * + k2p2zq_73023); + + int64_t gtid_91891; + + gtid_91891 = squot64(sext_i32_i64(group_tid_127174) * + segmap_group_sizze_94269 + + sext_i32_i64(local_tid_127173) - + squot64(sext_i32_i64(group_tid_127174) * + segmap_group_sizze_94269 + + sext_i32_i64(local_tid_127173), k2p2zq_73023 * + k2p2zq_73023) * (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023); + + int64_t gtid_91892; + + gtid_91892 = sext_i32_i64(group_tid_127174) * segmap_group_sizze_94269 + + sext_i32_i64(local_tid_127173) - + squot64(sext_i32_i64(group_tid_127174) * segmap_group_sizze_94269 + + sext_i32_i64(local_tid_127173), k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023) - squot64(sext_i32_i64(group_tid_127174) * + segmap_group_sizze_94269 + + sext_i32_i64(local_tid_127173) - + squot64(sext_i32_i64(group_tid_127174) * + segmap_group_sizze_94269 + + sext_i32_i64(local_tid_127173), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023) * k2p2zq_73023; + if ((slt64(gtid_91890, m_73008) && slt64(gtid_91891, k2p2zq_73023)) && + slt64(gtid_91892, k2p2zq_73023)) { + double x_94272 = ((__global double *) mem_121906)[gtid_91890 * + (k2p2zq_73023 * + k2p2zq_73023) + + gtid_91891 * + k2p2zq_73023 + + gtid_91892]; + bool isnan_res_94273; + + isnan_res_94273 = futrts_isnan64(x_94272); + + double defunc_0_f_res_94274; + + if (isnan_res_94273) { + defunc_0_f_res_94274 = 0.0; + } else { + defunc_0_f_res_94274 = x_94272; + } + ((__global double *) mem_121915)[gtid_91890 * (k2p2zq_73023 * + k2p2zq_73023) + + gtid_91891 * k2p2zq_73023 + + gtid_91892] = defunc_0_f_res_94274; + } + + error_0: + return; + #undef segmap_group_sizze_94269 +} +__kernel void mainMagnitudezisegmap_91941(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + int64_t binop_x_120251, __global + unsigned char *defunc_3_map_res_r_mem_121847, + __global unsigned char *mem_121906, + __global unsigned char *mem_121909) +{ + #define segmap_group_sizze_94250 (mainMagnitudezisegmap_group_sizze_91944) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127167; + int32_t local_tid_127168; + int64_t group_sizze_127171; + int32_t wave_sizze_127170; + int32_t group_tid_127169; + + global_tid_127167 = get_global_id(0); + local_tid_127168 = get_local_id(0); + group_sizze_127171 = get_local_size(0); + wave_sizze_127170 = LOCKSTEP_WIDTH; + group_tid_127169 = get_group_id(0); + + int32_t phys_tid_91941; + + phys_tid_91941 = global_tid_127167; + + int64_t gtid_91939; + + gtid_91939 = squot64(sext_i32_i64(group_tid_127169) * + segmap_group_sizze_94250 + + sext_i32_i64(local_tid_127168), binop_x_120251); + + int64_t gtid_91940; + + gtid_91940 = sext_i32_i64(group_tid_127169) * segmap_group_sizze_94250 + + sext_i32_i64(local_tid_127168) - + squot64(sext_i32_i64(group_tid_127169) * segmap_group_sizze_94250 + + sext_i32_i64(local_tid_127168), binop_x_120251) * + binop_x_120251; + if (slt64(gtid_91939, m_73008) && slt64(gtid_91940, binop_x_120251)) { + int64_t binop_x_115049 = gtid_91939 * binop_x_120251; + int64_t binop_x_115050 = gtid_91940 + binop_x_115049; + int64_t new_index_115052 = squot64(binop_x_115050, binop_x_120251); + int64_t binop_y_115060 = new_index_115052 * binop_x_120251; + int64_t binop_x_115061 = binop_x_115050 - binop_y_115060; + int64_t new_index_115062 = squot64(binop_x_115061, k2p2zq_73023); + int64_t write_index_94253 = ((__global + int64_t *) mem_121909)[new_index_115052 * + k2p2zq_73023 + + new_index_115062]; + int64_t binop_y_115117 = k2p2zq_73023 * new_index_115062; + int64_t new_index_115118 = binop_x_115061 - binop_y_115117; + int64_t write_index_94254 = ((__global + int64_t *) mem_121909)[new_index_115052 * + k2p2zq_73023 + + new_index_115118]; + double write_value_94255 = ((__global + double *) defunc_3_map_res_r_mem_121847)[new_index_115052 * + (k2p2zq_73023 * + k2p2zq_73023) + + new_index_115062 * + k2p2zq_73023 + + new_index_115118]; + + if (((sle64((int64_t) 0, gtid_91939) && slt64(gtid_91939, m_73008)) && + (sle64((int64_t) 0, write_index_94253) && slt64(write_index_94253, + k2p2zq_73023))) && + (sle64((int64_t) 0, write_index_94254) && slt64(write_index_94254, + k2p2zq_73023))) { + ((__global double *) mem_121906)[gtid_91939 * (k2p2zq_73023 * + k2p2zq_73023) + + write_index_94253 * k2p2zq_73023 + + write_index_94254] = + write_value_94255; + } + } + + error_0: + return; + #undef segmap_group_sizze_94250 +} +__kernel void mainMagnitudezisegmap_92098(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + int64_t binop_x_120251, __global + unsigned char *mem_121341, __global + unsigned char *defunc_3_map_res_r_mem_121609, + __global unsigned char *mem_121898, + __global unsigned char *mem_121901) +{ + #define segmap_group_sizze_94179 (mainMagnitudezisegmap_group_sizze_92101) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127161; + int32_t local_tid_127162; + int64_t group_sizze_127165; + int32_t wave_sizze_127164; + int32_t group_tid_127163; + + global_tid_127161 = get_global_id(0); + local_tid_127162 = get_local_id(0); + group_sizze_127165 = get_local_size(0); + wave_sizze_127164 = LOCKSTEP_WIDTH; + group_tid_127163 = get_group_id(0); + + int32_t phys_tid_92098; + + phys_tid_92098 = global_tid_127161; + + int64_t gtid_92096; + + gtid_92096 = squot64(sext_i32_i64(group_tid_127163) * + segmap_group_sizze_94179 + + sext_i32_i64(local_tid_127162), k2p2zq_73023); + + int64_t gtid_92097; + + gtid_92097 = sext_i32_i64(group_tid_127163) * segmap_group_sizze_94179 + + sext_i32_i64(local_tid_127162) - + squot64(sext_i32_i64(group_tid_127163) * segmap_group_sizze_94179 + + sext_i32_i64(local_tid_127162), k2p2zq_73023) * k2p2zq_73023; + if (slt64(gtid_92096, m_73008) && slt64(gtid_92097, k2p2zq_73023)) { + int64_t write_index_94184 = ((__global + int64_t *) mem_121341)[gtid_92097 * + m_73008 + + gtid_92096]; + double defunc_2_reduce_res_94185; + double redout_119740 = 0.0; + + for (int64_t i_119741 = 0; i_119741 < k2p2zq_73023; i_119741++) { + double x_94189 = ((__global double *) mem_121901)[gtid_92096 * + k2p2zq_73023 + + i_119741]; + double x_94190 = ((__global + double *) defunc_3_map_res_r_mem_121609)[gtid_92096 * + binop_x_120251 + + i_119741 * + k2p2zq_73023 + + gtid_92097]; + double defunc_1_f_res_94191 = x_94189 * x_94190; + double defunc_1_op_res_94188 = defunc_1_f_res_94191 + redout_119740; + double redout_tmp_127166 = defunc_1_op_res_94188; + + redout_119740 = redout_tmp_127166; + } + defunc_2_reduce_res_94185 = redout_119740; + if ((sle64((int64_t) 0, gtid_92096) && slt64(gtid_92096, m_73008)) && + (sle64((int64_t) 0, write_index_94184) && slt64(write_index_94184, + k2p2zq_73023))) { + ((__global double *) mem_121898)[gtid_92096 * k2p2zq_73023 + + write_index_94184] = + defunc_2_reduce_res_94185; + } + } + + error_0: + return; + #undef segmap_group_sizze_94179 +} +__kernel void mainMagnitudezisegmap_92141(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t m_73008, int64_t k2p2zq_73023, + int64_t m_73095, + int64_t num_groups_94110, + int64_t num_threads_125936, __global + unsigned char *mem_121338, __global + unsigned char *mem_121343, __global + unsigned char *mem_121850, __global + unsigned char *mem_121858, __global + unsigned char *mem_121895, __global + unsigned char *mem_125243, __global + unsigned char *double_buffer_mem_125565) +{ + #define segmap_group_sizze_94109 (mainMagnitudezisegmap_group_sizze_92143) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_127145; + int32_t local_tid_127146; + int64_t group_sizze_127149; + int32_t wave_sizze_127148; + int32_t group_tid_127147; + + global_tid_127145 = get_global_id(0); + local_tid_127146 = get_local_id(0); + group_sizze_127149 = get_local_size(0); + wave_sizze_127148 = LOCKSTEP_WIDTH; + group_tid_127147 = get_group_id(0); + + int32_t phys_tid_92141; + + phys_tid_92141 = global_tid_127145; + + int32_t phys_group_id_127150; + + phys_group_id_127150 = get_group_id(0); + for (int32_t i_127151 = 0; i_127151 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, segmap_group_sizze_94109)) - + phys_group_id_127150, sext_i64_i32(num_groups_94110)); + i_127151++) { + int32_t virt_group_id_127152 = phys_group_id_127150 + i_127151 * + sext_i64_i32(num_groups_94110); + int64_t gtid_92140 = sext_i32_i64(virt_group_id_127152) * + segmap_group_sizze_94109 + sext_i32_i64(local_tid_127146); + + if (slt64(gtid_92140, m_73008)) { + int64_t min_res_94116 = ((__global + int64_t *) mem_121343)[gtid_92140]; + int64_t min_res_94117 = smin64(m_73095, min_res_94116); + + for (int64_t i_127153 = 0; i_127153 < k2p2zq_73023; i_127153++) { + ((__global double *) double_buffer_mem_125565)[phys_tid_92141 + + i_127153 * + num_threads_125936] = + ((__global double *) mem_121850)[gtid_92140 + i_127153 * + m_73008]; + } + for (int64_t j_94119 = 0; j_94119 < min_res_94117; j_94119++) { + bool y_94121 = slt64(j_94119, k2p2zq_73023); + bool index_certs_94122; + + if (!y_94121) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 518) == -1) { + global_failure_args[0] = j_94119; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double zeze_arg_94123 = ((__global + double *) mem_121338)[j_94119 * + m_73008 + + gtid_92140]; + bool zeze_res_94124 = zeze_arg_94123 == 0.0; + + if (zeze_res_94124) { + for (int64_t i_127155 = 0; i_127155 < k2p2zq_73023; + i_127155++) { + ((__global double *) mem_125243)[phys_tid_92141 + + i_127155 * + num_threads_125936] = + ((__global + double *) double_buffer_mem_125565)[phys_tid_92141 + + i_127155 * + num_threads_125936]; + } + } else { + double y_94126 = ((__global + double *) double_buffer_mem_125565)[phys_tid_92141 + + j_94119 * + num_threads_125936]; + double negate_arg_94127 = zeze_arg_94123 * y_94126; + double t_94128 = 0.0 - negate_arg_94127; + int64_t x_94129 = sub64(k2p2zq_73023, j_94119); + int64_t upper_bound_94130 = sub64(x_94129, (int64_t) 1); + double t_94131; + double t_94133 = t_94128; + + for (int64_t i0_94132 = 0; i0_94132 < upper_bound_94130; + i0_94132++) { + int64_t x_94134 = add64(j_94119, i0_94132); + int64_t i_94135 = add64((int64_t) 1, x_94134); + bool x_94136 = sle64((int64_t) 0, i_94135); + bool y_94137 = slt64(i_94135, k2p2zq_73023); + bool bounds_check_94138 = x_94136 && y_94137; + bool index_ok_94139 = y_94121 && bounds_check_94138; + bool index_certs_94140; + + if (!index_ok_94139) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 519) == -1) { + global_failure_args[0] = j_94119; + global_failure_args[1] = i_94135; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_94141 = ((__global + double *) mem_121858)[i_94135 * + (m_73008 * + k2p2zq_73023) + + j_94119 * + m_73008 + + gtid_92140]; + bool index_certs_94142; + + if (!bounds_check_94138) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 520) == -1) { + global_failure_args[0] = i_94135; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_94143 = ((__global + double *) double_buffer_mem_125565)[phys_tid_92141 + + i_94135 * + num_threads_125936]; + double y_94144 = x_94141 * y_94143; + double loopres_94145 = t_94133 - y_94144; + double t_tmp_127156 = loopres_94145; + + t_94133 = t_tmp_127156; + } + t_94131 = t_94133; + + double t_94146 = t_94131 / zeze_arg_94123; + double y_94147 = zeze_arg_94123 * t_94146; + double lw_val_94148 = y_94126 + y_94147; + + ((__global + double *) double_buffer_mem_125565)[phys_tid_92141 + + j_94119 * + num_threads_125936] = + lw_val_94148; + for (int64_t i0_94151 = 0; i0_94151 < upper_bound_94130; + i0_94151++) { + int64_t x_94153 = add64(j_94119, i0_94151); + int64_t i_94154 = add64((int64_t) 1, x_94153); + bool x_94155 = sle64((int64_t) 0, i_94154); + bool y_94156 = slt64(i_94154, k2p2zq_73023); + bool bounds_check_94157 = x_94155 && y_94156; + bool index_certs_94158; + + if (!bounds_check_94157) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 521) == -1) { + global_failure_args[0] = i_94154; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_94159 = ((__global + double *) double_buffer_mem_125565)[phys_tid_92141 + + i_94154 * + num_threads_125936]; + bool index_ok_94160 = y_94121 && bounds_check_94157; + bool index_certs_94161; + + if (!index_ok_94160) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 522) == -1) { + global_failure_args[0] = j_94119; + global_failure_args[1] = i_94154; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_94162 = ((__global + double *) mem_121858)[i_94154 * + (m_73008 * + k2p2zq_73023) + + j_94119 * + m_73008 + + gtid_92140]; + double y_94163 = t_94146 * y_94162; + double lw_val_94164 = x_94159 + y_94163; + + ((__global + double *) double_buffer_mem_125565)[phys_tid_92141 + + i_94154 * + num_threads_125936] = + lw_val_94164; + } + for (int64_t i_127158 = 0; i_127158 < k2p2zq_73023; + i_127158++) { + ((__global double *) mem_125243)[phys_tid_92141 + + i_127158 * + num_threads_125936] = + ((__global + double *) double_buffer_mem_125565)[phys_tid_92141 + + i_127158 * + num_threads_125936]; + } + } + for (int64_t i_127159 = 0; i_127159 < k2p2zq_73023; + i_127159++) { + ((__global + double *) double_buffer_mem_125565)[phys_tid_92141 + + i_127159 * + num_threads_125936] = + ((__global double *) mem_125243)[phys_tid_92141 + + i_127159 * + num_threads_125936]; + } + } + for (int64_t i_127160 = 0; i_127160 < k2p2zq_73023; i_127160++) { + ((__global double *) mem_121895)[i_127160 * m_73008 + + gtid_92140] = ((__global + double *) double_buffer_mem_125565)[phys_tid_92141 + + i_127160 * + num_threads_125936]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_94109 +} +__kernel void mainMagnitudezisegmap_92203(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + int64_t num_groups_94051, + int64_t binop_x_120251, + int64_t num_threads_125932, __global + unsigned char *defunc_3_map_res_r_mem_121609, + __global unsigned char *mem_121613, + __global unsigned char *mem_121616, + __global unsigned char *mem_121632) +{ + #define segmap_group_sizze_94050 (mainMagnitudezisegmap_group_sizze_92206) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127002; + int32_t local_tid_127003; + int64_t group_sizze_127006; + int32_t wave_sizze_127005; + int32_t group_tid_127004; + + global_tid_127002 = get_global_id(0); + local_tid_127003 = get_local_id(0); + group_sizze_127006 = get_local_size(0); + wave_sizze_127005 = LOCKSTEP_WIDTH; + group_tid_127004 = get_group_id(0); + + int32_t phys_tid_92203; + + phys_tid_92203 = global_tid_127002; + + int32_t phys_group_id_127007; + + phys_group_id_127007 = get_group_id(0); + for (int32_t i_127008 = 0; i_127008 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008 * k2p2zq_73023, + segmap_group_sizze_94050)) - + phys_group_id_127007, sext_i64_i32(num_groups_94051)); + i_127008++) { + int32_t virt_group_id_127009 = phys_group_id_127007 + i_127008 * + sext_i64_i32(num_groups_94051); + int64_t gtid_92201 = squot64(sext_i32_i64(virt_group_id_127009) * + segmap_group_sizze_94050 + + sext_i32_i64(local_tid_127003), + k2p2zq_73023); + int64_t gtid_92202 = sext_i32_i64(virt_group_id_127009) * + segmap_group_sizze_94050 + sext_i32_i64(local_tid_127003) - + squot64(sext_i32_i64(virt_group_id_127009) * + segmap_group_sizze_94050 + + sext_i32_i64(local_tid_127003), k2p2zq_73023) * + k2p2zq_73023; + + if (slt64(gtid_92201, m_73008) && slt64(gtid_92202, k2p2zq_73023)) { + for (int64_t i_119736 = 0; i_119736 < k2p2zq_73023; i_119736++) { + double defunc_2_reduce_res_94062; + double redout_119738 = 0.0; + + for (int64_t i_119739 = 0; i_119739 < k2p2zq_73023; + i_119739++) { + double x_94066 = ((__global + double *) defunc_3_map_res_r_mem_121609)[gtid_92201 * + binop_x_120251 + + i_119739 * + k2p2zq_73023 + + gtid_92202]; + double x_94067 = ((__global + double *) mem_121613)[gtid_92201 * + (k2p2zq_73023 * + k2p2zq_73023) + + i_119736 * + k2p2zq_73023 + + i_119739]; + double defunc_1_f_res_94068 = x_94066 * x_94067; + double defunc_1_op_res_94065 = defunc_1_f_res_94068 + + redout_119738; + double redout_tmp_127011 = defunc_1_op_res_94065; + + redout_119738 = redout_tmp_127011; + } + defunc_2_reduce_res_94062 = redout_119738; + ((__global double *) mem_121616)[phys_tid_92203 + i_119736 * + num_threads_125932] = + defunc_2_reduce_res_94062; + } + for (int64_t i_127012 = 0; i_127012 < k2p2zq_73023; i_127012++) { + ((__global double *) mem_121632)[i_127012 * (k2p2zq_73023 * + m_73008) + + gtid_92201 * k2p2zq_73023 + + gtid_92202] = ((__global + double *) mem_121616)[phys_tid_92203 + + i_127012 * + num_threads_125932]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_94050 +} +__kernel void mainMagnitudezisegmap_92419(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + int64_t x_93925, int64_t i_93926, + int64_t j_m_i_93930, + int64_t num_groups_93958, + int64_t num_threads_125924, __global + unsigned char *mem_120252, __global + unsigned char *mem_121351, __global + unsigned char *mem_121458, __global + unsigned char *mem_121476, __global + unsigned char *mem_121480, __global + unsigned char *mem_121492, __global + unsigned char *mem_121504) +{ + #define segmap_group_sizze_93957 (mainMagnitudezisegmap_group_sizze_92421) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126899; + int32_t local_tid_126900; + int64_t group_sizze_126903; + int32_t wave_sizze_126902; + int32_t group_tid_126901; + + global_tid_126899 = get_global_id(0); + local_tid_126900 = get_local_id(0); + group_sizze_126903 = get_local_size(0); + wave_sizze_126902 = LOCKSTEP_WIDTH; + group_tid_126901 = get_group_id(0); + + int32_t phys_tid_92419; + + phys_tid_92419 = global_tid_126899; + + int32_t phys_group_id_126904; + + phys_group_id_126904 = get_group_id(0); + for (int32_t i_126905 = 0; i_126905 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, segmap_group_sizze_93957)) - + phys_group_id_126904, sext_i64_i32(num_groups_93958)); + i_126905++) { + int32_t virt_group_id_126906 = phys_group_id_126904 + i_126905 * + sext_i64_i32(num_groups_93958); + int64_t gtid_92418 = sext_i32_i64(virt_group_id_126906) * + segmap_group_sizze_93957 + sext_i32_i64(local_tid_126900); + + if (slt64(gtid_92418, m_73008)) { + double defunc_3_map_res_r_transformed_row_93963 = ((__global + double *) mem_121351)[gtid_92418 * + (k2p2zq_73023 * + k2p2zq_73023) + + i_93926 * + k2p2zq_73023 + + i_93926]; + + for (int64_t i_119725 = 0; i_119725 < k2p2zq_73023; i_119725++) { + for (int64_t i_126908 = 0; i_126908 < k2p2zq_73023; + i_126908++) { + ((__global double *) mem_121492)[phys_tid_92419 + i_126908 * + num_threads_125924] = + ((__global double *) mem_121476)[i_119725 * (m_73008 * + k2p2zq_73023) + + gtid_92418 + i_126908 * + m_73008]; + } + + double defunc_2_map_res_transformed_row_93968 = ((__global + double *) mem_120252)[i_119725 * + k2p2zq_73023 + + i_93926]; + double defunc_2_reduce_res_93969; + double redout_119728 = 0.0; + + for (int64_t i_119729 = 0; i_119729 < j_m_i_93930; i_119729++) { + int64_t slice_120011 = x_93925 + i_119729; + double x_93974 = ((__global + double *) mem_121458)[slice_120011 * + (k2p2zq_73023 * + m_73008) + + gtid_92418 * + k2p2zq_73023 + + i_93926]; + bool isnan_res_93975; + + isnan_res_93975 = futrts_isnan64(x_93974); + + double defunc_1_f_res_93976; + + if (isnan_res_93975) { + defunc_1_f_res_93976 = 0.0; + } else { + double x_93973 = ((__global + double *) mem_121476)[i_119725 * + (m_73008 * + k2p2zq_73023) + + slice_120011 * + m_73008 + + gtid_92418]; + double defunc_1_f_res_f_res_93977 = x_93973 * x_93974; + + defunc_1_f_res_93976 = defunc_1_f_res_f_res_93977; + } + + double defunc_1_op_res_93972 = defunc_1_f_res_93976 + + redout_119728; + double redout_tmp_126909 = defunc_1_op_res_93972; + + redout_119728 = redout_tmp_126909; + } + defunc_2_reduce_res_93969 = redout_119728; + + double zm_res_93978 = defunc_2_map_res_transformed_row_93968 - + defunc_2_reduce_res_93969; + double zs_res_93979 = zm_res_93978 / + defunc_3_map_res_r_transformed_row_93963; + + ((__global double *) mem_121492)[phys_tid_92419 + i_93926 * + num_threads_125924] = + zs_res_93979; + for (int64_t i_126910 = 0; i_126910 < k2p2zq_73023; + i_126910++) { + ((__global double *) mem_121480)[phys_tid_92419 + + (i_119725 * + (num_threads_125924 * + k2p2zq_73023) + + i_126910 * + num_threads_125924)] = + ((__global double *) mem_121492)[phys_tid_92419 + + i_126910 * + num_threads_125924]; + } + } + for (int64_t i_126911 = 0; i_126911 < k2p2zq_73023; i_126911++) { + for (int64_t i_126912 = 0; i_126912 < k2p2zq_73023; + i_126912++) { + ((__global double *) mem_121504)[i_126911 * (m_73008 * + k2p2zq_73023) + + i_126912 * m_73008 + + gtid_92418] = ((__global + double *) mem_121480)[phys_tid_92419 + + (i_126911 * + (num_threads_125924 * + k2p2zq_73023) + + i_126912 * + num_threads_125924)]; + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_93957 +} +__kernel void mainMagnitudezisegmap_92507(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + int64_t i_93926, + int64_t binop_x_120251, __global + unsigned char *mem_param_121469, + __global unsigned char *mem_121559) +{ + #define segmap_group_sizze_94039 (mainMagnitudezisegmap_group_sizze_92511) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126994; + int32_t local_tid_126995; + int64_t group_sizze_126998; + int32_t wave_sizze_126997; + int32_t group_tid_126996; + + global_tid_126994 = get_global_id(0); + local_tid_126995 = get_local_id(0); + group_sizze_126998 = get_local_size(0); + wave_sizze_126997 = LOCKSTEP_WIDTH; + group_tid_126996 = get_group_id(0); + + int32_t phys_tid_92507; + + phys_tid_92507 = global_tid_126994; + + int64_t gtid_92504; + + gtid_92504 = squot64(sext_i32_i64(group_tid_126996) * + segmap_group_sizze_94039 + + sext_i32_i64(local_tid_126995), k2p2zq_73023); + + int64_t gtid_92505; + + gtid_92505 = sext_i32_i64(group_tid_126996) * segmap_group_sizze_94039 + + sext_i32_i64(local_tid_126995) - + squot64(sext_i32_i64(group_tid_126996) * segmap_group_sizze_94039 + + sext_i32_i64(local_tid_126995), k2p2zq_73023) * k2p2zq_73023; + + int64_t gtid_92506; + + gtid_92506 = sext_i32_i64(group_tid_126996) * segmap_group_sizze_94039 + + sext_i32_i64(local_tid_126995) - + squot64(sext_i32_i64(group_tid_126996) * segmap_group_sizze_94039 + + sext_i32_i64(local_tid_126995), k2p2zq_73023) * k2p2zq_73023 - + (sext_i32_i64(group_tid_126996) * segmap_group_sizze_94039 + + sext_i32_i64(local_tid_126995) - + squot64(sext_i32_i64(group_tid_126996) * segmap_group_sizze_94039 + + sext_i32_i64(local_tid_126995), k2p2zq_73023) * k2p2zq_73023); + if ((slt64(gtid_92504, m_73008) && slt64(gtid_92505, k2p2zq_73023)) && + slt64(gtid_92506, (int64_t) 1)) { + double zs_res_94042 = ((__global double *) mem_121559)[gtid_92504 * + k2p2zq_73023 + + gtid_92505]; + + if (((sle64((int64_t) 0, gtid_92504) && slt64(gtid_92504, m_73008)) && + (sle64((int64_t) 0, gtid_92505) && slt64(gtid_92505, + k2p2zq_73023))) && + (sle64((int64_t) 0, i_93926) && slt64(i_93926, k2p2zq_73023))) { + ((__global double *) mem_param_121469)[gtid_92504 * binop_x_120251 + + gtid_92505 * k2p2zq_73023 + + i_93926] = zs_res_94042; + } + } + + error_0: + return; + #undef segmap_group_sizze_94039 +} +__kernel void mainMagnitudezisegmap_92519(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + int64_t i_93926, __global + unsigned char *mem_120252, __global + unsigned char *mem_121351, __global + unsigned char *mem_121555, __global + unsigned char *mem_121559) +{ + #define segmap_group_sizze_94028 (mainMagnitudezisegmap_group_sizze_92522) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126989; + int32_t local_tid_126990; + int64_t group_sizze_126993; + int32_t wave_sizze_126992; + int32_t group_tid_126991; + + global_tid_126989 = get_global_id(0); + local_tid_126990 = get_local_id(0); + group_sizze_126993 = get_local_size(0); + wave_sizze_126992 = LOCKSTEP_WIDTH; + group_tid_126991 = get_group_id(0); + + int32_t phys_tid_92519; + + phys_tid_92519 = global_tid_126989; + + int64_t gtid_92517; + + gtid_92517 = squot64(sext_i32_i64(group_tid_126991) * + segmap_group_sizze_94028 + + sext_i32_i64(local_tid_126990), k2p2zq_73023); + + int64_t gtid_92518; + + gtid_92518 = sext_i32_i64(group_tid_126991) * segmap_group_sizze_94028 + + sext_i32_i64(local_tid_126990) - + squot64(sext_i32_i64(group_tid_126991) * segmap_group_sizze_94028 + + sext_i32_i64(local_tid_126990), k2p2zq_73023) * k2p2zq_73023; + if (slt64(gtid_92517, m_73008) && slt64(gtid_92518, k2p2zq_73023)) { + double defunc_3_map_res_r_transformed_row_94031 = ((__global + double *) mem_121351)[gtid_92517 * + (k2p2zq_73023 * + k2p2zq_73023) + + i_93926 * + k2p2zq_73023 + + i_93926]; + double defunc_2_map_res_transformed_row_94032 = ((__global + double *) mem_120252)[gtid_92518 * + k2p2zq_73023 + + i_93926]; + double defunc_2_reduce_res_94033 = ((__global + double *) mem_121555)[gtid_92517 * + k2p2zq_73023 + + gtid_92518]; + double zm_res_94034 = defunc_2_map_res_transformed_row_94032 - + defunc_2_reduce_res_94033; + double zs_res_94035 = zm_res_94034 / + defunc_3_map_res_r_transformed_row_94031; + + ((__global double *) mem_121559)[gtid_92517 * k2p2zq_73023 + + gtid_92518] = zs_res_94035; + } + + error_0: + return; + #undef segmap_group_sizze_94028 +} +__kernel void mainMagnitudezisegmap_92845(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + __global unsigned char *mem_121335, + __global unsigned char *mem_121343, + __global unsigned char *mem_121346, + __global unsigned char *mem_121351) +{ + #define segmap_group_sizze_93835 (mainMagnitudezisegmap_group_sizze_92849) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126866; + int32_t local_tid_126867; + int64_t group_sizze_126870; + int32_t wave_sizze_126869; + int32_t group_tid_126868; + + global_tid_126866 = get_global_id(0); + local_tid_126867 = get_local_id(0); + group_sizze_126870 = get_local_size(0); + wave_sizze_126869 = LOCKSTEP_WIDTH; + group_tid_126868 = get_group_id(0); + + int32_t phys_tid_92845; + + phys_tid_92845 = global_tid_126866; + + int64_t gtid_92842; + + gtid_92842 = squot64(sext_i32_i64(group_tid_126868) * + segmap_group_sizze_93835 + + sext_i32_i64(local_tid_126867), k2p2zq_73023 * + k2p2zq_73023); + + int64_t gtid_92843; + + gtid_92843 = squot64(sext_i32_i64(group_tid_126868) * + segmap_group_sizze_93835 + + sext_i32_i64(local_tid_126867) - + squot64(sext_i32_i64(group_tid_126868) * + segmap_group_sizze_93835 + + sext_i32_i64(local_tid_126867), k2p2zq_73023 * + k2p2zq_73023) * (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023); + + int64_t gtid_92844; + + gtid_92844 = sext_i32_i64(group_tid_126868) * segmap_group_sizze_93835 + + sext_i32_i64(local_tid_126867) - + squot64(sext_i32_i64(group_tid_126868) * segmap_group_sizze_93835 + + sext_i32_i64(local_tid_126867), k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023) - squot64(sext_i32_i64(group_tid_126868) * + segmap_group_sizze_93835 + + sext_i32_i64(local_tid_126867) - + squot64(sext_i32_i64(group_tid_126868) * + segmap_group_sizze_93835 + + sext_i32_i64(local_tid_126867), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023) * k2p2zq_73023; + if ((slt64(gtid_92842, m_73008) && slt64(gtid_92843, k2p2zq_73023)) && + slt64(gtid_92844, k2p2zq_73023)) { + int64_t min_res_93838 = ((__global int64_t *) mem_121343)[gtid_92842]; + bool cond_f_res_93839 = ((__global bool *) mem_121346)[gtid_92842 * + k2p2zq_73023 + + gtid_92843]; + int64_t x_93842 = add64((int64_t) 1, gtid_92844); + bool cond_93843 = slt64(min_res_93838, x_93842); + bool x_93844 = !cond_93843; + bool y_93845 = cond_f_res_93839 && x_93844; + bool cond_93846 = cond_93843 || y_93845; + double defunc_1_f_res_93847; + + if (cond_93846) { + defunc_1_f_res_93847 = NAN; + } else { + double x_93841 = ((__global double *) mem_121335)[gtid_92843 * + (m_73008 * + k2p2zq_73023) + + gtid_92844 * + m_73008 + + gtid_92842]; + + defunc_1_f_res_93847 = x_93841; + } + ((__global double *) mem_121351)[gtid_92842 * (k2p2zq_73023 * + k2p2zq_73023) + + gtid_92843 * k2p2zq_73023 + + gtid_92844] = defunc_1_f_res_93847; + } + + error_0: + return; + #undef segmap_group_sizze_93835 +} +__kernel void mainMagnitudezisegmap_92880(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + __global unsigned char *mem_121343, + __global unsigned char *mem_121346) +{ + #define segmap_group_sizze_93820 (mainMagnitudezisegmap_group_sizze_92883) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126861; + int32_t local_tid_126862; + int64_t group_sizze_126865; + int32_t wave_sizze_126864; + int32_t group_tid_126863; + + global_tid_126861 = get_global_id(0); + local_tid_126862 = get_local_id(0); + group_sizze_126865 = get_local_size(0); + wave_sizze_126864 = LOCKSTEP_WIDTH; + group_tid_126863 = get_group_id(0); + + int32_t phys_tid_92880; + + phys_tid_92880 = global_tid_126861; + + int64_t gtid_92878; + + gtid_92878 = squot64(sext_i32_i64(group_tid_126863) * + segmap_group_sizze_93820 + + sext_i32_i64(local_tid_126862), k2p2zq_73023); + + int64_t gtid_92879; + + gtid_92879 = sext_i32_i64(group_tid_126863) * segmap_group_sizze_93820 + + sext_i32_i64(local_tid_126862) - + squot64(sext_i32_i64(group_tid_126863) * segmap_group_sizze_93820 + + sext_i32_i64(local_tid_126862), k2p2zq_73023) * k2p2zq_73023; + if (slt64(gtid_92878, m_73008) && slt64(gtid_92879, k2p2zq_73023)) { + int64_t min_res_93823 = ((__global int64_t *) mem_121343)[gtid_92878]; + int64_t x_93825 = add64((int64_t) 1, gtid_92879); + bool cond_f_res_93826 = slt64(min_res_93823, x_93825); + + ((__global bool *) mem_121346)[gtid_92878 * k2p2zq_73023 + gtid_92879] = + cond_f_res_93826; + } + + error_0: + return; + #undef segmap_group_sizze_93820 +} +__kernel void mainMagnitudezisegmap_92925(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t m_73008, int64_t k2p2zq_73023, + int64_t m_73095, + unsigned char y_73099, + int64_t min_res_73213, + int64_t k_73214, + int64_t num_groups_93535, + int64_t num_threads_125909, __global + unsigned char *mem_120248, __global + unsigned char *mem_121001, __global + unsigned char *mem_121004, __global + unsigned char *mem_121008, __global + unsigned char *mem_121011, __global + unsigned char *mem_121335, __global + unsigned char *mem_121338, __global + unsigned char *mem_121341, __global + unsigned char *mem_121343, __global + unsigned char *mem_125167, __global + unsigned char *mem_125169, __global + unsigned char *mem_125438, __global + unsigned char *mem_125446, __global + unsigned char *mem_125448, __global + unsigned char *mem_125498, __global + unsigned char *double_buffer_mem_125552, + __global + unsigned char *double_buffer_mem_125553, + __global + unsigned char *double_buffer_mem_125554) +{ + #define segmap_group_sizze_93534 (mainMagnitudezisegmap_group_sizze_92927) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_126792; + int32_t local_tid_126793; + int64_t group_sizze_126796; + int32_t wave_sizze_126795; + int32_t group_tid_126794; + + global_tid_126792 = get_global_id(0); + local_tid_126793 = get_local_id(0); + group_sizze_126796 = get_local_size(0); + wave_sizze_126795 = LOCKSTEP_WIDTH; + group_tid_126794 = get_group_id(0); + + int32_t phys_tid_92925; + + phys_tid_92925 = global_tid_126792; + + int32_t phys_group_id_126797; + + phys_group_id_126797 = get_group_id(0); + for (int32_t i_126798 = 0; i_126798 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, segmap_group_sizze_93534)) - + phys_group_id_126797, sext_i64_i32(num_groups_93535)); + i_126798++) { + int32_t virt_group_id_126799 = phys_group_id_126797 + i_126798 * + sext_i64_i32(num_groups_93535); + int64_t gtid_92924 = sext_i32_i64(virt_group_id_126799) * + segmap_group_sizze_93534 + sext_i32_i64(local_tid_126793); + + if (slt64(gtid_92924, m_73008)) { + for (int64_t i_126800 = 0; i_126800 < k2p2zq_73023; i_126800++) { + ((__global int64_t *) mem_121011)[phys_tid_92925 + i_126800 * + num_threads_125909] = + ((__global int64_t *) mem_120248)[i_126800]; + } + for (int64_t i_126801 = 0; i_126801 < k2p2zq_73023; i_126801++) { + for (int64_t i_126802 = 0; i_126802 < k2p2zq_73023; + i_126802++) { + ((__global + double *) double_buffer_mem_125552)[phys_tid_92925 + + (i_126801 * + (num_threads_125909 * + k2p2zq_73023) + + i_126802 * + num_threads_125909)] = + ((__global double *) mem_121001)[gtid_92924 + + (i_126801 * (m_73008 * + k2p2zq_73023) + + i_126802 * m_73008)]; + } + } + for (int64_t i_126803 = 0; i_126803 < k2p2zq_73023; i_126803++) { + ((__global double *) double_buffer_mem_125553)[phys_tid_92925 + + i_126803 * + num_threads_125909] = + ((__global double *) mem_121004)[gtid_92924 + i_126803 * + m_73008]; + } + for (int64_t i_126804 = 0; i_126804 < (int64_t) 2; i_126804++) { + for (int64_t i_126805 = 0; i_126805 < k2p2zq_73023; + i_126805++) { + ((__global + double *) double_buffer_mem_125554)[phys_tid_92925 + + (i_126804 * + (num_threads_125909 * + k2p2zq_73023) + + i_126805 * + num_threads_125909)] = + ((__global double *) mem_121008)[gtid_92924 + + (i_126804 * (m_73008 * + k2p2zq_73023) + + i_126805 * m_73008)]; + } + } + + int64_t dqrdc2_res_93549; + int64_t k_93555 = k_73214; + + for (int64_t l_93550 = 0; l_93550 < min_res_73213; l_93550++) { + int64_t x_93556 = add64((int64_t) 1, l_93550); + bool cond_93557 = slt64(x_93556, k_93555); + bool loop_cond_93558; + + if (cond_93557) { + bool y_93559 = slt64(l_93550, k2p2zq_73023); + bool index_certs_93560; + + if (!y_93559) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 480) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_93550; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double zt_arg_93561 = ((__global + double *) double_buffer_mem_125554)[phys_tid_92925 + + (num_threads_125909 * + k2p2zq_73023 + + l_93550 * + num_threads_125909)]; + double zt_res_93562 = 1.0e-7 * zt_arg_93561; + bool index_certs_93563; + + if (!y_93559) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 481) == -1) { + global_failure_args[0] = l_93550; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double zl_arg_93564 = ((__global + double *) double_buffer_mem_125553)[phys_tid_92925 + + l_93550 * + num_threads_125909]; + bool zl_res_93565 = zl_arg_93564 < zt_res_93562; + + loop_cond_93558 = zl_res_93565; + } else { + loop_cond_93558 = 0; + } + + bool y_93566 = slt64(l_93550, k2p2zq_73023); + int64_t upper_bound_93567 = sub64(k2p2zq_73023, x_93556); + bool loop_not_taken_93568 = !loop_cond_93558; + bool protect_assert_disj_93569 = y_93566 || + loop_not_taken_93568; + bool index_certs_93570; + + if (!protect_assert_disj_93569) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 482) == -1) { + global_failure_args[0] = l_93550; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_93571; + + if (!protect_assert_disj_93569) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 483) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = l_93550; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_93572; + + if (!protect_assert_disj_93569) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 484) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_93550; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool protect_assert_disj_93573 = y_73099 || + loop_not_taken_93568; + bool index_certs_93574; + + if (!protect_assert_disj_93573) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 485) == -1) { + global_failure_args[0] = m_73095; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_93575; + + if (!protect_assert_disj_93573) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 486) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = m_73095; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_93576; + + if (!protect_assert_disj_93573) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 487) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = m_73095; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool loopres_93577; + int64_t loopres_93582; + bool loop_while_93583; + int64_t k_93588; + + loop_while_93583 = loop_cond_93558; + k_93588 = k_93555; + while (loop_while_93583) { + for (int64_t i_93590 = 0; i_93590 < k2p2zq_73023; + i_93590++) { + bool index_certs_93592; + + if (!y_93566) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 488) == -1) { + global_failure_args[0] = l_93550; + global_failure_args[1] = i_93590; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double t_93593 = ((__global + double *) double_buffer_mem_125552)[phys_tid_92925 + + (l_93550 * + (num_threads_125909 * + k2p2zq_73023) + + i_93590 * + num_threads_125909)]; + + for (int64_t j0_93595 = 0; j0_93595 < upper_bound_93567; + j0_93595++) { + int64_t j_93597 = add64(x_93556, j0_93595); + bool x_93598 = sle64((int64_t) 0, j_93597); + bool y_93599 = slt64(j_93597, k2p2zq_73023); + bool bounds_check_93600 = x_93598 && y_93599; + bool index_certs_93601; + + if (!bounds_check_93600) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 489) == + -1) { + global_failure_args[0] = j_93597; + global_failure_args[1] = i_93590; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_93602 = ((__global + double *) double_buffer_mem_125552)[phys_tid_92925 + + (j_93597 * + (num_threads_125909 * + k2p2zq_73023) + + i_93590 * + num_threads_125909)]; + int64_t i_93603 = sub64(j_93597, (int64_t) 1); + bool x_93604 = sle64((int64_t) 0, i_93603); + bool y_93605 = slt64(i_93603, k2p2zq_73023); + bool bounds_check_93606 = x_93604 && y_93605; + bool index_certs_93607; + + if (!bounds_check_93606) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 490) == + -1) { + global_failure_args[0] = i_93603; + global_failure_args[1] = i_93590; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125552)[phys_tid_92925 + + (i_93603 * + (num_threads_125909 * + k2p2zq_73023) + + i_93590 * + num_threads_125909)] = + lw_val_93602; + } + + bool index_certs_93609; + + if (!y_73099) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 491) == -1) { + global_failure_args[0] = m_73095; + global_failure_args[1] = i_93590; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125552)[phys_tid_92925 + + (m_73095 * + (num_threads_125909 * + k2p2zq_73023) + + i_93590 * + num_threads_125909)] = + t_93593; + } + + int64_t i_93611 = ((__global + int64_t *) mem_121011)[phys_tid_92925 + + l_93550 * + num_threads_125909]; + double t_93612 = ((__global + double *) double_buffer_mem_125553)[phys_tid_92925 + + l_93550 * + num_threads_125909]; + double tt_93613 = ((__global + double *) double_buffer_mem_125554)[phys_tid_92925 + + l_93550 * + num_threads_125909]; + double ttt_93614 = ((__global + double *) double_buffer_mem_125554)[phys_tid_92925 + + (num_threads_125909 * + k2p2zq_73023 + + l_93550 * + num_threads_125909)]; + + for (int64_t j0_93618 = 0; j0_93618 < upper_bound_93567; + j0_93618++) { + int64_t j_93622 = add64(x_93556, j0_93618); + bool x_93623 = sle64((int64_t) 0, j_93622); + bool y_93624 = slt64(j_93622, k2p2zq_73023); + bool bounds_check_93625 = x_93623 && y_93624; + bool index_certs_93626; + + if (!bounds_check_93625) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 492) == -1) { + global_failure_args[0] = j_93622; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + int64_t lw_val_93627 = ((__global + int64_t *) mem_121011)[phys_tid_92925 + + j_93622 * + num_threads_125909]; + int64_t i_93628 = sub64(j_93622, (int64_t) 1); + bool x_93629 = sle64((int64_t) 0, i_93628); + bool y_93630 = slt64(i_93628, k2p2zq_73023); + bool bounds_check_93631 = x_93629 && y_93630; + bool index_certs_93632; + + if (!bounds_check_93631) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 493) == -1) { + global_failure_args[0] = i_93628; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global int64_t *) mem_121011)[phys_tid_92925 + + i_93628 * + num_threads_125909] = + lw_val_93627; + + double lw_val_93634 = ((__global + double *) double_buffer_mem_125553)[phys_tid_92925 + + j_93622 * + num_threads_125909]; + + ((__global + double *) double_buffer_mem_125553)[phys_tid_92925 + + i_93628 * + num_threads_125909] = + lw_val_93634; + + bool index_certs_93636; + + if (!bounds_check_93625) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 494) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = j_93622; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_93637 = ((__global + double *) double_buffer_mem_125554)[phys_tid_92925 + + j_93622 * + num_threads_125909]; + bool index_certs_93638; + + if (!bounds_check_93631) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 495) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = i_93628; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125554)[phys_tid_92925 + + i_93628 * + num_threads_125909] = + lw_val_93637; + + bool index_certs_93640; + + if (!bounds_check_93625) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 496) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = j_93622; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_93641 = ((__global + double *) double_buffer_mem_125554)[phys_tid_92925 + + (num_threads_125909 * + k2p2zq_73023 + + j_93622 * + num_threads_125909)]; + bool index_certs_93642; + + if (!bounds_check_93631) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 497) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = i_93628; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125554)[phys_tid_92925 + + (num_threads_125909 * + k2p2zq_73023 + + i_93628 * + num_threads_125909)] = + lw_val_93641; + } + ((__global int64_t *) mem_121011)[phys_tid_92925 + m_73095 * + num_threads_125909] = + i_93611; + ((__global + double *) double_buffer_mem_125553)[phys_tid_92925 + + m_73095 * + num_threads_125909] = + t_93612; + ((__global + double *) double_buffer_mem_125554)[phys_tid_92925 + + m_73095 * + num_threads_125909] = + tt_93613; + ((__global + double *) double_buffer_mem_125554)[phys_tid_92925 + + (num_threads_125909 * + k2p2zq_73023 + + m_73095 * + num_threads_125909)] = + ttt_93614; + + int64_t k_93648 = sub64(k_93588, (int64_t) 1); + bool cond_93649 = slt64(x_93556, k_93648); + bool loop_cond_93650; + + if (cond_93649) { + bool index_certs_93651; + + if (!y_93566) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 498) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_93550; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double zt_arg_93652 = ((__global + double *) double_buffer_mem_125554)[phys_tid_92925 + + (num_threads_125909 * + k2p2zq_73023 + + l_93550 * + num_threads_125909)]; + double zt_res_93653 = 1.0e-7 * zt_arg_93652; + bool index_certs_93654; + + if (!y_93566) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 499) == -1) { + global_failure_args[0] = l_93550; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double zl_arg_93655 = ((__global + double *) double_buffer_mem_125553)[phys_tid_92925 + + l_93550 * + num_threads_125909]; + bool zl_res_93656 = zl_arg_93655 < zt_res_93653; + + loop_cond_93650 = zl_res_93656; + } else { + loop_cond_93650 = 0; + } + + bool loop_while_tmp_126811 = loop_cond_93650; + int64_t k_tmp_126816 = k_93648; + + loop_while_93583 = loop_while_tmp_126811; + k_93588 = k_tmp_126816; + } + loopres_93577 = loop_while_93583; + loopres_93582 = k_93588; + + bool cond_93657 = x_93556 == k2p2zq_73023; + int64_t j_m_i_93658 = sub64(k2p2zq_73023, l_93550); + bool empty_slice_93662 = j_m_i_93658 == (int64_t) 0; + int64_t m_93663 = sub64(j_m_i_93658, (int64_t) 1); + int64_t i_p_m_t_s_93664 = add64(l_93550, m_93663); + bool zzero_leq_i_p_m_t_s_93665 = sle64((int64_t) 0, + i_p_m_t_s_93664); + bool i_p_m_t_s_leq_w_93666 = slt64(i_p_m_t_s_93664, + k2p2zq_73023); + bool i_lte_j_93667 = sle64(l_93550, k2p2zq_73023); + bool y_93668 = zzero_leq_i_p_m_t_s_93665 && + i_p_m_t_s_leq_w_93666; + bool y_93669 = i_lte_j_93667 && y_93668; + bool ok_or_empty_93670 = empty_slice_93662 || y_93669; + bool index_ok_93671 = y_93566 && ok_or_empty_93670; + + if (cond_93657) { + for (int64_t i_126822 = 0; i_126822 < k2p2zq_73023; + i_126822++) { + ((__global double *) mem_125448)[phys_tid_92925 + + i_126822 * + num_threads_125909] = + ((__global + double *) double_buffer_mem_125553)[phys_tid_92925 + + i_126822 * + num_threads_125909]; + } + for (int64_t i_126823 = 0; i_126823 < (int64_t) 2; + i_126823++) { + for (int64_t i_126824 = 0; i_126824 < k2p2zq_73023; + i_126824++) { + ((__global double *) mem_125446)[phys_tid_92925 + + (i_126823 * + (num_threads_125909 * + k2p2zq_73023) + + i_126824 * + num_threads_125909)] = + ((__global + double *) double_buffer_mem_125554)[phys_tid_92925 + + (i_126823 * + (num_threads_125909 * + k2p2zq_73023) + + i_126824 * + num_threads_125909)]; + } + } + for (int64_t i_126825 = 0; i_126825 < k2p2zq_73023; + i_126825++) { + for (int64_t i_126826 = 0; i_126826 < k2p2zq_73023; + i_126826++) { + ((__global double *) mem_125498)[phys_tid_92925 + + (i_126825 * + (num_threads_125909 * + k2p2zq_73023) + + i_126826 * + num_threads_125909)] = + ((__global + double *) double_buffer_mem_125552)[phys_tid_92925 + + (i_126825 * + (num_threads_125909 * + k2p2zq_73023) + + i_126826 * + num_threads_125909)]; + } + } + } else { + bool index_certs_93672; + + if (!index_ok_93671) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 500) == -1) { + global_failure_args[0] = l_93550; + global_failure_args[1] = l_93550; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_93674; + double redout_119715 = 0.0; + + for (int64_t i_119716 = 0; i_119716 < j_m_i_93658; + i_119716++) { + int64_t slice_120006 = l_93550 + i_119716; + double x_93678 = ((__global + double *) double_buffer_mem_125552)[phys_tid_92925 + + (l_93550 * + (num_threads_125909 * + k2p2zq_73023) + + slice_120006 * + num_threads_125909)]; + double defunc_1_f_res_93679 = x_93678 * x_93678; + double defunc_1_op_res_93677 = defunc_1_f_res_93679 + + redout_119715; + double redout_tmp_126827 = defunc_1_op_res_93677; + + redout_119715 = redout_tmp_126827; + } + defunc_2_reduce_res_93674 = redout_119715; + + double sqrt_res_93680; + + sqrt_res_93680 = futrts_sqrt64(defunc_2_reduce_res_93674); + + bool zeze_res_93681 = sqrt_res_93680 == 0.0; + + if (zeze_res_93681) { + for (int64_t i_126828 = 0; i_126828 < k2p2zq_73023; + i_126828++) { + ((__global double *) mem_125169)[phys_tid_92925 + + i_126828 * + num_threads_125909] = + ((__global + double *) double_buffer_mem_125553)[phys_tid_92925 + + i_126828 * + num_threads_125909]; + } + for (int64_t i_126829 = 0; i_126829 < (int64_t) 2; + i_126829++) { + for (int64_t i_126830 = 0; i_126830 < k2p2zq_73023; + i_126830++) { + ((__global + double *) mem_125167)[phys_tid_92925 + + (i_126829 * + (num_threads_125909 * + k2p2zq_73023) + + i_126830 * + num_threads_125909)] = + ((__global + double *) double_buffer_mem_125554)[phys_tid_92925 + + (i_126829 * + (num_threads_125909 * + k2p2zq_73023) + + i_126830 * + num_threads_125909)]; + } + } + for (int64_t i_126831 = 0; i_126831 < k2p2zq_73023; + i_126831++) { + for (int64_t i_126832 = 0; i_126832 < k2p2zq_73023; + i_126832++) { + ((__global + double *) mem_125438)[phys_tid_92925 + + (i_126831 * + (num_threads_125909 * + k2p2zq_73023) + + i_126832 * + num_threads_125909)] = + ((__global + double *) double_buffer_mem_125552)[phys_tid_92925 + + (i_126831 * + (num_threads_125909 * + k2p2zq_73023) + + i_126832 * + num_threads_125909)]; + } + } + } else { + bool index_ok_93685 = y_93566 && y_93566; + bool index_certs_93686; + + if (!index_ok_93685) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 501) == -1) { + global_failure_args[0] = l_93550; + global_failure_args[1] = l_93550; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double znze_arg_93687 = ((__global + double *) double_buffer_mem_125552)[phys_tid_92925 + + (l_93550 * + (num_threads_125909 * + k2p2zq_73023) + + l_93550 * + num_threads_125909)]; + bool zeze_res_93688 = znze_arg_93687 == 0.0; + bool znze_res_93689 = !zeze_res_93688; + double nrmxl_93690; + + if (znze_res_93689) { + double abs_res_93691 = fabs(sqrt_res_93680); + double sgn_res_93692 = fsignum32(znze_arg_93687); + double zt_res_93693 = abs_res_93691 * sgn_res_93692; + + nrmxl_93690 = zt_res_93693; + } else { + nrmxl_93690 = sqrt_res_93680; + } + for (int64_t i0_93695 = 0; i0_93695 < j_m_i_93658; + i0_93695++) { + int64_t i_93697 = add64(l_93550, i0_93695); + bool x_93698 = sle64((int64_t) 0, i_93697); + bool y_93699 = slt64(i_93697, k2p2zq_73023); + bool bounds_check_93700 = x_93698 && y_93699; + bool index_ok_93701 = y_93566 && bounds_check_93700; + bool index_certs_93702; + + if (!index_ok_93701) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 502) == + -1) { + global_failure_args[0] = l_93550; + global_failure_args[1] = i_93697; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_93703 = ((__global + double *) double_buffer_mem_125552)[phys_tid_92925 + + (l_93550 * + (num_threads_125909 * + k2p2zq_73023) + + i_93697 * + num_threads_125909)]; + double lw_val_93704 = x_93703 / nrmxl_93690; + + ((__global + double *) double_buffer_mem_125552)[phys_tid_92925 + + (l_93550 * + (num_threads_125909 * + k2p2zq_73023) + + i_93697 * + num_threads_125909)] = + lw_val_93704; + } + + double zp_arg_93706 = ((__global + double *) double_buffer_mem_125552)[phys_tid_92925 + + (l_93550 * + (num_threads_125909 * + k2p2zq_73023) + + l_93550 * + num_threads_125909)]; + double zp_res_93707 = 1.0 + zp_arg_93706; + + ((__global + double *) double_buffer_mem_125552)[phys_tid_92925 + + (l_93550 * + (num_threads_125909 * + k2p2zq_73023) + + l_93550 * + num_threads_125909)] = + zp_res_93707; + + bool bounds_invalid_upwards_93709 = slt64(k2p2zq_73023, + x_93556); + bool valid_93710 = !bounds_invalid_upwards_93709; + bool range_valid_c_93711; + + if (!valid_93710) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 503) == -1) { + global_failure_args[0] = x_93556; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool loop_nonempty_93712 = slt64((int64_t) 0, + upper_bound_93567); + bool loop_not_taken_93713 = !loop_nonempty_93712; + bool protect_assert_disj_93714 = index_ok_93685 || + loop_not_taken_93713; + bool index_certs_93715; + + if (!protect_assert_disj_93714) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 504) == -1) { + global_failure_args[0] = l_93550; + global_failure_args[1] = l_93550; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_93719 = 0; i_93719 < upper_bound_93567; + i_93719++) { + int64_t index_primexp_93723 = add64(x_93556, + i_93719); + bool x_93724 = sle64((int64_t) 0, + index_primexp_93723); + bool y_93725 = slt64(index_primexp_93723, + k2p2zq_73023); + bool bounds_check_93726 = x_93724 && y_93725; + double t_93727; + double t_93729 = 0.0; + + for (int64_t i0_93728 = 0; i0_93728 < j_m_i_93658; + i0_93728++) { + int64_t i_93730 = add64(l_93550, i0_93728); + bool x_93731 = sle64((int64_t) 0, i_93730); + bool y_93732 = slt64(i_93730, k2p2zq_73023); + bool bounds_check_93733 = x_93731 && y_93732; + bool index_ok_93734 = y_93566 && + bounds_check_93733; + bool index_certs_93735; + + if (!index_ok_93734) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 505) == + -1) { + global_failure_args[0] = l_93550; + global_failure_args[1] = i_93730; + global_failure_args[2] = + k2p2zq_73023; + global_failure_args[3] = + k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_93736 = ((__global + double *) double_buffer_mem_125552)[phys_tid_92925 + + (l_93550 * + (num_threads_125909 * + k2p2zq_73023) + + i_93730 * + num_threads_125909)]; + bool index_ok_93737 = bounds_check_93726 && + bounds_check_93733; + bool index_certs_93738; + + if (!index_ok_93737) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 506) == + -1) { + global_failure_args[0] = + index_primexp_93723; + global_failure_args[1] = i_93730; + global_failure_args[2] = + k2p2zq_73023; + global_failure_args[3] = + k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_93739 = ((__global + double *) double_buffer_mem_125552)[phys_tid_92925 + + (index_primexp_93723 * + (num_threads_125909 * + k2p2zq_73023) + + i_93730 * + num_threads_125909)]; + double y_93740 = x_93736 * y_93739; + double loopres_93741 = t_93729 - y_93740; + double t_tmp_126837 = loopres_93741; + + t_93729 = t_tmp_126837; + } + t_93727 = t_93729; + + double y_93742 = ((__global + double *) double_buffer_mem_125552)[phys_tid_92925 + + (l_93550 * + (num_threads_125909 * + k2p2zq_73023) + + l_93550 * + num_threads_125909)]; + double t_93743 = t_93727 / y_93742; + + for (int64_t i0_93745 = 0; i0_93745 < j_m_i_93658; + i0_93745++) { + int64_t i_93747 = add64(l_93550, i0_93745); + bool x_93748 = sle64((int64_t) 0, i_93747); + bool y_93749 = slt64(i_93747, k2p2zq_73023); + bool bounds_check_93750 = x_93748 && y_93749; + bool index_ok_93751 = bounds_check_93726 && + bounds_check_93750; + bool index_certs_93752; + + if (!index_ok_93751) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 507) == + -1) { + global_failure_args[0] = + index_primexp_93723; + global_failure_args[1] = i_93747; + global_failure_args[2] = + k2p2zq_73023; + global_failure_args[3] = + k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_93753 = ((__global + double *) double_buffer_mem_125552)[phys_tid_92925 + + (index_primexp_93723 * + (num_threads_125909 * + k2p2zq_73023) + + i_93747 * + num_threads_125909)]; + bool index_ok_93754 = y_93566 && + bounds_check_93750; + bool index_certs_93755; + + if (!index_ok_93754) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 508) == + -1) { + global_failure_args[0] = l_93550; + global_failure_args[1] = i_93747; + global_failure_args[2] = + k2p2zq_73023; + global_failure_args[3] = + k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_93756 = ((__global + double *) double_buffer_mem_125552)[phys_tid_92925 + + (l_93550 * + (num_threads_125909 * + k2p2zq_73023) + + i_93747 * + num_threads_125909)]; + double y_93757 = t_93743 * y_93756; + double lw_val_93758 = x_93753 + y_93757; + + ((__global + double *) double_buffer_mem_125552)[phys_tid_92925 + + (index_primexp_93723 * + (num_threads_125909 * + k2p2zq_73023) + + i_93747 * + num_threads_125909)] = + lw_val_93758; + } + + bool index_certs_93760; + + if (!bounds_check_93726) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 509) == + -1) { + global_failure_args[0] = + index_primexp_93723; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double zeze_arg_93761 = ((__global + double *) double_buffer_mem_125553)[phys_tid_92925 + + index_primexp_93723 * + num_threads_125909]; + bool zeze_res_93762 = zeze_arg_93761 == 0.0; + + if (!zeze_res_93762) { + bool index_ok_93765 = y_93566 && + bounds_check_93726; + bool index_certs_93766; + + if (!index_ok_93765) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 510) == + -1) { + global_failure_args[0] = + index_primexp_93723; + global_failure_args[1] = l_93550; + global_failure_args[2] = + k2p2zq_73023; + global_failure_args[3] = + k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double abs_arg_93767 = ((__global + double *) double_buffer_mem_125552)[phys_tid_92925 + + (index_primexp_93723 * + (num_threads_125909 * + k2p2zq_73023) + + l_93550 * + num_threads_125909)]; + double abs_res_93768 = fabs(abs_arg_93767); + double zs_res_93769 = abs_res_93768 / + zeze_arg_93761; + double ztzt_res_93770 = fpow64(zs_res_93769, + 2.0); + double zm_res_93771 = 1.0 - ztzt_res_93770; + double max_res_93772 = fmax64(0.0, + zm_res_93771); + double abs_res_93773 = fabs(max_res_93772); + bool zgze_res_93774 = 1.0e-6 <= abs_res_93773; + + if (zgze_res_93774) { + double sqrt_res_93777; + + sqrt_res_93777 = + futrts_sqrt64(max_res_93772); + + double zt_res_93778 = zeze_arg_93761 * + sqrt_res_93777; + + ((__global + double *) double_buffer_mem_125553)[phys_tid_92925 + + index_primexp_93723 * + num_threads_125909] = + zt_res_93778; + } else { + bool empty_slice_93780 = + upper_bound_93567 == (int64_t) 0; + int64_t m_93781 = sub64(upper_bound_93567, + (int64_t) 1); + int64_t i_p_m_t_s_93782 = add64(x_93556, + m_93781); + bool zzero_leq_i_p_m_t_s_93783 = + sle64((int64_t) 0, i_p_m_t_s_93782); + bool i_p_m_t_s_leq_w_93784 = + slt64(i_p_m_t_s_93782, k2p2zq_73023); + bool zzero_lte_i_93785 = sle64((int64_t) 0, + x_93556); + bool i_lte_j_93786 = sle64(x_93556, + k2p2zq_73023); + bool y_93787 = i_p_m_t_s_leq_w_93784 && + zzero_lte_i_93785; + bool y_93788 = zzero_leq_i_p_m_t_s_93783 && + y_93787; + bool y_93789 = i_lte_j_93786 && y_93788; + bool forwards_ok_93790 = + zzero_lte_i_93785 && y_93789; + bool ok_or_empty_93791 = + empty_slice_93780 || forwards_ok_93790; + bool index_ok_93792 = bounds_check_93726 && + ok_or_empty_93791; + bool index_certs_93793; + + if (!index_ok_93792) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 511) == + -1) { + global_failure_args[0] = + index_primexp_93723; + global_failure_args[1] = + x_93556; + global_failure_args[2] = + k2p2zq_73023; + global_failure_args[3] = + k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_93795; + double redout_119717 = 0.0; + + for (int64_t i_119718 = 0; i_119718 < + upper_bound_93567; i_119718++) { + int64_t slice_120007 = x_93556 + + i_119718; + double x_93799 = ((__global + double *) double_buffer_mem_125552)[phys_tid_92925 + + (index_primexp_93723 * + (num_threads_125909 * + k2p2zq_73023) + + slice_120007 * + num_threads_125909)]; + double defunc_1_f_res_93800 = x_93799 * + x_93799; + double defunc_1_op_res_93798 = + defunc_1_f_res_93800 + + redout_119717; + double redout_tmp_126839 = + defunc_1_op_res_93798; + + redout_119717 = redout_tmp_126839; + } + defunc_2_reduce_res_93795 = redout_119717; + + double sqrt_res_93801; + + sqrt_res_93801 = + futrts_sqrt64(defunc_2_reduce_res_93795); + ((__global + double *) double_buffer_mem_125553)[phys_tid_92925 + + index_primexp_93723 * + num_threads_125909] = + sqrt_res_93801; + + bool index_certs_93803; + + if (!bounds_check_93726) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 512) == + -1) { + global_failure_args[0] = + (int64_t) 0; + global_failure_args[1] = + index_primexp_93723; + global_failure_args[2] = + (int64_t) 2; + global_failure_args[3] = + k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_126840 = 0; i_126840 < + (int64_t) 1; i_126840++) { + ((__global + double *) double_buffer_mem_125554)[phys_tid_92925 + + (index_primexp_93723 + + i_126840) * + num_threads_125909] = + ((__global + double *) double_buffer_mem_125553)[phys_tid_92925 + + num_threads_125909 * + index_primexp_93723 + + i_126840 * + num_threads_125909]; + } + } + } + } + + bool index_certs_93806; + + if (!y_93566) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 513) == -1) { + global_failure_args[0] = l_93550; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_126841 = 0; i_126841 < (int64_t) 1; + i_126841++) { + ((__global + double *) double_buffer_mem_125553)[phys_tid_92925 + + (l_93550 + + i_126841) * + num_threads_125909] = + ((__global + double *) double_buffer_mem_125552)[phys_tid_92925 + + l_93550 * + (num_threads_125909 * + k2p2zq_73023) + + num_threads_125909 * + l_93550 + + i_126841 * + num_threads_125909]; + } + + double zt_res_93809 = -1.0 * nrmxl_93690; + + ((__global + double *) double_buffer_mem_125552)[phys_tid_92925 + + (l_93550 * + (num_threads_125909 * + k2p2zq_73023) + + l_93550 * + num_threads_125909)] = + zt_res_93809; + for (int64_t i_126842 = 0; i_126842 < k2p2zq_73023; + i_126842++) { + ((__global double *) mem_125169)[phys_tid_92925 + + i_126842 * + num_threads_125909] = + ((__global + double *) double_buffer_mem_125553)[phys_tid_92925 + + i_126842 * + num_threads_125909]; + } + for (int64_t i_126843 = 0; i_126843 < (int64_t) 2; + i_126843++) { + for (int64_t i_126844 = 0; i_126844 < k2p2zq_73023; + i_126844++) { + ((__global + double *) mem_125167)[phys_tid_92925 + + (i_126843 * + (num_threads_125909 * + k2p2zq_73023) + + i_126844 * + num_threads_125909)] = + ((__global + double *) double_buffer_mem_125554)[phys_tid_92925 + + (i_126843 * + (num_threads_125909 * + k2p2zq_73023) + + i_126844 * + num_threads_125909)]; + } + } + for (int64_t i_126845 = 0; i_126845 < k2p2zq_73023; + i_126845++) { + for (int64_t i_126846 = 0; i_126846 < k2p2zq_73023; + i_126846++) { + ((__global + double *) mem_125438)[phys_tid_92925 + + (i_126845 * + (num_threads_125909 * + k2p2zq_73023) + + i_126846 * + num_threads_125909)] = + ((__global + double *) double_buffer_mem_125552)[phys_tid_92925 + + (i_126845 * + (num_threads_125909 * + k2p2zq_73023) + + i_126846 * + num_threads_125909)]; + } + } + } + for (int64_t i_126847 = 0; i_126847 < k2p2zq_73023; + i_126847++) { + ((__global double *) mem_125448)[phys_tid_92925 + + i_126847 * + num_threads_125909] = + ((__global double *) mem_125169)[phys_tid_92925 + + i_126847 * + num_threads_125909]; + } + for (int64_t i_126848 = 0; i_126848 < (int64_t) 2; + i_126848++) { + for (int64_t i_126849 = 0; i_126849 < k2p2zq_73023; + i_126849++) { + ((__global double *) mem_125446)[phys_tid_92925 + + (i_126848 * + (num_threads_125909 * + k2p2zq_73023) + + i_126849 * + num_threads_125909)] = + ((__global + double *) mem_125167)[phys_tid_92925 + + (i_126848 * + (num_threads_125909 * + k2p2zq_73023) + + i_126849 * + num_threads_125909)]; + } + } + for (int64_t i_126850 = 0; i_126850 < k2p2zq_73023; + i_126850++) { + for (int64_t i_126851 = 0; i_126851 < k2p2zq_73023; + i_126851++) { + ((__global double *) mem_125498)[phys_tid_92925 + + (i_126850 * + (num_threads_125909 * + k2p2zq_73023) + + i_126851 * + num_threads_125909)] = + ((__global + double *) mem_125438)[phys_tid_92925 + + (i_126850 * + (num_threads_125909 * + k2p2zq_73023) + + i_126851 * + num_threads_125909)]; + } + } + } + for (int64_t i_126852 = 0; i_126852 < k2p2zq_73023; + i_126852++) { + for (int64_t i_126853 = 0; i_126853 < k2p2zq_73023; + i_126853++) { + ((__global + double *) double_buffer_mem_125552)[phys_tid_92925 + + (i_126852 * + (num_threads_125909 * + k2p2zq_73023) + + i_126853 * + num_threads_125909)] = + ((__global double *) mem_125498)[phys_tid_92925 + + (i_126852 * + (num_threads_125909 * + k2p2zq_73023) + + i_126853 * + num_threads_125909)]; + } + } + for (int64_t i_126854 = 0; i_126854 < k2p2zq_73023; + i_126854++) { + ((__global + double *) double_buffer_mem_125553)[phys_tid_92925 + + i_126854 * + num_threads_125909] = + ((__global double *) mem_125448)[phys_tid_92925 + + i_126854 * + num_threads_125909]; + } + for (int64_t i_126855 = 0; i_126855 < (int64_t) 2; i_126855++) { + for (int64_t i_126856 = 0; i_126856 < k2p2zq_73023; + i_126856++) { + ((__global + double *) double_buffer_mem_125554)[phys_tid_92925 + + (i_126855 * + (num_threads_125909 * + k2p2zq_73023) + + i_126856 * + num_threads_125909)] = + ((__global double *) mem_125446)[phys_tid_92925 + + (i_126855 * + (num_threads_125909 * + k2p2zq_73023) + + i_126856 * + num_threads_125909)]; + } + } + + int64_t k_tmp_126810 = loopres_93582; + + k_93555 = k_tmp_126810; + } + dqrdc2_res_93549 = k_93555; + + int64_t min_arg_93811 = sub64(dqrdc2_res_93549, (int64_t) 1); + int64_t min_res_93812 = smin64(k2p2zq_73023, min_arg_93811); + + for (int64_t i_126857 = 0; i_126857 < k2p2zq_73023; i_126857++) { + for (int64_t i_126858 = 0; i_126858 < k2p2zq_73023; + i_126858++) { + ((__global double *) mem_121335)[i_126857 * (m_73008 * + k2p2zq_73023) + + i_126858 * m_73008 + + gtid_92924] = ((__global + double *) double_buffer_mem_125552)[phys_tid_92925 + + (i_126857 * + (num_threads_125909 * + k2p2zq_73023) + + i_126858 * + num_threads_125909)]; + } + } + for (int64_t i_126859 = 0; i_126859 < k2p2zq_73023; i_126859++) { + ((__global double *) mem_121338)[i_126859 * m_73008 + + gtid_92924] = ((__global + double *) double_buffer_mem_125553)[phys_tid_92925 + + i_126859 * + num_threads_125909]; + } + for (int64_t i_126860 = 0; i_126860 < k2p2zq_73023; i_126860++) { + ((__global int64_t *) mem_121341)[i_126860 * m_73008 + + gtid_92924] = ((__global + int64_t *) mem_121011)[phys_tid_92925 + + i_126860 * + num_threads_125909]; + } + ((__global int64_t *) mem_121343)[gtid_92924] = min_res_93812; + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_93534 +} +__kernel void mainMagnitudezisegmap_93213(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + int64_t j_93466, + int64_t num_groups_93477, __global + unsigned char *mem_120894, __global + unsigned char *mem_120923, __global + unsigned char *mem_120927, __global + unsigned char *mem_120931, __global + unsigned char *mem_120935) +{ + #define segmap_group_sizze_93476 (mainMagnitudezisegmap_group_sizze_93215) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126692; + int32_t local_tid_126693; + int64_t group_sizze_126696; + int32_t wave_sizze_126695; + int32_t group_tid_126694; + + global_tid_126692 = get_global_id(0); + local_tid_126693 = get_local_id(0); + group_sizze_126696 = get_local_size(0); + wave_sizze_126695 = LOCKSTEP_WIDTH; + group_tid_126694 = get_group_id(0); + + int32_t phys_tid_93213; + + phys_tid_93213 = global_tid_126692; + + int32_t phys_group_id_126697; + + phys_group_id_126697 = get_group_id(0); + for (int32_t i_126698 = 0; i_126698 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, segmap_group_sizze_93476)) - + phys_group_id_126697, sext_i64_i32(num_groups_93477)); + i_126698++) { + int32_t virt_group_id_126699 = phys_group_id_126697 + i_126698 * + sext_i64_i32(num_groups_93477); + int64_t gtid_93212 = sext_i32_i64(virt_group_id_126699) * + segmap_group_sizze_93476 + sext_i32_i64(local_tid_126693); + + if (slt64(gtid_93212, m_73008)) { + double defunc_2_reduce_res_93484; + double redout_119713 = 0.0; + + for (int64_t i_119714 = 0; i_119714 < k2p2zq_73023; i_119714++) { + double x_93488 = ((__global double *) mem_120894)[i_119714 * + (k2p2zq_73023 * + m_73008) + + gtid_93212 * + k2p2zq_73023 + + j_93466]; + double defunc_1_f_res_93489 = x_93488 * x_93488; + double defunc_1_op_res_93487 = defunc_1_f_res_93489 + + redout_119713; + double redout_tmp_126700 = defunc_1_op_res_93487; + + redout_119713 = redout_tmp_126700; + } + defunc_2_reduce_res_93484 = redout_119713; + + double sqrt_res_93490; + + sqrt_res_93490 = futrts_sqrt64(defunc_2_reduce_res_93484); + ((__global double *) mem_120923)[gtid_93212 + j_93466 * m_73008] = + sqrt_res_93490; + ((__global double *) mem_120927)[gtid_93212 + j_93466 * m_73008] = + sqrt_res_93490; + + bool zeze_res_93493 = sqrt_res_93490 == 0.0; + double lw_val_93494; + + if (zeze_res_93493) { + lw_val_93494 = 1.0; + } else { + lw_val_93494 = sqrt_res_93490; + } + ((__global double *) mem_120927)[gtid_93212 + (m_73008 * + k2p2zq_73023 + + j_93466 * m_73008)] = + lw_val_93494; + for (int64_t i_126701 = 0; i_126701 < k2p2zq_73023; i_126701++) { + ((__global double *) mem_120931)[i_126701 * m_73008 + + gtid_93212] = ((__global + double *) mem_120923)[gtid_93212 + + i_126701 * + m_73008]; + } + for (int64_t i_126702 = 0; i_126702 < (int64_t) 2; i_126702++) { + for (int64_t i_126703 = 0; i_126703 < k2p2zq_73023; + i_126703++) { + ((__global double *) mem_120935)[i_126702 * (m_73008 * + k2p2zq_73023) + + i_126703 * m_73008 + + gtid_93212] = ((__global + double *) mem_120927)[gtid_93212 + + (i_126702 * + (m_73008 * + k2p2zq_73023) + + i_126703 * + m_73008)]; + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_93476 +} +__kernel void mainMagnitudezisegmap_93258(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + int64_t j_93466, + int64_t num_groups_93522, + int64_t num_threads_115425, + int64_t per_chunk_115432, __global + unsigned char *mem_120941, __global + unsigned char *mem_120946, __global + unsigned char *mem_120951, __global + unsigned char *mem_120956) +{ + #define segmap_group_sizze_93521 (mainMagnitudezisegmap_group_sizze_93260) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126775; + int32_t local_tid_126776; + int64_t group_sizze_126779; + int32_t wave_sizze_126778; + int32_t group_tid_126777; + + global_tid_126775 = get_global_id(0); + local_tid_126776 = get_local_id(0); + group_sizze_126779 = get_local_size(0); + wave_sizze_126778 = LOCKSTEP_WIDTH; + group_tid_126777 = get_group_id(0); + + int32_t phys_tid_93258; + + phys_tid_93258 = global_tid_126775; + + int32_t phys_group_id_126780; + + phys_group_id_126780 = get_group_id(0); + for (int32_t i_126781 = 0; i_126781 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, segmap_group_sizze_93521)) - + phys_group_id_126780, sext_i64_i32(num_groups_93522)); + i_126781++) { + int32_t virt_group_id_126782 = phys_group_id_126780 + i_126781 * + sext_i64_i32(num_groups_93522); + int64_t gtid_93257 = sext_i32_i64(virt_group_id_126782) * + segmap_group_sizze_93521 + sext_i32_i64(local_tid_126776); + + if (slt64(gtid_93257, m_73008)) { + double sqrt_res_93526 = ((__global + double *) mem_120941)[gtid_93257]; + + for (int64_t i_126783 = 0; i_126783 < (int64_t) 1; i_126783++) { + ((__global double *) mem_120946)[gtid_93257 + (j_93466 + + i_126783) * + m_73008] = ((__global + double *) mem_120951)[(gtid_93257 + + i_126783 - + squot64(gtid_93257 + + i_126783, + per_chunk_115432) * + per_chunk_115432) * + num_threads_115425 + + squot64(gtid_93257 + + i_126783, + per_chunk_115432)]; + } + + bool zeze_res_93528 = sqrt_res_93526 == 0.0; + double lw_val_93529; + + if (zeze_res_93528) { + lw_val_93529 = 1.0; + } else { + lw_val_93529 = sqrt_res_93526; + } + ((__global double *) mem_120946)[gtid_93257 + (m_73008 * + k2p2zq_73023 + + j_93466 * m_73008)] = + lw_val_93529; + for (int64_t i_126784 = 0; i_126784 < (int64_t) 2; i_126784++) { + for (int64_t i_126785 = 0; i_126785 < k2p2zq_73023; + i_126785++) { + ((__global double *) mem_120956)[i_126784 * (m_73008 * + k2p2zq_73023) + + i_126785 * m_73008 + + gtid_93257] = ((__global + double *) mem_120946)[gtid_93257 + + (i_126784 * + (m_73008 * + k2p2zq_73023) + + i_126785 * + m_73008)]; + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_93521 +} +__kernel void mainMagnitudezisegmap_93273(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + int64_t j_93466, __global + unsigned char *mem_param_120902, + __global unsigned char *mem_120941) +{ + #define segmap_group_sizze_93516 (mainMagnitudezisegmap_group_sizze_93276) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126769; + int32_t local_tid_126770; + int64_t group_sizze_126773; + int32_t wave_sizze_126772; + int32_t group_tid_126771; + + global_tid_126769 = get_global_id(0); + local_tid_126770 = get_local_id(0); + group_sizze_126773 = get_local_size(0); + wave_sizze_126772 = LOCKSTEP_WIDTH; + group_tid_126771 = get_group_id(0); + + int32_t phys_tid_93273; + + phys_tid_93273 = global_tid_126769; + + int64_t gtid_93271; + + gtid_93271 = sext_i32_i64(group_tid_126771) * segmap_group_sizze_93516 + + sext_i32_i64(local_tid_126770); + + int64_t gtid_93272; + + gtid_93272 = sext_i32_i64(group_tid_126771) * segmap_group_sizze_93516 + + sext_i32_i64(local_tid_126770) - (sext_i32_i64(group_tid_126771) * + segmap_group_sizze_93516 + + sext_i32_i64(local_tid_126770)); + if (slt64(gtid_93271, m_73008) && slt64(gtid_93272, (int64_t) 1)) { + double sqrt_res_93519 = ((__global double *) mem_120941)[gtid_93271]; + + if ((sle64((int64_t) 0, gtid_93271) && slt64(gtid_93271, m_73008)) && + (sle64((int64_t) 0, j_93466) && slt64(j_93466, k2p2zq_73023))) { + ((__global double *) mem_param_120902)[gtid_93271 * k2p2zq_73023 + + j_93466] = sqrt_res_93519; + } + } + + error_0: + return; + #undef segmap_group_sizze_93516 +} +__kernel void mainMagnitudezisegmap_93282(__global int *global_failure, + int64_t m_73008, __global + unsigned char *mem_120938, __global + unsigned char *mem_120941) +{ + #define segmap_group_sizze_93509 (mainMagnitudezisegmap_group_sizze_93284) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126764; + int32_t local_tid_126765; + int64_t group_sizze_126768; + int32_t wave_sizze_126767; + int32_t group_tid_126766; + + global_tid_126764 = get_global_id(0); + local_tid_126765 = get_local_id(0); + group_sizze_126768 = get_local_size(0); + wave_sizze_126767 = LOCKSTEP_WIDTH; + group_tid_126766 = get_group_id(0); + + int32_t phys_tid_93282; + + phys_tid_93282 = global_tid_126764; + + int64_t gtid_93281; + + gtid_93281 = sext_i32_i64(group_tid_126766) * segmap_group_sizze_93509 + + sext_i32_i64(local_tid_126765); + if (slt64(gtid_93281, m_73008)) { + double defunc_2_reduce_res_93512 = ((__global + double *) mem_120938)[gtid_93281]; + double sqrt_res_93513; + + sqrt_res_93513 = futrts_sqrt64(defunc_2_reduce_res_93512); + ((__global double *) mem_120941)[gtid_93281] = sqrt_res_93513; + } + + error_0: + return; + #undef segmap_group_sizze_93509 +} +__kernel void mainMagnitudezisegmap_94299(__global int *global_failure, + int64_t k2p2zq_73023, __global + unsigned char *mem_121938) +{ + #define segmap_group_sizze_94324 (mainMagnitudezisegmap_group_sizze_94302) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127183; + int32_t local_tid_127184; + int64_t group_sizze_127187; + int32_t wave_sizze_127186; + int32_t group_tid_127185; + + global_tid_127183 = get_global_id(0); + local_tid_127184 = get_local_id(0); + group_sizze_127187 = get_local_size(0); + wave_sizze_127186 = LOCKSTEP_WIDTH; + group_tid_127185 = get_group_id(0); + + int32_t phys_tid_94299; + + phys_tid_94299 = global_tid_127183; + + int64_t gtid_94297; + + gtid_94297 = squot64(sext_i32_i64(group_tid_127185) * + segmap_group_sizze_94324 + + sext_i32_i64(local_tid_127184), k2p2zq_73023); + + int64_t gtid_94298; + + gtid_94298 = sext_i32_i64(group_tid_127185) * segmap_group_sizze_94324 + + sext_i32_i64(local_tid_127184) - + squot64(sext_i32_i64(group_tid_127185) * segmap_group_sizze_94324 + + sext_i32_i64(local_tid_127184), k2p2zq_73023) * k2p2zq_73023; + if (slt64(gtid_94297, k2p2zq_73023) && slt64(gtid_94298, k2p2zq_73023)) { + bool cond_94329 = gtid_94298 == gtid_94297; + double defunc_0_f_res_94330; + + if (cond_94329) { + defunc_0_f_res_94330 = 1.0; + } else { + defunc_0_f_res_94330 = 0.0; + } + ((__global double *) mem_121938)[gtid_94297 * k2p2zq_73023 + + gtid_94298] = defunc_0_f_res_94330; + } + + error_0: + return; + #undef segmap_group_sizze_94324 +} +__kernel void mainMagnitudezisegmap_94438(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t m_73008, int64_t n_73011, + int64_t k2p2zq_73023, int64_t m_73095, + unsigned char y_73099, + int64_t defunc_2_reduce_res_73132, + double tol_73201, int64_t k_73214, + int64_t r_73698, int64_t rp1_73709, + unsigned char ok_or_empty_73720, + int64_t min_res_73721, + int64_t num_groups_94974, + int64_t binop_x_120251, + int64_t num_threads_125944, __global + unsigned char *defunc_3_map_res_mem_120231, + __global unsigned char *mem_120246, + __global unsigned char *mem_121938, + __global unsigned char *mem_121941, + __global + unsigned char *mem_param_121972, + __global unsigned char *mem_122011, + __global unsigned char *mem_122014, + __global unsigned char *mem_122017, + __global unsigned char *mem_122021, + __global unsigned char *mem_122025, + __global unsigned char *mem_122028, + __global unsigned char *mem_122042, + __global unsigned char *mem_122045, + __global unsigned char *mem_122047, + __global unsigned char *mem_122382, + __global unsigned char *mem_122423, + __global unsigned char *mem_122435, + __global unsigned char *mem_122464, + __global unsigned char *mem_122537, + __global unsigned char *mem_122552, + __global unsigned char *mem_122564, + __global unsigned char *mem_122575, + __global unsigned char *mem_122595, + __global unsigned char *mem_122598, + __global unsigned char *mem_122650, + __global unsigned char *mem_122654, + __global unsigned char *mem_122657, + __global unsigned char *mem_122659, + __global unsigned char *mem_122661, + __global unsigned char *mem_125248, + __global unsigned char *mem_125250, + __global unsigned char *mem_125258, + __global unsigned char *mem_125455, + __global unsigned char *mem_125463, + __global unsigned char *mem_125465, + __global unsigned char *mem_125505, + __global + unsigned char *double_buffer_mem_125569, + __global + unsigned char *double_buffer_mem_125570, + __global + unsigned char *double_buffer_mem_125571, + __global + unsigned char *double_buffer_mem_125582) +{ + #define segmap_group_sizze_94973 (mainMagnitudezisegmap_group_sizze_94440) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_127223; + int32_t local_tid_127224; + int64_t group_sizze_127227; + int32_t wave_sizze_127226; + int32_t group_tid_127225; + + global_tid_127223 = get_global_id(0); + local_tid_127224 = get_local_id(0); + group_sizze_127227 = get_local_size(0); + wave_sizze_127226 = LOCKSTEP_WIDTH; + group_tid_127225 = get_group_id(0); + + int32_t phys_tid_94438; + + phys_tid_94438 = global_tid_127223; + + int32_t phys_group_id_127228; + + phys_group_id_127228 = get_group_id(0); + for (int32_t i_127229 = 0; i_127229 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, segmap_group_sizze_94973)) - + phys_group_id_127228, sext_i64_i32(num_groups_94974)); + i_127229++) { + int32_t virt_group_id_127230 = phys_group_id_127228 + i_127229 * + sext_i64_i32(num_groups_94974); + int64_t gtid_94437 = sext_i32_i64(virt_group_id_127230) * + segmap_group_sizze_94973 + sext_i32_i64(local_tid_127224); + + if (slt64(gtid_94437, m_73008)) { + int64_t x_94984 = ((__global + int64_t *) mem_param_121972)[gtid_94437]; + double defunc_0_f_res_94988; + double redout_119743 = 0.0; + + for (int64_t i_119745 = 0; i_119745 < k2p2zq_73023; i_119745++) { + double x_94994 = ((__global double *) mem_120246)[i_119745 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_94437 * + defunc_2_reduce_res_73132 + + r_73698]; + double defunc_0_f_res_94995; + double redout_119747 = 0.0; + + for (int64_t i_119748 = 0; i_119748 < k2p2zq_73023; + i_119748++) { + double x_94999 = ((__global double *) mem_120246)[i_119748 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_94437 * + defunc_2_reduce_res_73132 + + r_73698]; + double x_95000 = ((__global double *) mem_122011)[i_119745 * + (m_73008 * + k2p2zq_73023) + + i_119748 * + m_73008 + + gtid_94437]; + double defunc_1_f_res_95001 = x_94999 * x_95000; + double defunc_1_op_res_94998 = defunc_1_f_res_95001 + + redout_119747; + double redout_tmp_127233 = defunc_1_op_res_94998; + + redout_119747 = redout_tmp_127233; + } + defunc_0_f_res_94995 = redout_119747; + + double defunc_1_f_res_95002 = x_94994 * defunc_0_f_res_94995; + double defunc_1_op_res_94992 = defunc_1_f_res_95002 + + redout_119743; + + ((__global double *) mem_122028)[phys_tid_94438 + i_119745 * + num_threads_125944] = + defunc_0_f_res_94995; + + double redout_tmp_127231 = defunc_1_op_res_94992; + + redout_119743 = redout_tmp_127231; + } + defunc_0_f_res_94988 = redout_119743; + + double fr_95003 = 1.0 + defunc_0_f_res_94988; + double x_95004 = ((__global + double *) defunc_3_map_res_mem_120231)[gtid_94437 * + n_73011 + + r_73698]; + double defunc_0_f_res_95005; + double redout_119749 = 0.0; + + for (int64_t i_119750 = 0; i_119750 < k2p2zq_73023; i_119750++) { + double x_95009 = ((__global double *) mem_120246)[i_119750 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_94437 * + defunc_2_reduce_res_73132 + + r_73698]; + double x_95010 = ((__global double *) mem_122014)[i_119750 * + m_73008 + + gtid_94437]; + double defunc_1_f_res_95011 = x_95009 * x_95010; + double defunc_1_op_res_95008 = defunc_1_f_res_95011 + + redout_119749; + double redout_tmp_127234 = defunc_1_op_res_95008; + + redout_119749 = redout_tmp_127234; + } + defunc_0_f_res_95005 = redout_119749; + + double resid_95012 = x_95004 - defunc_0_f_res_95005; + double sqrt_res_95013; + + sqrt_res_95013 = futrts_sqrt64(fr_95003); + + double recresid_r_95014 = resid_95012 / sqrt_res_95013; + + for (int64_t i_127235 = 0; i_127235 < k2p2zq_73023; i_127235++) { + ((__global double *) mem_122042)[phys_tid_94438 + i_127235 * + num_threads_125944] = 0.0; + } + for (int64_t i_127236 = 0; i_127236 < (int64_t) 2; i_127236++) { + for (int64_t i_127237 = 0; i_127237 < k2p2zq_73023; + i_127237++) { + ((__global double *) mem_122045)[phys_tid_94438 + + (i_127236 * + (num_threads_125944 * + k2p2zq_73023) + + i_127237 * + num_threads_125944)] = + 0.0; + } + } + for (int64_t i_127238 = 0; i_127238 < k2p2zq_73023; i_127238++) { + int64_t x_127239 = (int64_t) 0 + i_127238 * (int64_t) 1; + + ((__global int64_t *) mem_122047)[phys_tid_94438 + i_127238 * + num_threads_125944] = + x_127239; + } + for (int64_t j_95020 = 0; j_95020 < k2p2zq_73023; j_95020++) { + bool index_certs_95023; + + if (!ok_or_empty_73720) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 523) == -1) { + global_failure_args[0] = j_95020; + global_failure_args[1] = (int64_t) 0; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_95025; + double redout_119751 = 0.0; + + for (int64_t i_119752 = 0; i_119752 < rp1_73709; i_119752++) { + double x_95029 = ((__global double *) mem_122025)[i_119752 * + (k2p2zq_73023 * + m_73008) + + gtid_94437 * + k2p2zq_73023 + + j_95020]; + double defunc_1_f_res_95030 = x_95029 * x_95029; + double defunc_1_op_res_95028 = defunc_1_f_res_95030 + + redout_119751; + double redout_tmp_127242 = defunc_1_op_res_95028; + + redout_119751 = redout_tmp_127242; + } + defunc_2_reduce_res_95025 = redout_119751; + + double sqrt_res_95031; + + sqrt_res_95031 = futrts_sqrt64(defunc_2_reduce_res_95025); + ((__global double *) mem_122042)[phys_tid_94438 + j_95020 * + num_threads_125944] = + sqrt_res_95031; + ((__global double *) mem_122045)[phys_tid_94438 + j_95020 * + num_threads_125944] = + sqrt_res_95031; + + bool zeze_res_95034 = sqrt_res_95031 == 0.0; + double lw_val_95035; + + if (zeze_res_95034) { + lw_val_95035 = 1.0; + } else { + lw_val_95035 = sqrt_res_95031; + } + ((__global double *) mem_122045)[phys_tid_94438 + + (num_threads_125944 * + k2p2zq_73023 + j_95020 * + num_threads_125944)] = + lw_val_95035; + } + for (int64_t i_127243 = 0; i_127243 < k2p2zq_73023; i_127243++) { + for (int64_t i_127244 = 0; i_127244 < rp1_73709; i_127244++) { + ((__global + double *) double_buffer_mem_125569)[phys_tid_94438 + + (i_127243 * + (num_threads_125944 * + rp1_73709) + + i_127244 * + num_threads_125944)] = + ((__global double *) mem_122021)[gtid_94437 + + (i_127243 * (m_73008 * + rp1_73709) + + i_127244 * m_73008)]; + } + } + for (int64_t i_127245 = 0; i_127245 < k2p2zq_73023; i_127245++) { + ((__global double *) double_buffer_mem_125570)[phys_tid_94438 + + i_127245 * + num_threads_125944] = + ((__global double *) mem_122042)[phys_tid_94438 + i_127245 * + num_threads_125944]; + } + for (int64_t i_127246 = 0; i_127246 < (int64_t) 2; i_127246++) { + for (int64_t i_127247 = 0; i_127247 < k2p2zq_73023; + i_127247++) { + ((__global + double *) double_buffer_mem_125571)[phys_tid_94438 + + (i_127246 * + (num_threads_125944 * + k2p2zq_73023) + + i_127247 * + num_threads_125944)] = + ((__global double *) mem_122045)[phys_tid_94438 + + (i_127246 * + (num_threads_125944 * + k2p2zq_73023) + + i_127247 * + num_threads_125944)]; + } + } + + int64_t dqrdc2_res_95041; + int64_t k_95047 = k_73214; + + for (int64_t l_95042 = 0; l_95042 < min_res_73721; l_95042++) { + int64_t x_95048 = add64((int64_t) 1, l_95042); + bool cond_95049 = slt64(x_95048, k_95047); + bool loop_cond_95050; + + if (cond_95049) { + bool y_95051 = slt64(l_95042, k2p2zq_73023); + bool index_certs_95052; + + if (!y_95051) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 524) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_95042; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double zt_arg_95053 = ((__global + double *) double_buffer_mem_125571)[phys_tid_94438 + + (num_threads_125944 * + k2p2zq_73023 + + l_95042 * + num_threads_125944)]; + double zt_res_95054 = 1.0e-7 * zt_arg_95053; + bool index_certs_95055; + + if (!y_95051) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 525) == -1) { + global_failure_args[0] = l_95042; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double zl_arg_95056 = ((__global + double *) double_buffer_mem_125570)[phys_tid_94438 + + l_95042 * + num_threads_125944]; + bool zl_res_95057 = zl_arg_95056 < zt_res_95054; + + loop_cond_95050 = zl_res_95057; + } else { + loop_cond_95050 = 0; + } + + bool y_95058 = slt64(l_95042, k2p2zq_73023); + int64_t upper_bound_95059 = sub64(k2p2zq_73023, x_95048); + bool loop_not_taken_95060 = !loop_cond_95050; + bool protect_assert_disj_95061 = y_95058 || + loop_not_taken_95060; + bool index_certs_95062; + + if (!protect_assert_disj_95061) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 526) == -1) { + global_failure_args[0] = l_95042; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_95063; + + if (!protect_assert_disj_95061) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 527) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = l_95042; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_95064; + + if (!protect_assert_disj_95061) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 528) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_95042; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool protect_assert_disj_95065 = y_73099 || + loop_not_taken_95060; + bool index_certs_95066; + + if (!protect_assert_disj_95065) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 529) == -1) { + global_failure_args[0] = m_73095; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_95067; + + if (!protect_assert_disj_95065) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 530) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = m_73095; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_95068; + + if (!protect_assert_disj_95065) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 531) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = m_73095; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool loopres_95069; + int64_t loopres_95074; + bool loop_while_95075; + int64_t k_95080; + + loop_while_95075 = loop_cond_95050; + k_95080 = k_95047; + while (loop_while_95075) { + for (int64_t i_95082 = 0; i_95082 < rp1_73709; i_95082++) { + bool index_certs_95084; + + if (!y_95058) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 532) == -1) { + global_failure_args[0] = l_95042; + global_failure_args[1] = i_95082; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double t_95085 = ((__global + double *) double_buffer_mem_125569)[phys_tid_94438 + + (l_95042 * + (num_threads_125944 * + rp1_73709) + + i_95082 * + num_threads_125944)]; + + for (int64_t j0_95087 = 0; j0_95087 < upper_bound_95059; + j0_95087++) { + int64_t j_95089 = add64(x_95048, j0_95087); + bool x_95090 = sle64((int64_t) 0, j_95089); + bool y_95091 = slt64(j_95089, k2p2zq_73023); + bool bounds_check_95092 = x_95090 && y_95091; + bool index_certs_95093; + + if (!bounds_check_95092) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 533) == + -1) { + global_failure_args[0] = j_95089; + global_failure_args[1] = i_95082; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_95094 = ((__global + double *) double_buffer_mem_125569)[phys_tid_94438 + + (j_95089 * + (num_threads_125944 * + rp1_73709) + + i_95082 * + num_threads_125944)]; + int64_t i_95095 = sub64(j_95089, (int64_t) 1); + bool x_95096 = sle64((int64_t) 0, i_95095); + bool y_95097 = slt64(i_95095, k2p2zq_73023); + bool bounds_check_95098 = x_95096 && y_95097; + bool index_certs_95099; + + if (!bounds_check_95098) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 534) == + -1) { + global_failure_args[0] = i_95095; + global_failure_args[1] = i_95082; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125569)[phys_tid_94438 + + (i_95095 * + (num_threads_125944 * + rp1_73709) + + i_95082 * + num_threads_125944)] = + lw_val_95094; + } + + bool index_certs_95101; + + if (!y_73099) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 535) == -1) { + global_failure_args[0] = m_73095; + global_failure_args[1] = i_95082; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125569)[phys_tid_94438 + + (m_73095 * + (num_threads_125944 * + rp1_73709) + + i_95082 * + num_threads_125944)] = + t_95085; + } + + int64_t i_95103 = ((__global + int64_t *) mem_122047)[phys_tid_94438 + + l_95042 * + num_threads_125944]; + double t_95104 = ((__global + double *) double_buffer_mem_125570)[phys_tid_94438 + + l_95042 * + num_threads_125944]; + double tt_95105 = ((__global + double *) double_buffer_mem_125571)[phys_tid_94438 + + l_95042 * + num_threads_125944]; + double ttt_95106 = ((__global + double *) double_buffer_mem_125571)[phys_tid_94438 + + (num_threads_125944 * + k2p2zq_73023 + + l_95042 * + num_threads_125944)]; + + for (int64_t j0_95110 = 0; j0_95110 < upper_bound_95059; + j0_95110++) { + int64_t j_95114 = add64(x_95048, j0_95110); + bool x_95115 = sle64((int64_t) 0, j_95114); + bool y_95116 = slt64(j_95114, k2p2zq_73023); + bool bounds_check_95117 = x_95115 && y_95116; + bool index_certs_95118; + + if (!bounds_check_95117) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 536) == -1) { + global_failure_args[0] = j_95114; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + int64_t lw_val_95119 = ((__global + int64_t *) mem_122047)[phys_tid_94438 + + j_95114 * + num_threads_125944]; + int64_t i_95120 = sub64(j_95114, (int64_t) 1); + bool x_95121 = sle64((int64_t) 0, i_95120); + bool y_95122 = slt64(i_95120, k2p2zq_73023); + bool bounds_check_95123 = x_95121 && y_95122; + bool index_certs_95124; + + if (!bounds_check_95123) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 537) == -1) { + global_failure_args[0] = i_95120; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global int64_t *) mem_122047)[phys_tid_94438 + + i_95120 * + num_threads_125944] = + lw_val_95119; + + double lw_val_95126 = ((__global + double *) double_buffer_mem_125570)[phys_tid_94438 + + j_95114 * + num_threads_125944]; + + ((__global + double *) double_buffer_mem_125570)[phys_tid_94438 + + i_95120 * + num_threads_125944] = + lw_val_95126; + + bool index_certs_95128; + + if (!bounds_check_95117) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 538) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = j_95114; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_95129 = ((__global + double *) double_buffer_mem_125571)[phys_tid_94438 + + j_95114 * + num_threads_125944]; + bool index_certs_95130; + + if (!bounds_check_95123) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 539) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = i_95120; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125571)[phys_tid_94438 + + i_95120 * + num_threads_125944] = + lw_val_95129; + + bool index_certs_95132; + + if (!bounds_check_95117) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 540) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = j_95114; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_95133 = ((__global + double *) double_buffer_mem_125571)[phys_tid_94438 + + (num_threads_125944 * + k2p2zq_73023 + + j_95114 * + num_threads_125944)]; + bool index_certs_95134; + + if (!bounds_check_95123) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 541) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = i_95120; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125571)[phys_tid_94438 + + (num_threads_125944 * + k2p2zq_73023 + + i_95120 * + num_threads_125944)] = + lw_val_95133; + } + ((__global int64_t *) mem_122047)[phys_tid_94438 + m_73095 * + num_threads_125944] = + i_95103; + ((__global + double *) double_buffer_mem_125570)[phys_tid_94438 + + m_73095 * + num_threads_125944] = + t_95104; + ((__global + double *) double_buffer_mem_125571)[phys_tid_94438 + + m_73095 * + num_threads_125944] = + tt_95105; + ((__global + double *) double_buffer_mem_125571)[phys_tid_94438 + + (num_threads_125944 * + k2p2zq_73023 + + m_73095 * + num_threads_125944)] = + ttt_95106; + + int64_t k_95140 = sub64(k_95080, (int64_t) 1); + bool cond_95141 = slt64(x_95048, k_95140); + bool loop_cond_95142; + + if (cond_95141) { + bool index_certs_95143; + + if (!y_95058) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 542) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_95042; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double zt_arg_95144 = ((__global + double *) double_buffer_mem_125571)[phys_tid_94438 + + (num_threads_125944 * + k2p2zq_73023 + + l_95042 * + num_threads_125944)]; + double zt_res_95145 = 1.0e-7 * zt_arg_95144; + bool index_certs_95146; + + if (!y_95058) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 543) == -1) { + global_failure_args[0] = l_95042; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double zl_arg_95147 = ((__global + double *) double_buffer_mem_125570)[phys_tid_94438 + + l_95042 * + num_threads_125944]; + bool zl_res_95148 = zl_arg_95147 < zt_res_95145; + + loop_cond_95142 = zl_res_95148; + } else { + loop_cond_95142 = 0; + } + + bool loop_while_tmp_127253 = loop_cond_95142; + int64_t k_tmp_127258 = k_95140; + + loop_while_95075 = loop_while_tmp_127253; + k_95080 = k_tmp_127258; + } + loopres_95069 = loop_while_95075; + loopres_95074 = k_95080; + + bool cond_95149 = x_95048 == rp1_73709; + int64_t j_m_i_95150 = sub64(rp1_73709, l_95042); + bool empty_slice_95154 = j_m_i_95150 == (int64_t) 0; + int64_t m_95155 = sub64(j_m_i_95150, (int64_t) 1); + int64_t i_p_m_t_s_95156 = add64(l_95042, m_95155); + bool zzero_leq_i_p_m_t_s_95157 = sle64((int64_t) 0, + i_p_m_t_s_95156); + bool i_p_m_t_s_leq_w_95158 = slt64(i_p_m_t_s_95156, rp1_73709); + bool i_lte_j_95159 = sle64(l_95042, rp1_73709); + bool y_95160 = zzero_leq_i_p_m_t_s_95157 && + i_p_m_t_s_leq_w_95158; + bool y_95161 = i_lte_j_95159 && y_95160; + bool ok_or_empty_95162 = empty_slice_95154 || y_95161; + bool index_ok_95163 = y_95058 && ok_or_empty_95162; + + if (cond_95149) { + for (int64_t i_127264 = 0; i_127264 < k2p2zq_73023; + i_127264++) { + ((__global double *) mem_125465)[phys_tid_94438 + + i_127264 * + num_threads_125944] = + ((__global + double *) double_buffer_mem_125570)[phys_tid_94438 + + i_127264 * + num_threads_125944]; + } + for (int64_t i_127265 = 0; i_127265 < (int64_t) 2; + i_127265++) { + for (int64_t i_127266 = 0; i_127266 < k2p2zq_73023; + i_127266++) { + ((__global double *) mem_125463)[phys_tid_94438 + + (i_127265 * + (num_threads_125944 * + k2p2zq_73023) + + i_127266 * + num_threads_125944)] = + ((__global + double *) double_buffer_mem_125571)[phys_tid_94438 + + (i_127265 * + (num_threads_125944 * + k2p2zq_73023) + + i_127266 * + num_threads_125944)]; + } + } + for (int64_t i_127267 = 0; i_127267 < k2p2zq_73023; + i_127267++) { + for (int64_t i_127268 = 0; i_127268 < rp1_73709; + i_127268++) { + ((__global double *) mem_125505)[phys_tid_94438 + + (i_127267 * + (num_threads_125944 * + rp1_73709) + + i_127268 * + num_threads_125944)] = + ((__global + double *) double_buffer_mem_125569)[phys_tid_94438 + + (i_127267 * + (num_threads_125944 * + rp1_73709) + + i_127268 * + num_threads_125944)]; + } + } + } else { + bool index_certs_95164; + + if (!index_ok_95163) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 544) == -1) { + global_failure_args[0] = l_95042; + global_failure_args[1] = l_95042; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_95166; + double redout_119753 = 0.0; + + for (int64_t i_119754 = 0; i_119754 < j_m_i_95150; + i_119754++) { + int64_t slice_120014 = l_95042 + i_119754; + double x_95170 = ((__global + double *) double_buffer_mem_125569)[phys_tid_94438 + + (l_95042 * + (num_threads_125944 * + rp1_73709) + + slice_120014 * + num_threads_125944)]; + double defunc_1_f_res_95171 = x_95170 * x_95170; + double defunc_1_op_res_95169 = defunc_1_f_res_95171 + + redout_119753; + double redout_tmp_127269 = defunc_1_op_res_95169; + + redout_119753 = redout_tmp_127269; + } + defunc_2_reduce_res_95166 = redout_119753; + + double sqrt_res_95172; + + sqrt_res_95172 = futrts_sqrt64(defunc_2_reduce_res_95166); + + bool zeze_res_95173 = sqrt_res_95172 == 0.0; + + if (zeze_res_95173) { + for (int64_t i_127270 = 0; i_127270 < k2p2zq_73023; + i_127270++) { + ((__global double *) mem_125250)[phys_tid_94438 + + i_127270 * + num_threads_125944] = + ((__global + double *) double_buffer_mem_125570)[phys_tid_94438 + + i_127270 * + num_threads_125944]; + } + for (int64_t i_127271 = 0; i_127271 < (int64_t) 2; + i_127271++) { + for (int64_t i_127272 = 0; i_127272 < k2p2zq_73023; + i_127272++) { + ((__global + double *) mem_125248)[phys_tid_94438 + + (i_127271 * + (num_threads_125944 * + k2p2zq_73023) + + i_127272 * + num_threads_125944)] = + ((__global + double *) double_buffer_mem_125571)[phys_tid_94438 + + (i_127271 * + (num_threads_125944 * + k2p2zq_73023) + + i_127272 * + num_threads_125944)]; + } + } + for (int64_t i_127273 = 0; i_127273 < k2p2zq_73023; + i_127273++) { + for (int64_t i_127274 = 0; i_127274 < rp1_73709; + i_127274++) { + ((__global + double *) mem_125455)[phys_tid_94438 + + (i_127273 * + (num_threads_125944 * + rp1_73709) + + i_127274 * + num_threads_125944)] = + ((__global + double *) double_buffer_mem_125569)[phys_tid_94438 + + (i_127273 * + (num_threads_125944 * + rp1_73709) + + i_127274 * + num_threads_125944)]; + } + } + } else { + bool y_95177 = slt64(l_95042, rp1_73709); + bool index_ok_95178 = y_95058 && y_95177; + bool index_certs_95179; + + if (!index_ok_95178) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 545) == -1) { + global_failure_args[0] = l_95042; + global_failure_args[1] = l_95042; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double znze_arg_95180 = ((__global + double *) double_buffer_mem_125569)[phys_tid_94438 + + (l_95042 * + (num_threads_125944 * + rp1_73709) + + l_95042 * + num_threads_125944)]; + bool zeze_res_95181 = znze_arg_95180 == 0.0; + bool znze_res_95182 = !zeze_res_95181; + double nrmxl_95183; + + if (znze_res_95182) { + double abs_res_95184 = fabs(sqrt_res_95172); + double sgn_res_95185 = fsignum32(znze_arg_95180); + double zt_res_95186 = abs_res_95184 * sgn_res_95185; + + nrmxl_95183 = zt_res_95186; + } else { + nrmxl_95183 = sqrt_res_95172; + } + for (int64_t i0_95188 = 0; i0_95188 < j_m_i_95150; + i0_95188++) { + int64_t i_95190 = add64(l_95042, i0_95188); + bool x_95191 = sle64((int64_t) 0, i_95190); + bool y_95192 = slt64(i_95190, rp1_73709); + bool bounds_check_95193 = x_95191 && y_95192; + bool index_ok_95194 = y_95058 && bounds_check_95193; + bool index_certs_95195; + + if (!index_ok_95194) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 546) == + -1) { + global_failure_args[0] = l_95042; + global_failure_args[1] = i_95190; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_95196 = ((__global + double *) double_buffer_mem_125569)[phys_tid_94438 + + (l_95042 * + (num_threads_125944 * + rp1_73709) + + i_95190 * + num_threads_125944)]; + double lw_val_95197 = x_95196 / nrmxl_95183; + + ((__global + double *) double_buffer_mem_125569)[phys_tid_94438 + + (l_95042 * + (num_threads_125944 * + rp1_73709) + + i_95190 * + num_threads_125944)] = + lw_val_95197; + } + + double zp_arg_95199 = ((__global + double *) double_buffer_mem_125569)[phys_tid_94438 + + (l_95042 * + (num_threads_125944 * + rp1_73709) + + l_95042 * + num_threads_125944)]; + double zp_res_95200 = 1.0 + zp_arg_95199; + + ((__global + double *) double_buffer_mem_125569)[phys_tid_94438 + + (l_95042 * + (num_threads_125944 * + rp1_73709) + + l_95042 * + num_threads_125944)] = + zp_res_95200; + + bool bounds_invalid_upwards_95202 = slt64(k2p2zq_73023, + x_95048); + bool valid_95203 = !bounds_invalid_upwards_95202; + bool range_valid_c_95204; + + if (!valid_95203) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 547) == -1) { + global_failure_args[0] = x_95048; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool loop_nonempty_95205 = slt64((int64_t) 0, + upper_bound_95059); + bool loop_not_taken_95206 = !loop_nonempty_95205; + bool protect_assert_disj_95207 = index_ok_95178 || + loop_not_taken_95206; + bool index_certs_95208; + + if (!protect_assert_disj_95207) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 548) == -1) { + global_failure_args[0] = l_95042; + global_failure_args[1] = l_95042; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_95212 = 0; i_95212 < upper_bound_95059; + i_95212++) { + int64_t index_primexp_95216 = add64(x_95048, + i_95212); + bool x_95217 = sle64((int64_t) 0, + index_primexp_95216); + bool y_95218 = slt64(index_primexp_95216, + k2p2zq_73023); + bool bounds_check_95219 = x_95217 && y_95218; + double t_95220; + double t_95222 = 0.0; + + for (int64_t i0_95221 = 0; i0_95221 < j_m_i_95150; + i0_95221++) { + int64_t i_95223 = add64(l_95042, i0_95221); + bool x_95224 = sle64((int64_t) 0, i_95223); + bool y_95225 = slt64(i_95223, rp1_73709); + bool bounds_check_95226 = x_95224 && y_95225; + bool index_ok_95227 = y_95058 && + bounds_check_95226; + bool index_certs_95228; + + if (!index_ok_95227) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 549) == + -1) { + global_failure_args[0] = l_95042; + global_failure_args[1] = i_95223; + global_failure_args[2] = + k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_95229 = ((__global + double *) double_buffer_mem_125569)[phys_tid_94438 + + (l_95042 * + (num_threads_125944 * + rp1_73709) + + i_95223 * + num_threads_125944)]; + bool index_ok_95230 = bounds_check_95219 && + bounds_check_95226; + bool index_certs_95231; + + if (!index_ok_95230) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 550) == + -1) { + global_failure_args[0] = + index_primexp_95216; + global_failure_args[1] = i_95223; + global_failure_args[2] = + k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_95232 = ((__global + double *) double_buffer_mem_125569)[phys_tid_94438 + + (index_primexp_95216 * + (num_threads_125944 * + rp1_73709) + + i_95223 * + num_threads_125944)]; + double y_95233 = x_95229 * y_95232; + double loopres_95234 = t_95222 - y_95233; + double t_tmp_127279 = loopres_95234; + + t_95222 = t_tmp_127279; + } + t_95220 = t_95222; + + double y_95235 = ((__global + double *) double_buffer_mem_125569)[phys_tid_94438 + + (l_95042 * + (num_threads_125944 * + rp1_73709) + + l_95042 * + num_threads_125944)]; + double t_95236 = t_95220 / y_95235; + + for (int64_t i0_95238 = 0; i0_95238 < j_m_i_95150; + i0_95238++) { + int64_t i_95240 = add64(l_95042, i0_95238); + bool x_95241 = sle64((int64_t) 0, i_95240); + bool y_95242 = slt64(i_95240, rp1_73709); + bool bounds_check_95243 = x_95241 && y_95242; + bool index_ok_95244 = bounds_check_95219 && + bounds_check_95243; + bool index_certs_95245; + + if (!index_ok_95244) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 551) == + -1) { + global_failure_args[0] = + index_primexp_95216; + global_failure_args[1] = i_95240; + global_failure_args[2] = + k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_95246 = ((__global + double *) double_buffer_mem_125569)[phys_tid_94438 + + (index_primexp_95216 * + (num_threads_125944 * + rp1_73709) + + i_95240 * + num_threads_125944)]; + bool index_ok_95247 = y_95058 && + bounds_check_95243; + bool index_certs_95248; + + if (!index_ok_95247) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 552) == + -1) { + global_failure_args[0] = l_95042; + global_failure_args[1] = i_95240; + global_failure_args[2] = + k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_95249 = ((__global + double *) double_buffer_mem_125569)[phys_tid_94438 + + (l_95042 * + (num_threads_125944 * + rp1_73709) + + i_95240 * + num_threads_125944)]; + double y_95250 = t_95236 * y_95249; + double lw_val_95251 = x_95246 + y_95250; + + ((__global + double *) double_buffer_mem_125569)[phys_tid_94438 + + (index_primexp_95216 * + (num_threads_125944 * + rp1_73709) + + i_95240 * + num_threads_125944)] = + lw_val_95251; + } + + bool index_certs_95253; + + if (!bounds_check_95219) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 553) == + -1) { + global_failure_args[0] = + index_primexp_95216; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double zeze_arg_95254 = ((__global + double *) double_buffer_mem_125570)[phys_tid_94438 + + index_primexp_95216 * + num_threads_125944]; + bool zeze_res_95255 = zeze_arg_95254 == 0.0; + + if (!zeze_res_95255) { + bool index_ok_95258 = y_95177 && + bounds_check_95219; + bool index_certs_95259; + + if (!index_ok_95258) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 554) == + -1) { + global_failure_args[0] = + index_primexp_95216; + global_failure_args[1] = l_95042; + global_failure_args[2] = + k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double abs_arg_95260 = ((__global + double *) double_buffer_mem_125569)[phys_tid_94438 + + (index_primexp_95216 * + (num_threads_125944 * + rp1_73709) + + l_95042 * + num_threads_125944)]; + double abs_res_95261 = fabs(abs_arg_95260); + double zs_res_95262 = abs_res_95261 / + zeze_arg_95254; + double ztzt_res_95263 = fpow64(zs_res_95262, + 2.0); + double zm_res_95264 = 1.0 - ztzt_res_95263; + double max_res_95265 = fmax64(0.0, + zm_res_95264); + double abs_res_95266 = fabs(max_res_95265); + bool zgze_res_95267 = 1.0e-6 <= abs_res_95266; + int64_t j_m_i_95268 = sub64(rp1_73709, x_95048); + + if (zgze_res_95267) { + double sqrt_res_95271; + + sqrt_res_95271 = + futrts_sqrt64(max_res_95265); + + double zt_res_95272 = zeze_arg_95254 * + sqrt_res_95271; + + ((__global + double *) double_buffer_mem_125570)[phys_tid_94438 + + index_primexp_95216 * + num_threads_125944] = + zt_res_95272; + } else { + bool empty_slice_95274 = j_m_i_95268 == + (int64_t) 0; + int64_t m_95275 = sub64(j_m_i_95268, + (int64_t) 1); + int64_t i_p_m_t_s_95276 = add64(x_95048, + m_95275); + bool zzero_leq_i_p_m_t_s_95277 = + sle64((int64_t) 0, i_p_m_t_s_95276); + bool i_p_m_t_s_leq_w_95278 = + slt64(i_p_m_t_s_95276, rp1_73709); + bool zzero_lte_i_95279 = sle64((int64_t) 0, + x_95048); + bool i_lte_j_95280 = sle64(x_95048, + rp1_73709); + bool y_95281 = i_p_m_t_s_leq_w_95278 && + zzero_lte_i_95279; + bool y_95282 = zzero_leq_i_p_m_t_s_95277 && + y_95281; + bool y_95283 = i_lte_j_95280 && y_95282; + bool forwards_ok_95284 = + zzero_lte_i_95279 && y_95283; + bool ok_or_empty_95285 = + empty_slice_95274 || forwards_ok_95284; + bool index_ok_95286 = bounds_check_95219 && + ok_or_empty_95285; + bool index_certs_95287; + + if (!index_ok_95286) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 555) == + -1) { + global_failure_args[0] = + index_primexp_95216; + global_failure_args[1] = + x_95048; + global_failure_args[2] = + k2p2zq_73023; + global_failure_args[3] = + rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_95289; + double redout_119755 = 0.0; + + for (int64_t i_119756 = 0; i_119756 < + j_m_i_95268; i_119756++) { + int64_t slice_120015 = x_95048 + + i_119756; + double x_95293 = ((__global + double *) double_buffer_mem_125569)[phys_tid_94438 + + (index_primexp_95216 * + (num_threads_125944 * + rp1_73709) + + slice_120015 * + num_threads_125944)]; + double defunc_1_f_res_95294 = x_95293 * + x_95293; + double defunc_1_op_res_95292 = + defunc_1_f_res_95294 + + redout_119755; + double redout_tmp_127281 = + defunc_1_op_res_95292; + + redout_119755 = redout_tmp_127281; + } + defunc_2_reduce_res_95289 = redout_119755; + + double sqrt_res_95295; + + sqrt_res_95295 = + futrts_sqrt64(defunc_2_reduce_res_95289); + ((__global + double *) double_buffer_mem_125570)[phys_tid_94438 + + index_primexp_95216 * + num_threads_125944] = + sqrt_res_95295; + + bool index_certs_95297; + + if (!bounds_check_95219) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 556) == + -1) { + global_failure_args[0] = + (int64_t) 0; + global_failure_args[1] = + index_primexp_95216; + global_failure_args[2] = + (int64_t) 2; + global_failure_args[3] = + k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_127282 = 0; i_127282 < + (int64_t) 1; i_127282++) { + ((__global + double *) double_buffer_mem_125571)[phys_tid_94438 + + (index_primexp_95216 + + i_127282) * + num_threads_125944] = + ((__global + double *) double_buffer_mem_125570)[phys_tid_94438 + + num_threads_125944 * + index_primexp_95216 + + i_127282 * + num_threads_125944]; + } + } + } + } + + bool index_certs_95300; + + if (!y_95058) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 557) == -1) { + global_failure_args[0] = l_95042; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_127283 = 0; i_127283 < (int64_t) 1; + i_127283++) { + ((__global + double *) double_buffer_mem_125570)[phys_tid_94438 + + (l_95042 + + i_127283) * + num_threads_125944] = + ((__global + double *) double_buffer_mem_125569)[phys_tid_94438 + + l_95042 * + (num_threads_125944 * + rp1_73709) + + num_threads_125944 * + l_95042 + + i_127283 * + num_threads_125944]; + } + + double zt_res_95303 = -1.0 * nrmxl_95183; + + ((__global + double *) double_buffer_mem_125569)[phys_tid_94438 + + (l_95042 * + (num_threads_125944 * + rp1_73709) + + l_95042 * + num_threads_125944)] = + zt_res_95303; + for (int64_t i_127284 = 0; i_127284 < k2p2zq_73023; + i_127284++) { + ((__global double *) mem_125250)[phys_tid_94438 + + i_127284 * + num_threads_125944] = + ((__global + double *) double_buffer_mem_125570)[phys_tid_94438 + + i_127284 * + num_threads_125944]; + } + for (int64_t i_127285 = 0; i_127285 < (int64_t) 2; + i_127285++) { + for (int64_t i_127286 = 0; i_127286 < k2p2zq_73023; + i_127286++) { + ((__global + double *) mem_125248)[phys_tid_94438 + + (i_127285 * + (num_threads_125944 * + k2p2zq_73023) + + i_127286 * + num_threads_125944)] = + ((__global + double *) double_buffer_mem_125571)[phys_tid_94438 + + (i_127285 * + (num_threads_125944 * + k2p2zq_73023) + + i_127286 * + num_threads_125944)]; + } + } + for (int64_t i_127287 = 0; i_127287 < k2p2zq_73023; + i_127287++) { + for (int64_t i_127288 = 0; i_127288 < rp1_73709; + i_127288++) { + ((__global + double *) mem_125455)[phys_tid_94438 + + (i_127287 * + (num_threads_125944 * + rp1_73709) + + i_127288 * + num_threads_125944)] = + ((__global + double *) double_buffer_mem_125569)[phys_tid_94438 + + (i_127287 * + (num_threads_125944 * + rp1_73709) + + i_127288 * + num_threads_125944)]; + } + } + } + for (int64_t i_127289 = 0; i_127289 < k2p2zq_73023; + i_127289++) { + ((__global double *) mem_125465)[phys_tid_94438 + + i_127289 * + num_threads_125944] = + ((__global double *) mem_125250)[phys_tid_94438 + + i_127289 * + num_threads_125944]; + } + for (int64_t i_127290 = 0; i_127290 < (int64_t) 2; + i_127290++) { + for (int64_t i_127291 = 0; i_127291 < k2p2zq_73023; + i_127291++) { + ((__global double *) mem_125463)[phys_tid_94438 + + (i_127290 * + (num_threads_125944 * + k2p2zq_73023) + + i_127291 * + num_threads_125944)] = + ((__global + double *) mem_125248)[phys_tid_94438 + + (i_127290 * + (num_threads_125944 * + k2p2zq_73023) + + i_127291 * + num_threads_125944)]; + } + } + for (int64_t i_127292 = 0; i_127292 < k2p2zq_73023; + i_127292++) { + for (int64_t i_127293 = 0; i_127293 < rp1_73709; + i_127293++) { + ((__global double *) mem_125505)[phys_tid_94438 + + (i_127292 * + (num_threads_125944 * + rp1_73709) + + i_127293 * + num_threads_125944)] = + ((__global + double *) mem_125455)[phys_tid_94438 + + (i_127292 * + (num_threads_125944 * + rp1_73709) + + i_127293 * + num_threads_125944)]; + } + } + } + for (int64_t i_127294 = 0; i_127294 < k2p2zq_73023; + i_127294++) { + for (int64_t i_127295 = 0; i_127295 < rp1_73709; + i_127295++) { + ((__global + double *) double_buffer_mem_125569)[phys_tid_94438 + + (i_127294 * + (num_threads_125944 * + rp1_73709) + + i_127295 * + num_threads_125944)] = + ((__global double *) mem_125505)[phys_tid_94438 + + (i_127294 * + (num_threads_125944 * + rp1_73709) + + i_127295 * + num_threads_125944)]; + } + } + for (int64_t i_127296 = 0; i_127296 < k2p2zq_73023; + i_127296++) { + ((__global + double *) double_buffer_mem_125570)[phys_tid_94438 + + i_127296 * + num_threads_125944] = + ((__global double *) mem_125465)[phys_tid_94438 + + i_127296 * + num_threads_125944]; + } + for (int64_t i_127297 = 0; i_127297 < (int64_t) 2; i_127297++) { + for (int64_t i_127298 = 0; i_127298 < k2p2zq_73023; + i_127298++) { + ((__global + double *) double_buffer_mem_125571)[phys_tid_94438 + + (i_127297 * + (num_threads_125944 * + k2p2zq_73023) + + i_127298 * + num_threads_125944)] = + ((__global double *) mem_125463)[phys_tid_94438 + + (i_127297 * + (num_threads_125944 * + k2p2zq_73023) + + i_127298 * + num_threads_125944)]; + } + } + + int64_t k_tmp_127252 = loopres_95074; + + k_95047 = k_tmp_127252; + } + dqrdc2_res_95041 = k_95047; + + int64_t min_arg_95305 = sub64(dqrdc2_res_95041, (int64_t) 1); + int64_t min_res_95306 = smin64(rp1_73709, min_arg_95305); + + for (int64_t i_119759 = 0; i_119759 < k2p2zq_73023; i_119759++) { + int64_t x_95311 = add64((int64_t) 1, i_119759); + bool cond_f_res_95312 = slt64(min_res_95306, x_95311); + + for (int64_t i_119763 = 0; i_119763 < k2p2zq_73023; + i_119763++) { + int64_t x_95316 = add64((int64_t) 1, i_119763); + bool cond_95317 = slt64(min_res_95306, x_95316); + bool x_95318 = !cond_95317; + bool y_95319 = cond_f_res_95312 && x_95318; + bool cond_95320 = cond_95317 || y_95319; + double defunc_1_f_res_95321; + + if (cond_95320) { + defunc_1_f_res_95321 = NAN; + } else { + double x_95315 = ((__global + double *) double_buffer_mem_125569)[phys_tid_94438 + + (i_119759 * + (num_threads_125944 * + rp1_73709) + + i_119763 * + num_threads_125944)]; + + defunc_1_f_res_95321 = x_95315; + } + ((__global double *) mem_122382)[phys_tid_94438 + + (i_119759 * + (num_threads_125944 * + k2p2zq_73023) + + i_119763 * + num_threads_125944)] = + defunc_1_f_res_95321; + } + } + for (int64_t i_127301 = 0; i_127301 < k2p2zq_73023; i_127301++) { + ((__global double *) mem_122435)[phys_tid_94438 + i_127301 * + num_threads_125944] = 0.0; + } + for (int64_t i_119767 = 0; i_119767 < k2p2zq_73023; i_119767++) { + for (int64_t i_127303 = 0; i_127303 < k2p2zq_73023; + i_127303++) { + ((__global double *) mem_122423)[phys_tid_94438 + + (i_119767 * + (num_threads_125944 * + k2p2zq_73023) + + i_127303 * + num_threads_125944)] = + ((__global double *) mem_122435)[phys_tid_94438 + + i_127303 * + num_threads_125944]; + } + for (int64_t i_95327 = 0; i_95327 < k2p2zq_73023; i_95327++) { + int64_t x_95329 = sub64(k2p2zq_73023, i_95327); + int64_t i_95330 = sub64(x_95329, (int64_t) 1); + bool x_95331 = sle64((int64_t) 0, i_95330); + bool y_95332 = slt64(i_95330, k2p2zq_73023); + bool bounds_check_95333 = x_95331 && y_95332; + int64_t j_m_i_95334 = sub64(k2p2zq_73023, x_95329); + bool empty_slice_95335 = j_m_i_95334 == (int64_t) 0; + int64_t m_95336 = sub64(j_m_i_95334, (int64_t) 1); + int64_t i_p_m_t_s_95337 = add64(x_95329, m_95336); + bool zzero_leq_i_p_m_t_s_95338 = sle64((int64_t) 0, + i_p_m_t_s_95337); + bool i_p_m_t_s_leq_w_95339 = slt64(i_p_m_t_s_95337, + k2p2zq_73023); + bool zzero_lte_i_95340 = sle64((int64_t) 0, x_95329); + bool i_lte_j_95341 = sle64(x_95329, k2p2zq_73023); + bool y_95342 = i_p_m_t_s_leq_w_95339 && zzero_lte_i_95340; + bool y_95343 = zzero_leq_i_p_m_t_s_95338 && y_95342; + bool y_95344 = i_lte_j_95341 && y_95343; + bool forwards_ok_95345 = zzero_lte_i_95340 && y_95344; + bool ok_or_empty_95346 = empty_slice_95335 || + forwards_ok_95345; + bool index_ok_95347 = bounds_check_95333 && + ok_or_empty_95346; + bool index_certs_95348; + + if (!index_ok_95347) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 558) == -1) { + global_failure_args[0] = i_95330; + global_failure_args[1] = x_95329; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + global_failure_args[4] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_95349; + + if (!ok_or_empty_95346) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 559) == -1) { + global_failure_args[0] = x_95329; + global_failure_args[1] = k2p2zq_73023; + global_failure_args[2] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_95352; + double redout_119769 = 0.0; + + for (int64_t i_119770 = 0; i_119770 < j_m_i_95334; + i_119770++) { + int64_t slice_120021 = x_95329 + i_119770; + double x_95357 = ((__global + double *) mem_122382)[phys_tid_94438 + + (slice_120021 * + (num_threads_125944 * + k2p2zq_73023) + + i_95330 * + num_threads_125944)]; + bool isnan_res_95358; + + isnan_res_95358 = futrts_isnan64(x_95357); + + double defunc_1_f_res_95359; + + if (isnan_res_95358) { + defunc_1_f_res_95359 = 0.0; + } else { + double x_95356 = ((__global + double *) mem_122423)[phys_tid_94438 + + (i_119767 * + (num_threads_125944 * + k2p2zq_73023) + + slice_120021 * + num_threads_125944)]; + double defunc_1_f_res_f_res_95360 = x_95356 * + x_95357; + + defunc_1_f_res_95359 = defunc_1_f_res_f_res_95360; + } + + double defunc_1_op_res_95355 = defunc_1_f_res_95359 + + redout_119769; + double redout_tmp_127305 = defunc_1_op_res_95355; + + redout_119769 = redout_tmp_127305; + } + defunc_2_reduce_res_95352 = redout_119769; + + bool index_ok_95361 = bounds_check_95333 && + bounds_check_95333; + bool index_certs_95362; + + if (!index_ok_95361) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 560) == -1) { + global_failure_args[0] = i_95330; + global_failure_args[1] = i_95330; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double zs_arg_95363 = ((__global + double *) mem_122382)[phys_tid_94438 + + (i_95330 * + (num_threads_125944 * + k2p2zq_73023) + + i_95330 * + num_threads_125944)]; + bool index_certs_95364; + + if (!bounds_check_95333) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 561) == -1) { + global_failure_args[0] = i_95330; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double zm_arg_95365 = ((__global + double *) mem_121938)[i_119767 * + k2p2zq_73023 + + i_95330]; + double zm_res_95366 = zm_arg_95365 - + defunc_2_reduce_res_95352; + double zs_res_95367 = zm_res_95366 / zs_arg_95363; + + ((__global double *) mem_122423)[phys_tid_94438 + + (i_119767 * + (num_threads_125944 * + k2p2zq_73023) + i_95330 * + num_threads_125944)] = + zs_res_95367; + } + } + for (int64_t i_119773 = 0; i_119773 < k2p2zq_73023; i_119773++) { + for (int64_t i_119777 = 0; i_119777 < k2p2zq_73023; + i_119777++) { + double defunc_2_reduce_res_95374; + double redout_119779 = 0.0; + + for (int64_t i_119780 = 0; i_119780 < k2p2zq_73023; + i_119780++) { + double x_95378 = ((__global + double *) mem_122423)[phys_tid_94438 + + (i_119780 * + (num_threads_125944 * + k2p2zq_73023) + + i_119773 * + num_threads_125944)]; + double x_95379 = ((__global + double *) mem_122423)[phys_tid_94438 + + (i_119780 * + (num_threads_125944 * + k2p2zq_73023) + + i_119777 * + num_threads_125944)]; + double defunc_1_f_res_95380 = x_95378 * x_95379; + double defunc_1_op_res_95377 = defunc_1_f_res_95380 + + redout_119779; + double redout_tmp_127308 = defunc_1_op_res_95377; + + redout_119779 = redout_tmp_127308; + } + defunc_2_reduce_res_95374 = redout_119779; + ((__global double *) mem_122464)[phys_tid_94438 + + (i_119773 * + (num_threads_125944 * + k2p2zq_73023) + + i_119777 * + num_threads_125944)] = + defunc_2_reduce_res_95374; + } + } + + int64_t min_res_95381 = smin64(r_73698, min_res_95306); + + for (int64_t i_127309 = 0; i_127309 < rp1_73709; i_127309++) { + ((__global double *) double_buffer_mem_125582)[phys_tid_94438 + + i_127309 * + num_threads_125944] = + ((__global double *) mem_122017)[gtid_94437 + i_127309 * + m_73008]; + } + for (int64_t j_95383 = 0; j_95383 < min_res_95381; j_95383++) { + bool y_95385 = slt64(j_95383, k2p2zq_73023); + bool index_certs_95386; + + if (!y_95385) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 562) == -1) { + global_failure_args[0] = j_95383; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double zeze_arg_95387 = ((__global + double *) double_buffer_mem_125570)[phys_tid_94438 + + j_95383 * + num_threads_125944]; + bool zeze_res_95388 = zeze_arg_95387 == 0.0; + + if (zeze_res_95388) { + for (int64_t i_127311 = 0; i_127311 < rp1_73709; + i_127311++) { + ((__global double *) mem_125258)[phys_tid_94438 + + i_127311 * + num_threads_125944] = + ((__global + double *) double_buffer_mem_125582)[phys_tid_94438 + + i_127311 * + num_threads_125944]; + } + } else { + bool y_95390 = slt64(j_95383, rp1_73709); + bool index_certs_95391; + + if (!y_95390) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 563) == -1) { + global_failure_args[0] = j_95383; + global_failure_args[1] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_95392 = ((__global + double *) double_buffer_mem_125582)[phys_tid_94438 + + j_95383 * + num_threads_125944]; + double negate_arg_95393 = zeze_arg_95387 * y_95392; + double t_95394 = 0.0 - negate_arg_95393; + int64_t x_95395 = sub64(rp1_73709, j_95383); + int64_t upper_bound_95396 = sub64(x_95395, (int64_t) 1); + double t_95397; + double t_95399 = t_95394; + + for (int64_t i0_95398 = 0; i0_95398 < upper_bound_95396; + i0_95398++) { + int64_t x_95400 = add64(j_95383, i0_95398); + int64_t i_95401 = add64((int64_t) 1, x_95400); + bool x_95402 = sle64((int64_t) 0, i_95401); + bool y_95403 = slt64(i_95401, rp1_73709); + bool bounds_check_95404 = x_95402 && y_95403; + bool index_ok_95405 = y_95385 && bounds_check_95404; + bool index_certs_95406; + + if (!index_ok_95405) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 564) == -1) { + global_failure_args[0] = j_95383; + global_failure_args[1] = i_95401; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_95407 = ((__global + double *) double_buffer_mem_125569)[phys_tid_94438 + + (j_95383 * + (num_threads_125944 * + rp1_73709) + + i_95401 * + num_threads_125944)]; + bool index_certs_95408; + + if (!bounds_check_95404) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 565) == -1) { + global_failure_args[0] = i_95401; + global_failure_args[1] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_95409 = ((__global + double *) double_buffer_mem_125582)[phys_tid_94438 + + i_95401 * + num_threads_125944]; + double y_95410 = x_95407 * y_95409; + double loopres_95411 = t_95399 - y_95410; + double t_tmp_127312 = loopres_95411; + + t_95399 = t_tmp_127312; + } + t_95397 = t_95399; + + double t_95412 = t_95397 / zeze_arg_95387; + double y_95413 = zeze_arg_95387 * t_95412; + double lw_val_95414 = y_95392 + y_95413; + + ((__global + double *) double_buffer_mem_125582)[phys_tid_94438 + + j_95383 * + num_threads_125944] = + lw_val_95414; + for (int64_t i0_95417 = 0; i0_95417 < upper_bound_95396; + i0_95417++) { + int64_t x_95419 = add64(j_95383, i0_95417); + int64_t i_95420 = add64((int64_t) 1, x_95419); + bool x_95421 = sle64((int64_t) 0, i_95420); + bool y_95422 = slt64(i_95420, rp1_73709); + bool bounds_check_95423 = x_95421 && y_95422; + bool index_certs_95424; + + if (!bounds_check_95423) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 566) == -1) { + global_failure_args[0] = i_95420; + global_failure_args[1] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_95425 = ((__global + double *) double_buffer_mem_125582)[phys_tid_94438 + + i_95420 * + num_threads_125944]; + bool index_ok_95426 = y_95385 && bounds_check_95423; + bool index_certs_95427; + + if (!index_ok_95426) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 567) == -1) { + global_failure_args[0] = j_95383; + global_failure_args[1] = i_95420; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_95428 = ((__global + double *) double_buffer_mem_125569)[phys_tid_94438 + + (j_95383 * + (num_threads_125944 * + rp1_73709) + + i_95420 * + num_threads_125944)]; + double y_95429 = t_95412 * y_95428; + double lw_val_95430 = x_95425 + y_95429; + + ((__global + double *) double_buffer_mem_125582)[phys_tid_94438 + + i_95420 * + num_threads_125944] = + lw_val_95430; + } + for (int64_t i_127314 = 0; i_127314 < rp1_73709; + i_127314++) { + ((__global double *) mem_125258)[phys_tid_94438 + + i_127314 * + num_threads_125944] = + ((__global + double *) double_buffer_mem_125582)[phys_tid_94438 + + i_127314 * + num_threads_125944]; + } + } + for (int64_t i_127315 = 0; i_127315 < rp1_73709; i_127315++) { + ((__global + double *) double_buffer_mem_125582)[phys_tid_94438 + + i_127315 * + num_threads_125944] = + ((__global double *) mem_125258)[phys_tid_94438 + + i_127315 * + num_threads_125944]; + } + } + for (int64_t i_127316 = 0; i_127316 < k2p2zq_73023; i_127316++) { + ((__global double *) mem_122537)[phys_tid_94438 + i_127316 * + num_threads_125944] = 0.0; + } + for (int64_t write_iter_119781 = 0; write_iter_119781 < + k2p2zq_73023; write_iter_119781++) { + int64_t write_iv_119784 = ((__global + int64_t *) mem_122047)[phys_tid_94438 + + write_iter_119781 * + num_threads_125944]; + double defunc_2_reduce_res_95437; + double redout_119791 = 0.0; + + for (int64_t i_119792 = 0; i_119792 < k2p2zq_73023; + i_119792++) { + double x_95441 = ((__global + double *) double_buffer_mem_125582)[phys_tid_94438 + + i_119792 * + num_threads_125944]; + double x_95442 = ((__global + double *) mem_122423)[phys_tid_94438 + + (i_119792 * + (num_threads_125944 * + k2p2zq_73023) + + write_iter_119781 * + num_threads_125944)]; + double defunc_1_f_res_95443 = x_95441 * x_95442; + double defunc_1_op_res_95440 = defunc_1_f_res_95443 + + redout_119791; + double redout_tmp_127318 = defunc_1_op_res_95440; + + redout_119791 = redout_tmp_127318; + } + defunc_2_reduce_res_95437 = redout_119791; + + bool less_than_zzero_119785 = slt64(write_iv_119784, + (int64_t) 0); + bool greater_than_sizze_119786 = sle64(k2p2zq_73023, + write_iv_119784); + bool outside_bounds_dim_119787 = less_than_zzero_119785 || + greater_than_sizze_119786; + + if (!outside_bounds_dim_119787) { + ((__global double *) mem_122537)[phys_tid_94438 + + write_iv_119784 * + num_threads_125944] = + defunc_2_reduce_res_95437; + } + } + for (int64_t i_119795 = 0; i_119795 < k2p2zq_73023; i_119795++) { + int64_t x_95446 = ((__global + int64_t *) mem_122047)[phys_tid_94438 + + i_119795 * + num_threads_125944]; + + for (int64_t i_127320 = 0; i_127320 < k2p2zq_73023; + i_127320++) { + ((__global int64_t *) mem_122564)[phys_tid_94438 + + i_127320 * + num_threads_125944] = + x_95446; + } + for (int64_t i_127321 = 0; i_127321 < k2p2zq_73023; + i_127321++) { + ((__global int64_t *) mem_122552)[phys_tid_94438 + + (i_119795 * + (num_threads_125944 * + k2p2zq_73023) + + i_127321 * + num_threads_125944)] = + ((__global int64_t *) mem_122564)[phys_tid_94438 + + i_127321 * + num_threads_125944]; + } + } + for (int64_t i_127322 = 0; i_127322 < k2p2zq_73023; i_127322++) { + for (int64_t i_127323 = 0; i_127323 < k2p2zq_73023; + i_127323++) { + ((__global double *) mem_122575)[phys_tid_94438 + + (i_127322 * + (num_threads_125944 * + k2p2zq_73023) + + i_127323 * + num_threads_125944)] = + 0.0; + } + } + for (int64_t write_iter_119797 = 0; write_iter_119797 < + binop_x_120251; write_iter_119797++) { + int64_t new_index_120022 = squot64(write_iter_119797, + k2p2zq_73023); + int64_t binop_y_120024 = k2p2zq_73023 * new_index_120022; + int64_t new_index_120025 = write_iter_119797 - binop_y_120024; + int64_t write_iv_119799 = ((__global + int64_t *) mem_122552)[phys_tid_94438 + + (new_index_120022 * + (num_threads_125944 * + k2p2zq_73023) + + new_index_120025 * + num_threads_125944)]; + int64_t write_iv_119800 = ((__global + int64_t *) mem_122047)[phys_tid_94438 + + new_index_120025 * + num_threads_125944]; + bool less_than_zzero_119802 = slt64(write_iv_119799, + (int64_t) 0); + bool greater_than_sizze_119803 = sle64(k2p2zq_73023, + write_iv_119799); + bool outside_bounds_dim_119804 = less_than_zzero_119802 || + greater_than_sizze_119803; + bool less_than_zzero_119805 = slt64(write_iv_119800, + (int64_t) 0); + bool greater_than_sizze_119806 = sle64(k2p2zq_73023, + write_iv_119800); + bool outside_bounds_dim_119807 = less_than_zzero_119805 || + greater_than_sizze_119806; + bool outside_bounds_119809 = outside_bounds_dim_119804 || + outside_bounds_dim_119807; + + if (!outside_bounds_119809) { + for (int64_t i_127325 = 0; i_127325 < (int64_t) 1; + i_127325++) { + ((__global double *) mem_122575)[phys_tid_94438 + + (write_iv_119799 * + (num_threads_125944 * + k2p2zq_73023) + + (write_iv_119800 + + i_127325) * + num_threads_125944)] = + ((__global double *) mem_122464)[phys_tid_94438 + + new_index_120022 * + (num_threads_125944 * + k2p2zq_73023) + + num_threads_125944 * + new_index_120025 + + i_127325 * + num_threads_125944]; + } + } + } + for (int64_t i_119816 = 0; i_119816 < k2p2zq_73023; i_119816++) { + double x_95459 = ((__global + double *) mem_122537)[phys_tid_94438 + + i_119816 * + num_threads_125944]; + + for (int64_t i_119821 = 0; i_119821 < k2p2zq_73023; + i_119821++) { + double x_95461 = ((__global + double *) mem_122575)[phys_tid_94438 + + (i_119816 * + (num_threads_125944 * + k2p2zq_73023) + + i_119821 * + num_threads_125944)]; + bool isnan_res_95462; + + isnan_res_95462 = futrts_isnan64(x_95461); + + double defunc_0_f_res_95463; + + if (isnan_res_95462) { + defunc_0_f_res_95463 = 0.0; + } else { + defunc_0_f_res_95463 = x_95461; + } + ((__global double *) mem_122598)[phys_tid_94438 + + (i_119816 * + (num_threads_125944 * + k2p2zq_73023) + + i_119821 * + num_threads_125944)] = + defunc_0_f_res_95463; + } + + bool isnan_res_95464; + + isnan_res_95464 = futrts_isnan64(x_95459); + + double defunc_0_f_res_95465; + + if (isnan_res_95464) { + defunc_0_f_res_95465 = 0.0; + } else { + defunc_0_f_res_95465 = x_95459; + } + ((__global double *) mem_122595)[phys_tid_94438 + i_119816 * + num_threads_125944] = + defunc_0_f_res_95465; + } + + bool isnan_res_95466; + + isnan_res_95466 = futrts_isnan64(recresid_r_95014); + + bool cond_95467 = !isnan_res_95466; + bool cond_t_res_95468 = x_94984 == k2p2zq_73023; + bool x_95469 = cond_95467 && cond_t_res_95468; + bool nona_t_res_95470 = min_res_95306 == k2p2zq_73023; + bool x_95471 = x_95469 && nona_t_res_95470; + bool complement_arg_95472; + + if (x_95471) { + double defunc_2_reduce_res_95473; + double redout_119823 = 0.0; + + for (int64_t i_119824 = 0; i_119824 < k2p2zq_73023; + i_119824++) { + double x_95477 = ((__global + double *) mem_122028)[phys_tid_94438 + + i_119824 * + num_threads_125944]; + double x_95479 = ((__global double *) mem_122014)[i_119824 * + m_73008 + + gtid_94437]; + double x_95480 = ((__global + double *) mem_122595)[phys_tid_94438 + + i_119824 * + num_threads_125944]; + double defunc_0_f_res_95481; + double redout_119825 = 0.0; + + for (int64_t i_119826 = 0; i_119826 < k2p2zq_73023; + i_119826++) { + double x_95485 = ((__global + double *) mem_122028)[phys_tid_94438 + + i_119826 * + num_threads_125944]; + double x_95486 = ((__global + double *) mem_122011)[i_119824 * + (m_73008 * + k2p2zq_73023) + + i_119826 * + m_73008 + + gtid_94437]; + double x_95487 = ((__global + double *) mem_120246)[i_119826 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_94437 * + defunc_2_reduce_res_73132 + + r_73698]; + double x_95488 = x_95477 * x_95485; + double y_95489 = x_95488 / fr_95003; + double defunc_1_f_res_95490 = x_95486 - y_95489; + double defunc_1_f_res_95491 = x_95487 * + defunc_1_f_res_95490; + double defunc_1_op_res_95484 = defunc_1_f_res_95491 + + redout_119825; + double redout_tmp_127330 = defunc_1_op_res_95484; + + redout_119825 = redout_tmp_127330; + } + defunc_0_f_res_95481 = redout_119825; + + double defunc_0_g_res_95492 = resid_95012 * + defunc_0_f_res_95481; + double defunc_1_f_res_95493 = x_95479 + + defunc_0_g_res_95492; + double defunc_1_f_res_95494 = x_95480 - + defunc_1_f_res_95493; + double defunc_0_f_res_95495 = fabs(defunc_1_f_res_95494); + double defunc_1_op_res_95476 = defunc_0_f_res_95495 + + redout_119823; + double redout_tmp_127329 = defunc_1_op_res_95476; + + redout_119823 = redout_tmp_127329; + } + defunc_2_reduce_res_95473 = redout_119823; + + double i64_res_95496 = sitofp_i64_f64(k2p2zq_73023); + double mean_abs_res_95497 = defunc_2_reduce_res_95473 / + i64_res_95496; + bool approx_equal_res_95498 = mean_abs_res_95497 <= tol_73201; + + complement_arg_95472 = approx_equal_res_95498; + } else { + complement_arg_95472 = 0; + } + + bool check_95499 = !complement_arg_95472; + bool check_95500; + + if (check_95499) { + bool defunc_2_reduce_res_95501; + bool redout_119827 = 1; + + for (int64_t i_119828 = 0; i_119828 < defunc_2_reduce_res_73132; + i_119828++) { + double x_95505 = ((__global double *) mem_121941)[i_119828 * + m_73008 + + gtid_94437]; + bool defunc_0_f_res_95506; + + defunc_0_f_res_95506 = futrts_isnan64(x_95505); + + bool x_95504 = defunc_0_f_res_95506 && redout_119827; + bool redout_tmp_127331 = x_95504; + + redout_119827 = redout_tmp_127331; + } + defunc_2_reduce_res_95501 = redout_119827; + + bool check_t_res_95507 = !defunc_2_reduce_res_95501; + + check_95500 = check_t_res_95507; + } else { + check_95500 = 0; + } + ((__global bool *) mem_122650)[gtid_94437] = check_95500; + for (int64_t i_127332 = 0; i_127332 < k2p2zq_73023; i_127332++) { + for (int64_t i_127333 = 0; i_127333 < k2p2zq_73023; + i_127333++) { + ((__global double *) mem_122654)[i_127332 * (m_73008 * + k2p2zq_73023) + + i_127333 * m_73008 + + gtid_94437] = ((__global + double *) mem_122598)[phys_tid_94438 + + (i_127332 * + (num_threads_125944 * + k2p2zq_73023) + + i_127333 * + num_threads_125944)]; + } + } + for (int64_t i_127334 = 0; i_127334 < k2p2zq_73023; i_127334++) { + ((__global double *) mem_122657)[i_127334 * m_73008 + + gtid_94437] = ((__global + double *) mem_122595)[phys_tid_94438 + + i_127334 * + num_threads_125944]; + } + ((__global int64_t *) mem_122659)[gtid_94437] = min_res_95306; + ((__global double *) mem_122661)[gtid_94437] = recresid_r_95014; + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_94973 +} +__kernel void mainMagnitudezisegmap_95535(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + int64_t defunc_2_reduce_res_73132, + double tol_73201, int64_t r_73698, + __global unsigned char *mem_120246, + __global unsigned char *mem_121941, + __global + unsigned char *mem_param_121972, + __global unsigned char *mem_122674, + __global unsigned char *mem_122680, + __global unsigned char *mem_122682, + __global unsigned char *mem_123135, + __global unsigned char *mem_123699, + __global unsigned char *mem_123702, + __global unsigned char *mem_123705, + __global unsigned char *mem_123708, + __global unsigned char *mem_123711) +{ + #define segmap_group_sizze_98196 (mainMagnitudezisegmap_group_sizze_95537) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127966; + int32_t local_tid_127967; + int64_t group_sizze_127970; + int32_t wave_sizze_127969; + int32_t group_tid_127968; + + global_tid_127966 = get_global_id(0); + local_tid_127967 = get_local_id(0); + group_sizze_127970 = get_local_size(0); + wave_sizze_127969 = LOCKSTEP_WIDTH; + group_tid_127968 = get_group_id(0); + + int32_t phys_tid_95535; + + phys_tid_95535 = global_tid_127966; + + int64_t gtid_95534; + + gtid_95534 = sext_i32_i64(group_tid_127968) * segmap_group_sizze_98196 + + sext_i32_i64(local_tid_127967); + if (slt64(gtid_95534, m_73008)) { + int64_t x_98202 = ((__global int64_t *) mem_param_121972)[gtid_95534]; + double recresid_r_98207 = ((__global double *) mem_122682)[gtid_95534]; + int64_t min_res_98208 = ((__global int64_t *) mem_123135)[gtid_95534]; + bool isnan_res_98210; + + isnan_res_98210 = futrts_isnan64(recresid_r_98207); + + bool cond_98211 = !isnan_res_98210; + bool cond_t_res_98212 = x_98202 == k2p2zq_73023; + bool x_98213 = cond_98211 && cond_t_res_98212; + bool nona_t_res_98214 = min_res_98208 == k2p2zq_73023; + bool x_98215 = x_98213 && nona_t_res_98214; + bool complement_arg_98216; + + if (x_98215) { + double fr_98205 = ((__global double *) mem_122674)[gtid_95534]; + double resid_98206 = ((__global double *) mem_122680)[gtid_95534]; + double defunc_2_reduce_res_98217; + double redout_119860 = 0.0; + + for (int64_t i_119861 = 0; i_119861 < k2p2zq_73023; i_119861++) { + double x_98221 = ((__global double *) mem_123705)[i_119861 * + m_73008 + + gtid_95534]; + double x_98223 = ((__global double *) mem_123702)[i_119861 * + m_73008 + + gtid_95534]; + double x_98224 = ((__global double *) mem_123708)[i_119861 * + m_73008 + + gtid_95534]; + double defunc_0_f_res_98225; + double redout_119862 = 0.0; + + for (int64_t i_119863 = 0; i_119863 < k2p2zq_73023; + i_119863++) { + double x_98229 = ((__global double *) mem_123705)[i_119863 * + m_73008 + + gtid_95534]; + double x_98230 = ((__global double *) mem_123699)[i_119861 * + (m_73008 * + k2p2zq_73023) + + i_119863 * + m_73008 + + gtid_95534]; + double x_98231 = ((__global double *) mem_120246)[i_119863 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_95534 * + defunc_2_reduce_res_73132 + + r_73698]; + double x_98232 = x_98221 * x_98229; + double y_98233 = x_98232 / fr_98205; + double defunc_1_f_res_98234 = x_98230 - y_98233; + double defunc_1_f_res_98235 = x_98231 * + defunc_1_f_res_98234; + double defunc_1_op_res_98228 = defunc_1_f_res_98235 + + redout_119862; + double redout_tmp_127972 = defunc_1_op_res_98228; + + redout_119862 = redout_tmp_127972; + } + defunc_0_f_res_98225 = redout_119862; + + double defunc_0_g_res_98236 = resid_98206 * + defunc_0_f_res_98225; + double defunc_1_f_res_98237 = x_98223 + defunc_0_g_res_98236; + double defunc_1_f_res_98238 = x_98224 - defunc_1_f_res_98237; + double defunc_0_f_res_98239 = fabs(defunc_1_f_res_98238); + double defunc_1_op_res_98220 = defunc_0_f_res_98239 + + redout_119860; + double redout_tmp_127971 = defunc_1_op_res_98220; + + redout_119860 = redout_tmp_127971; + } + defunc_2_reduce_res_98217 = redout_119860; + + double i64_res_98240 = sitofp_i64_f64(k2p2zq_73023); + double mean_abs_res_98241 = defunc_2_reduce_res_98217 / + i64_res_98240; + bool approx_equal_res_98242 = mean_abs_res_98241 <= tol_73201; + + complement_arg_98216 = approx_equal_res_98242; + } else { + complement_arg_98216 = 0; + } + + bool check_98243 = !complement_arg_98216; + bool check_98244; + + if (check_98243) { + bool defunc_2_reduce_res_98245; + bool redout_119864 = 1; + + for (int64_t i_119865 = 0; i_119865 < defunc_2_reduce_res_73132; + i_119865++) { + double x_98249 = ((__global double *) mem_121941)[i_119865 * + m_73008 + + gtid_95534]; + bool defunc_0_f_res_98250; + + defunc_0_f_res_98250 = futrts_isnan64(x_98249); + + bool x_98248 = defunc_0_f_res_98250 && redout_119864; + bool redout_tmp_127973 = x_98248; + + redout_119864 = redout_tmp_127973; + } + defunc_2_reduce_res_98245 = redout_119864; + + bool check_t_res_98251 = !defunc_2_reduce_res_98245; + + check_98244 = check_t_res_98251; + } else { + check_98244 = 0; + } + ((__global bool *) mem_123711)[gtid_95534] = check_98244; + } + + error_0: + return; + #undef segmap_group_sizze_98196 +} +__kernel void mainMagnitudezisegmap_95607(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + __global unsigned char *mem_122003, + __global unsigned char *mem_123695) +{ + #define segmap_group_sizze_98187 (mainMagnitudezisegmap_group_sizze_95610) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127961; + int32_t local_tid_127962; + int64_t group_sizze_127965; + int32_t wave_sizze_127964; + int32_t group_tid_127963; + + global_tid_127961 = get_global_id(0); + local_tid_127962 = get_local_id(0); + group_sizze_127965 = get_local_size(0); + wave_sizze_127964 = LOCKSTEP_WIDTH; + group_tid_127963 = get_group_id(0); + + int32_t phys_tid_95607; + + phys_tid_95607 = global_tid_127961; + + int64_t gtid_95605; + + gtid_95605 = squot64(sext_i32_i64(group_tid_127963) * + segmap_group_sizze_98187 + + sext_i32_i64(local_tid_127962), k2p2zq_73023); + + int64_t gtid_95606; + + gtid_95606 = sext_i32_i64(group_tid_127963) * segmap_group_sizze_98187 + + sext_i32_i64(local_tid_127962) - + squot64(sext_i32_i64(group_tid_127963) * segmap_group_sizze_98187 + + sext_i32_i64(local_tid_127962), k2p2zq_73023) * k2p2zq_73023; + if (slt64(gtid_95605, m_73008) && slt64(gtid_95606, k2p2zq_73023)) { + double x_98190 = ((__global double *) mem_122003)[gtid_95605 * + k2p2zq_73023 + + gtid_95606]; + bool isnan_res_98191; + + isnan_res_98191 = futrts_isnan64(x_98190); + + double defunc_0_f_res_98192; + + if (isnan_res_98191) { + defunc_0_f_res_98192 = 0.0; + } else { + defunc_0_f_res_98192 = x_98190; + } + ((__global double *) mem_123695)[gtid_95605 * k2p2zq_73023 + + gtid_95606] = defunc_0_f_res_98192; + } + + error_0: + return; + #undef segmap_group_sizze_98187 +} +__kernel void mainMagnitudezisegmap_95629(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + __global unsigned char *mem_122007, + __global unsigned char *mem_123691) +{ + #define segmap_group_sizze_98178 (mainMagnitudezisegmap_group_sizze_95633) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127956; + int32_t local_tid_127957; + int64_t group_sizze_127960; + int32_t wave_sizze_127959; + int32_t group_tid_127958; + + global_tid_127956 = get_global_id(0); + local_tid_127957 = get_local_id(0); + group_sizze_127960 = get_local_size(0); + wave_sizze_127959 = LOCKSTEP_WIDTH; + group_tid_127958 = get_group_id(0); + + int32_t phys_tid_95629; + + phys_tid_95629 = global_tid_127956; + + int64_t gtid_95626; + + gtid_95626 = squot64(sext_i32_i64(group_tid_127958) * + segmap_group_sizze_98178 + + sext_i32_i64(local_tid_127957), k2p2zq_73023 * + k2p2zq_73023); + + int64_t gtid_95627; + + gtid_95627 = squot64(sext_i32_i64(group_tid_127958) * + segmap_group_sizze_98178 + + sext_i32_i64(local_tid_127957) - + squot64(sext_i32_i64(group_tid_127958) * + segmap_group_sizze_98178 + + sext_i32_i64(local_tid_127957), k2p2zq_73023 * + k2p2zq_73023) * (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023); + + int64_t gtid_95628; + + gtid_95628 = sext_i32_i64(group_tid_127958) * segmap_group_sizze_98178 + + sext_i32_i64(local_tid_127957) - + squot64(sext_i32_i64(group_tid_127958) * segmap_group_sizze_98178 + + sext_i32_i64(local_tid_127957), k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023) - squot64(sext_i32_i64(group_tid_127958) * + segmap_group_sizze_98178 + + sext_i32_i64(local_tid_127957) - + squot64(sext_i32_i64(group_tid_127958) * + segmap_group_sizze_98178 + + sext_i32_i64(local_tid_127957), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023) * k2p2zq_73023; + if ((slt64(gtid_95626, m_73008) && slt64(gtid_95627, k2p2zq_73023)) && + slt64(gtid_95628, k2p2zq_73023)) { + double x_98181 = ((__global double *) mem_122007)[gtid_95626 * + (k2p2zq_73023 * + k2p2zq_73023) + + gtid_95627 * + k2p2zq_73023 + + gtid_95628]; + bool isnan_res_98182; + + isnan_res_98182 = futrts_isnan64(x_98181); + + double defunc_0_f_res_98183; + + if (isnan_res_98182) { + defunc_0_f_res_98183 = 0.0; + } else { + defunc_0_f_res_98183 = x_98181; + } + ((__global double *) mem_123691)[gtid_95626 * (k2p2zq_73023 * + k2p2zq_73023) + + gtid_95627 * k2p2zq_73023 + + gtid_95628] = defunc_0_f_res_98183; + } + + error_0: + return; + #undef segmap_group_sizze_98178 +} +__kernel void mainMagnitudezisegmap_95677(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + int64_t binop_x_120251, __global + unsigned char *mem_122007, __global + unsigned char *defunc_3_map_res_r_mem_123630, + __global unsigned char *mem_123685) +{ + #define segmap_group_sizze_98159 (mainMagnitudezisegmap_group_sizze_95680) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127951; + int32_t local_tid_127952; + int64_t group_sizze_127955; + int32_t wave_sizze_127954; + int32_t group_tid_127953; + + global_tid_127951 = get_global_id(0); + local_tid_127952 = get_local_id(0); + group_sizze_127955 = get_local_size(0); + wave_sizze_127954 = LOCKSTEP_WIDTH; + group_tid_127953 = get_group_id(0); + + int32_t phys_tid_95677; + + phys_tid_95677 = global_tid_127951; + + int64_t gtid_95675; + + gtid_95675 = squot64(sext_i32_i64(group_tid_127953) * + segmap_group_sizze_98159 + + sext_i32_i64(local_tid_127952), binop_x_120251); + + int64_t gtid_95676; + + gtid_95676 = sext_i32_i64(group_tid_127953) * segmap_group_sizze_98159 + + sext_i32_i64(local_tid_127952) - + squot64(sext_i32_i64(group_tid_127953) * segmap_group_sizze_98159 + + sext_i32_i64(local_tid_127952), binop_x_120251) * + binop_x_120251; + if (slt64(gtid_95675, m_73008) && slt64(gtid_95676, binop_x_120251)) { + int64_t binop_x_115166 = gtid_95675 * binop_x_120251; + int64_t binop_x_115167 = gtid_95676 + binop_x_115166; + int64_t new_index_115169 = squot64(binop_x_115167, binop_x_120251); + int64_t binop_y_115177 = new_index_115169 * binop_x_120251; + int64_t binop_x_115178 = binop_x_115167 - binop_y_115177; + int64_t new_index_115179 = squot64(binop_x_115178, k2p2zq_73023); + int64_t write_index_98162 = ((__global + int64_t *) mem_123685)[new_index_115169 * + k2p2zq_73023 + + new_index_115179]; + int64_t binop_y_115234 = k2p2zq_73023 * new_index_115179; + int64_t new_index_115235 = binop_x_115178 - binop_y_115234; + int64_t write_index_98163 = ((__global + int64_t *) mem_123685)[new_index_115169 * + k2p2zq_73023 + + new_index_115235]; + double write_value_98164 = ((__global + double *) defunc_3_map_res_r_mem_123630)[new_index_115169 * + (k2p2zq_73023 * + k2p2zq_73023) + + new_index_115179 * + k2p2zq_73023 + + new_index_115235]; + + if (((sle64((int64_t) 0, gtid_95675) && slt64(gtid_95675, m_73008)) && + (sle64((int64_t) 0, write_index_98162) && slt64(write_index_98162, + k2p2zq_73023))) && + (sle64((int64_t) 0, write_index_98163) && slt64(write_index_98163, + k2p2zq_73023))) { + ((__global double *) mem_122007)[gtid_95675 * (k2p2zq_73023 * + k2p2zq_73023) + + write_index_98162 * k2p2zq_73023 + + write_index_98163] = + write_value_98164; + } + } + + error_0: + return; + #undef segmap_group_sizze_98159 +} +__kernel void mainMagnitudezisegmap_95834(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + int64_t rp1_73709, + int64_t binop_x_120251, __global + unsigned char *mem_122003, __global + unsigned char *mem_123133, __global + unsigned char *defunc_3_map_res_r_mem_123392, + __global unsigned char *mem_123681) +{ + #define segmap_group_sizze_98088 (mainMagnitudezisegmap_group_sizze_95837) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127945; + int32_t local_tid_127946; + int64_t group_sizze_127949; + int32_t wave_sizze_127948; + int32_t group_tid_127947; + + global_tid_127945 = get_global_id(0); + local_tid_127946 = get_local_id(0); + group_sizze_127949 = get_local_size(0); + wave_sizze_127948 = LOCKSTEP_WIDTH; + group_tid_127947 = get_group_id(0); + + int32_t phys_tid_95834; + + phys_tid_95834 = global_tid_127945; + + int64_t gtid_95832; + + gtid_95832 = squot64(sext_i32_i64(group_tid_127947) * + segmap_group_sizze_98088 + + sext_i32_i64(local_tid_127946), k2p2zq_73023); + + int64_t gtid_95833; + + gtid_95833 = sext_i32_i64(group_tid_127947) * segmap_group_sizze_98088 + + sext_i32_i64(local_tid_127946) - + squot64(sext_i32_i64(group_tid_127947) * segmap_group_sizze_98088 + + sext_i32_i64(local_tid_127946), k2p2zq_73023) * k2p2zq_73023; + if (slt64(gtid_95832, m_73008) && slt64(gtid_95833, k2p2zq_73023)) { + int64_t write_index_98093 = ((__global + int64_t *) mem_123133)[gtid_95833 * + m_73008 + + gtid_95832]; + double defunc_2_reduce_res_98094; + double redout_119858 = 0.0; + + for (int64_t i_119859 = 0; i_119859 < k2p2zq_73023; i_119859++) { + double x_98098 = ((__global double *) mem_123681)[gtid_95832 * + rp1_73709 + + i_119859]; + double x_98099 = ((__global + double *) defunc_3_map_res_r_mem_123392)[gtid_95832 * + binop_x_120251 + + i_119859 * + k2p2zq_73023 + + gtid_95833]; + double defunc_1_f_res_98100 = x_98098 * x_98099; + double defunc_1_op_res_98097 = defunc_1_f_res_98100 + redout_119858; + double redout_tmp_127950 = defunc_1_op_res_98097; + + redout_119858 = redout_tmp_127950; + } + defunc_2_reduce_res_98094 = redout_119858; + if ((sle64((int64_t) 0, gtid_95832) && slt64(gtid_95832, m_73008)) && + (sle64((int64_t) 0, write_index_98093) && slt64(write_index_98093, + k2p2zq_73023))) { + ((__global double *) mem_122003)[gtid_95832 * k2p2zq_73023 + + write_index_98093] = + defunc_2_reduce_res_98094; + } + } + + error_0: + return; + #undef segmap_group_sizze_98088 +} +__kernel void mainMagnitudezisegmap_95889(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t m_73008, int64_t k2p2zq_73023, + int64_t r_73698, int64_t rp1_73709, + int64_t num_groups_98010, + int64_t num_threads_126006, __global + unsigned char *mem_123130, __global + unsigned char *mem_123135, __global + unsigned char *mem_123633, __global + unsigned char *mem_123641, __global + unsigned char *mem_123678, __global + unsigned char *mem_125341, __global + unsigned char *double_buffer_mem_125599) +{ + #define segmap_group_sizze_98009 (mainMagnitudezisegmap_group_sizze_95891) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_127929; + int32_t local_tid_127930; + int64_t group_sizze_127933; + int32_t wave_sizze_127932; + int32_t group_tid_127931; + + global_tid_127929 = get_global_id(0); + local_tid_127930 = get_local_id(0); + group_sizze_127933 = get_local_size(0); + wave_sizze_127932 = LOCKSTEP_WIDTH; + group_tid_127931 = get_group_id(0); + + int32_t phys_tid_95889; + + phys_tid_95889 = global_tid_127929; + + int32_t phys_group_id_127934; + + phys_group_id_127934 = get_group_id(0); + for (int32_t i_127935 = 0; i_127935 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, segmap_group_sizze_98009)) - + phys_group_id_127934, sext_i64_i32(num_groups_98010)); + i_127935++) { + int32_t virt_group_id_127936 = phys_group_id_127934 + i_127935 * + sext_i64_i32(num_groups_98010); + int64_t gtid_95888 = sext_i32_i64(virt_group_id_127936) * + segmap_group_sizze_98009 + sext_i32_i64(local_tid_127930); + + if (slt64(gtid_95888, m_73008)) { + int64_t min_res_98016 = ((__global + int64_t *) mem_123135)[gtid_95888]; + int64_t min_res_98017 = smin64(r_73698, min_res_98016); + + for (int64_t i_127937 = 0; i_127937 < rp1_73709; i_127937++) { + ((__global double *) double_buffer_mem_125599)[phys_tid_95889 + + i_127937 * + num_threads_126006] = + ((__global double *) mem_123633)[gtid_95888 + i_127937 * + m_73008]; + } + for (int64_t j_98019 = 0; j_98019 < min_res_98017; j_98019++) { + bool y_98021 = slt64(j_98019, k2p2zq_73023); + bool index_certs_98022; + + if (!y_98021) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 606) == -1) { + global_failure_args[0] = j_98019; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double zeze_arg_98023 = ((__global + double *) mem_123130)[j_98019 * + m_73008 + + gtid_95888]; + bool zeze_res_98024 = zeze_arg_98023 == 0.0; + + if (zeze_res_98024) { + for (int64_t i_127939 = 0; i_127939 < rp1_73709; + i_127939++) { + ((__global double *) mem_125341)[phys_tid_95889 + + i_127939 * + num_threads_126006] = + ((__global + double *) double_buffer_mem_125599)[phys_tid_95889 + + i_127939 * + num_threads_126006]; + } + } else { + bool y_98026 = slt64(j_98019, rp1_73709); + bool index_certs_98027; + + if (!y_98026) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 607) == -1) { + global_failure_args[0] = j_98019; + global_failure_args[1] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_98028 = ((__global + double *) double_buffer_mem_125599)[phys_tid_95889 + + j_98019 * + num_threads_126006]; + double negate_arg_98029 = zeze_arg_98023 * y_98028; + double t_98030 = 0.0 - negate_arg_98029; + int64_t x_98031 = sub64(rp1_73709, j_98019); + int64_t upper_bound_98032 = sub64(x_98031, (int64_t) 1); + double t_98033; + double t_98035 = t_98030; + + for (int64_t i0_98034 = 0; i0_98034 < upper_bound_98032; + i0_98034++) { + int64_t x_98036 = add64(j_98019, i0_98034); + int64_t i_98037 = add64((int64_t) 1, x_98036); + bool x_98038 = sle64((int64_t) 0, i_98037); + bool y_98039 = slt64(i_98037, rp1_73709); + bool bounds_check_98040 = x_98038 && y_98039; + bool index_ok_98041 = y_98021 && bounds_check_98040; + bool index_certs_98042; + + if (!index_ok_98041) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 608) == -1) { + global_failure_args[0] = j_98019; + global_failure_args[1] = i_98037; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_98043 = ((__global + double *) mem_123641)[i_98037 * + (m_73008 * + k2p2zq_73023) + + j_98019 * + m_73008 + + gtid_95888]; + bool index_certs_98044; + + if (!bounds_check_98040) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 609) == -1) { + global_failure_args[0] = i_98037; + global_failure_args[1] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_98045 = ((__global + double *) double_buffer_mem_125599)[phys_tid_95889 + + i_98037 * + num_threads_126006]; + double y_98046 = x_98043 * y_98045; + double loopres_98047 = t_98035 - y_98046; + double t_tmp_127940 = loopres_98047; + + t_98035 = t_tmp_127940; + } + t_98033 = t_98035; + + double t_98048 = t_98033 / zeze_arg_98023; + double y_98049 = zeze_arg_98023 * t_98048; + double lw_val_98050 = y_98028 + y_98049; + + ((__global + double *) double_buffer_mem_125599)[phys_tid_95889 + + j_98019 * + num_threads_126006] = + lw_val_98050; + for (int64_t i0_98053 = 0; i0_98053 < upper_bound_98032; + i0_98053++) { + int64_t x_98055 = add64(j_98019, i0_98053); + int64_t i_98056 = add64((int64_t) 1, x_98055); + bool x_98057 = sle64((int64_t) 0, i_98056); + bool y_98058 = slt64(i_98056, rp1_73709); + bool bounds_check_98059 = x_98057 && y_98058; + bool index_certs_98060; + + if (!bounds_check_98059) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 610) == -1) { + global_failure_args[0] = i_98056; + global_failure_args[1] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_98061 = ((__global + double *) double_buffer_mem_125599)[phys_tid_95889 + + i_98056 * + num_threads_126006]; + bool index_ok_98062 = y_98021 && bounds_check_98059; + bool index_certs_98063; + + if (!index_ok_98062) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 611) == -1) { + global_failure_args[0] = j_98019; + global_failure_args[1] = i_98056; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_98064 = ((__global + double *) mem_123641)[i_98056 * + (m_73008 * + k2p2zq_73023) + + j_98019 * + m_73008 + + gtid_95888]; + double y_98065 = t_98048 * y_98064; + double lw_val_98066 = x_98061 + y_98065; + + ((__global + double *) double_buffer_mem_125599)[phys_tid_95889 + + i_98056 * + num_threads_126006] = + lw_val_98066; + } + for (int64_t i_127942 = 0; i_127942 < rp1_73709; + i_127942++) { + ((__global double *) mem_125341)[phys_tid_95889 + + i_127942 * + num_threads_126006] = + ((__global + double *) double_buffer_mem_125599)[phys_tid_95889 + + i_127942 * + num_threads_126006]; + } + } + for (int64_t i_127943 = 0; i_127943 < rp1_73709; i_127943++) { + ((__global + double *) double_buffer_mem_125599)[phys_tid_95889 + + i_127943 * + num_threads_126006] = + ((__global double *) mem_125341)[phys_tid_95889 + + i_127943 * + num_threads_126006]; + } + } + for (int64_t i_127944 = 0; i_127944 < rp1_73709; i_127944++) { + ((__global double *) mem_123678)[i_127944 * m_73008 + + gtid_95888] = ((__global + double *) double_buffer_mem_125599)[phys_tid_95889 + + i_127944 * + num_threads_126006]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_98009 +} +__kernel void mainMagnitudezisegmap_95953(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + int64_t num_groups_97951, + int64_t binop_x_120251, + int64_t num_threads_126002, __global + unsigned char *defunc_3_map_res_r_mem_123392, + __global unsigned char *mem_123396, + __global unsigned char *mem_123399, + __global unsigned char *mem_123415) +{ + #define segmap_group_sizze_97950 (mainMagnitudezisegmap_group_sizze_95956) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127786; + int32_t local_tid_127787; + int64_t group_sizze_127790; + int32_t wave_sizze_127789; + int32_t group_tid_127788; + + global_tid_127786 = get_global_id(0); + local_tid_127787 = get_local_id(0); + group_sizze_127790 = get_local_size(0); + wave_sizze_127789 = LOCKSTEP_WIDTH; + group_tid_127788 = get_group_id(0); + + int32_t phys_tid_95953; + + phys_tid_95953 = global_tid_127786; + + int32_t phys_group_id_127791; + + phys_group_id_127791 = get_group_id(0); + for (int32_t i_127792 = 0; i_127792 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008 * k2p2zq_73023, + segmap_group_sizze_97950)) - + phys_group_id_127791, sext_i64_i32(num_groups_97951)); + i_127792++) { + int32_t virt_group_id_127793 = phys_group_id_127791 + i_127792 * + sext_i64_i32(num_groups_97951); + int64_t gtid_95951 = squot64(sext_i32_i64(virt_group_id_127793) * + segmap_group_sizze_97950 + + sext_i32_i64(local_tid_127787), + k2p2zq_73023); + int64_t gtid_95952 = sext_i32_i64(virt_group_id_127793) * + segmap_group_sizze_97950 + sext_i32_i64(local_tid_127787) - + squot64(sext_i32_i64(virt_group_id_127793) * + segmap_group_sizze_97950 + + sext_i32_i64(local_tid_127787), k2p2zq_73023) * + k2p2zq_73023; + + if (slt64(gtid_95951, m_73008) && slt64(gtid_95952, k2p2zq_73023)) { + for (int64_t i_119854 = 0; i_119854 < k2p2zq_73023; i_119854++) { + double defunc_2_reduce_res_97962; + double redout_119856 = 0.0; + + for (int64_t i_119857 = 0; i_119857 < k2p2zq_73023; + i_119857++) { + double x_97966 = ((__global + double *) defunc_3_map_res_r_mem_123392)[gtid_95951 * + binop_x_120251 + + i_119857 * + k2p2zq_73023 + + gtid_95952]; + double x_97967 = ((__global + double *) mem_123396)[gtid_95951 * + (k2p2zq_73023 * + k2p2zq_73023) + + i_119854 * + k2p2zq_73023 + + i_119857]; + double defunc_1_f_res_97968 = x_97966 * x_97967; + double defunc_1_op_res_97965 = defunc_1_f_res_97968 + + redout_119856; + double redout_tmp_127795 = defunc_1_op_res_97965; + + redout_119856 = redout_tmp_127795; + } + defunc_2_reduce_res_97962 = redout_119856; + ((__global double *) mem_123399)[phys_tid_95953 + i_119854 * + num_threads_126002] = + defunc_2_reduce_res_97962; + } + for (int64_t i_127796 = 0; i_127796 < k2p2zq_73023; i_127796++) { + ((__global double *) mem_123415)[i_127796 * (k2p2zq_73023 * + m_73008) + + gtid_95951 * k2p2zq_73023 + + gtid_95952] = ((__global + double *) mem_123399)[phys_tid_95953 + + i_127796 * + num_threads_126002]; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_97950 +} +__kernel void mainMagnitudezisegmap_96169(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + int64_t x_97825, int64_t i_97826, + int64_t j_m_i_97830, + int64_t num_groups_97858, + int64_t num_threads_125994, __global + unsigned char *mem_121938, __global + unsigned char *mem_123143, __global + unsigned char *mem_123241, __global + unsigned char *mem_123259, __global + unsigned char *mem_123263, __global + unsigned char *mem_123275, __global + unsigned char *mem_123287) +{ + #define segmap_group_sizze_97857 (mainMagnitudezisegmap_group_sizze_96171) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127683; + int32_t local_tid_127684; + int64_t group_sizze_127687; + int32_t wave_sizze_127686; + int32_t group_tid_127685; + + global_tid_127683 = get_global_id(0); + local_tid_127684 = get_local_id(0); + group_sizze_127687 = get_local_size(0); + wave_sizze_127686 = LOCKSTEP_WIDTH; + group_tid_127685 = get_group_id(0); + + int32_t phys_tid_96169; + + phys_tid_96169 = global_tid_127683; + + int32_t phys_group_id_127688; + + phys_group_id_127688 = get_group_id(0); + for (int32_t i_127689 = 0; i_127689 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, segmap_group_sizze_97857)) - + phys_group_id_127688, sext_i64_i32(num_groups_97858)); + i_127689++) { + int32_t virt_group_id_127690 = phys_group_id_127688 + i_127689 * + sext_i64_i32(num_groups_97858); + int64_t gtid_96168 = sext_i32_i64(virt_group_id_127690) * + segmap_group_sizze_97857 + sext_i32_i64(local_tid_127684); + + if (slt64(gtid_96168, m_73008)) { + double defunc_3_map_res_r_transformed_row_97863 = ((__global + double *) mem_123143)[gtid_96168 * + (k2p2zq_73023 * + k2p2zq_73023) + + i_97826 * + k2p2zq_73023 + + i_97826]; + + for (int64_t i_119843 = 0; i_119843 < k2p2zq_73023; i_119843++) { + for (int64_t i_127692 = 0; i_127692 < k2p2zq_73023; + i_127692++) { + ((__global double *) mem_123275)[phys_tid_96169 + i_127692 * + num_threads_125994] = + ((__global double *) mem_123259)[i_119843 * (m_73008 * + k2p2zq_73023) + + gtid_96168 + i_127692 * + m_73008]; + } + + double defunc_2_map_res_transformed_row_97868 = ((__global + double *) mem_121938)[i_119843 * + k2p2zq_73023 + + i_97826]; + double defunc_2_reduce_res_97869; + double redout_119846 = 0.0; + + for (int64_t i_119847 = 0; i_119847 < j_m_i_97830; i_119847++) { + int64_t slice_120040 = x_97825 + i_119847; + double x_97874 = ((__global + double *) mem_123241)[slice_120040 * + (k2p2zq_73023 * + m_73008) + + gtid_96168 * + k2p2zq_73023 + + i_97826]; + bool isnan_res_97875; + + isnan_res_97875 = futrts_isnan64(x_97874); + + double defunc_1_f_res_97876; + + if (isnan_res_97875) { + defunc_1_f_res_97876 = 0.0; + } else { + double x_97873 = ((__global + double *) mem_123259)[i_119843 * + (m_73008 * + k2p2zq_73023) + + slice_120040 * + m_73008 + + gtid_96168]; + double defunc_1_f_res_f_res_97877 = x_97873 * x_97874; + + defunc_1_f_res_97876 = defunc_1_f_res_f_res_97877; + } + + double defunc_1_op_res_97872 = defunc_1_f_res_97876 + + redout_119846; + double redout_tmp_127693 = defunc_1_op_res_97872; + + redout_119846 = redout_tmp_127693; + } + defunc_2_reduce_res_97869 = redout_119846; + + double zm_res_97878 = defunc_2_map_res_transformed_row_97868 - + defunc_2_reduce_res_97869; + double zs_res_97879 = zm_res_97878 / + defunc_3_map_res_r_transformed_row_97863; + + ((__global double *) mem_123275)[phys_tid_96169 + i_97826 * + num_threads_125994] = + zs_res_97879; + for (int64_t i_127694 = 0; i_127694 < k2p2zq_73023; + i_127694++) { + ((__global double *) mem_123263)[phys_tid_96169 + + (i_119843 * + (num_threads_125994 * + k2p2zq_73023) + + i_127694 * + num_threads_125994)] = + ((__global double *) mem_123275)[phys_tid_96169 + + i_127694 * + num_threads_125994]; + } + } + for (int64_t i_127695 = 0; i_127695 < k2p2zq_73023; i_127695++) { + for (int64_t i_127696 = 0; i_127696 < k2p2zq_73023; + i_127696++) { + ((__global double *) mem_123287)[i_127695 * (m_73008 * + k2p2zq_73023) + + i_127696 * m_73008 + + gtid_96168] = ((__global + double *) mem_123263)[phys_tid_96169 + + (i_127695 * + (num_threads_125994 * + k2p2zq_73023) + + i_127696 * + num_threads_125994)]; + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_97857 +} +__kernel void mainMagnitudezisegmap_96257(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + int64_t i_97826, + int64_t binop_x_120251, __global + unsigned char *mem_param_123252, + __global unsigned char *mem_123342) +{ + #define segmap_group_sizze_97939 (mainMagnitudezisegmap_group_sizze_96261) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127778; + int32_t local_tid_127779; + int64_t group_sizze_127782; + int32_t wave_sizze_127781; + int32_t group_tid_127780; + + global_tid_127778 = get_global_id(0); + local_tid_127779 = get_local_id(0); + group_sizze_127782 = get_local_size(0); + wave_sizze_127781 = LOCKSTEP_WIDTH; + group_tid_127780 = get_group_id(0); + + int32_t phys_tid_96257; + + phys_tid_96257 = global_tid_127778; + + int64_t gtid_96254; + + gtid_96254 = squot64(sext_i32_i64(group_tid_127780) * + segmap_group_sizze_97939 + + sext_i32_i64(local_tid_127779), k2p2zq_73023); + + int64_t gtid_96255; + + gtid_96255 = sext_i32_i64(group_tid_127780) * segmap_group_sizze_97939 + + sext_i32_i64(local_tid_127779) - + squot64(sext_i32_i64(group_tid_127780) * segmap_group_sizze_97939 + + sext_i32_i64(local_tid_127779), k2p2zq_73023) * k2p2zq_73023; + + int64_t gtid_96256; + + gtid_96256 = sext_i32_i64(group_tid_127780) * segmap_group_sizze_97939 + + sext_i32_i64(local_tid_127779) - + squot64(sext_i32_i64(group_tid_127780) * segmap_group_sizze_97939 + + sext_i32_i64(local_tid_127779), k2p2zq_73023) * k2p2zq_73023 - + (sext_i32_i64(group_tid_127780) * segmap_group_sizze_97939 + + sext_i32_i64(local_tid_127779) - + squot64(sext_i32_i64(group_tid_127780) * segmap_group_sizze_97939 + + sext_i32_i64(local_tid_127779), k2p2zq_73023) * k2p2zq_73023); + if ((slt64(gtid_96254, m_73008) && slt64(gtid_96255, k2p2zq_73023)) && + slt64(gtid_96256, (int64_t) 1)) { + double zs_res_97942 = ((__global double *) mem_123342)[gtid_96254 * + k2p2zq_73023 + + gtid_96255]; + + if (((sle64((int64_t) 0, gtid_96254) && slt64(gtid_96254, m_73008)) && + (sle64((int64_t) 0, gtid_96255) && slt64(gtid_96255, + k2p2zq_73023))) && + (sle64((int64_t) 0, i_97826) && slt64(i_97826, k2p2zq_73023))) { + ((__global double *) mem_param_123252)[gtid_96254 * binop_x_120251 + + gtid_96255 * k2p2zq_73023 + + i_97826] = zs_res_97942; + } + } + + error_0: + return; + #undef segmap_group_sizze_97939 +} +__kernel void mainMagnitudezisegmap_96269(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + int64_t i_97826, __global + unsigned char *mem_121938, __global + unsigned char *mem_123143, __global + unsigned char *mem_123338, __global + unsigned char *mem_123342) +{ + #define segmap_group_sizze_97928 (mainMagnitudezisegmap_group_sizze_96272) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127773; + int32_t local_tid_127774; + int64_t group_sizze_127777; + int32_t wave_sizze_127776; + int32_t group_tid_127775; + + global_tid_127773 = get_global_id(0); + local_tid_127774 = get_local_id(0); + group_sizze_127777 = get_local_size(0); + wave_sizze_127776 = LOCKSTEP_WIDTH; + group_tid_127775 = get_group_id(0); + + int32_t phys_tid_96269; + + phys_tid_96269 = global_tid_127773; + + int64_t gtid_96267; + + gtid_96267 = squot64(sext_i32_i64(group_tid_127775) * + segmap_group_sizze_97928 + + sext_i32_i64(local_tid_127774), k2p2zq_73023); + + int64_t gtid_96268; + + gtid_96268 = sext_i32_i64(group_tid_127775) * segmap_group_sizze_97928 + + sext_i32_i64(local_tid_127774) - + squot64(sext_i32_i64(group_tid_127775) * segmap_group_sizze_97928 + + sext_i32_i64(local_tid_127774), k2p2zq_73023) * k2p2zq_73023; + if (slt64(gtid_96267, m_73008) && slt64(gtid_96268, k2p2zq_73023)) { + double defunc_3_map_res_r_transformed_row_97931 = ((__global + double *) mem_123143)[gtid_96267 * + (k2p2zq_73023 * + k2p2zq_73023) + + i_97826 * + k2p2zq_73023 + + i_97826]; + double defunc_2_map_res_transformed_row_97932 = ((__global + double *) mem_121938)[gtid_96268 * + k2p2zq_73023 + + i_97826]; + double defunc_2_reduce_res_97933 = ((__global + double *) mem_123338)[gtid_96267 * + k2p2zq_73023 + + gtid_96268]; + double zm_res_97934 = defunc_2_map_res_transformed_row_97932 - + defunc_2_reduce_res_97933; + double zs_res_97935 = zm_res_97934 / + defunc_3_map_res_r_transformed_row_97931; + + ((__global double *) mem_123342)[gtid_96267 * k2p2zq_73023 + + gtid_96268] = zs_res_97935; + } + + error_0: + return; + #undef segmap_group_sizze_97928 +} +__kernel void mainMagnitudezisegmap_96596(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + int64_t rp1_73709, __global + unsigned char *mem_123127, __global + unsigned char *mem_123135, __global + unsigned char *mem_123138, __global + unsigned char *mem_123143) +{ + #define segmap_group_sizze_97735 (mainMagnitudezisegmap_group_sizze_96600) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127650; + int32_t local_tid_127651; + int64_t group_sizze_127654; + int32_t wave_sizze_127653; + int32_t group_tid_127652; + + global_tid_127650 = get_global_id(0); + local_tid_127651 = get_local_id(0); + group_sizze_127654 = get_local_size(0); + wave_sizze_127653 = LOCKSTEP_WIDTH; + group_tid_127652 = get_group_id(0); + + int32_t phys_tid_96596; + + phys_tid_96596 = global_tid_127650; + + int64_t gtid_96593; + + gtid_96593 = squot64(sext_i32_i64(group_tid_127652) * + segmap_group_sizze_97735 + + sext_i32_i64(local_tid_127651), k2p2zq_73023 * + k2p2zq_73023); + + int64_t gtid_96594; + + gtid_96594 = squot64(sext_i32_i64(group_tid_127652) * + segmap_group_sizze_97735 + + sext_i32_i64(local_tid_127651) - + squot64(sext_i32_i64(group_tid_127652) * + segmap_group_sizze_97735 + + sext_i32_i64(local_tid_127651), k2p2zq_73023 * + k2p2zq_73023) * (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023); + + int64_t gtid_96595; + + gtid_96595 = sext_i32_i64(group_tid_127652) * segmap_group_sizze_97735 + + sext_i32_i64(local_tid_127651) - + squot64(sext_i32_i64(group_tid_127652) * segmap_group_sizze_97735 + + sext_i32_i64(local_tid_127651), k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023) - squot64(sext_i32_i64(group_tid_127652) * + segmap_group_sizze_97735 + + sext_i32_i64(local_tid_127651) - + squot64(sext_i32_i64(group_tid_127652) * + segmap_group_sizze_97735 + + sext_i32_i64(local_tid_127651), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023) * k2p2zq_73023; + if ((slt64(gtid_96593, m_73008) && slt64(gtid_96594, k2p2zq_73023)) && + slt64(gtid_96595, k2p2zq_73023)) { + int64_t min_res_97738 = ((__global int64_t *) mem_123135)[gtid_96593]; + bool cond_f_res_97739 = ((__global bool *) mem_123138)[gtid_96593 * + k2p2zq_73023 + + gtid_96594]; + int64_t x_97742 = add64((int64_t) 1, gtid_96595); + bool cond_97743 = slt64(min_res_97738, x_97742); + bool x_97744 = !cond_97743; + bool y_97745 = cond_f_res_97739 && x_97744; + bool cond_97746 = cond_97743 || y_97745; + double defunc_1_f_res_97747; + + if (cond_97746) { + defunc_1_f_res_97747 = NAN; + } else { + double x_97741 = ((__global double *) mem_123127)[gtid_96594 * + (m_73008 * + rp1_73709) + + gtid_96595 * + m_73008 + + gtid_96593]; + + defunc_1_f_res_97747 = x_97741; + } + ((__global double *) mem_123143)[gtid_96593 * (k2p2zq_73023 * + k2p2zq_73023) + + gtid_96594 * k2p2zq_73023 + + gtid_96595] = defunc_1_f_res_97747; + } + + error_0: + return; + #undef segmap_group_sizze_97735 +} +__kernel void mainMagnitudezisegmap_96631(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + __global unsigned char *mem_123135, + __global unsigned char *mem_123138) +{ + #define segmap_group_sizze_97720 (mainMagnitudezisegmap_group_sizze_96634) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127645; + int32_t local_tid_127646; + int64_t group_sizze_127649; + int32_t wave_sizze_127648; + int32_t group_tid_127647; + + global_tid_127645 = get_global_id(0); + local_tid_127646 = get_local_id(0); + group_sizze_127649 = get_local_size(0); + wave_sizze_127648 = LOCKSTEP_WIDTH; + group_tid_127647 = get_group_id(0); + + int32_t phys_tid_96631; + + phys_tid_96631 = global_tid_127645; + + int64_t gtid_96629; + + gtid_96629 = squot64(sext_i32_i64(group_tid_127647) * + segmap_group_sizze_97720 + + sext_i32_i64(local_tid_127646), k2p2zq_73023); + + int64_t gtid_96630; + + gtid_96630 = sext_i32_i64(group_tid_127647) * segmap_group_sizze_97720 + + sext_i32_i64(local_tid_127646) - + squot64(sext_i32_i64(group_tid_127647) * segmap_group_sizze_97720 + + sext_i32_i64(local_tid_127646), k2p2zq_73023) * k2p2zq_73023; + if (slt64(gtid_96629, m_73008) && slt64(gtid_96630, k2p2zq_73023)) { + int64_t min_res_97723 = ((__global int64_t *) mem_123135)[gtid_96629]; + int64_t x_97725 = add64((int64_t) 1, gtid_96630); + bool cond_f_res_97726 = slt64(min_res_97723, x_97725); + + ((__global bool *) mem_123138)[gtid_96629 * k2p2zq_73023 + gtid_96630] = + cond_f_res_97726; + } + + error_0: + return; + #undef segmap_group_sizze_97720 +} +__kernel void mainMagnitudezisegmap_96689(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t m_73008, int64_t k2p2zq_73023, + int64_t m_73095, + unsigned char y_73099, + int64_t k_73214, int64_t rp1_73709, + int64_t min_res_73721, + int64_t num_groups_97425, + int64_t num_threads_125979, __global + unsigned char *mem_120248, __global + unsigned char *mem_122793, __global + unsigned char *mem_122796, __global + unsigned char *mem_122800, __global + unsigned char *mem_122803, __global + unsigned char *mem_123127, __global + unsigned char *mem_123130, __global + unsigned char *mem_123133, __global + unsigned char *mem_123135, __global + unsigned char *mem_125265, __global + unsigned char *mem_125267, __global + unsigned char *mem_125472, __global + unsigned char *mem_125480, __global + unsigned char *mem_125482, __global + unsigned char *mem_125512, __global + unsigned char *double_buffer_mem_125586, + __global + unsigned char *double_buffer_mem_125587, + __global + unsigned char *double_buffer_mem_125588) +{ + #define segmap_group_sizze_97424 (mainMagnitudezisegmap_group_sizze_96691) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_127576; + int32_t local_tid_127577; + int64_t group_sizze_127580; + int32_t wave_sizze_127579; + int32_t group_tid_127578; + + global_tid_127576 = get_global_id(0); + local_tid_127577 = get_local_id(0); + group_sizze_127580 = get_local_size(0); + wave_sizze_127579 = LOCKSTEP_WIDTH; + group_tid_127578 = get_group_id(0); + + int32_t phys_tid_96689; + + phys_tid_96689 = global_tid_127576; + + int32_t phys_group_id_127581; + + phys_group_id_127581 = get_group_id(0); + for (int32_t i_127582 = 0; i_127582 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, segmap_group_sizze_97424)) - + phys_group_id_127581, sext_i64_i32(num_groups_97425)); + i_127582++) { + int32_t virt_group_id_127583 = phys_group_id_127581 + i_127582 * + sext_i64_i32(num_groups_97425); + int64_t gtid_96688 = sext_i32_i64(virt_group_id_127583) * + segmap_group_sizze_97424 + sext_i32_i64(local_tid_127577); + + if (slt64(gtid_96688, m_73008)) { + for (int64_t i_127584 = 0; i_127584 < k2p2zq_73023; i_127584++) { + ((__global int64_t *) mem_122803)[phys_tid_96689 + i_127584 * + num_threads_125979] = + ((__global int64_t *) mem_120248)[i_127584]; + } + for (int64_t i_127585 = 0; i_127585 < k2p2zq_73023; i_127585++) { + for (int64_t i_127586 = 0; i_127586 < rp1_73709; i_127586++) { + ((__global + double *) double_buffer_mem_125586)[phys_tid_96689 + + (i_127585 * + (num_threads_125979 * + rp1_73709) + + i_127586 * + num_threads_125979)] = + ((__global double *) mem_122793)[gtid_96688 + + (i_127585 * (m_73008 * + rp1_73709) + + i_127586 * m_73008)]; + } + } + for (int64_t i_127587 = 0; i_127587 < k2p2zq_73023; i_127587++) { + ((__global double *) double_buffer_mem_125587)[phys_tid_96689 + + i_127587 * + num_threads_125979] = + ((__global double *) mem_122796)[gtid_96688 + i_127587 * + m_73008]; + } + for (int64_t i_127588 = 0; i_127588 < (int64_t) 2; i_127588++) { + for (int64_t i_127589 = 0; i_127589 < k2p2zq_73023; + i_127589++) { + ((__global + double *) double_buffer_mem_125588)[phys_tid_96689 + + (i_127588 * + (num_threads_125979 * + k2p2zq_73023) + + i_127589 * + num_threads_125979)] = + ((__global double *) mem_122800)[gtid_96688 + + (i_127588 * (m_73008 * + k2p2zq_73023) + + i_127589 * m_73008)]; + } + } + + int64_t dqrdc2_res_97439; + int64_t k_97445 = k_73214; + + for (int64_t l_97440 = 0; l_97440 < min_res_73721; l_97440++) { + int64_t x_97446 = add64((int64_t) 1, l_97440); + bool cond_97447 = slt64(x_97446, k_97445); + bool loop_cond_97448; + + if (cond_97447) { + bool y_97449 = slt64(l_97440, k2p2zq_73023); + bool index_certs_97450; + + if (!y_97449) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 568) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_97440; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double zt_arg_97451 = ((__global + double *) double_buffer_mem_125588)[phys_tid_96689 + + (num_threads_125979 * + k2p2zq_73023 + + l_97440 * + num_threads_125979)]; + double zt_res_97452 = 1.0e-7 * zt_arg_97451; + bool index_certs_97453; + + if (!y_97449) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 569) == -1) { + global_failure_args[0] = l_97440; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double zl_arg_97454 = ((__global + double *) double_buffer_mem_125587)[phys_tid_96689 + + l_97440 * + num_threads_125979]; + bool zl_res_97455 = zl_arg_97454 < zt_res_97452; + + loop_cond_97448 = zl_res_97455; + } else { + loop_cond_97448 = 0; + } + + bool y_97456 = slt64(l_97440, k2p2zq_73023); + int64_t upper_bound_97457 = sub64(k2p2zq_73023, x_97446); + bool loop_not_taken_97458 = !loop_cond_97448; + bool protect_assert_disj_97459 = y_97456 || + loop_not_taken_97458; + bool index_certs_97460; + + if (!protect_assert_disj_97459) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 570) == -1) { + global_failure_args[0] = l_97440; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_97461; + + if (!protect_assert_disj_97459) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 571) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = l_97440; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_97462; + + if (!protect_assert_disj_97459) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 572) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_97440; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool protect_assert_disj_97463 = y_73099 || + loop_not_taken_97458; + bool index_certs_97464; + + if (!protect_assert_disj_97463) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 573) == -1) { + global_failure_args[0] = m_73095; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_97465; + + if (!protect_assert_disj_97463) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 574) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = m_73095; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool index_certs_97466; + + if (!protect_assert_disj_97463) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 575) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = m_73095; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool loopres_97467; + int64_t loopres_97472; + bool loop_while_97473; + int64_t k_97478; + + loop_while_97473 = loop_cond_97448; + k_97478 = k_97445; + while (loop_while_97473) { + for (int64_t i_97480 = 0; i_97480 < rp1_73709; i_97480++) { + bool index_certs_97482; + + if (!y_97456) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 576) == -1) { + global_failure_args[0] = l_97440; + global_failure_args[1] = i_97480; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double t_97483 = ((__global + double *) double_buffer_mem_125586)[phys_tid_96689 + + (l_97440 * + (num_threads_125979 * + rp1_73709) + + i_97480 * + num_threads_125979)]; + + for (int64_t j0_97485 = 0; j0_97485 < upper_bound_97457; + j0_97485++) { + int64_t j_97487 = add64(x_97446, j0_97485); + bool x_97488 = sle64((int64_t) 0, j_97487); + bool y_97489 = slt64(j_97487, k2p2zq_73023); + bool bounds_check_97490 = x_97488 && y_97489; + bool index_certs_97491; + + if (!bounds_check_97490) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 577) == + -1) { + global_failure_args[0] = j_97487; + global_failure_args[1] = i_97480; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_97492 = ((__global + double *) double_buffer_mem_125586)[phys_tid_96689 + + (j_97487 * + (num_threads_125979 * + rp1_73709) + + i_97480 * + num_threads_125979)]; + int64_t i_97493 = sub64(j_97487, (int64_t) 1); + bool x_97494 = sle64((int64_t) 0, i_97493); + bool y_97495 = slt64(i_97493, k2p2zq_73023); + bool bounds_check_97496 = x_97494 && y_97495; + bool index_certs_97497; + + if (!bounds_check_97496) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 578) == + -1) { + global_failure_args[0] = i_97493; + global_failure_args[1] = i_97480; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125586)[phys_tid_96689 + + (i_97493 * + (num_threads_125979 * + rp1_73709) + + i_97480 * + num_threads_125979)] = + lw_val_97492; + } + + bool index_certs_97499; + + if (!y_73099) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 579) == -1) { + global_failure_args[0] = m_73095; + global_failure_args[1] = i_97480; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125586)[phys_tid_96689 + + (m_73095 * + (num_threads_125979 * + rp1_73709) + + i_97480 * + num_threads_125979)] = + t_97483; + } + + int64_t i_97501 = ((__global + int64_t *) mem_122803)[phys_tid_96689 + + l_97440 * + num_threads_125979]; + double t_97502 = ((__global + double *) double_buffer_mem_125587)[phys_tid_96689 + + l_97440 * + num_threads_125979]; + double tt_97503 = ((__global + double *) double_buffer_mem_125588)[phys_tid_96689 + + l_97440 * + num_threads_125979]; + double ttt_97504 = ((__global + double *) double_buffer_mem_125588)[phys_tid_96689 + + (num_threads_125979 * + k2p2zq_73023 + + l_97440 * + num_threads_125979)]; + + for (int64_t j0_97508 = 0; j0_97508 < upper_bound_97457; + j0_97508++) { + int64_t j_97512 = add64(x_97446, j0_97508); + bool x_97513 = sle64((int64_t) 0, j_97512); + bool y_97514 = slt64(j_97512, k2p2zq_73023); + bool bounds_check_97515 = x_97513 && y_97514; + bool index_certs_97516; + + if (!bounds_check_97515) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 580) == -1) { + global_failure_args[0] = j_97512; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + int64_t lw_val_97517 = ((__global + int64_t *) mem_122803)[phys_tid_96689 + + j_97512 * + num_threads_125979]; + int64_t i_97518 = sub64(j_97512, (int64_t) 1); + bool x_97519 = sle64((int64_t) 0, i_97518); + bool y_97520 = slt64(i_97518, k2p2zq_73023); + bool bounds_check_97521 = x_97519 && y_97520; + bool index_certs_97522; + + if (!bounds_check_97521) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 581) == -1) { + global_failure_args[0] = i_97518; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global int64_t *) mem_122803)[phys_tid_96689 + + i_97518 * + num_threads_125979] = + lw_val_97517; + + double lw_val_97524 = ((__global + double *) double_buffer_mem_125587)[phys_tid_96689 + + j_97512 * + num_threads_125979]; + + ((__global + double *) double_buffer_mem_125587)[phys_tid_96689 + + i_97518 * + num_threads_125979] = + lw_val_97524; + + bool index_certs_97526; + + if (!bounds_check_97515) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 582) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = j_97512; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_97527 = ((__global + double *) double_buffer_mem_125588)[phys_tid_96689 + + j_97512 * + num_threads_125979]; + bool index_certs_97528; + + if (!bounds_check_97521) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 583) == -1) { + global_failure_args[0] = (int64_t) 0; + global_failure_args[1] = i_97518; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125588)[phys_tid_96689 + + i_97518 * + num_threads_125979] = + lw_val_97527; + + bool index_certs_97530; + + if (!bounds_check_97515) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 584) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = j_97512; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double lw_val_97531 = ((__global + double *) double_buffer_mem_125588)[phys_tid_96689 + + (num_threads_125979 * + k2p2zq_73023 + + j_97512 * + num_threads_125979)]; + bool index_certs_97532; + + if (!bounds_check_97521) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 585) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = i_97518; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + ((__global + double *) double_buffer_mem_125588)[phys_tid_96689 + + (num_threads_125979 * + k2p2zq_73023 + + i_97518 * + num_threads_125979)] = + lw_val_97531; + } + ((__global int64_t *) mem_122803)[phys_tid_96689 + m_73095 * + num_threads_125979] = + i_97501; + ((__global + double *) double_buffer_mem_125587)[phys_tid_96689 + + m_73095 * + num_threads_125979] = + t_97502; + ((__global + double *) double_buffer_mem_125588)[phys_tid_96689 + + m_73095 * + num_threads_125979] = + tt_97503; + ((__global + double *) double_buffer_mem_125588)[phys_tid_96689 + + (num_threads_125979 * + k2p2zq_73023 + + m_73095 * + num_threads_125979)] = + ttt_97504; + + int64_t k_97538 = sub64(k_97478, (int64_t) 1); + bool cond_97539 = slt64(x_97446, k_97538); + bool loop_cond_97540; + + if (cond_97539) { + bool index_certs_97541; + + if (!y_97456) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 586) == -1) { + global_failure_args[0] = (int64_t) 1; + global_failure_args[1] = l_97440; + global_failure_args[2] = (int64_t) 2; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double zt_arg_97542 = ((__global + double *) double_buffer_mem_125588)[phys_tid_96689 + + (num_threads_125979 * + k2p2zq_73023 + + l_97440 * + num_threads_125979)]; + double zt_res_97543 = 1.0e-7 * zt_arg_97542; + bool index_certs_97544; + + if (!y_97456) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 587) == -1) { + global_failure_args[0] = l_97440; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double zl_arg_97545 = ((__global + double *) double_buffer_mem_125587)[phys_tid_96689 + + l_97440 * + num_threads_125979]; + bool zl_res_97546 = zl_arg_97545 < zt_res_97543; + + loop_cond_97540 = zl_res_97546; + } else { + loop_cond_97540 = 0; + } + + bool loop_while_tmp_127595 = loop_cond_97540; + int64_t k_tmp_127600 = k_97538; + + loop_while_97473 = loop_while_tmp_127595; + k_97478 = k_tmp_127600; + } + loopres_97467 = loop_while_97473; + loopres_97472 = k_97478; + + bool cond_97547 = x_97446 == rp1_73709; + int64_t j_m_i_97548 = sub64(rp1_73709, l_97440); + bool empty_slice_97552 = j_m_i_97548 == (int64_t) 0; + int64_t m_97553 = sub64(j_m_i_97548, (int64_t) 1); + int64_t i_p_m_t_s_97554 = add64(l_97440, m_97553); + bool zzero_leq_i_p_m_t_s_97555 = sle64((int64_t) 0, + i_p_m_t_s_97554); + bool i_p_m_t_s_leq_w_97556 = slt64(i_p_m_t_s_97554, rp1_73709); + bool i_lte_j_97557 = sle64(l_97440, rp1_73709); + bool y_97558 = zzero_leq_i_p_m_t_s_97555 && + i_p_m_t_s_leq_w_97556; + bool y_97559 = i_lte_j_97557 && y_97558; + bool ok_or_empty_97560 = empty_slice_97552 || y_97559; + bool index_ok_97561 = y_97456 && ok_or_empty_97560; + + if (cond_97547) { + for (int64_t i_127606 = 0; i_127606 < k2p2zq_73023; + i_127606++) { + ((__global double *) mem_125482)[phys_tid_96689 + + i_127606 * + num_threads_125979] = + ((__global + double *) double_buffer_mem_125587)[phys_tid_96689 + + i_127606 * + num_threads_125979]; + } + for (int64_t i_127607 = 0; i_127607 < (int64_t) 2; + i_127607++) { + for (int64_t i_127608 = 0; i_127608 < k2p2zq_73023; + i_127608++) { + ((__global double *) mem_125480)[phys_tid_96689 + + (i_127607 * + (num_threads_125979 * + k2p2zq_73023) + + i_127608 * + num_threads_125979)] = + ((__global + double *) double_buffer_mem_125588)[phys_tid_96689 + + (i_127607 * + (num_threads_125979 * + k2p2zq_73023) + + i_127608 * + num_threads_125979)]; + } + } + for (int64_t i_127609 = 0; i_127609 < k2p2zq_73023; + i_127609++) { + for (int64_t i_127610 = 0; i_127610 < rp1_73709; + i_127610++) { + ((__global double *) mem_125512)[phys_tid_96689 + + (i_127609 * + (num_threads_125979 * + rp1_73709) + + i_127610 * + num_threads_125979)] = + ((__global + double *) double_buffer_mem_125586)[phys_tid_96689 + + (i_127609 * + (num_threads_125979 * + rp1_73709) + + i_127610 * + num_threads_125979)]; + } + } + } else { + bool index_certs_97562; + + if (!index_ok_97561) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 588) == -1) { + global_failure_args[0] = l_97440; + global_failure_args[1] = l_97440; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_97564; + double redout_119833 = 0.0; + + for (int64_t i_119834 = 0; i_119834 < j_m_i_97548; + i_119834++) { + int64_t slice_120035 = l_97440 + i_119834; + double x_97568 = ((__global + double *) double_buffer_mem_125586)[phys_tid_96689 + + (l_97440 * + (num_threads_125979 * + rp1_73709) + + slice_120035 * + num_threads_125979)]; + double defunc_1_f_res_97569 = x_97568 * x_97568; + double defunc_1_op_res_97567 = defunc_1_f_res_97569 + + redout_119833; + double redout_tmp_127611 = defunc_1_op_res_97567; + + redout_119833 = redout_tmp_127611; + } + defunc_2_reduce_res_97564 = redout_119833; + + double sqrt_res_97570; + + sqrt_res_97570 = futrts_sqrt64(defunc_2_reduce_res_97564); + + bool zeze_res_97571 = sqrt_res_97570 == 0.0; + + if (zeze_res_97571) { + for (int64_t i_127612 = 0; i_127612 < k2p2zq_73023; + i_127612++) { + ((__global double *) mem_125267)[phys_tid_96689 + + i_127612 * + num_threads_125979] = + ((__global + double *) double_buffer_mem_125587)[phys_tid_96689 + + i_127612 * + num_threads_125979]; + } + for (int64_t i_127613 = 0; i_127613 < (int64_t) 2; + i_127613++) { + for (int64_t i_127614 = 0; i_127614 < k2p2zq_73023; + i_127614++) { + ((__global + double *) mem_125265)[phys_tid_96689 + + (i_127613 * + (num_threads_125979 * + k2p2zq_73023) + + i_127614 * + num_threads_125979)] = + ((__global + double *) double_buffer_mem_125588)[phys_tid_96689 + + (i_127613 * + (num_threads_125979 * + k2p2zq_73023) + + i_127614 * + num_threads_125979)]; + } + } + for (int64_t i_127615 = 0; i_127615 < k2p2zq_73023; + i_127615++) { + for (int64_t i_127616 = 0; i_127616 < rp1_73709; + i_127616++) { + ((__global + double *) mem_125472)[phys_tid_96689 + + (i_127615 * + (num_threads_125979 * + rp1_73709) + + i_127616 * + num_threads_125979)] = + ((__global + double *) double_buffer_mem_125586)[phys_tid_96689 + + (i_127615 * + (num_threads_125979 * + rp1_73709) + + i_127616 * + num_threads_125979)]; + } + } + } else { + bool y_97575 = slt64(l_97440, rp1_73709); + bool index_ok_97576 = y_97456 && y_97575; + bool index_certs_97577; + + if (!index_ok_97576) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 589) == -1) { + global_failure_args[0] = l_97440; + global_failure_args[1] = l_97440; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double znze_arg_97578 = ((__global + double *) double_buffer_mem_125586)[phys_tid_96689 + + (l_97440 * + (num_threads_125979 * + rp1_73709) + + l_97440 * + num_threads_125979)]; + bool zeze_res_97579 = znze_arg_97578 == 0.0; + bool znze_res_97580 = !zeze_res_97579; + double nrmxl_97581; + + if (znze_res_97580) { + double abs_res_97582 = fabs(sqrt_res_97570); + double sgn_res_97583 = fsignum32(znze_arg_97578); + double zt_res_97584 = abs_res_97582 * sgn_res_97583; + + nrmxl_97581 = zt_res_97584; + } else { + nrmxl_97581 = sqrt_res_97570; + } + for (int64_t i0_97586 = 0; i0_97586 < j_m_i_97548; + i0_97586++) { + int64_t i_97588 = add64(l_97440, i0_97586); + bool x_97589 = sle64((int64_t) 0, i_97588); + bool y_97590 = slt64(i_97588, rp1_73709); + bool bounds_check_97591 = x_97589 && y_97590; + bool index_ok_97592 = y_97456 && bounds_check_97591; + bool index_certs_97593; + + if (!index_ok_97592) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 590) == + -1) { + global_failure_args[0] = l_97440; + global_failure_args[1] = i_97588; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_97594 = ((__global + double *) double_buffer_mem_125586)[phys_tid_96689 + + (l_97440 * + (num_threads_125979 * + rp1_73709) + + i_97588 * + num_threads_125979)]; + double lw_val_97595 = x_97594 / nrmxl_97581; + + ((__global + double *) double_buffer_mem_125586)[phys_tid_96689 + + (l_97440 * + (num_threads_125979 * + rp1_73709) + + i_97588 * + num_threads_125979)] = + lw_val_97595; + } + + double zp_arg_97597 = ((__global + double *) double_buffer_mem_125586)[phys_tid_96689 + + (l_97440 * + (num_threads_125979 * + rp1_73709) + + l_97440 * + num_threads_125979)]; + double zp_res_97598 = 1.0 + zp_arg_97597; + + ((__global + double *) double_buffer_mem_125586)[phys_tid_96689 + + (l_97440 * + (num_threads_125979 * + rp1_73709) + + l_97440 * + num_threads_125979)] = + zp_res_97598; + + bool bounds_invalid_upwards_97600 = slt64(k2p2zq_73023, + x_97446); + bool valid_97601 = !bounds_invalid_upwards_97600; + bool range_valid_c_97602; + + if (!valid_97601) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 591) == -1) { + global_failure_args[0] = x_97446; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + bool loop_nonempty_97603 = slt64((int64_t) 0, + upper_bound_97457); + bool loop_not_taken_97604 = !loop_nonempty_97603; + bool protect_assert_disj_97605 = index_ok_97576 || + loop_not_taken_97604; + bool index_certs_97606; + + if (!protect_assert_disj_97605) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 592) == -1) { + global_failure_args[0] = l_97440; + global_failure_args[1] = l_97440; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_97610 = 0; i_97610 < upper_bound_97457; + i_97610++) { + int64_t index_primexp_97614 = add64(x_97446, + i_97610); + bool x_97615 = sle64((int64_t) 0, + index_primexp_97614); + bool y_97616 = slt64(index_primexp_97614, + k2p2zq_73023); + bool bounds_check_97617 = x_97615 && y_97616; + double t_97618; + double t_97620 = 0.0; + + for (int64_t i0_97619 = 0; i0_97619 < j_m_i_97548; + i0_97619++) { + int64_t i_97621 = add64(l_97440, i0_97619); + bool x_97622 = sle64((int64_t) 0, i_97621); + bool y_97623 = slt64(i_97621, rp1_73709); + bool bounds_check_97624 = x_97622 && y_97623; + bool index_ok_97625 = y_97456 && + bounds_check_97624; + bool index_certs_97626; + + if (!index_ok_97625) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 593) == + -1) { + global_failure_args[0] = l_97440; + global_failure_args[1] = i_97621; + global_failure_args[2] = + k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_97627 = ((__global + double *) double_buffer_mem_125586)[phys_tid_96689 + + (l_97440 * + (num_threads_125979 * + rp1_73709) + + i_97621 * + num_threads_125979)]; + bool index_ok_97628 = bounds_check_97617 && + bounds_check_97624; + bool index_certs_97629; + + if (!index_ok_97628) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 594) == + -1) { + global_failure_args[0] = + index_primexp_97614; + global_failure_args[1] = i_97621; + global_failure_args[2] = + k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_97630 = ((__global + double *) double_buffer_mem_125586)[phys_tid_96689 + + (index_primexp_97614 * + (num_threads_125979 * + rp1_73709) + + i_97621 * + num_threads_125979)]; + double y_97631 = x_97627 * y_97630; + double loopres_97632 = t_97620 - y_97631; + double t_tmp_127621 = loopres_97632; + + t_97620 = t_tmp_127621; + } + t_97618 = t_97620; + + double y_97633 = ((__global + double *) double_buffer_mem_125586)[phys_tid_96689 + + (l_97440 * + (num_threads_125979 * + rp1_73709) + + l_97440 * + num_threads_125979)]; + double t_97634 = t_97618 / y_97633; + + for (int64_t i0_97636 = 0; i0_97636 < j_m_i_97548; + i0_97636++) { + int64_t i_97638 = add64(l_97440, i0_97636); + bool x_97639 = sle64((int64_t) 0, i_97638); + bool y_97640 = slt64(i_97638, rp1_73709); + bool bounds_check_97641 = x_97639 && y_97640; + bool index_ok_97642 = bounds_check_97617 && + bounds_check_97641; + bool index_certs_97643; + + if (!index_ok_97642) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 595) == + -1) { + global_failure_args[0] = + index_primexp_97614; + global_failure_args[1] = i_97638; + global_failure_args[2] = + k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_97644 = ((__global + double *) double_buffer_mem_125586)[phys_tid_96689 + + (index_primexp_97614 * + (num_threads_125979 * + rp1_73709) + + i_97638 * + num_threads_125979)]; + bool index_ok_97645 = y_97456 && + bounds_check_97641; + bool index_certs_97646; + + if (!index_ok_97645) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 596) == + -1) { + global_failure_args[0] = l_97440; + global_failure_args[1] = i_97638; + global_failure_args[2] = + k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_97647 = ((__global + double *) double_buffer_mem_125586)[phys_tid_96689 + + (l_97440 * + (num_threads_125979 * + rp1_73709) + + i_97638 * + num_threads_125979)]; + double y_97648 = t_97634 * y_97647; + double lw_val_97649 = x_97644 + y_97648; + + ((__global + double *) double_buffer_mem_125586)[phys_tid_96689 + + (index_primexp_97614 * + (num_threads_125979 * + rp1_73709) + + i_97638 * + num_threads_125979)] = + lw_val_97649; + } + + bool index_certs_97651; + + if (!bounds_check_97617) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 597) == + -1) { + global_failure_args[0] = + index_primexp_97614; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double zeze_arg_97652 = ((__global + double *) double_buffer_mem_125587)[phys_tid_96689 + + index_primexp_97614 * + num_threads_125979]; + bool zeze_res_97653 = zeze_arg_97652 == 0.0; + + if (!zeze_res_97653) { + bool index_ok_97656 = y_97575 && + bounds_check_97617; + bool index_certs_97657; + + if (!index_ok_97656) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 598) == + -1) { + global_failure_args[0] = + index_primexp_97614; + global_failure_args[1] = l_97440; + global_failure_args[2] = + k2p2zq_73023; + global_failure_args[3] = rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double abs_arg_97658 = ((__global + double *) double_buffer_mem_125586)[phys_tid_96689 + + (index_primexp_97614 * + (num_threads_125979 * + rp1_73709) + + l_97440 * + num_threads_125979)]; + double abs_res_97659 = fabs(abs_arg_97658); + double zs_res_97660 = abs_res_97659 / + zeze_arg_97652; + double ztzt_res_97661 = fpow64(zs_res_97660, + 2.0); + double zm_res_97662 = 1.0 - ztzt_res_97661; + double max_res_97663 = fmax64(0.0, + zm_res_97662); + double abs_res_97664 = fabs(max_res_97663); + bool zgze_res_97665 = 1.0e-6 <= abs_res_97664; + int64_t j_m_i_97666 = sub64(rp1_73709, x_97446); + + if (zgze_res_97665) { + double sqrt_res_97669; + + sqrt_res_97669 = + futrts_sqrt64(max_res_97663); + + double zt_res_97670 = zeze_arg_97652 * + sqrt_res_97669; + + ((__global + double *) double_buffer_mem_125587)[phys_tid_96689 + + index_primexp_97614 * + num_threads_125979] = + zt_res_97670; + } else { + bool empty_slice_97672 = j_m_i_97666 == + (int64_t) 0; + int64_t m_97673 = sub64(j_m_i_97666, + (int64_t) 1); + int64_t i_p_m_t_s_97674 = add64(x_97446, + m_97673); + bool zzero_leq_i_p_m_t_s_97675 = + sle64((int64_t) 0, i_p_m_t_s_97674); + bool i_p_m_t_s_leq_w_97676 = + slt64(i_p_m_t_s_97674, rp1_73709); + bool zzero_lte_i_97677 = sle64((int64_t) 0, + x_97446); + bool i_lte_j_97678 = sle64(x_97446, + rp1_73709); + bool y_97679 = i_p_m_t_s_leq_w_97676 && + zzero_lte_i_97677; + bool y_97680 = zzero_leq_i_p_m_t_s_97675 && + y_97679; + bool y_97681 = i_lte_j_97678 && y_97680; + bool forwards_ok_97682 = + zzero_lte_i_97677 && y_97681; + bool ok_or_empty_97683 = + empty_slice_97672 || forwards_ok_97682; + bool index_ok_97684 = bounds_check_97617 && + ok_or_empty_97683; + bool index_certs_97685; + + if (!index_ok_97684) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 599) == + -1) { + global_failure_args[0] = + index_primexp_97614; + global_failure_args[1] = + x_97446; + global_failure_args[2] = + k2p2zq_73023; + global_failure_args[3] = + rp1_73709; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_2_reduce_res_97687; + double redout_119835 = 0.0; + + for (int64_t i_119836 = 0; i_119836 < + j_m_i_97666; i_119836++) { + int64_t slice_120036 = x_97446 + + i_119836; + double x_97691 = ((__global + double *) double_buffer_mem_125586)[phys_tid_96689 + + (index_primexp_97614 * + (num_threads_125979 * + rp1_73709) + + slice_120036 * + num_threads_125979)]; + double defunc_1_f_res_97692 = x_97691 * + x_97691; + double defunc_1_op_res_97690 = + defunc_1_f_res_97692 + + redout_119835; + double redout_tmp_127623 = + defunc_1_op_res_97690; + + redout_119835 = redout_tmp_127623; + } + defunc_2_reduce_res_97687 = redout_119835; + + double sqrt_res_97693; + + sqrt_res_97693 = + futrts_sqrt64(defunc_2_reduce_res_97687); + ((__global + double *) double_buffer_mem_125587)[phys_tid_96689 + + index_primexp_97614 * + num_threads_125979] = + sqrt_res_97693; + + bool index_certs_97695; + + if (!bounds_check_97617) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, + 600) == + -1) { + global_failure_args[0] = + (int64_t) 0; + global_failure_args[1] = + index_primexp_97614; + global_failure_args[2] = + (int64_t) 2; + global_failure_args[3] = + k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_127624 = 0; i_127624 < + (int64_t) 1; i_127624++) { + ((__global + double *) double_buffer_mem_125588)[phys_tid_96689 + + (index_primexp_97614 + + i_127624) * + num_threads_125979] = + ((__global + double *) double_buffer_mem_125587)[phys_tid_96689 + + num_threads_125979 * + index_primexp_97614 + + i_127624 * + num_threads_125979]; + } + } + } + } + + bool index_certs_97698; + + if (!y_97456) { + { + if (atomic_cmpxchg_i32_global(global_failure, + -1, 601) == -1) { + global_failure_args[0] = l_97440; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + for (int64_t i_127625 = 0; i_127625 < (int64_t) 1; + i_127625++) { + ((__global + double *) double_buffer_mem_125587)[phys_tid_96689 + + (l_97440 + + i_127625) * + num_threads_125979] = + ((__global + double *) double_buffer_mem_125586)[phys_tid_96689 + + l_97440 * + (num_threads_125979 * + rp1_73709) + + num_threads_125979 * + l_97440 + + i_127625 * + num_threads_125979]; + } + + double zt_res_97701 = -1.0 * nrmxl_97581; + + ((__global + double *) double_buffer_mem_125586)[phys_tid_96689 + + (l_97440 * + (num_threads_125979 * + rp1_73709) + + l_97440 * + num_threads_125979)] = + zt_res_97701; + for (int64_t i_127626 = 0; i_127626 < k2p2zq_73023; + i_127626++) { + ((__global double *) mem_125267)[phys_tid_96689 + + i_127626 * + num_threads_125979] = + ((__global + double *) double_buffer_mem_125587)[phys_tid_96689 + + i_127626 * + num_threads_125979]; + } + for (int64_t i_127627 = 0; i_127627 < (int64_t) 2; + i_127627++) { + for (int64_t i_127628 = 0; i_127628 < k2p2zq_73023; + i_127628++) { + ((__global + double *) mem_125265)[phys_tid_96689 + + (i_127627 * + (num_threads_125979 * + k2p2zq_73023) + + i_127628 * + num_threads_125979)] = + ((__global + double *) double_buffer_mem_125588)[phys_tid_96689 + + (i_127627 * + (num_threads_125979 * + k2p2zq_73023) + + i_127628 * + num_threads_125979)]; + } + } + for (int64_t i_127629 = 0; i_127629 < k2p2zq_73023; + i_127629++) { + for (int64_t i_127630 = 0; i_127630 < rp1_73709; + i_127630++) { + ((__global + double *) mem_125472)[phys_tid_96689 + + (i_127629 * + (num_threads_125979 * + rp1_73709) + + i_127630 * + num_threads_125979)] = + ((__global + double *) double_buffer_mem_125586)[phys_tid_96689 + + (i_127629 * + (num_threads_125979 * + rp1_73709) + + i_127630 * + num_threads_125979)]; + } + } + } + for (int64_t i_127631 = 0; i_127631 < k2p2zq_73023; + i_127631++) { + ((__global double *) mem_125482)[phys_tid_96689 + + i_127631 * + num_threads_125979] = + ((__global double *) mem_125267)[phys_tid_96689 + + i_127631 * + num_threads_125979]; + } + for (int64_t i_127632 = 0; i_127632 < (int64_t) 2; + i_127632++) { + for (int64_t i_127633 = 0; i_127633 < k2p2zq_73023; + i_127633++) { + ((__global double *) mem_125480)[phys_tid_96689 + + (i_127632 * + (num_threads_125979 * + k2p2zq_73023) + + i_127633 * + num_threads_125979)] = + ((__global + double *) mem_125265)[phys_tid_96689 + + (i_127632 * + (num_threads_125979 * + k2p2zq_73023) + + i_127633 * + num_threads_125979)]; + } + } + for (int64_t i_127634 = 0; i_127634 < k2p2zq_73023; + i_127634++) { + for (int64_t i_127635 = 0; i_127635 < rp1_73709; + i_127635++) { + ((__global double *) mem_125512)[phys_tid_96689 + + (i_127634 * + (num_threads_125979 * + rp1_73709) + + i_127635 * + num_threads_125979)] = + ((__global + double *) mem_125472)[phys_tid_96689 + + (i_127634 * + (num_threads_125979 * + rp1_73709) + + i_127635 * + num_threads_125979)]; + } + } + } + for (int64_t i_127636 = 0; i_127636 < k2p2zq_73023; + i_127636++) { + for (int64_t i_127637 = 0; i_127637 < rp1_73709; + i_127637++) { + ((__global + double *) double_buffer_mem_125586)[phys_tid_96689 + + (i_127636 * + (num_threads_125979 * + rp1_73709) + + i_127637 * + num_threads_125979)] = + ((__global double *) mem_125512)[phys_tid_96689 + + (i_127636 * + (num_threads_125979 * + rp1_73709) + + i_127637 * + num_threads_125979)]; + } + } + for (int64_t i_127638 = 0; i_127638 < k2p2zq_73023; + i_127638++) { + ((__global + double *) double_buffer_mem_125587)[phys_tid_96689 + + i_127638 * + num_threads_125979] = + ((__global double *) mem_125482)[phys_tid_96689 + + i_127638 * + num_threads_125979]; + } + for (int64_t i_127639 = 0; i_127639 < (int64_t) 2; i_127639++) { + for (int64_t i_127640 = 0; i_127640 < k2p2zq_73023; + i_127640++) { + ((__global + double *) double_buffer_mem_125588)[phys_tid_96689 + + (i_127639 * + (num_threads_125979 * + k2p2zq_73023) + + i_127640 * + num_threads_125979)] = + ((__global double *) mem_125480)[phys_tid_96689 + + (i_127639 * + (num_threads_125979 * + k2p2zq_73023) + + i_127640 * + num_threads_125979)]; + } + } + + int64_t k_tmp_127594 = loopres_97472; + + k_97445 = k_tmp_127594; + } + dqrdc2_res_97439 = k_97445; + + int64_t min_arg_97703 = sub64(dqrdc2_res_97439, (int64_t) 1); + int64_t min_res_97704 = smin64(rp1_73709, min_arg_97703); + + for (int64_t i_127641 = 0; i_127641 < k2p2zq_73023; i_127641++) { + for (int64_t i_127642 = 0; i_127642 < rp1_73709; i_127642++) { + ((__global double *) mem_123127)[i_127641 * (m_73008 * + rp1_73709) + + i_127642 * m_73008 + + gtid_96688] = ((__global + double *) double_buffer_mem_125586)[phys_tid_96689 + + (i_127641 * + (num_threads_125979 * + rp1_73709) + + i_127642 * + num_threads_125979)]; + } + } + for (int64_t i_127643 = 0; i_127643 < k2p2zq_73023; i_127643++) { + ((__global double *) mem_123130)[i_127643 * m_73008 + + gtid_96688] = ((__global + double *) double_buffer_mem_125587)[phys_tid_96689 + + i_127643 * + num_threads_125979]; + } + for (int64_t i_127644 = 0; i_127644 < k2p2zq_73023; i_127644++) { + ((__global int64_t *) mem_123133)[i_127644 * m_73008 + + gtid_96688] = ((__global + int64_t *) mem_122803)[phys_tid_96689 + + i_127644 * + num_threads_125979]; + } + ((__global int64_t *) mem_123135)[gtid_96688] = min_res_97704; + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_97424 +} +__kernel void mainMagnitudezisegmap_96979(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + int64_t rp1_73709, int64_t j_97356, + int64_t num_groups_97367, __global + unsigned char *mem_122686, __global + unsigned char *mem_122715, __global + unsigned char *mem_122719, __global + unsigned char *mem_122723, __global + unsigned char *mem_122727) +{ + #define segmap_group_sizze_97366 (mainMagnitudezisegmap_group_sizze_96981) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127476; + int32_t local_tid_127477; + int64_t group_sizze_127480; + int32_t wave_sizze_127479; + int32_t group_tid_127478; + + global_tid_127476 = get_global_id(0); + local_tid_127477 = get_local_id(0); + group_sizze_127480 = get_local_size(0); + wave_sizze_127479 = LOCKSTEP_WIDTH; + group_tid_127478 = get_group_id(0); + + int32_t phys_tid_96979; + + phys_tid_96979 = global_tid_127476; + + int32_t phys_group_id_127481; + + phys_group_id_127481 = get_group_id(0); + for (int32_t i_127482 = 0; i_127482 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, segmap_group_sizze_97366)) - + phys_group_id_127481, sext_i64_i32(num_groups_97367)); + i_127482++) { + int32_t virt_group_id_127483 = phys_group_id_127481 + i_127482 * + sext_i64_i32(num_groups_97367); + int64_t gtid_96978 = sext_i32_i64(virt_group_id_127483) * + segmap_group_sizze_97366 + sext_i32_i64(local_tid_127477); + + if (slt64(gtid_96978, m_73008)) { + double defunc_2_reduce_res_97374; + double redout_119831 = 0.0; + + for (int64_t i_119832 = 0; i_119832 < rp1_73709; i_119832++) { + double x_97378 = ((__global double *) mem_122686)[i_119832 * + (k2p2zq_73023 * + m_73008) + + gtid_96978 * + k2p2zq_73023 + + j_97356]; + double defunc_1_f_res_97379 = x_97378 * x_97378; + double defunc_1_op_res_97377 = defunc_1_f_res_97379 + + redout_119831; + double redout_tmp_127484 = defunc_1_op_res_97377; + + redout_119831 = redout_tmp_127484; + } + defunc_2_reduce_res_97374 = redout_119831; + + double sqrt_res_97380; + + sqrt_res_97380 = futrts_sqrt64(defunc_2_reduce_res_97374); + ((__global double *) mem_122715)[gtid_96978 + j_97356 * m_73008] = + sqrt_res_97380; + ((__global double *) mem_122719)[gtid_96978 + j_97356 * m_73008] = + sqrt_res_97380; + + bool zeze_res_97383 = sqrt_res_97380 == 0.0; + double lw_val_97384; + + if (zeze_res_97383) { + lw_val_97384 = 1.0; + } else { + lw_val_97384 = sqrt_res_97380; + } + ((__global double *) mem_122719)[gtid_96978 + (m_73008 * + k2p2zq_73023 + + j_97356 * m_73008)] = + lw_val_97384; + for (int64_t i_127485 = 0; i_127485 < k2p2zq_73023; i_127485++) { + ((__global double *) mem_122723)[i_127485 * m_73008 + + gtid_96978] = ((__global + double *) mem_122715)[gtid_96978 + + i_127485 * + m_73008]; + } + for (int64_t i_127486 = 0; i_127486 < (int64_t) 2; i_127486++) { + for (int64_t i_127487 = 0; i_127487 < k2p2zq_73023; + i_127487++) { + ((__global double *) mem_122727)[i_127486 * (m_73008 * + k2p2zq_73023) + + i_127487 * m_73008 + + gtid_96978] = ((__global + double *) mem_122719)[gtid_96978 + + (i_127486 * + (m_73008 * + k2p2zq_73023) + + i_127487 * + m_73008)]; + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_97366 +} +__kernel void mainMagnitudezisegmap_97024(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + int64_t j_97356, + int64_t num_groups_97412, + int64_t num_threads_115503, + int64_t per_chunk_115510, __global + unsigned char *mem_122733, __global + unsigned char *mem_122738, __global + unsigned char *mem_122743, __global + unsigned char *mem_122748) +{ + #define segmap_group_sizze_97411 (mainMagnitudezisegmap_group_sizze_97026) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127559; + int32_t local_tid_127560; + int64_t group_sizze_127563; + int32_t wave_sizze_127562; + int32_t group_tid_127561; + + global_tid_127559 = get_global_id(0); + local_tid_127560 = get_local_id(0); + group_sizze_127563 = get_local_size(0); + wave_sizze_127562 = LOCKSTEP_WIDTH; + group_tid_127561 = get_group_id(0); + + int32_t phys_tid_97024; + + phys_tid_97024 = global_tid_127559; + + int32_t phys_group_id_127564; + + phys_group_id_127564 = get_group_id(0); + for (int32_t i_127565 = 0; i_127565 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, segmap_group_sizze_97411)) - + phys_group_id_127564, sext_i64_i32(num_groups_97412)); + i_127565++) { + int32_t virt_group_id_127566 = phys_group_id_127564 + i_127565 * + sext_i64_i32(num_groups_97412); + int64_t gtid_97023 = sext_i32_i64(virt_group_id_127566) * + segmap_group_sizze_97411 + sext_i32_i64(local_tid_127560); + + if (slt64(gtid_97023, m_73008)) { + double sqrt_res_97416 = ((__global + double *) mem_122733)[gtid_97023]; + + for (int64_t i_127567 = 0; i_127567 < (int64_t) 1; i_127567++) { + ((__global double *) mem_122738)[gtid_97023 + (j_97356 + + i_127567) * + m_73008] = ((__global + double *) mem_122743)[(gtid_97023 + + i_127567 - + squot64(gtid_97023 + + i_127567, + per_chunk_115510) * + per_chunk_115510) * + num_threads_115503 + + squot64(gtid_97023 + + i_127567, + per_chunk_115510)]; + } + + bool zeze_res_97418 = sqrt_res_97416 == 0.0; + double lw_val_97419; + + if (zeze_res_97418) { + lw_val_97419 = 1.0; + } else { + lw_val_97419 = sqrt_res_97416; + } + ((__global double *) mem_122738)[gtid_97023 + (m_73008 * + k2p2zq_73023 + + j_97356 * m_73008)] = + lw_val_97419; + for (int64_t i_127568 = 0; i_127568 < (int64_t) 2; i_127568++) { + for (int64_t i_127569 = 0; i_127569 < k2p2zq_73023; + i_127569++) { + ((__global double *) mem_122748)[i_127568 * (m_73008 * + k2p2zq_73023) + + i_127569 * m_73008 + + gtid_97023] = ((__global + double *) mem_122738)[gtid_97023 + + (i_127568 * + (m_73008 * + k2p2zq_73023) + + i_127569 * + m_73008)]; + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_97411 +} +__kernel void mainMagnitudezisegmap_97039(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + int64_t j_97356, __global + unsigned char *mem_param_122694, + __global unsigned char *mem_122733) +{ + #define segmap_group_sizze_97406 (mainMagnitudezisegmap_group_sizze_97042) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127553; + int32_t local_tid_127554; + int64_t group_sizze_127557; + int32_t wave_sizze_127556; + int32_t group_tid_127555; + + global_tid_127553 = get_global_id(0); + local_tid_127554 = get_local_id(0); + group_sizze_127557 = get_local_size(0); + wave_sizze_127556 = LOCKSTEP_WIDTH; + group_tid_127555 = get_group_id(0); + + int32_t phys_tid_97039; + + phys_tid_97039 = global_tid_127553; + + int64_t gtid_97037; + + gtid_97037 = sext_i32_i64(group_tid_127555) * segmap_group_sizze_97406 + + sext_i32_i64(local_tid_127554); + + int64_t gtid_97038; + + gtid_97038 = sext_i32_i64(group_tid_127555) * segmap_group_sizze_97406 + + sext_i32_i64(local_tid_127554) - (sext_i32_i64(group_tid_127555) * + segmap_group_sizze_97406 + + sext_i32_i64(local_tid_127554)); + if (slt64(gtid_97037, m_73008) && slt64(gtid_97038, (int64_t) 1)) { + double sqrt_res_97409 = ((__global double *) mem_122733)[gtid_97037]; + + if ((sle64((int64_t) 0, gtid_97037) && slt64(gtid_97037, m_73008)) && + (sle64((int64_t) 0, j_97356) && slt64(j_97356, k2p2zq_73023))) { + ((__global double *) mem_param_122694)[gtid_97037 * k2p2zq_73023 + + j_97356] = sqrt_res_97409; + } + } + + error_0: + return; + #undef segmap_group_sizze_97406 +} +__kernel void mainMagnitudezisegmap_97048(__global int *global_failure, + int64_t m_73008, __global + unsigned char *mem_122730, __global + unsigned char *mem_122733) +{ + #define segmap_group_sizze_97399 (mainMagnitudezisegmap_group_sizze_97050) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127548; + int32_t local_tid_127549; + int64_t group_sizze_127552; + int32_t wave_sizze_127551; + int32_t group_tid_127550; + + global_tid_127548 = get_global_id(0); + local_tid_127549 = get_local_id(0); + group_sizze_127552 = get_local_size(0); + wave_sizze_127551 = LOCKSTEP_WIDTH; + group_tid_127550 = get_group_id(0); + + int32_t phys_tid_97048; + + phys_tid_97048 = global_tid_127548; + + int64_t gtid_97047; + + gtid_97047 = sext_i32_i64(group_tid_127550) * segmap_group_sizze_97399 + + sext_i32_i64(local_tid_127549); + if (slt64(gtid_97047, m_73008)) { + double defunc_2_reduce_res_97402 = ((__global + double *) mem_122730)[gtid_97047]; + double sqrt_res_97403; + + sqrt_res_97403 = futrts_sqrt64(defunc_2_reduce_res_97402); + ((__global double *) mem_122733)[gtid_97047] = sqrt_res_97403; + } + + error_0: + return; + #undef segmap_group_sizze_97399 +} +__kernel void mainMagnitudezisegmap_97197(__global int *global_failure, + int64_t m_73008, int64_t n_73011, + int64_t r_73698, __global + unsigned char *defunc_3_map_res_mem_120231, + __global unsigned char *mem_122674, + __global unsigned char *mem_122677, + __global unsigned char *mem_122680, + __global unsigned char *mem_122682) +{ + #define segmap_group_sizze_97308 (mainMagnitudezisegmap_group_sizze_97199) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127462; + int32_t local_tid_127463; + int64_t group_sizze_127466; + int32_t wave_sizze_127465; + int32_t group_tid_127464; + + global_tid_127462 = get_global_id(0); + local_tid_127463 = get_local_id(0); + group_sizze_127466 = get_local_size(0); + wave_sizze_127465 = LOCKSTEP_WIDTH; + group_tid_127464 = get_group_id(0); + + int32_t phys_tid_97197; + + phys_tid_97197 = global_tid_127462; + + int64_t gtid_97196; + + gtid_97196 = sext_i32_i64(group_tid_127464) * segmap_group_sizze_97308 + + sext_i32_i64(local_tid_127463); + if (slt64(gtid_97196, m_73008)) { + double fr_97312 = ((__global double *) mem_122674)[gtid_97196]; + double x_97313 = ((__global + double *) defunc_3_map_res_mem_120231)[gtid_97196 * + n_73011 + + r_73698]; + double defunc_0_f_res_97314 = ((__global + double *) mem_122677)[gtid_97196]; + double resid_97315 = x_97313 - defunc_0_f_res_97314; + double sqrt_res_97316; + + sqrt_res_97316 = futrts_sqrt64(fr_97312); + + double recresid_r_97317 = resid_97315 / sqrt_res_97316; + + ((__global double *) mem_122680)[gtid_97196] = resid_97315; + ((__global double *) mem_122682)[gtid_97196] = recresid_r_97317; + } + + error_0: + return; + #undef segmap_group_sizze_97308 +} +__kernel void mainMagnitudezisegmap_97228(__global int *global_failure, + int64_t m_73008, __global + unsigned char *mem_122668, __global + unsigned char *mem_122674) +{ + #define segmap_group_sizze_97285 (mainMagnitudezisegmap_group_sizze_97230) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127397; + int32_t local_tid_127398; + int64_t group_sizze_127401; + int32_t wave_sizze_127400; + int32_t group_tid_127399; + + global_tid_127397 = get_global_id(0); + local_tid_127398 = get_local_id(0); + group_sizze_127401 = get_local_size(0); + wave_sizze_127400 = LOCKSTEP_WIDTH; + group_tid_127399 = get_group_id(0); + + int32_t phys_tid_97228; + + phys_tid_97228 = global_tid_127397; + + int64_t gtid_97227; + + gtid_97227 = sext_i32_i64(group_tid_127399) * segmap_group_sizze_97285 + + sext_i32_i64(local_tid_127398); + if (slt64(gtid_97227, m_73008)) { + double defunc_0_f_res_97290 = ((__global + double *) mem_122668)[gtid_97227]; + double fr_97291 = 1.0 + defunc_0_f_res_97290; + + ((__global double *) mem_122674)[gtid_97227] = fr_97291; + } + + error_0: + return; + #undef segmap_group_sizze_97285 +} +__kernel void mainMagnitudezisegmap_98306(__global int *global_failure, + int64_t m_73008, int64_t n_73011, + int64_t k2p2zq_73023, + int64_t defunc_2_reduce_res_73132, + int64_t index_primexp_74309, + int64_t num_groups_98364, + int64_t num_threads_126015, __global + unsigned char *defunc_3_map_res_mem_120231, + __global unsigned char *mem_120246, + __global unsigned char *mem_123798, + __global unsigned char *mem_123801, + __global unsigned char *mem_123804, + __global unsigned char *mem_123818, + __global unsigned char *mem_123821, + __global unsigned char *mem_123840, + __global unsigned char *mem_123869, + __global unsigned char *mem_123872, + __global unsigned char *mem_123874) +{ + #define segmap_group_sizze_98363 (mainMagnitudezisegmap_group_sizze_98308) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128015; + int32_t local_tid_128016; + int64_t group_sizze_128019; + int32_t wave_sizze_128018; + int32_t group_tid_128017; + + global_tid_128015 = get_global_id(0); + local_tid_128016 = get_local_id(0); + group_sizze_128019 = get_local_size(0); + wave_sizze_128018 = LOCKSTEP_WIDTH; + group_tid_128017 = get_group_id(0); + + int32_t phys_tid_98306; + + phys_tid_98306 = global_tid_128015; + + int32_t phys_group_id_128020; + + phys_group_id_128020 = get_group_id(0); + for (int32_t i_128021 = 0; i_128021 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, segmap_group_sizze_98363)) - + phys_group_id_128020, sext_i64_i32(num_groups_98364)); + i_128021++) { + int32_t virt_group_id_128022 = phys_group_id_128020 + i_128021 * + sext_i64_i32(num_groups_98364); + int64_t gtid_98305 = sext_i32_i64(virt_group_id_128022) * + segmap_group_sizze_98363 + sext_i32_i64(local_tid_128016); + + if (slt64(gtid_98305, m_73008)) { + double defunc_11_internal_map_res_transformed_row_98372 = ((__global + double *) defunc_3_map_res_mem_120231)[gtid_98305 * + n_73011 + + index_primexp_74309]; + double defunc_0_f_res_98373; + double redout_119867 = 0.0; + + for (int64_t i_119869 = 0; i_119869 < k2p2zq_73023; i_119869++) { + double x_98379 = ((__global double *) mem_120246)[i_119869 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_98305 * + defunc_2_reduce_res_73132 + + index_primexp_74309]; + double defunc_0_f_res_98380; + double redout_119871 = 0.0; + + for (int64_t i_119872 = 0; i_119872 < k2p2zq_73023; + i_119872++) { + double x_98384 = ((__global double *) mem_120246)[i_119872 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_98305 * + defunc_2_reduce_res_73132 + + index_primexp_74309]; + double x_98385 = ((__global double *) mem_123798)[i_119869 * + (m_73008 * + k2p2zq_73023) + + i_119872 * + m_73008 + + gtid_98305]; + double defunc_1_f_res_98386 = x_98384 * x_98385; + double defunc_1_op_res_98383 = defunc_1_f_res_98386 + + redout_119871; + double redout_tmp_128025 = defunc_1_op_res_98383; + + redout_119871 = redout_tmp_128025; + } + defunc_0_f_res_98380 = redout_119871; + + double defunc_1_f_res_98387 = x_98379 * defunc_0_f_res_98380; + double defunc_1_op_res_98377 = defunc_1_f_res_98387 + + redout_119867; + + ((__global double *) mem_123804)[phys_tid_98306 + i_119869 * + num_threads_126015] = + defunc_0_f_res_98380; + + double redout_tmp_128023 = defunc_1_op_res_98377; + + redout_119867 = redout_tmp_128023; + } + defunc_0_f_res_98373 = redout_119867; + + double fr_98388 = 1.0 + defunc_0_f_res_98373; + double defunc_0_f_res_98389; + double redout_119873 = 0.0; + + for (int64_t i_119874 = 0; i_119874 < k2p2zq_73023; i_119874++) { + double x_98393 = ((__global double *) mem_120246)[i_119874 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_98305 * + defunc_2_reduce_res_73132 + + index_primexp_74309]; + double x_98394 = ((__global double *) mem_123801)[i_119874 * + m_73008 + + gtid_98305]; + double defunc_1_f_res_98395 = x_98393 * x_98394; + double defunc_1_op_res_98392 = defunc_1_f_res_98395 + + redout_119873; + double redout_tmp_128026 = defunc_1_op_res_98392; + + redout_119873 = redout_tmp_128026; + } + defunc_0_f_res_98389 = redout_119873; + + double resid_98396 = + defunc_11_internal_map_res_transformed_row_98372 - + defunc_0_f_res_98389; + double sqrt_res_98397; + + sqrt_res_98397 = futrts_sqrt64(fr_98388); + + double recresid_r_98398 = resid_98396 / sqrt_res_98397; + + for (int64_t i_119879 = 0; i_119879 < k2p2zq_73023; i_119879++) { + double x_98401 = ((__global + double *) mem_123804)[phys_tid_98306 + + i_119879 * + num_threads_126015]; + double x_98403 = ((__global double *) mem_123801)[i_119879 * + m_73008 + + gtid_98305]; + double defunc_0_f_res_98404; + double redout_119883 = 0.0; + + for (int64_t i_119885 = 0; i_119885 < k2p2zq_73023; + i_119885++) { + double x_98409 = ((__global + double *) mem_123804)[phys_tid_98306 + + i_119885 * + num_threads_126015]; + double x_98410 = ((__global double *) mem_123798)[i_119879 * + (m_73008 * + k2p2zq_73023) + + i_119885 * + m_73008 + + gtid_98305]; + double x_98411 = ((__global double *) mem_120246)[i_119885 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_98305 * + defunc_2_reduce_res_73132 + + index_primexp_74309]; + double x_98412 = x_98401 * x_98409; + double y_98413 = x_98412 / fr_98388; + double defunc_1_f_res_98414 = x_98410 - y_98413; + double defunc_1_f_res_98415 = x_98411 * + defunc_1_f_res_98414; + double defunc_1_op_res_98408 = defunc_1_f_res_98415 + + redout_119883; + + ((__global double *) mem_123840)[phys_tid_98306 + i_119885 * + num_threads_126015] = + defunc_1_f_res_98414; + + double redout_tmp_128029 = defunc_1_op_res_98408; + + redout_119883 = redout_tmp_128029; + } + defunc_0_f_res_98404 = redout_119883; + + double defunc_0_g_res_98416 = resid_98396 * + defunc_0_f_res_98404; + double defunc_1_f_res_98417 = x_98403 + defunc_0_g_res_98416; + + ((__global double *) mem_123818)[phys_tid_98306 + i_119879 * + num_threads_126015] = + defunc_1_f_res_98417; + for (int64_t i_128031 = 0; i_128031 < k2p2zq_73023; + i_128031++) { + ((__global double *) mem_123821)[phys_tid_98306 + + (i_119879 * + (num_threads_126015 * + k2p2zq_73023) + + i_128031 * + num_threads_126015)] = + ((__global double *) mem_123840)[phys_tid_98306 + + i_128031 * + num_threads_126015]; + } + } + for (int64_t i_128032 = 0; i_128032 < k2p2zq_73023; i_128032++) { + for (int64_t i_128033 = 0; i_128033 < k2p2zq_73023; + i_128033++) { + ((__global double *) mem_123869)[i_128032 * (m_73008 * + k2p2zq_73023) + + i_128033 * m_73008 + + gtid_98305] = ((__global + double *) mem_123821)[phys_tid_98306 + + (i_128032 * + (num_threads_126015 * + k2p2zq_73023) + + i_128033 * + num_threads_126015)]; + } + } + for (int64_t i_128034 = 0; i_128034 < k2p2zq_73023; i_128034++) { + ((__global double *) mem_123872)[i_128034 * m_73008 + + gtid_98305] = ((__global + double *) mem_123818)[phys_tid_98306 + + i_128034 * + num_threads_126015]; + } + ((__global double *) mem_123874)[gtid_98305] = recresid_r_98398; + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_98363 +} +__kernel void mainMagnitudezisegmap_98483(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + int64_t defunc_2_reduce_res_73132, + int64_t index_primexp_74309, + int64_t num_groups_98717, + int64_t num_threads_126025, __global + unsigned char *mem_120246, __global + unsigned char *mem_param_123786, + __global unsigned char *mem_123901, + __global unsigned char *mem_123907, + __global unsigned char *mem_123910, + __global unsigned char *mem_123916, + __global unsigned char *mem_123921, + __global unsigned char *mem_123937, + __global unsigned char *mem_123940) +{ + #define segmap_group_sizze_98716 (mainMagnitudezisegmap_group_sizze_98486) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128200; + int32_t local_tid_128201; + int64_t group_sizze_128204; + int32_t wave_sizze_128203; + int32_t group_tid_128202; + + global_tid_128200 = get_global_id(0); + local_tid_128201 = get_local_id(0); + group_sizze_128204 = get_local_size(0); + wave_sizze_128203 = LOCKSTEP_WIDTH; + group_tid_128202 = get_group_id(0); + + int32_t phys_tid_98483; + + phys_tid_98483 = global_tid_128200; + + int32_t phys_group_id_128205; + + phys_group_id_128205 = get_group_id(0); + for (int32_t i_128206 = 0; i_128206 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008 * k2p2zq_73023, + segmap_group_sizze_98716)) - + phys_group_id_128205, sext_i64_i32(num_groups_98717)); + i_128206++) { + int32_t virt_group_id_128207 = phys_group_id_128205 + i_128206 * + sext_i64_i32(num_groups_98717); + int64_t gtid_98481 = squot64(sext_i32_i64(virt_group_id_128207) * + segmap_group_sizze_98716 + + sext_i32_i64(local_tid_128201), + k2p2zq_73023); + int64_t gtid_98482 = sext_i32_i64(virt_group_id_128207) * + segmap_group_sizze_98716 + sext_i32_i64(local_tid_128201) - + squot64(sext_i32_i64(virt_group_id_128207) * + segmap_group_sizze_98716 + + sext_i32_i64(local_tid_128201), k2p2zq_73023) * + k2p2zq_73023; + + if (slt64(gtid_98481, m_73008) && slt64(gtid_98482, k2p2zq_73023)) { + double fr_98728 = ((__global double *) mem_123910)[gtid_98481]; + double resid_98729 = ((__global double *) mem_123916)[gtid_98481]; + double x_98730 = ((__global double *) mem_123907)[gtid_98481 * + k2p2zq_73023 + + gtid_98482]; + double x_98732 = ((__global double *) mem_param_123786)[gtid_98481 * + k2p2zq_73023 + + gtid_98482]; + double defunc_0_f_res_98733; + double redout_119892 = 0.0; + + for (int64_t i_119894 = 0; i_119894 < k2p2zq_73023; i_119894++) { + double x_98738 = ((__global double *) mem_123907)[gtid_98481 * + k2p2zq_73023 + + i_119894]; + double x_98739 = ((__global double *) mem_123901)[i_119894 * + (k2p2zq_73023 * + m_73008) + + gtid_98481 * + k2p2zq_73023 + + gtid_98482]; + double x_98740 = ((__global double *) mem_120246)[i_119894 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_98481 * + defunc_2_reduce_res_73132 + + index_primexp_74309]; + double x_98741 = x_98730 * x_98738; + double y_98742 = x_98741 / fr_98728; + double defunc_1_f_res_98743 = x_98739 - y_98742; + double defunc_1_f_res_98744 = x_98740 * defunc_1_f_res_98743; + double defunc_1_op_res_98737 = defunc_1_f_res_98744 + + redout_119892; + + ((__global double *) mem_123921)[phys_tid_98483 + i_119894 * + num_threads_126025] = + defunc_1_f_res_98743; + + double redout_tmp_128208 = defunc_1_op_res_98737; + + redout_119892 = redout_tmp_128208; + } + defunc_0_f_res_98733 = redout_119892; + + double defunc_0_g_res_98745 = resid_98729 * defunc_0_f_res_98733; + double defunc_1_f_res_98746 = x_98732 + defunc_0_g_res_98745; + + for (int64_t i_128210 = 0; i_128210 < k2p2zq_73023; i_128210++) { + ((__global double *) mem_123937)[i_128210 * (k2p2zq_73023 * + m_73008) + + gtid_98481 * k2p2zq_73023 + + gtid_98482] = ((__global + double *) mem_123921)[phys_tid_98483 + + i_128210 * + num_threads_126025]; + } + ((__global double *) mem_123940)[gtid_98481 * k2p2zq_73023 + + gtid_98482] = defunc_1_f_res_98746; + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_98716 +} +__kernel void mainMagnitudezisegmap_98521(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + __global + unsigned char *mem_param_123786, + __global unsigned char *mem_123916, + __global unsigned char *mem_123944, + __global unsigned char *mem_123952) +{ + #define segmap_group_sizze_98773 (mainMagnitudezisegmap_group_sizze_98524) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128271; + int32_t local_tid_128272; + int64_t group_sizze_128275; + int32_t wave_sizze_128274; + int32_t group_tid_128273; + + global_tid_128271 = get_global_id(0); + local_tid_128272 = get_local_id(0); + group_sizze_128275 = get_local_size(0); + wave_sizze_128274 = LOCKSTEP_WIDTH; + group_tid_128273 = get_group_id(0); + + int32_t phys_tid_98521; + + phys_tid_98521 = global_tid_128271; + + int64_t gtid_98519; + + gtid_98519 = squot64(sext_i32_i64(group_tid_128273) * + segmap_group_sizze_98773 + + sext_i32_i64(local_tid_128272), k2p2zq_73023); + + int64_t gtid_98520; + + gtid_98520 = sext_i32_i64(group_tid_128273) * segmap_group_sizze_98773 + + sext_i32_i64(local_tid_128272) - + squot64(sext_i32_i64(group_tid_128273) * segmap_group_sizze_98773 + + sext_i32_i64(local_tid_128272), k2p2zq_73023) * k2p2zq_73023; + if (slt64(gtid_98519, m_73008) && slt64(gtid_98520, k2p2zq_73023)) { + double resid_98776 = ((__global double *) mem_123916)[gtid_98519]; + double x_98777 = ((__global double *) mem_param_123786)[gtid_98519 * + k2p2zq_73023 + + gtid_98520]; + double defunc_0_f_res_98778 = ((__global + double *) mem_123944)[gtid_98519 * + k2p2zq_73023 + + gtid_98520]; + double defunc_0_g_res_98779 = resid_98776 * defunc_0_f_res_98778; + double defunc_1_f_res_98780 = x_98777 + defunc_0_g_res_98779; + + ((__global double *) mem_123952)[gtid_98519 * k2p2zq_73023 + + gtid_98520] = defunc_1_f_res_98780; + } + + error_0: + return; + #undef segmap_group_sizze_98773 +} +__kernel void mainMagnitudezisegmap_98598(__global int *global_failure, + int64_t m_73008, int64_t n_73011, + int64_t index_primexp_74309, __global + unsigned char *defunc_3_map_res_mem_120231, + __global unsigned char *mem_123910, + __global unsigned char *mem_123913, + __global unsigned char *mem_123916, + __global unsigned char *mem_123918) +{ + #define segmap_group_sizze_98704 (mainMagnitudezisegmap_group_sizze_98600) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128195; + int32_t local_tid_128196; + int64_t group_sizze_128199; + int32_t wave_sizze_128198; + int32_t group_tid_128197; + + global_tid_128195 = get_global_id(0); + local_tid_128196 = get_local_id(0); + group_sizze_128199 = get_local_size(0); + wave_sizze_128198 = LOCKSTEP_WIDTH; + group_tid_128197 = get_group_id(0); + + int32_t phys_tid_98598; + + phys_tid_98598 = global_tid_128195; + + int64_t gtid_98597; + + gtid_98597 = sext_i32_i64(group_tid_128197) * segmap_group_sizze_98704 + + sext_i32_i64(local_tid_128196); + if (slt64(gtid_98597, m_73008)) { + double defunc_11_internal_map_res_transformed_row_98708 = ((__global + double *) defunc_3_map_res_mem_120231)[gtid_98597 * + n_73011 + + index_primexp_74309]; + double fr_98709 = ((__global double *) mem_123910)[gtid_98597]; + double defunc_0_f_res_98710 = ((__global + double *) mem_123913)[gtid_98597]; + double resid_98711 = defunc_11_internal_map_res_transformed_row_98708 - + defunc_0_f_res_98710; + double sqrt_res_98712; + + sqrt_res_98712 = futrts_sqrt64(fr_98709); + + double recresid_r_98713 = resid_98711 / sqrt_res_98712; + + ((__global double *) mem_123916)[gtid_98597] = resid_98711; + ((__global double *) mem_123918)[gtid_98597] = recresid_r_98713; + } + + error_0: + return; + #undef segmap_group_sizze_98704 +} +__kernel void mainMagnitudezisegmap_98629(__global int *global_failure, + int64_t m_73008, __global + unsigned char *mem_123904, __global + unsigned char *mem_123910) +{ + #define segmap_group_sizze_98684 (mainMagnitudezisegmap_group_sizze_98631) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128130; + int32_t local_tid_128131; + int64_t group_sizze_128134; + int32_t wave_sizze_128133; + int32_t group_tid_128132; + + global_tid_128130 = get_global_id(0); + local_tid_128131 = get_local_id(0); + group_sizze_128134 = get_local_size(0); + wave_sizze_128133 = LOCKSTEP_WIDTH; + group_tid_128132 = get_group_id(0); + + int32_t phys_tid_98629; + + phys_tid_98629 = global_tid_128130; + + int64_t gtid_98628; + + gtid_98628 = sext_i32_i64(group_tid_128132) * segmap_group_sizze_98684 + + sext_i32_i64(local_tid_128131); + if (slt64(gtid_98628, m_73008)) { + double defunc_0_f_res_98687 = ((__global + double *) mem_123904)[gtid_98628]; + double fr_98688 = 1.0 + defunc_0_f_res_98687; + + ((__global double *) mem_123910)[gtid_98628] = fr_98688; + } + + error_0: + return; + #undef segmap_group_sizze_98684 +} +__kernel void mainMagnitudezisegmap_98804(__global int *global_failure, + int failure_is_an_option, __global + int64_t *global_failure_args, + int64_t m_73008, int64_t k2p2zq_73023, + int64_t num_recresids_padded_73681, + int64_t Nmk_74408, + int64_t num_groups_98855, + int64_t num_threads_126029, __global + unsigned char *defunc_3_map_res_mem_120230, + __global unsigned char *mem_121934, + __global unsigned char *mem_124009, + __global unsigned char *mem_124024, + __global unsigned char *mem_124026) +{ + #define segmap_group_sizze_98854 (mainMagnitudezisegmap_group_sizze_98806) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_128283; + int32_t local_tid_128284; + int64_t group_sizze_128287; + int32_t wave_sizze_128286; + int32_t group_tid_128285; + + global_tid_128283 = get_global_id(0); + local_tid_128284 = get_local_id(0); + group_sizze_128287 = get_local_size(0); + wave_sizze_128286 = LOCKSTEP_WIDTH; + group_tid_128285 = get_group_id(0); + + int32_t phys_tid_98804; + + phys_tid_98804 = global_tid_128283; + + int32_t phys_group_id_128288; + + phys_group_id_128288 = get_group_id(0); + for (int32_t i_128289 = 0; i_128289 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, segmap_group_sizze_98854)) - + phys_group_id_128288, sext_i64_i32(num_groups_98855)); + i_128289++) { + int32_t virt_group_id_128290 = phys_group_id_128288 + i_128289 * + sext_i64_i32(num_groups_98855); + int64_t gtid_98803 = sext_i32_i64(virt_group_id_128290) * + segmap_group_sizze_98854 + sext_i32_i64(local_tid_128284); + + if (slt64(gtid_98803, m_73008)) { + int64_t x_98860 = ((__global + int64_t *) defunc_3_map_res_mem_120230)[gtid_98803]; + int64_t n_98861 = sub64(x_98860, k2p2zq_73023); + double i64_res_98862 = sitofp_i64_f64(n_98861); + double defunc_2_reduce_res_98863; + double redout_119896 = 0.0; + + for (int64_t i_119897 = 0; i_119897 < num_recresids_padded_73681; + i_119897++) { + double x_98871 = ((__global double *) mem_121934)[i_119897 * + m_73008 + + gtid_98803]; + bool isnan_res_98866; + + isnan_res_98866 = futrts_isnan64(redout_119896); + + double defunc_1_op_res_98867; + + if (isnan_res_98866) { + defunc_1_op_res_98867 = x_98871; + } else { + bool isnan_res_98868; + + isnan_res_98868 = futrts_isnan64(x_98871); + + double defunc_1_op_res_f_res_98869; + + if (isnan_res_98868) { + defunc_1_op_res_f_res_98869 = redout_119896; + } else { + double defunc_1_op_res_f_res_f_res_98870 = x_98871 + + redout_119896; + + defunc_1_op_res_f_res_98869 = + defunc_1_op_res_f_res_f_res_98870; + } + defunc_1_op_res_98867 = defunc_1_op_res_f_res_98869; + } + + double redout_tmp_128291 = defunc_1_op_res_98867; + + redout_119896 = redout_tmp_128291; + } + defunc_2_reduce_res_98863 = redout_119896; + + double x_mean_98872 = defunc_2_reduce_res_98863 / i64_res_98862; + double defunc_2_reduce_res_98873; + double redout_119898 = 0.0; + + for (int64_t i_119899 = 0; i_119899 < num_recresids_padded_73681; + i_119899++) { + double x_98877 = ((__global double *) mem_121934)[i_119899 * + m_73008 + + gtid_98803]; + bool isnan_res_98878; + + isnan_res_98878 = futrts_isnan64(x_98877); + + double defunc_0_f_res_98879; + + if (isnan_res_98878) { + defunc_0_f_res_98879 = 0.0; + } else { + double x_98880 = x_98877 - x_mean_98872; + double defunc_0_f_res_f_res_98881 = fpow64(x_98880, 2.0); + + defunc_0_f_res_98879 = defunc_0_f_res_f_res_98881; + } + + double defunc_1_op_res_98876 = defunc_0_f_res_98879 + + redout_119898; + double redout_tmp_128292 = defunc_1_op_res_98876; + + redout_119898 = redout_tmp_128292; + } + defunc_2_reduce_res_98873 = redout_119898; + + double y_98882 = i64_res_98862 - 1.0; + double binop_p_98883 = defunc_2_reduce_res_98873 / y_98882; + double defunc_0_f_res_98884; + + defunc_0_f_res_98884 = futrts_sqrt64(binop_p_98883); + + double sqrt_res_98885; + + sqrt_res_98885 = futrts_sqrt64(i64_res_98862); + + double fr_98886 = defunc_0_f_res_98884 * sqrt_res_98885; + double discard_119905; + double scanacc_119901 = 0.0; + + for (int64_t i_119903 = 0; i_119903 < Nmk_74408; i_119903++) { + bool cond_98892 = i_119903 == (int64_t) 0; + double defunc_0_f_res_98893; + + if (cond_98892) { + defunc_0_f_res_98893 = 0.0; + } else { + int64_t i_98894 = sub64(i_119903, (int64_t) 1); + bool x_98895 = sle64((int64_t) 0, i_98894); + bool y_98896 = slt64(i_98894, num_recresids_padded_73681); + bool bounds_check_98897 = x_98895 && y_98896; + bool index_certs_98898; + + if (!bounds_check_98897) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 612) == -1) { + global_failure_args[0] = i_98894; + global_failure_args[1] = + num_recresids_padded_73681; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_98899 = ((__global double *) mem_121934)[i_98894 * + m_73008 + + gtid_98803]; + double defunc_0_f_res_f_res_98900 = x_98899 / fr_98886; + + defunc_0_f_res_98893 = defunc_0_f_res_f_res_98900; + } + + double defunc_1_op_res_98890 = defunc_0_f_res_98893 + + scanacc_119901; + + ((__global double *) mem_124009)[phys_tid_98804 + i_119903 * + num_threads_126029] = + defunc_1_op_res_98890; + + double scanacc_tmp_128293 = defunc_1_op_res_98890; + + scanacc_119901 = scanacc_tmp_128293; + } + discard_119905 = scanacc_119901; + for (int64_t i_128295 = 0; i_128295 < Nmk_74408; i_128295++) { + ((__global double *) mem_124024)[i_128295 * m_73008 + + gtid_98803] = ((__global + double *) mem_124009)[phys_tid_98804 + + i_128295 * + num_threads_126029]; + } + ((__global int64_t *) mem_124026)[gtid_98803] = n_98861; + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_0: + return; + #undef segmap_group_sizze_98854 +} +__kernel void mainMagnitudezisegmap_98980(__global int *global_failure, + int64_t m_73008, __global + unsigned char *mem_124040, __global + unsigned char *mem_124054, __global + unsigned char *mem_124057) +{ + #define segmap_group_sizze_99098 (mainMagnitudezisegmap_group_sizze_98982) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128463; + int32_t local_tid_128464; + int64_t group_sizze_128467; + int32_t wave_sizze_128466; + int32_t group_tid_128465; + + global_tid_128463 = get_global_id(0); + local_tid_128464 = get_local_id(0); + group_sizze_128467 = get_local_size(0); + wave_sizze_128466 = LOCKSTEP_WIDTH; + group_tid_128465 = get_group_id(0); + + int32_t phys_tid_98980; + + phys_tid_98980 = global_tid_128463; + + int64_t gtid_98979; + + gtid_98979 = sext_i32_i64(group_tid_128465) * segmap_group_sizze_99098 + + sext_i32_i64(local_tid_128464); + if (slt64(gtid_98979, m_73008)) { + double i64_res_99101 = ((__global double *) mem_124040)[gtid_98979]; + double defunc_2_reduce_res_99102 = ((__global + double *) mem_124054)[gtid_98979]; + double y_99103 = i64_res_99101 - 1.0; + double binop_p_99104 = defunc_2_reduce_res_99102 / y_99103; + double defunc_0_f_res_99105; + + defunc_0_f_res_99105 = futrts_sqrt64(binop_p_99104); + + double sqrt_res_99106; + + sqrt_res_99106 = futrts_sqrt64(i64_res_99101); + + double fr_99107 = defunc_0_f_res_99105 * sqrt_res_99106; + + ((__global double *) mem_124057)[gtid_98979] = fr_99107; + } + + error_0: + return; + #undef segmap_group_sizze_99098 +} +__kernel void mainMagnitudezisegmap_99015(__global int *global_failure, + int64_t m_73008, __global + unsigned char *mem_124040, __global + unsigned char *mem_124048, __global + unsigned char *mem_124051) +{ + #define segmap_group_sizze_99075 (mainMagnitudezisegmap_group_sizze_99017) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128398; + int32_t local_tid_128399; + int64_t group_sizze_128402; + int32_t wave_sizze_128401; + int32_t group_tid_128400; + + global_tid_128398 = get_global_id(0); + local_tid_128399 = get_local_id(0); + group_sizze_128402 = get_local_size(0); + wave_sizze_128401 = LOCKSTEP_WIDTH; + group_tid_128400 = get_group_id(0); + + int32_t phys_tid_99015; + + phys_tid_99015 = global_tid_128398; + + int64_t gtid_99014; + + gtid_99014 = sext_i32_i64(group_tid_128400) * segmap_group_sizze_99075 + + sext_i32_i64(local_tid_128399); + if (slt64(gtid_99014, m_73008)) { + double i64_res_99078 = ((__global double *) mem_124040)[gtid_99014]; + double defunc_2_reduce_res_99079 = ((__global + double *) mem_124048)[gtid_99014]; + double x_mean_99080 = defunc_2_reduce_res_99079 / i64_res_99078; + + ((__global double *) mem_124051)[gtid_99014] = x_mean_99080; + } + + error_0: + return; + #undef segmap_group_sizze_99075 +} +__kernel void mainMagnitudezisegmap_99043(__global int *global_failure, + int64_t m_73008, int64_t k2p2zq_73023, + __global + unsigned char *defunc_3_map_res_mem_120230, + __global unsigned char *mem_124040, + __global unsigned char *mem_124042) +{ + #define segmap_group_sizze_99052 (mainMagnitudezisegmap_group_sizze_99045) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128323; + int32_t local_tid_128324; + int64_t group_sizze_128327; + int32_t wave_sizze_128326; + int32_t group_tid_128325; + + global_tid_128323 = get_global_id(0); + local_tid_128324 = get_local_id(0); + group_sizze_128327 = get_local_size(0); + wave_sizze_128326 = LOCKSTEP_WIDTH; + group_tid_128325 = get_group_id(0); + + int32_t phys_tid_99043; + + phys_tid_99043 = global_tid_128323; + + int64_t gtid_99042; + + gtid_99042 = sext_i32_i64(group_tid_128325) * segmap_group_sizze_99052 + + sext_i32_i64(local_tid_128324); + if (slt64(gtid_99042, m_73008)) { + int64_t x_99056 = ((__global + int64_t *) defunc_3_map_res_mem_120230)[gtid_99042]; + int64_t n_99057 = sub64(x_99056, k2p2zq_73023); + double i64_res_99058 = sitofp_i64_f64(n_99057); + + ((__global double *) mem_124040)[gtid_99042] = i64_res_99058; + ((__global int64_t *) mem_124042)[gtid_99042] = n_99057; + } + + error_0: + return; + #undef segmap_group_sizze_99052 +} +__kernel void mainMagnitudezisegmap_99145(__global int *global_failure, + int64_t m_73008, double conf_73017, + int64_t Nmk_74408, __global + unsigned char *mem_124072, __global + unsigned char *mem_124074, __global + unsigned char *mem_124078) +{ + #define segmap_group_sizze_99201 (mainMagnitudezisegmap_group_sizze_99148) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128531; + int32_t local_tid_128532; + int64_t group_sizze_128535; + int32_t wave_sizze_128534; + int32_t group_tid_128533; + + global_tid_128531 = get_global_id(0); + local_tid_128532 = get_local_id(0); + group_sizze_128535 = get_local_size(0); + wave_sizze_128534 = LOCKSTEP_WIDTH; + group_tid_128533 = get_group_id(0); + + int32_t phys_tid_99145; + + phys_tid_99145 = global_tid_128531; + + int64_t gtid_99143; + + gtid_99143 = squot64(sext_i32_i64(group_tid_128533) * + segmap_group_sizze_99201 + + sext_i32_i64(local_tid_128532), Nmk_74408); + + int64_t gtid_99144; + + gtid_99144 = sext_i32_i64(group_tid_128533) * segmap_group_sizze_99201 + + sext_i32_i64(local_tid_128532) - + squot64(sext_i32_i64(group_tid_128533) * segmap_group_sizze_99201 + + sext_i32_i64(local_tid_128532), Nmk_74408) * Nmk_74408; + if (slt64(gtid_99143, m_73008) && slt64(gtid_99144, Nmk_74408)) { + int64_t n_99204 = ((__global int64_t *) mem_124072)[gtid_99143]; + bool cond_99207 = slt64(gtid_99144, n_99204); + double defunc_0_f_res_99208; + + if (cond_99207) { + double div_99205 = ((__global double *) mem_124074)[gtid_99143]; + double x_99209 = 2.0 * conf_73017; + double i64_res_99210 = sitofp_i64_f64(gtid_99144); + double x_99211 = x_99209 * i64_res_99210; + double y_99212 = x_99211 / div_99205; + double defunc_0_f_res_t_res_99213 = conf_73017 + y_99212; + + defunc_0_f_res_99208 = defunc_0_f_res_t_res_99213; + } else { + defunc_0_f_res_99208 = NAN; + } + ((__global double *) mem_124078)[gtid_99143 * Nmk_74408 + gtid_99144] = + defunc_0_f_res_99208; + } + + error_0: + return; + #undef segmap_group_sizze_99201 +} +__kernel void mainMagnitudezisegmap_99177(__global int *global_failure, + int64_t m_73008, __global + unsigned char *defunc_3_map_res_mem_124069, + __global unsigned char *mem_124072, + __global unsigned char *mem_124074) +{ + #define segmap_group_sizze_99187 (mainMagnitudezisegmap_group_sizze_99179) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128526; + int32_t local_tid_128527; + int64_t group_sizze_128530; + int32_t wave_sizze_128529; + int32_t group_tid_128528; + + global_tid_128526 = get_global_id(0); + local_tid_128527 = get_local_id(0); + group_sizze_128530 = get_local_size(0); + wave_sizze_128529 = LOCKSTEP_WIDTH; + group_tid_128528 = get_group_id(0); + + int32_t phys_tid_99177; + + phys_tid_99177 = global_tid_128526; + + int64_t gtid_99176; + + gtid_99176 = sext_i32_i64(group_tid_128528) * segmap_group_sizze_99187 + + sext_i32_i64(local_tid_128527); + if (slt64(gtid_99176, m_73008)) { + int64_t x_99191 = ((__global + int64_t *) defunc_3_map_res_mem_124069)[gtid_99176]; + int64_t n_99192 = add64((int64_t) 1, x_99191); + double i64_res_99193 = sitofp_i64_f64(n_99192); + double div_99194 = i64_res_99193 - 1.0; + + ((__global int64_t *) mem_124072)[gtid_99176] = n_99192; + ((__global double *) mem_124074)[gtid_99176] = div_99194; + } + + error_0: + return; + #undef segmap_group_sizze_99187 +} +__kernel void mainMagnitudezisegmap_99641(__global int *global_failure, + int64_t m_73008, double level_73014, + __global + unsigned char *defunc_3_map_res_mem_124069, + __global unsigned char *mem_124127, + __global unsigned char *mem_124130, + __global unsigned char *mem_124133) +{ + #define segmap_group_sizze_99959 (mainMagnitudezisegmap_group_sizze_99643) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128693; + int32_t local_tid_128694; + int64_t group_sizze_128697; + int32_t wave_sizze_128696; + int32_t group_tid_128695; + + global_tid_128693 = get_global_id(0); + local_tid_128694 = get_local_id(0); + group_sizze_128697 = get_local_size(0); + wave_sizze_128696 = LOCKSTEP_WIDTH; + group_tid_128695 = get_group_id(0); + + int32_t phys_tid_99641; + + phys_tid_99641 = global_tid_128693; + + int64_t gtid_99640; + + gtid_99640 = sext_i32_i64(group_tid_128695) * segmap_group_sizze_99959 + + sext_i32_i64(local_tid_128694); + if (slt64(gtid_99640, m_73008)) { + double pval_brownian_motion_max_res_99963 = ((__global + double *) mem_124127)[gtid_99640]; + int64_t defunc_0_f_res_99964 = ((__global + int64_t *) mem_124130)[gtid_99640]; + bool isnan_res_99965; + + isnan_res_99965 = futrts_isnan64(pval_brownian_motion_max_res_99963); + + bool cond_99966 = !isnan_res_99965; + bool cond_t_res_99967 = pval_brownian_motion_max_res_99963 < + level_73014; + bool x_99968 = cond_99966 && cond_t_res_99967; + bool chk_t_res_99969 = defunc_0_f_res_99964 == + (int64_t) 9223372036854775807; + bool chk_t_res_99970 = !chk_t_res_99969; + bool x_99971 = x_99968 && chk_t_res_99970; + int64_t y_start_99972; + + if (x_99971) { + int64_t x_99962 = ((__global + int64_t *) defunc_3_map_res_mem_124069)[gtid_99640]; + int64_t y_start_t_res_99973 = sub64(x_99962, defunc_0_f_res_99964); + + y_start_99972 = y_start_t_res_99973; + } else { + y_start_99972 = (int64_t) 0; + } + ((__global int64_t *) mem_124133)[gtid_99640] = y_start_99972; + } + + error_0: + return; + #undef segmap_group_sizze_99959 +} +__kernel void mainMagnitudezisegmap_99681(__global int *global_failure, + int64_t m_73008, __global + unsigned char *mem_124124, __global + unsigned char *mem_124127) +{ + #define segmap_group_sizze_99842 (mainMagnitudezisegmap_group_sizze_99683) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128628; + int32_t local_tid_128629; + int64_t group_sizze_128632; + int32_t wave_sizze_128631; + int32_t group_tid_128630; + + global_tid_128628 = get_global_id(0); + local_tid_128629 = get_local_id(0); + group_sizze_128632 = get_local_size(0); + wave_sizze_128631 = LOCKSTEP_WIDTH; + group_tid_128630 = get_group_id(0); + + int32_t phys_tid_99681; + + phys_tid_99681 = global_tid_128628; + + int64_t gtid_99680; + + gtid_99680 = sext_i32_i64(group_tid_128630) * segmap_group_sizze_99842 + + sext_i32_i64(local_tid_128629); + if (slt64(gtid_99680, m_73008)) { + double defunc_2_reduce_res_99845 = ((__global + double *) mem_124124)[gtid_99680]; + double defunc_0_Q_arg_99846 = 3.0 * defunc_2_reduce_res_99845; + double zs_res_99847 = defunc_0_Q_arg_99846 / 1.4142135623730951; + double abs_res_99848 = fabs(zs_res_99847); + double zs_res_99849 = abs_res_99848 / 2.0; + double zp_res_99850 = 1.0 + zs_res_99849; + double zs_res_99851 = 1.0 / zp_res_99850; + double zt_res_99852 = zs_res_99851 * zs_res_99851; + double zt_res_99853 = zs_res_99851 * zt_res_99852; + double zt_res_99854 = zt_res_99852 * zt_res_99852; + double zt_res_99855 = zt_res_99852 * zt_res_99853; + double zt_res_99856 = zt_res_99853 * zt_res_99853; + double zt_res_99857 = zt_res_99853 * zt_res_99854; + double zt_res_99858 = zt_res_99854 * zt_res_99854; + double zt_res_99859 = zt_res_99854 * zt_res_99855; + double zt_res_99860 = 0.17087277 * zt_res_99859; + double zt_res_99861 = 0.82215223 * zt_res_99858; + double zt_res_99862 = 1.48851587 * zt_res_99857; + double zt_res_99863 = 1.13520398 * zt_res_99856; + double zt_res_99864 = 0.27886807 * zt_res_99855; + double zt_res_99865 = 0.18628806 * zt_res_99854; + double zt_res_99866 = 9.678418e-2 * zt_res_99853; + double zt_res_99867 = 0.37409196 * zt_res_99852; + double zt_res_99868 = 1.00002368 * zs_res_99851; + double zt_res_99869 = zs_res_99847 * zs_res_99847; + double zm_res_99870 = 0.0 - zt_res_99869; + double zm_res_99871 = zm_res_99870 - 1.26551223; + double zp_res_99872 = zt_res_99868 + zm_res_99871; + double zp_res_99873 = zt_res_99867 + zp_res_99872; + double zp_res_99874 = zt_res_99866 + zp_res_99873; + double zm_res_99875 = zp_res_99874 - zt_res_99865; + double zp_res_99876 = zt_res_99864 + zm_res_99875; + double zm_res_99877 = zp_res_99876 - zt_res_99863; + double zp_res_99878 = zt_res_99862 + zm_res_99877; + double zm_res_99879 = zp_res_99878 - zt_res_99861; + double zp_res_99880 = zt_res_99860 + zm_res_99879; + double exp_res_99881; + + exp_res_99881 = futrts_exp64(zp_res_99880); + + double zt_res_99882 = zs_res_99851 * exp_res_99881; + bool zgze_res_99883 = 0.0 <= zs_res_99847; + double erf_res_99884; + + if (zgze_res_99883) { + double zm_res_99885 = 1.0 - zt_res_99882; + + erf_res_99884 = zm_res_99885; + } else { + double zm_res_99886 = zt_res_99882 - 1.0; + + erf_res_99884 = zm_res_99886; + } + + double zp_res_99887 = 1.0 + erf_res_99884; + double zs_res_99888 = zp_res_99887 / 2.0; + double defunc_0_Q_res_99889 = 1.0 - zs_res_99888; + double y_99890 = fpow64(defunc_2_reduce_res_99845, 2.0); + double negate_arg_99891 = 4.0 * y_99890; + double defunc_0_exp_arg_99892 = 0.0 - negate_arg_99891; + double defunc_0_exp_res_99893 = fpow64(2.718281828459045, + defunc_0_exp_arg_99892); + double x_99894 = defunc_0_Q_res_99889 + defunc_0_exp_res_99893; + double zs_res_99895 = defunc_2_reduce_res_99845 / 1.4142135623730951; + double abs_res_99896 = fabs(zs_res_99895); + double zs_res_99897 = abs_res_99896 / 2.0; + double zp_res_99898 = 1.0 + zs_res_99897; + double zs_res_99899 = 1.0 / zp_res_99898; + double zt_res_99900 = zs_res_99899 * zs_res_99899; + double zt_res_99901 = zs_res_99899 * zt_res_99900; + double zt_res_99902 = zt_res_99900 * zt_res_99900; + double zt_res_99903 = zt_res_99900 * zt_res_99901; + double zt_res_99904 = zt_res_99901 * zt_res_99901; + double zt_res_99905 = zt_res_99901 * zt_res_99902; + double zt_res_99906 = zt_res_99902 * zt_res_99902; + double zt_res_99907 = zt_res_99902 * zt_res_99903; + double zt_res_99908 = 0.17087277 * zt_res_99907; + double zt_res_99909 = 0.82215223 * zt_res_99906; + double zt_res_99910 = 1.48851587 * zt_res_99905; + double zt_res_99911 = 1.13520398 * zt_res_99904; + double zt_res_99912 = 0.27886807 * zt_res_99903; + double zt_res_99913 = 0.18628806 * zt_res_99902; + double zt_res_99914 = 9.678418e-2 * zt_res_99901; + double zt_res_99915 = 0.37409196 * zt_res_99900; + double zt_res_99916 = 1.00002368 * zs_res_99899; + double zt_res_99917 = zs_res_99895 * zs_res_99895; + double zm_res_99918 = 0.0 - zt_res_99917; + double zm_res_99919 = zm_res_99918 - 1.26551223; + double zp_res_99920 = zt_res_99916 + zm_res_99919; + double zp_res_99921 = zt_res_99915 + zp_res_99920; + double zp_res_99922 = zt_res_99914 + zp_res_99921; + double zm_res_99923 = zp_res_99922 - zt_res_99913; + double zp_res_99924 = zt_res_99912 + zm_res_99923; + double zm_res_99925 = zp_res_99924 - zt_res_99911; + double zp_res_99926 = zt_res_99910 + zm_res_99925; + double zm_res_99927 = zp_res_99926 - zt_res_99909; + double zp_res_99928 = zt_res_99908 + zm_res_99927; + double exp_res_99929; + + exp_res_99929 = futrts_exp64(zp_res_99928); + + double zt_res_99930 = zs_res_99899 * exp_res_99929; + bool zgze_res_99931 = 0.0 <= zs_res_99895; + double erf_res_99932; + + if (zgze_res_99931) { + double zm_res_99933 = 1.0 - zt_res_99930; + + erf_res_99932 = zm_res_99933; + } else { + double zm_res_99934 = zt_res_99930 - 1.0; + + erf_res_99932 = zm_res_99934; + } + + double zp_res_99935 = 1.0 + erf_res_99932; + double zs_res_99936 = zp_res_99935 / 2.0; + double defunc_0_Q_res_99937 = 1.0 - zs_res_99936; + double y_99938 = defunc_0_exp_res_99893 * defunc_0_Q_res_99937; + double y_99939 = x_99894 - y_99938; + double pval_brownian_motion_max_res_99940 = 2.0 * y_99939; + + ((__global double *) mem_124127)[gtid_99680] = + pval_brownian_motion_max_res_99940; + } + + error_0: + return; + #undef segmap_group_sizze_99842 +} +__kernel void mainMagnitudezisegmap_99808(__global int *global_failure, + int64_t m_73008, __global + unsigned char *defunc_3_map_res_mem_124069, + __global unsigned char *mem_124121) +{ + #define segmap_group_sizze_99816 (mainMagnitudezisegmap_group_sizze_99810) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128563; + int32_t local_tid_128564; + int64_t group_sizze_128567; + int32_t wave_sizze_128566; + int32_t group_tid_128565; + + global_tid_128563 = get_global_id(0); + local_tid_128564 = get_local_id(0); + group_sizze_128567 = get_local_size(0); + wave_sizze_128566 = LOCKSTEP_WIDTH; + group_tid_128565 = get_group_id(0); + + int32_t phys_tid_99808; + + phys_tid_99808 = global_tid_128563; + + int64_t gtid_99807; + + gtid_99807 = sext_i32_i64(group_tid_128565) * segmap_group_sizze_99816 + + sext_i32_i64(local_tid_128564); + if (slt64(gtid_99807, m_73008)) { + int64_t x_99819 = ((__global + int64_t *) defunc_3_map_res_mem_124069)[gtid_99807]; + double i64_res_99820 = sitofp_i64_f64(x_99819); + + ((__global double *) mem_124121)[gtid_99807] = i64_res_99820; + } + + error_0: + return; + #undef segmap_group_sizze_99816 +} +__kernel void mainMagnitudezisegmap_99986(__global int *global_failure, + int64_t N_73007, int64_t m_73008, + __global + unsigned char *images_mem_120108, + __global + unsigned char *hist_inds_mem_124138, + __global unsigned char *mem_124142) +{ + #define segmap_group_sizze_100013 (mainMagnitudezisegmap_group_sizze_99989) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128700; + int32_t local_tid_128701; + int64_t group_sizze_128704; + int32_t wave_sizze_128703; + int32_t group_tid_128702; + + global_tid_128700 = get_global_id(0); + local_tid_128701 = get_local_id(0); + group_sizze_128704 = get_local_size(0); + wave_sizze_128703 = LOCKSTEP_WIDTH; + group_tid_128702 = get_group_id(0); + + int32_t phys_tid_99986; + + phys_tid_99986 = global_tid_128700; + + int64_t gtid_99984; + + gtid_99984 = squot64(sext_i32_i64(group_tid_128702) * + segmap_group_sizze_100013 + + sext_i32_i64(local_tid_128701), N_73007); + + int64_t gtid_99985; + + gtid_99985 = sext_i32_i64(group_tid_128702) * segmap_group_sizze_100013 + + sext_i32_i64(local_tid_128701) - + squot64(sext_i32_i64(group_tid_128702) * segmap_group_sizze_100013 + + sext_i32_i64(local_tid_128701), N_73007) * N_73007; + if (slt64(gtid_99984, m_73008) && slt64(gtid_99985, N_73007)) { + int64_t x_100016 = ((__global + int64_t *) hist_inds_mem_124138)[gtid_99984]; + bool cond_100019 = slt64(gtid_99985, x_100016); + double defunc_1_f_res_100020; + + if (cond_100019) { + defunc_1_f_res_100020 = NAN; + } else { + double x_100018 = ((__global + double *) images_mem_120108)[gtid_99984 * + N_73007 + + gtid_99985]; + + defunc_1_f_res_100020 = x_100018; + } + ((__global double *) mem_124142)[gtid_99984 * N_73007 + gtid_99985] = + defunc_1_f_res_100020; + } + + error_0: + return; + #undef segmap_group_sizze_100013 +} +__kernel void mainMagnitudezisegmap_intragroup_100360(__global + int *global_failure, + int failure_is_an_option, + __global + int64_t *global_failure_args, + __local volatile + int64_t *mem_124308_backing_aligned_0, + __local volatile + int64_t *mem_124298_backing_aligned_1, + int64_t k2p2zq_73023, + int64_t m_74646, + int64_t nm_74647, __global + unsigned char *defunc_3_map_res_mem_124294, + __global + unsigned char *mem_124318) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_124308_backing_1 = (__local volatile + char *) mem_124308_backing_aligned_0; + __local volatile char *restrict mem_124298_backing_0 = (__local volatile + char *) mem_124298_backing_aligned_1; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_128830; + int32_t local_tid_128831; + int64_t group_sizze_128834; + int32_t wave_sizze_128833; + int32_t group_tid_128832; + + global_tid_128830 = get_global_id(0); + local_tid_128831 = get_local_id(0); + group_sizze_128834 = get_local_size(0); + wave_sizze_128833 = LOCKSTEP_WIDTH; + group_tid_128832 = get_group_id(0); + + int32_t phys_tid_100360; + + phys_tid_100360 = group_tid_128832; + + int32_t ltid_pre_128835; + + ltid_pre_128835 = local_tid_128831; + + int64_t gtid_100298; + + gtid_100298 = sext_i32_i64(group_tid_128832); + + __local char *mem_124298; + + mem_124298 = (__local char *) mem_124298_backing_0; + + int64_t gtid_100301 = sext_i32_i64(ltid_pre_128835); + int32_t phys_tid_100302 = local_tid_128831; + int64_t defunc_0_f_res_100371 = sdiv64(gtid_100301, m_74646); + int64_t defunc_0_f_res_100372 = smod64(gtid_100301, m_74646); + bool cond_100373 = slt64(defunc_0_f_res_100372, k2p2zq_73023); + double defunc_0_f_res_100374; + + if (cond_100373) { + bool x_100375 = sle64((int64_t) 0, defunc_0_f_res_100371); + bool y_100376 = slt64(defunc_0_f_res_100371, k2p2zq_73023); + bool bounds_check_100377 = x_100375 && y_100376; + bool x_100378 = sle64((int64_t) 0, defunc_0_f_res_100372); + bool bounds_check_100379 = cond_100373 && x_100378; + bool index_ok_100380 = bounds_check_100377 && bounds_check_100379; + bool index_certs_100381; + + if (!index_ok_100380) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 615) == -1) { + global_failure_args[0] = defunc_0_f_res_100371; + global_failure_args[1] = defunc_0_f_res_100372; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_0_f_res_t_res_100382 = ((__global + double *) defunc_3_map_res_mem_124294)[gtid_100298 * + (k2p2zq_73023 * + k2p2zq_73023) + + defunc_0_f_res_100371 * + k2p2zq_73023 + + defunc_0_f_res_100372]; + + defunc_0_f_res_100374 = defunc_0_f_res_t_res_100382; + } else { + int64_t y_100383 = add64(k2p2zq_73023, defunc_0_f_res_100371); + bool cond_100384 = defunc_0_f_res_100372 == y_100383; + double defunc_0_f_res_f_res_100385; + + if (cond_100384) { + defunc_0_f_res_f_res_100385 = 1.0; + } else { + defunc_0_f_res_f_res_100385 = 0.0; + } + defunc_0_f_res_100374 = defunc_0_f_res_f_res_100385; + } + ((__local double *) mem_124298)[gtid_100301] = defunc_0_f_res_100374; + + error_0: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + __local char *mem_124308; + + mem_124308 = (__local char *) mem_124308_backing_1; + for (int64_t i_100387 = 0; i_100387 < k2p2zq_73023; i_100387++) { + bool y_100389 = slt64(i_100387, nm_74647); + bool index_certs_100390; + + if (!y_100389) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 616) == -1) { + global_failure_args[0] = i_100387; + global_failure_args[1] = nm_74647; + ; + } + local_failure = true; + goto error_1; + } + } + + double v1_100391 = ((__local double *) mem_124298)[i_100387]; + bool cond_100392 = v1_100391 == 0.0; + int64_t gtid_100319 = sext_i32_i64(ltid_pre_128835); + int32_t phys_tid_100320 = local_tid_128831; + int64_t defunc_0_f_res_100395 = sdiv64(gtid_100319, m_74646); + int64_t defunc_0_f_res_100396 = smod64(gtid_100319, m_74646); + double defunc_0_f_res_100397; + + if (cond_100392) { + int64_t x_100398 = mul64(m_74646, defunc_0_f_res_100395); + int64_t i_100399 = add64(defunc_0_f_res_100396, x_100398); + bool x_100400 = sle64((int64_t) 0, i_100399); + bool y_100401 = slt64(i_100399, nm_74647); + bool bounds_check_100402 = x_100400 && y_100401; + bool index_certs_100403; + + if (!bounds_check_100402) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 617) == + -1) { + global_failure_args[0] = i_100399; + global_failure_args[1] = nm_74647; + ; + } + local_failure = true; + goto error_1; + } + } + + double defunc_0_f_res_t_res_100404 = ((__local + double *) mem_124298)[i_100399]; + + defunc_0_f_res_100397 = defunc_0_f_res_t_res_100404; + } else { + bool x_100405 = sle64((int64_t) 0, defunc_0_f_res_100396); + bool y_100406 = slt64(defunc_0_f_res_100396, nm_74647); + bool bounds_check_100407 = x_100405 && y_100406; + bool index_certs_100408; + + if (!bounds_check_100407) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 618) == + -1) { + global_failure_args[0] = defunc_0_f_res_100396; + global_failure_args[1] = nm_74647; + ; + } + local_failure = true; + goto error_1; + } + } + + double x_100409 = ((__local + double *) mem_124298)[defunc_0_f_res_100396]; + double x_100410 = x_100409 / v1_100391; + int64_t y_100411 = sub64(k2p2zq_73023, (int64_t) 1); + bool cond_100412 = slt64(defunc_0_f_res_100395, y_100411); + double defunc_0_f_res_f_res_100413; + + if (cond_100412) { + int64_t x_100414 = add64((int64_t) 1, defunc_0_f_res_100395); + int64_t x_100415 = mul64(m_74646, x_100414); + int64_t i_100416 = add64(defunc_0_f_res_100396, x_100415); + bool x_100417 = sle64((int64_t) 0, i_100416); + bool y_100418 = slt64(i_100416, nm_74647); + bool bounds_check_100419 = x_100417 && y_100418; + bool index_certs_100420; + + if (!bounds_check_100419) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 619) == -1) { + global_failure_args[0] = i_100416; + global_failure_args[1] = nm_74647; + ; + } + local_failure = true; + goto error_1; + } + } + + double x_100421 = ((__local double *) mem_124298)[i_100416]; + int64_t i_100422 = add64(i_100387, x_100415); + bool x_100423 = sle64((int64_t) 0, i_100422); + bool y_100424 = slt64(i_100422, nm_74647); + bool bounds_check_100425 = x_100423 && y_100424; + bool index_certs_100426; + + if (!bounds_check_100425) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 620) == -1) { + global_failure_args[0] = i_100422; + global_failure_args[1] = nm_74647; + ; + } + local_failure = true; + goto error_1; + } + } + + double x_100427 = ((__local double *) mem_124298)[i_100422]; + double y_100428 = x_100410 * x_100427; + double defunc_0_f_res_f_res_t_res_100429 = x_100421 - y_100428; + + defunc_0_f_res_f_res_100413 = defunc_0_f_res_f_res_t_res_100429; + } else { + defunc_0_f_res_f_res_100413 = x_100410; + } + defunc_0_f_res_100397 = defunc_0_f_res_f_res_100413; + } + ((__local double *) mem_124308)[gtid_100319] = defunc_0_f_res_100397; + + error_1: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t write_i_100357 = sext_i32_i64(ltid_pre_128835); + int32_t phys_tid_100358 = local_tid_128831; + double write_value_100432 = ((__local + double *) mem_124308)[write_i_100357]; + + if (sle64((int64_t) 0, write_i_100357) && slt64(write_i_100357, + nm_74647)) { + ((__local double *) mem_124298)[write_i_100357] = + write_value_100432; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + for (int64_t i_128837 = 0; i_128837 < sdiv_up64(k2p2zq_73023 * + k2p2zq_73023 - + sext_i32_i64(local_tid_128831), + nm_74647); i_128837++) { + ((__global double *) mem_124318)[gtid_100298 * (k2p2zq_73023 * + k2p2zq_73023) + + squot64(i_128837 * nm_74647 + + sext_i32_i64(local_tid_128831), + k2p2zq_73023) * k2p2zq_73023 + + (i_128837 * nm_74647 + + sext_i32_i64(local_tid_128831) - + squot64(i_128837 * nm_74647 + + sext_i32_i64(local_tid_128831), + k2p2zq_73023) * + k2p2zq_73023)] = ((__local + double *) mem_124298)[k2p2zq_73023 + + (squot64(i_128837 * + nm_74647 + + sext_i32_i64(local_tid_128831), + k2p2zq_73023) * + m_74646 + + (i_128837 * + nm_74647 + + sext_i32_i64(local_tid_128831) - + squot64(i_128837 * + nm_74647 + + sext_i32_i64(local_tid_128831), + k2p2zq_73023) * + k2p2zq_73023))]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + error_3: + return; +} +__kernel void mainMagnitudezisegmap_intragroup_100498(__global + int *global_failure, + int failure_is_an_option, + __global + int64_t *global_failure_args, + __local volatile + int64_t *mem_124338_backing_aligned_0, + __local volatile + int64_t *mem_124335_backing_aligned_1, + int64_t k2p2zq_73023, + int64_t m_74646, + int64_t nm_74647, + int64_t i_100818, + int64_t ctx_param_ext_124325, + int64_t ctx_param_ext_124326, + int64_t ctx_param_ext_124328, + __global + unsigned char *mem_param_124330, + __global + unsigned char *mem_124342) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_124338_backing_1 = (__local volatile + char *) mem_124338_backing_aligned_0; + __local volatile char *restrict mem_124335_backing_0 = (__local volatile + char *) mem_124335_backing_aligned_1; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_128859; + int32_t local_tid_128860; + int64_t group_sizze_128863; + int32_t wave_sizze_128862; + int32_t group_tid_128861; + + global_tid_128859 = get_global_id(0); + local_tid_128860 = get_local_id(0); + group_sizze_128863 = get_local_size(0); + wave_sizze_128862 = LOCKSTEP_WIDTH; + group_tid_128861 = get_group_id(0); + + int32_t phys_tid_100498; + + phys_tid_100498 = group_tid_128861; + + int32_t ltid_pre_128864; + + ltid_pre_128864 = local_tid_128860; + + int64_t gtid_100454; + + gtid_100454 = sext_i32_i64(group_tid_128861); + + __local char *mem_124335; + + mem_124335 = (__local char *) mem_124335_backing_0; + ((__local double *) mem_124335)[sext_i32_i64(local_tid_128860)] = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + gtid_100454 * + ctx_param_ext_124326 + + sext_i32_i64(local_tid_128860) * + ctx_param_ext_124328]; + barrier(CLK_LOCAL_MEM_FENCE); + + double v1_100833 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_100454 * + ctx_param_ext_124326 + + i_100818 * + ctx_param_ext_124328)]; + bool cond_100834 = v1_100833 == 0.0; + __local char *mem_124338; + + mem_124338 = (__local char *) mem_124338_backing_1; + + int64_t gtid_100457 = sext_i32_i64(ltid_pre_128864); + int32_t phys_tid_100458 = local_tid_128860; + int64_t defunc_0_f_res_100837 = sdiv64(gtid_100457, m_74646); + int64_t defunc_0_f_res_100838 = smod64(gtid_100457, m_74646); + double defunc_0_f_res_100839; + + if (cond_100834) { + int64_t x_100840 = mul64(m_74646, defunc_0_f_res_100837); + int64_t i_100841 = add64(defunc_0_f_res_100838, x_100840); + bool x_100842 = sle64((int64_t) 0, i_100841); + bool y_100843 = slt64(i_100841, nm_74647); + bool bounds_check_100844 = x_100842 && y_100843; + bool index_certs_100845; + + if (!bounds_check_100844) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 622) == -1) { + global_failure_args[0] = i_100841; + global_failure_args[1] = nm_74647; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_0_f_res_t_res_100846 = ((__local + double *) mem_124335)[i_100841]; + + defunc_0_f_res_100839 = defunc_0_f_res_t_res_100846; + } else { + bool x_100847 = sle64((int64_t) 0, defunc_0_f_res_100838); + bool y_100848 = slt64(defunc_0_f_res_100838, nm_74647); + bool bounds_check_100849 = x_100847 && y_100848; + bool index_certs_100850; + + if (!bounds_check_100849) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 623) == -1) { + global_failure_args[0] = defunc_0_f_res_100838; + global_failure_args[1] = nm_74647; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_100851 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_100454 * + ctx_param_ext_124326 + + defunc_0_f_res_100838 * + ctx_param_ext_124328)]; + double x_100852 = x_100851 / v1_100833; + int64_t y_100853 = sub64(k2p2zq_73023, (int64_t) 1); + bool cond_100854 = slt64(defunc_0_f_res_100837, y_100853); + double defunc_0_f_res_f_res_100855; + + if (cond_100854) { + int64_t x_100856 = add64((int64_t) 1, defunc_0_f_res_100837); + int64_t x_100857 = mul64(m_74646, x_100856); + int64_t i_100858 = add64(defunc_0_f_res_100838, x_100857); + bool x_100859 = sle64((int64_t) 0, i_100858); + bool y_100860 = slt64(i_100858, nm_74647); + bool bounds_check_100861 = x_100859 && y_100860; + bool index_certs_100862; + + if (!bounds_check_100861) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 624) == + -1) { + global_failure_args[0] = i_100858; + global_failure_args[1] = nm_74647; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_100863 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_100454 * + ctx_param_ext_124326 + + i_100858 * + ctx_param_ext_124328)]; + int64_t i_100864 = add64(i_100818, x_100857); + bool x_100865 = sle64((int64_t) 0, i_100864); + bool y_100866 = slt64(i_100864, nm_74647); + bool bounds_check_100867 = x_100865 && y_100866; + bool index_certs_100868; + + if (!bounds_check_100867) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 625) == + -1) { + global_failure_args[0] = i_100864; + global_failure_args[1] = nm_74647; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_100869 = ((__global + double *) mem_param_124330)[ctx_param_ext_124325 + + (gtid_100454 * + ctx_param_ext_124326 + + i_100864 * + ctx_param_ext_124328)]; + double y_100870 = x_100852 * x_100869; + double defunc_0_f_res_f_res_t_res_100871 = x_100863 - y_100870; + + defunc_0_f_res_f_res_100855 = defunc_0_f_res_f_res_t_res_100871; + } else { + defunc_0_f_res_f_res_100855 = x_100852; + } + defunc_0_f_res_100839 = defunc_0_f_res_f_res_100855; + } + ((__local double *) mem_124338)[gtid_100457] = defunc_0_f_res_100839; + + error_0: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t write_i_100495 = sext_i32_i64(ltid_pre_128864); + int32_t phys_tid_100496 = local_tid_128860; + double write_value_100874 = ((__local double *) mem_124338)[write_i_100495]; + + if (sle64((int64_t) 0, write_i_100495) && slt64(write_i_100495, nm_74647)) { + ((__local double *) mem_124335)[write_i_100495] = write_value_100874; + } + barrier(CLK_LOCAL_MEM_FENCE); + ((__global double *) mem_124342)[gtid_100454 * nm_74647 + + sext_i32_i64(local_tid_128860)] = ((__local + double *) mem_124335)[sext_i32_i64(local_tid_128860)]; + barrier(CLK_LOCAL_MEM_FENCE); + + error_2: + return; +} +__kernel void mainMagnitudezisegmap_intragroup_101360(__global + int *global_failure, + __local volatile + int64_t *mem_124893_backing_aligned_0, + __local volatile + int64_t *mem_124891_backing_aligned_1, + __local volatile + int64_t *mem_124889_backing_aligned_2, + __local volatile + int64_t *mem_124887_backing_aligned_3, + int64_t N_73007, + int64_t i_74783, __global + unsigned char *mem_124142, + __global + unsigned char *defunc_3_map_res_mem_124883, + __global + unsigned char *mem_124896, + __global + unsigned char *mem_124899, + __global + unsigned char *mem_124902) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_124893_backing_3 = (__local volatile + char *) mem_124893_backing_aligned_0; + __local volatile char *restrict mem_124891_backing_2 = (__local volatile + char *) mem_124891_backing_aligned_1; + __local volatile char *restrict mem_124889_backing_1 = (__local volatile + char *) mem_124889_backing_aligned_2; + __local volatile char *restrict mem_124887_backing_0 = (__local volatile + char *) mem_124887_backing_aligned_3; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129240; + int32_t local_tid_129241; + int64_t group_sizze_129244; + int32_t wave_sizze_129243; + int32_t group_tid_129242; + + global_tid_129240 = get_global_id(0); + local_tid_129241 = get_local_id(0); + group_sizze_129244 = get_local_size(0); + wave_sizze_129243 = LOCKSTEP_WIDTH; + group_tid_129242 = get_group_id(0); + + int32_t phys_tid_101360; + + phys_tid_101360 = group_tid_129242; + + int32_t ltid_pre_129245; + + ltid_pre_129245 = local_tid_129241; + + int64_t gtid_101351; + + gtid_101351 = sext_i32_i64(group_tid_129242); + + __local char *mem_124887; + + mem_124887 = (__local char *) mem_124887_backing_0; + + __local char *mem_124889; + + mem_124889 = (__local char *) mem_124889_backing_1; + + int64_t gtid_101354 = sext_i32_i64(ltid_pre_129245); + int32_t phys_tid_101355 = local_tid_129241; + double x_101378 = ((__global double *) mem_124142)[gtid_101351 * N_73007 + + gtid_101354]; + bool isnan_res_101380; + + isnan_res_101380 = futrts_isnan64(x_101378); + + bool cond_101381 = !isnan_res_101380; + double defunc_1_f_res_101382; + + if (cond_101381) { + double x_101379 = ((__global + double *) defunc_3_map_res_mem_124883)[gtid_101351 * + N_73007 + + gtid_101354]; + double defunc_1_f_res_t_res_101383 = x_101378 - x_101379; + + defunc_1_f_res_101382 = defunc_1_f_res_t_res_101383; + } else { + defunc_1_f_res_101382 = NAN; + } + + bool isnan_res_101384; + + isnan_res_101384 = futrts_isnan64(defunc_1_f_res_101382); + + bool defunc_0_p_res_101385 = !isnan_res_101384; + int64_t defunc_0_f_res_101386 = btoi_bool_i64(defunc_0_p_res_101385); + + ((__local int64_t *) mem_124887)[gtid_101354] = defunc_0_f_res_101386; + ((__local double *) mem_124889)[gtid_101354] = defunc_1_f_res_101382; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t dims_flat_129246; + + dims_flat_129246 = N_73007; + + int64_t x_101375; + int64_t x_101376; + int64_t x_129248; + int64_t x_129249; + bool ltid_in_bounds_129251; + + ltid_in_bounds_129251 = slt64(sext_i32_i64(local_tid_129241), N_73007); + + int32_t skip_threads_129252; + + // read input for in-block scan + { + if (ltid_in_bounds_129251) { + x_101376 = ((volatile __local + int64_t *) mem_124887)[sext_i32_i64(local_tid_129241)]; + if ((local_tid_129241 - squot32(local_tid_129241, 32) * 32) == 0) { + x_101375 = x_101376; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129252 = 1; + while (slt32(skip_threads_129252, 32)) { + if (sle32(skip_threads_129252, local_tid_129241 - + squot32(local_tid_129241, 32) * 32) && + ltid_in_bounds_129251) { + // read operands + { + x_101375 = ((volatile __local + int64_t *) mem_124887)[sext_i32_i64(local_tid_129241) - + sext_i32_i64(skip_threads_129252)]; + } + // perform operation + { + bool inactive_129253 = + slt64(srem64(sext_i32_i64(local_tid_129241), N_73007), + sext_i32_i64(local_tid_129241) - + sext_i32_i64(local_tid_129241 - + skip_threads_129252)); + + if (inactive_129253) { + x_101375 = x_101376; + } + if (!inactive_129253) { + int64_t defunc_1_op_res_101377 = add64(x_101375, + x_101376); + + x_101375 = defunc_1_op_res_101377; + } + } + } + if (sle32(wave_sizze_129243, skip_threads_129252)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129252, local_tid_129241 - + squot32(local_tid_129241, 32) * 32) && + ltid_in_bounds_129251) { + // write result + { + ((volatile __local + int64_t *) mem_124887)[sext_i32_i64(local_tid_129241)] = + x_101375; + x_101376 = x_101375; + } + } + if (sle32(wave_sizze_129243, skip_threads_129252)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129252 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129241 - squot32(local_tid_129241, 32) * 32) == 31 && + ltid_in_bounds_129251) { + ((volatile __local + int64_t *) mem_124887)[sext_i32_i64(squot32(local_tid_129241, + 32))] = x_101375; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129254; + + // read input for in-block scan + { + if (squot32(local_tid_129241, 32) == 0 && ltid_in_bounds_129251) { + x_129249 = ((volatile __local + int64_t *) mem_124887)[sext_i32_i64(local_tid_129241)]; + if ((local_tid_129241 - squot32(local_tid_129241, 32) * 32) == + 0) { + x_129248 = x_129249; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129254 = 1; + while (slt32(skip_threads_129254, 32)) { + if (sle32(skip_threads_129254, local_tid_129241 - + squot32(local_tid_129241, 32) * 32) && + (squot32(local_tid_129241, 32) == 0 && + ltid_in_bounds_129251)) { + // read operands + { + x_129248 = ((volatile __local + int64_t *) mem_124887)[sext_i32_i64(local_tid_129241) - + sext_i32_i64(skip_threads_129254)]; + } + // perform operation + { + bool inactive_129255 = + slt64(srem64(sext_i32_i64(local_tid_129241 * 32 + + 32 - 1), N_73007), + sext_i32_i64(local_tid_129241 * 32 + 32 - + 1) - sext_i32_i64((local_tid_129241 - + skip_threads_129254) * + 32 + 32 - 1)); + + if (inactive_129255) { + x_129248 = x_129249; + } + if (!inactive_129255) { + int64_t defunc_1_op_res_129250 = add64(x_129248, + x_129249); + + x_129248 = defunc_1_op_res_129250; + } + } + } + if (sle32(wave_sizze_129243, skip_threads_129254)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129254, local_tid_129241 - + squot32(local_tid_129241, 32) * 32) && + (squot32(local_tid_129241, 32) == 0 && + ltid_in_bounds_129251)) { + // write result + { + ((volatile __local + int64_t *) mem_124887)[sext_i32_i64(local_tid_129241)] = + x_129248; + x_129249 = x_129248; + } + } + if (sle32(wave_sizze_129243, skip_threads_129254)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129254 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129241, 32) == 0 || !ltid_in_bounds_129251)) { + // read operands + { + x_101376 = x_101375; + x_101375 = ((__local + int64_t *) mem_124887)[sext_i32_i64(squot32(local_tid_129241, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129256 = + slt64(srem64(sext_i32_i64(local_tid_129241), N_73007), + sext_i32_i64(local_tid_129241) - + sext_i32_i64(squot32(local_tid_129241, 32) * 32 - + 1)); + + if (inactive_129256) { + x_101375 = x_101376; + } + if (!inactive_129256) { + int64_t defunc_1_op_res_101377 = add64(x_101375, x_101376); + + x_101375 = defunc_1_op_res_101377; + } + } + // write final result + { + ((__local + int64_t *) mem_124887)[sext_i32_i64(local_tid_129241)] = + x_101375; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129241, 32) == 0) { + ((__local int64_t *) mem_124887)[sext_i32_i64(local_tid_129241)] = + x_101376; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t last_res_101387 = ((__local int64_t *) mem_124887)[i_74783]; + __local char *mem_124891; + + mem_124891 = (__local char *) mem_124891_backing_2; + ((__local double *) mem_124891)[sext_i32_i64(local_tid_129241)] = NAN; + barrier(CLK_LOCAL_MEM_FENCE); + + __local char *mem_124893; + + mem_124893 = (__local char *) mem_124893_backing_3; + ((__local int64_t *) mem_124893)[sext_i32_i64(local_tid_129241)] = + (int64_t) 0; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t write_i_101356 = sext_i32_i64(ltid_pre_129245); + int32_t phys_tid_101357 = local_tid_129241; + double x_101392 = ((__local double *) mem_124889)[write_i_101356]; + bool isnan_res_101395; + + isnan_res_101395 = futrts_isnan64(x_101392); + + bool defunc_0_p_res_101396 = !isnan_res_101395; + int64_t defunc_1_f_res_101397; + + if (defunc_0_p_res_101396) { + int64_t x_101393 = ((__local int64_t *) mem_124887)[write_i_101356]; + int64_t defunc_1_f_res_t_res_101398 = sub64(x_101393, (int64_t) 1); + + defunc_1_f_res_101397 = defunc_1_f_res_t_res_101398; + } else { + defunc_1_f_res_101397 = (int64_t) -1; + } + if (sle64((int64_t) 0, defunc_1_f_res_101397) && + slt64(defunc_1_f_res_101397, N_73007)) { + ((__local int64_t *) mem_124893)[defunc_1_f_res_101397] = + write_i_101356; + } + if (sle64((int64_t) 0, defunc_1_f_res_101397) && + slt64(defunc_1_f_res_101397, N_73007)) { + ((__local double *) mem_124891)[defunc_1_f_res_101397] = x_101392; + } + barrier(CLK_LOCAL_MEM_FENCE); + if (local_tid_129241 == 0) { + ((__global int64_t *) mem_124896)[gtid_101351] = last_res_101387; + } + ((__global double *) mem_124899)[gtid_101351 * N_73007 + + sext_i32_i64(local_tid_129241)] = ((__local + double *) mem_124891)[sext_i32_i64(local_tid_129241)]; + barrier(CLK_LOCAL_MEM_FENCE); + ((__global int64_t *) mem_124902)[gtid_101351 * N_73007 + + sext_i32_i64(local_tid_129241)] = + ((__local int64_t *) mem_124893)[sext_i32_i64(local_tid_129241)]; + barrier(CLK_LOCAL_MEM_FENCE); + + error_2: + return; +} +__kernel void mainMagnitudezisegmap_intragroup_101572(__global + int *global_failure, + int failure_is_an_option, + __global + int64_t *global_failure_args, + __local volatile + int64_t *red_arr_mem_129343_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_129339_backing_aligned_1, + int64_t N_73007, + int64_t n_73011, + double hfrac_73013, + int64_t k2p2_73021, + __global + unsigned char *mem_124142, + __global + unsigned char *defunc_4_map_res_mem_124920, + __global + unsigned char *mem_124939, + __global + unsigned char *mem_124941, + __global + unsigned char *mem_124943) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_129343_backing_1 = + (__local volatile + char *) red_arr_mem_129343_backing_aligned_0; + __local volatile char *restrict red_arr_mem_129339_backing_0 = + (__local volatile + char *) red_arr_mem_129339_backing_aligned_1; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_129333; + int32_t local_tid_129334; + int64_t group_sizze_129337; + int32_t wave_sizze_129336; + int32_t group_tid_129335; + + global_tid_129333 = get_global_id(0); + local_tid_129334 = get_local_id(0); + group_sizze_129337 = get_local_size(0); + wave_sizze_129336 = LOCKSTEP_WIDTH; + group_tid_129335 = get_group_id(0); + + int32_t phys_tid_101572; + + phys_tid_101572 = group_tid_129335; + + int32_t ltid_pre_129338; + + ltid_pre_129338 = local_tid_129334; + + int64_t gtid_101563; + + gtid_101563 = sext_i32_i64(group_tid_129335); + + int64_t defunc_0_f_res_101659; + int64_t gtid_101566 = sext_i32_i64(ltid_pre_129338); + int32_t phys_tid_101567 = local_tid_129334; + __local char *red_arr_mem_129339; + + red_arr_mem_129339 = (__local char *) red_arr_mem_129339_backing_0; + + double x_101663; + + x_101663 = ((__global double *) mem_124142)[gtid_101563 * N_73007 + + gtid_101566]; + + bool isnan_res_101664; + + isnan_res_101664 = futrts_isnan64(x_101663); + + bool cond_101665 = !isnan_res_101664; + int64_t defunc_0_f_res_101666 = btoi_bool_i64(cond_101665); + + ((__local int64_t *) red_arr_mem_129339)[gtid_101566] = + defunc_0_f_res_101666; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_129341; + int32_t skip_waves_129342; + + skip_waves_129342 = 1; + + int64_t x_101660; + int64_t x_101661; + + offset_129341 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129334, sext_i64_i32(n_73011))) { + x_101660 = ((__local + int64_t *) red_arr_mem_129339)[sext_i32_i64(local_tid_129334 + + offset_129341)]; + } + } + offset_129341 = 1; + while (slt32(offset_129341, wave_sizze_129336)) { + if (slt32(local_tid_129334 + offset_129341, sext_i64_i32(n_73011)) && + ((local_tid_129334 - squot32(local_tid_129334, wave_sizze_129336) * + wave_sizze_129336) & (2 * offset_129341 - 1)) == 0) { + // read array element + { + x_101661 = ((volatile __local + int64_t *) red_arr_mem_129339)[sext_i32_i64(local_tid_129334 + + offset_129341)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_101662 = add64(x_101660, x_101661); + + x_101660 = defunc_1_op_res_101662; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_129339)[sext_i32_i64(local_tid_129334)] = + x_101660; + } + } + offset_129341 *= 2; + } + while (slt32(skip_waves_129342, squot32(sext_i64_i32(n_73011) + + wave_sizze_129336 - 1, + wave_sizze_129336))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129341 = skip_waves_129342 * wave_sizze_129336; + if (slt32(local_tid_129334 + offset_129341, sext_i64_i32(n_73011)) && + ((local_tid_129334 - squot32(local_tid_129334, wave_sizze_129336) * + wave_sizze_129336) == 0 && (squot32(local_tid_129334, + wave_sizze_129336) & (2 * + skip_waves_129342 - + 1)) == + 0)) { + // read array element + { + x_101661 = ((__local + int64_t *) red_arr_mem_129339)[sext_i32_i64(local_tid_129334 + + offset_129341)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_101662 = add64(x_101660, x_101661); + + x_101660 = defunc_1_op_res_101662; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_129339)[sext_i32_i64(local_tid_129334)] = + x_101660; + } + } + skip_waves_129342 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + defunc_0_f_res_101659 = ((__local + int64_t *) red_arr_mem_129339)[(int64_t) 0]; + + double defunc_0_f_res_101667; + int64_t gtid_101568 = sext_i32_i64(ltid_pre_129338); + int32_t phys_tid_101569 = local_tid_129334; + __local char *red_arr_mem_129343; + + red_arr_mem_129343 = (__local char *) red_arr_mem_129343_backing_1; + + bool cond_101672; + + cond_101672 = slt64(gtid_101568, defunc_0_f_res_101659); + + double defunc_0_f_res_101673; + + if (cond_101672) { + bool y_101675 = slt64(gtid_101568, N_73007); + bool index_certs_101677; + + if (!y_101675) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 631) == -1) { + global_failure_args[0] = gtid_101568; + global_failure_args[1] = N_73007; + ; + } + local_failure = true; + goto error_2; + } + } + + double defunc_0_f_res_t_res_101678 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_101563 * + N_73007 + + gtid_101568]; + + defunc_0_f_res_101673 = defunc_0_f_res_t_res_101678; + } else { + defunc_0_f_res_101673 = 0.0; + } + + double defunc_0_f_res_101679 = defunc_0_f_res_101673 * + defunc_0_f_res_101673; + + ((__local double *) red_arr_mem_129343)[gtid_101568] = + defunc_0_f_res_101679; + + error_2: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_129345; + int32_t skip_waves_129346; + + skip_waves_129346 = 1; + + double x_101668; + double x_101669; + + offset_129345 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129334, sext_i64_i32(n_73011))) { + x_101668 = ((__local + double *) red_arr_mem_129343)[sext_i32_i64(local_tid_129334 + + offset_129345)]; + } + } + offset_129345 = 1; + while (slt32(offset_129345, wave_sizze_129336)) { + if (slt32(local_tid_129334 + offset_129345, sext_i64_i32(n_73011)) && + ((local_tid_129334 - squot32(local_tid_129334, wave_sizze_129336) * + wave_sizze_129336) & (2 * offset_129345 - 1)) == 0) { + // read array element + { + x_101669 = ((volatile __local + double *) red_arr_mem_129343)[sext_i32_i64(local_tid_129334 + + offset_129345)]; + } + // apply reduction operation + { + double defunc_1_op_res_101670 = x_101668 + x_101669; + + x_101668 = defunc_1_op_res_101670; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_129343)[sext_i32_i64(local_tid_129334)] = + x_101668; + } + } + offset_129345 *= 2; + } + while (slt32(skip_waves_129346, squot32(sext_i64_i32(n_73011) + + wave_sizze_129336 - 1, + wave_sizze_129336))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129345 = skip_waves_129346 * wave_sizze_129336; + if (slt32(local_tid_129334 + offset_129345, sext_i64_i32(n_73011)) && + ((local_tid_129334 - squot32(local_tid_129334, wave_sizze_129336) * + wave_sizze_129336) == 0 && (squot32(local_tid_129334, + wave_sizze_129336) & (2 * + skip_waves_129346 - + 1)) == + 0)) { + // read array element + { + x_101669 = ((__local + double *) red_arr_mem_129343)[sext_i32_i64(local_tid_129334 + + offset_129345)]; + } + // apply reduction operation + { + double defunc_1_op_res_101670 = x_101668 + x_101669; + + x_101668 = defunc_1_op_res_101670; + } + // write result of operation + { + ((__local + double *) red_arr_mem_129343)[sext_i32_i64(local_tid_129334)] = + x_101668; + } + } + skip_waves_129346 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + defunc_0_f_res_101667 = ((__local + double *) red_arr_mem_129343)[(int64_t) 0]; + + int64_t i64_arg_101680 = sub64(defunc_0_f_res_101659, k2p2_73021); + double i64_res_101681 = sitofp_i64_f64(i64_arg_101680); + double sqrt_arg_101682 = defunc_0_f_res_101667 / i64_res_101681; + double sqrt_res_101683; + + sqrt_res_101683 = futrts_sqrt64(sqrt_arg_101682); + + double i64_res_101684 = sitofp_i64_f64(defunc_0_f_res_101659); + double f64_arg_101685 = hfrac_73013 * i64_res_101684; + int64_t f64_res_101686 = fptosi_f64_i64(f64_arg_101685); + + if (local_tid_129334 == 0) { + ((__global int64_t *) mem_124939)[gtid_101563] = f64_res_101686; + } + if (local_tid_129334 == 0) { + ((__global int64_t *) mem_124941)[gtid_101563] = defunc_0_f_res_101659; + } + if (local_tid_129334 == 0) { + ((__global double *) mem_124943)[gtid_101563] = sqrt_res_101683; + } + + error_4: + return; +} +__kernel void mainMagnitudezisegmap_intragroup_102227(__global + int *global_failure, + int failure_is_an_option, + __global + int64_t *global_failure_args, + __local volatile + int64_t *red_arr_mem_129622_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_129620_backing_aligned_1, + __local volatile + int64_t *red_arr_mem_129618_backing_aligned_2, + __local volatile + int64_t *mem_125018_backing_aligned_3, + int64_t N_73007, + int64_t n_73011, + int64_t iota_arg_74896, + __global + unsigned char *defunc_4_map_res_mem_124919, + __global + unsigned char *defunc_4_map_res_mem_124920, + __global + unsigned char *defunc_4_map_res_mem_124921, + __global + unsigned char *defunc_3_map_res_mem_124958, + __global + unsigned char *defunc_3_map_res_mem_124959, + __global + unsigned char *defunc_3_map_res_mem_124960, + __global + unsigned char *defunc_0_f_res_mem_124970, + __global + unsigned char *mem_124973, + __global + unsigned char *mem_125021, + __global + unsigned char *mem_125023) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_129622_backing_3 = + (__local volatile + char *) red_arr_mem_129622_backing_aligned_0; + __local volatile char *restrict red_arr_mem_129620_backing_2 = + (__local volatile + char *) red_arr_mem_129620_backing_aligned_1; + __local volatile char *restrict red_arr_mem_129618_backing_1 = + (__local volatile + char *) red_arr_mem_129618_backing_aligned_2; + __local volatile char *restrict mem_125018_backing_0 = (__local volatile + char *) mem_125018_backing_aligned_3; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_129601; + int32_t local_tid_129602; + int64_t group_sizze_129605; + int32_t wave_sizze_129604; + int32_t group_tid_129603; + + global_tid_129601 = get_global_id(0); + local_tid_129602 = get_local_id(0); + group_sizze_129605 = get_local_size(0); + wave_sizze_129604 = LOCKSTEP_WIDTH; + group_tid_129603 = get_group_id(0); + + int32_t phys_tid_102227; + + phys_tid_102227 = group_tid_129603; + + int32_t ltid_pre_129606; + + ltid_pre_129606 = local_tid_129602; + + int64_t gtid_102218; + + gtid_102218 = sext_i32_i64(group_tid_129603); + + int64_t x_102235; + + x_102235 = ((__global int64_t *) defunc_4_map_res_mem_124919)[gtid_102218]; + + int64_t x_102236 = ((__global + int64_t *) defunc_3_map_res_mem_124959)[gtid_102218]; + double x_102237 = ((__global + double *) defunc_3_map_res_mem_124960)[gtid_102218]; + int64_t x_102238 = ((__global + int64_t *) defunc_3_map_res_mem_124958)[gtid_102218]; + double x_102239 = ((__global + double *) defunc_0_f_res_mem_124970)[gtid_102218]; + int64_t y_102244 = sub64(x_102235, x_102236); + double i64_res_102245 = sitofp_i64_f64(x_102236); + double sqrt_res_102246; + + sqrt_res_102246 = futrts_sqrt64(i64_res_102245); + + double y_102247 = x_102237 * sqrt_res_102246; + __local char *mem_125018; + + mem_125018 = (__local char *) mem_125018_backing_0; + + int64_t gtid_102221 = sext_i32_i64(ltid_pre_129606); + int32_t phys_tid_102222 = local_tid_129602; + bool cond_102260 = sle64(y_102244, gtid_102221); + double defunc_0_f_res_102261; + + if (cond_102260) { + defunc_0_f_res_102261 = 0.0; + } else { + bool cond_102262 = gtid_102221 == (int64_t) 0; + double defunc_0_f_res_f_res_102263; + + if (cond_102262) { + defunc_0_f_res_f_res_102263 = x_102239; + } else { + int64_t i_102264 = add64(gtid_102221, x_102236); + bool x_102265 = sle64((int64_t) 0, i_102264); + bool y_102266 = slt64(i_102264, N_73007); + bool bounds_check_102267 = x_102265 && y_102266; + bool index_certs_102268; + + if (!bounds_check_102267) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 649) == + -1) { + global_failure_args[0] = i_102264; + global_failure_args[1] = N_73007; + ; + } + local_failure = true; + goto error_0; + } + } + + double x_102269 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_102218 * + N_73007 + + i_102264]; + int64_t x_102270 = sub64(x_102236, x_102238); + int64_t i_102271 = add64(gtid_102221, x_102270); + bool x_102272 = sle64((int64_t) 0, i_102271); + bool y_102273 = slt64(i_102271, N_73007); + bool bounds_check_102274 = x_102272 && y_102273; + bool index_certs_102275; + + if (!bounds_check_102274) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 650) == + -1) { + global_failure_args[0] = i_102271; + global_failure_args[1] = N_73007; + ; + } + local_failure = true; + goto error_0; + } + } + + double y_102276 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_102218 * + N_73007 + + i_102271]; + double defunc_0_f_res_f_res_f_res_102277 = x_102269 - y_102276; + + defunc_0_f_res_f_res_102263 = defunc_0_f_res_f_res_f_res_102277; + } + defunc_0_f_res_102261 = defunc_0_f_res_f_res_102263; + } + ((__local double *) mem_125018)[gtid_102221] = defunc_0_f_res_102261; + + error_0: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t dims_flat_129607; + + dims_flat_129607 = iota_arg_74896; + + double x_102256; + double x_102257; + double x_129609; + double x_129610; + bool ltid_in_bounds_129612; + + ltid_in_bounds_129612 = slt64(sext_i32_i64(local_tid_129602), + iota_arg_74896); + + int32_t skip_threads_129613; + + // read input for in-block scan + { + if (ltid_in_bounds_129612) { + x_102257 = ((volatile __local + double *) mem_125018)[sext_i32_i64(local_tid_129602)]; + if ((local_tid_129602 - squot32(local_tid_129602, 32) * 32) == 0) { + x_102256 = x_102257; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129613 = 1; + while (slt32(skip_threads_129613, 32)) { + if (sle32(skip_threads_129613, local_tid_129602 - + squot32(local_tid_129602, 32) * 32) && + ltid_in_bounds_129612) { + // read operands + { + x_102256 = ((volatile __local + double *) mem_125018)[sext_i32_i64(local_tid_129602) - + sext_i32_i64(skip_threads_129613)]; + } + // perform operation + { + bool inactive_129614 = + slt64(srem64(sext_i32_i64(local_tid_129602), + iota_arg_74896), + sext_i32_i64(local_tid_129602) - + sext_i32_i64(local_tid_129602 - + skip_threads_129613)); + + if (inactive_129614) { + x_102256 = x_102257; + } + if (!inactive_129614) { + double defunc_1_op_res_102258 = x_102256 + x_102257; + + x_102256 = defunc_1_op_res_102258; + } + } + } + if (sle32(wave_sizze_129604, skip_threads_129613)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129613, local_tid_129602 - + squot32(local_tid_129602, 32) * 32) && + ltid_in_bounds_129612) { + // write result + { + ((volatile __local + double *) mem_125018)[sext_i32_i64(local_tid_129602)] = + x_102256; + x_102257 = x_102256; + } + } + if (sle32(wave_sizze_129604, skip_threads_129613)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129613 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129602 - squot32(local_tid_129602, 32) * 32) == 31 && + ltid_in_bounds_129612) { + ((volatile __local + double *) mem_125018)[sext_i32_i64(squot32(local_tid_129602, + 32))] = x_102256; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129615; + + // read input for in-block scan + { + if (squot32(local_tid_129602, 32) == 0 && ltid_in_bounds_129612) { + x_129610 = ((volatile __local + double *) mem_125018)[sext_i32_i64(local_tid_129602)]; + if ((local_tid_129602 - squot32(local_tid_129602, 32) * 32) == + 0) { + x_129609 = x_129610; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129615 = 1; + while (slt32(skip_threads_129615, 32)) { + if (sle32(skip_threads_129615, local_tid_129602 - + squot32(local_tid_129602, 32) * 32) && + (squot32(local_tid_129602, 32) == 0 && + ltid_in_bounds_129612)) { + // read operands + { + x_129609 = ((volatile __local + double *) mem_125018)[sext_i32_i64(local_tid_129602) - + sext_i32_i64(skip_threads_129615)]; + } + // perform operation + { + bool inactive_129616 = + slt64(srem64(sext_i32_i64(local_tid_129602 * 32 + + 32 - 1), iota_arg_74896), + sext_i32_i64(local_tid_129602 * 32 + 32 - + 1) - sext_i32_i64((local_tid_129602 - + skip_threads_129615) * + 32 + 32 - 1)); + + if (inactive_129616) { + x_129609 = x_129610; + } + if (!inactive_129616) { + double defunc_1_op_res_129611 = x_129609 + x_129610; + + x_129609 = defunc_1_op_res_129611; + } + } + } + if (sle32(wave_sizze_129604, skip_threads_129615)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129615, local_tid_129602 - + squot32(local_tid_129602, 32) * 32) && + (squot32(local_tid_129602, 32) == 0 && + ltid_in_bounds_129612)) { + // write result + { + ((volatile __local + double *) mem_125018)[sext_i32_i64(local_tid_129602)] = + x_129609; + x_129610 = x_129609; + } + } + if (sle32(wave_sizze_129604, skip_threads_129615)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129615 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129602, 32) == 0 || !ltid_in_bounds_129612)) { + // read operands + { + x_102257 = x_102256; + x_102256 = ((__local + double *) mem_125018)[sext_i32_i64(squot32(local_tid_129602, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129617 = + slt64(srem64(sext_i32_i64(local_tid_129602), + iota_arg_74896), + sext_i32_i64(local_tid_129602) - + sext_i32_i64(squot32(local_tid_129602, 32) * 32 - + 1)); + + if (inactive_129617) { + x_102256 = x_102257; + } + if (!inactive_129617) { + double defunc_1_op_res_102258 = x_102256 + x_102257; + + x_102256 = defunc_1_op_res_102258; + } + } + // write final result + { + ((__local + double *) mem_125018)[sext_i32_i64(local_tid_129602)] = + x_102256; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129602, 32) == 0) { + ((__local double *) mem_125018)[sext_i32_i64(local_tid_129602)] = + x_102257; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + bool acc0_102283; + int64_t acc0_102284; + double acc0_102285; + int64_t gtid_102223 = sext_i32_i64(ltid_pre_129606); + int32_t phys_tid_102224 = local_tid_129602; + __local char *red_arr_mem_129618; + + red_arr_mem_129618 = (__local char *) red_arr_mem_129618_backing_1; + + __local char *red_arr_mem_129620; + + red_arr_mem_129620 = (__local char *) red_arr_mem_129620_backing_2; + + __local char *red_arr_mem_129622; + + red_arr_mem_129622 = (__local char *) red_arr_mem_129622_backing_3; + + double x_102300; + + x_102300 = ((__local double *) mem_125018)[gtid_102223]; + + double x_102301 = ((__global double *) mem_124973)[gtid_102223]; + double defunc_0_f_res_102304 = x_102300 / y_102247; + bool cond_102305 = slt64(gtid_102223, y_102244); + bool isnan_res_102306; + + isnan_res_102306 = futrts_isnan64(defunc_0_f_res_102304); + + bool cond_t_res_102307 = !isnan_res_102306; + bool x_102308 = cond_102305 && cond_t_res_102307; + double abs_res_102309 = fabs(defunc_0_f_res_102304); + bool defunc_2_f_res_t_res_102310 = x_102301 < abs_res_102309; + bool x_102311 = x_102308 && defunc_2_f_res_t_res_102310; + double defunc_1_f_res_102312; + + if (cond_102305) { + defunc_1_f_res_102312 = defunc_0_f_res_102304; + } else { + defunc_1_f_res_102312 = 0.0; + } + ((__local bool *) red_arr_mem_129618)[gtid_102223] = x_102311; + ((__local int64_t *) red_arr_mem_129620)[gtid_102223] = gtid_102223; + ((__local double *) red_arr_mem_129622)[gtid_102223] = + defunc_1_f_res_102312; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_129624; + int32_t skip_waves_129625; + + skip_waves_129625 = 1; + + bool x_102286; + int64_t x_102287; + double x_102288; + bool x_102289; + int64_t x_102290; + double x_102291; + + offset_129624 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129602, sext_i64_i32(iota_arg_74896))) { + x_102286 = ((__local + bool *) red_arr_mem_129618)[sext_i32_i64(local_tid_129602 + + offset_129624)]; + x_102287 = ((__local + int64_t *) red_arr_mem_129620)[sext_i32_i64(local_tid_129602 + + offset_129624)]; + x_102288 = ((__local + double *) red_arr_mem_129622)[sext_i32_i64(local_tid_129602 + + offset_129624)]; + } + } + offset_129624 = 1; + while (slt32(offset_129624, wave_sizze_129604)) { + if (slt32(local_tid_129602 + offset_129624, + sext_i64_i32(iota_arg_74896)) && ((local_tid_129602 - + squot32(local_tid_129602, + wave_sizze_129604) * + wave_sizze_129604) & (2 * + offset_129624 - + 1)) == + 0) { + // read array element + { + x_102289 = ((volatile __local + bool *) red_arr_mem_129618)[sext_i32_i64(local_tid_129602 + + offset_129624)]; + x_102290 = ((volatile __local + int64_t *) red_arr_mem_129620)[sext_i32_i64(local_tid_129602 + + offset_129624)]; + x_102291 = ((volatile __local + double *) red_arr_mem_129622)[sext_i32_i64(local_tid_129602 + + offset_129624)]; + } + // apply reduction operation + { + bool defunc_1_op_res_102292; + int64_t defunc_1_op_res_102293; + + if (x_102286) { + defunc_1_op_res_102292 = x_102286; + defunc_1_op_res_102293 = x_102287; + } else { + bool x_102294 = x_102289 && x_102289; + bool x_102295 = !x_102289; + bool y_102296 = x_102286 && x_102295; + bool defunc_1_op_res_f_res_102297 = x_102294 || y_102296; + int64_t defunc_1_op_res_f_res_102298; + + if (x_102289) { + defunc_1_op_res_f_res_102298 = x_102290; + } else { + defunc_1_op_res_f_res_102298 = x_102287; + } + defunc_1_op_res_102292 = defunc_1_op_res_f_res_102297; + defunc_1_op_res_102293 = defunc_1_op_res_f_res_102298; + } + + double defunc_1_op_res_102299 = x_102288 + x_102291; + + x_102286 = defunc_1_op_res_102292; + x_102287 = defunc_1_op_res_102293; + x_102288 = defunc_1_op_res_102299; + } + // write result of operation + { + ((volatile __local + bool *) red_arr_mem_129618)[sext_i32_i64(local_tid_129602)] = + x_102286; + ((volatile __local + int64_t *) red_arr_mem_129620)[sext_i32_i64(local_tid_129602)] = + x_102287; + ((volatile __local + double *) red_arr_mem_129622)[sext_i32_i64(local_tid_129602)] = + x_102288; + } + } + offset_129624 *= 2; + } + while (slt32(skip_waves_129625, squot32(sext_i64_i32(iota_arg_74896) + + wave_sizze_129604 - 1, + wave_sizze_129604))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129624 = skip_waves_129625 * wave_sizze_129604; + if (slt32(local_tid_129602 + offset_129624, + sext_i64_i32(iota_arg_74896)) && ((local_tid_129602 - + squot32(local_tid_129602, + wave_sizze_129604) * + wave_sizze_129604) == 0 && + (squot32(local_tid_129602, + wave_sizze_129604) & + (2 * skip_waves_129625 - + 1)) == 0)) { + // read array element + { + x_102289 = ((__local + bool *) red_arr_mem_129618)[sext_i32_i64(local_tid_129602 + + offset_129624)]; + x_102290 = ((__local + int64_t *) red_arr_mem_129620)[sext_i32_i64(local_tid_129602 + + offset_129624)]; + x_102291 = ((__local + double *) red_arr_mem_129622)[sext_i32_i64(local_tid_129602 + + offset_129624)]; + } + // apply reduction operation + { + bool defunc_1_op_res_102292; + int64_t defunc_1_op_res_102293; + + if (x_102286) { + defunc_1_op_res_102292 = x_102286; + defunc_1_op_res_102293 = x_102287; + } else { + bool x_102294 = x_102289 && x_102289; + bool x_102295 = !x_102289; + bool y_102296 = x_102286 && x_102295; + bool defunc_1_op_res_f_res_102297 = x_102294 || y_102296; + int64_t defunc_1_op_res_f_res_102298; + + if (x_102289) { + defunc_1_op_res_f_res_102298 = x_102290; + } else { + defunc_1_op_res_f_res_102298 = x_102287; + } + defunc_1_op_res_102292 = defunc_1_op_res_f_res_102297; + defunc_1_op_res_102293 = defunc_1_op_res_f_res_102298; + } + + double defunc_1_op_res_102299 = x_102288 + x_102291; + + x_102286 = defunc_1_op_res_102292; + x_102287 = defunc_1_op_res_102293; + x_102288 = defunc_1_op_res_102299; + } + // write result of operation + { + ((__local + bool *) red_arr_mem_129618)[sext_i32_i64(local_tid_129602)] = + x_102286; + ((__local + int64_t *) red_arr_mem_129620)[sext_i32_i64(local_tid_129602)] = + x_102287; + ((__local + double *) red_arr_mem_129622)[sext_i32_i64(local_tid_129602)] = + x_102288; + } + } + skip_waves_129625 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + acc0_102283 = ((__local bool *) red_arr_mem_129618)[(int64_t) 0]; + acc0_102284 = ((__local int64_t *) red_arr_mem_129620)[(int64_t) 0]; + acc0_102285 = ((__local double *) red_arr_mem_129622)[(int64_t) 0]; + + bool x_102315 = acc0_102283 && acc0_102283; + int64_t defunc_1_op_res_f_res_102319; + + if (acc0_102283) { + defunc_1_op_res_f_res_102319 = acc0_102284; + } else { + defunc_1_op_res_f_res_102319 = (int64_t) -1; + } + + bool cond_102325 = y_102244 == (int64_t) 0; + double defunc_0_f_res_102326; + + if (cond_102325) { + defunc_0_f_res_102326 = 0.0; + } else { + double i64_res_102327 = sitofp_i64_f64(y_102244); + double defunc_0_f_res_f_res_102328 = acc0_102285 / i64_res_102327; + + defunc_0_f_res_102326 = defunc_0_f_res_f_res_102328; + } + + bool cond_102329 = !x_102315; + int64_t fst_breakzq_102330; + + if (cond_102329) { + fst_breakzq_102330 = (int64_t) -1; + } else { + bool cond_102331 = slt64(defunc_1_op_res_f_res_102319, y_102244); + int64_t adjustValInds_res_102332; + + if (cond_102331) { + int64_t i_102333 = add64(x_102236, defunc_1_op_res_f_res_102319); + bool x_102334 = sle64((int64_t) 0, i_102333); + bool y_102335 = slt64(i_102333, N_73007); + bool bounds_check_102336 = x_102334 && y_102335; + bool index_certs_102337; + + if (!bounds_check_102336) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 651) == + -1) { + global_failure_args[0] = i_102333; + global_failure_args[1] = N_73007; + ; + } + local_failure = true; + goto error_3; + } + } + + int64_t x_102338 = ((__global + int64_t *) defunc_4_map_res_mem_124921)[gtid_102218 * + N_73007 + + i_102333]; + int64_t adjustValInds_res_t_res_102339 = sub64(x_102338, n_73011); + + adjustValInds_res_102332 = adjustValInds_res_t_res_102339; + } else { + adjustValInds_res_102332 = (int64_t) -1; + } + fst_breakzq_102330 = adjustValInds_res_102332; + } + + bool cond_102340 = sle64(x_102236, (int64_t) 5); + bool cond_f_res_102341 = sle64(y_102244, (int64_t) 5); + bool x_102342 = !cond_102340; + bool y_102343 = cond_f_res_102341 && x_102342; + bool cond_102344 = cond_102340 || y_102343; + int64_t fst_breakzq_102345; + + if (cond_102344) { + fst_breakzq_102345 = (int64_t) -2; + } else { + fst_breakzq_102345 = fst_breakzq_102330; + } + if (local_tid_129602 == 0) { + ((__global int64_t *) mem_125021)[gtid_102218] = fst_breakzq_102345; + } + if (local_tid_129602 == 0) { + ((__global double *) mem_125023)[gtid_102218] = defunc_0_f_res_102326; + } + + error_3: + return; +} +__kernel void mainMagnitudezisegmap_intragroup_115661(__global + int *global_failure, + int failure_is_an_option, + __global + int64_t *global_failure_args, + __local volatile + int64_t *mem_125194_backing_aligned_0, + __local volatile + int64_t *mem_121428_backing_aligned_1, + __local volatile + int64_t *mem_121409_backing_aligned_2, + __local volatile + int64_t *mem_121400_backing_aligned_3, + __local volatile + int64_t *mem_121377_backing_aligned_4, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t num_groups_y_115659, + int64_t ctx_val_121390, + int64_t num_threads_125922, + __global + unsigned char *mem_121359, + __global + unsigned char *mem_121363, + __global + unsigned char *mem_121366, + __global + unsigned char *mem_121368, + __global + unsigned char *mem_121446, + __global + unsigned char *mem_125177) +{ + #define tile_sizze_115656 (mainMagnitudezitile_sizze_115655) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_125194_backing_8 = (__local volatile + char *) mem_125194_backing_aligned_0; + __local volatile char *restrict mem_121428_backing_7 = (__local volatile + char *) mem_121428_backing_aligned_1; + __local volatile char *restrict mem_121409_backing_2 = (__local volatile + char *) mem_121409_backing_aligned_2; + __local volatile char *restrict mem_121400_backing_1 = (__local volatile + char *) mem_121400_backing_aligned_3; + __local volatile char *restrict mem_121377_backing_0 = (__local volatile + char *) mem_121377_backing_aligned_4; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_126876; + int32_t local_tid_126877; + int64_t group_sizze_126880; + int32_t wave_sizze_126879; + int32_t group_tid_126878; + + global_tid_126876 = get_global_id(0); + local_tid_126877 = get_local_id(0); + group_sizze_126880 = get_local_size(0); + wave_sizze_126879 = LOCKSTEP_WIDTH; + group_tid_126878 = get_group_id(0); + + int32_t gid_flat_115661; + + gid_flat_115661 = group_tid_126878; + + int32_t ltid_pre_126881; + + ltid_pre_126881 = squot32(local_tid_126877, + sext_i64_i32(tile_sizze_115656)); + + int32_t ltid_pre_126882; + + ltid_pre_126882 = local_tid_126877 - squot32(local_tid_126877, + sext_i64_i32(tile_sizze_115656)) * + sext_i64_i32(tile_sizze_115656); + + int64_t gid_x_115653; + + gid_x_115653 = squot64(sext_i32_i64(group_tid_126878), num_groups_y_115659); + + int64_t gid_y_115654; + + gid_y_115654 = sext_i32_i64(group_tid_126878) - + squot64(sext_i32_i64(group_tid_126878), num_groups_y_115659) * + num_groups_y_115659; + + int64_t binop_x_115688; + + binop_x_115688 = gid_x_115653 * tile_sizze_115656; + + int64_t binop_x_115690 = gid_y_115654 * tile_sizze_115656; + __local char *mem_121377; + + mem_121377 = (__local char *) mem_121377_backing_0; + + int64_t ltid_y_115680 = sext_i32_i64(ltid_pre_126881); + int64_t ltid_x_115678 = sext_i32_i64(ltid_pre_126882); + int32_t ltid_flat_115679 = local_tid_126877; + + if (slt64(ltid_y_115680, tile_sizze_115656) && slt64(ltid_x_115678, + tile_sizze_115656)) { + int64_t gtid_115689 = ltid_y_115680 + binop_x_115688; + int64_t gtid_115691 = ltid_x_115678 + binop_x_115690; + bool binop_x_115692 = slt64(gtid_115689, m_73008); + bool binop_y_115693 = slt64(gtid_115691, k2p2zq_73023); + bool cond_115694 = binop_x_115692 && binop_y_115693; + + if (cond_115694) { + for (int64_t i_126883 = 0; i_126883 < k2p2zq_73023; i_126883++) { + ((__global double *) mem_125177)[gid_flat_115661 + i_126883 * + num_threads_125922] = + ((__global double *) mem_121368)[i_126883]; + } + } + for (int64_t i_126884 = 0; i_126884 < k2p2zq_73023; i_126884++) { + ((__local double *) mem_121377)[ltid_y_115680 * (k2p2zq_73023 * + tile_sizze_115656) + + ltid_x_115678 * k2p2zq_73023 + + i_126884] = ((__global + double *) mem_125177)[gid_flat_115661 + + i_126884 * + num_threads_125922]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_121395[1]; + __local char *mem_121400; + + mem_121400 = (__local char *) mem_121400_backing_1; + + __local char *mem_121409; + + mem_121409 = (__local char *) mem_121409_backing_2; + + double mem_121413[1]; + double mem_125187[1]; + __local char *tiled_inside_loop_mem_121442; + __local char *mem_param_121388; + + mem_param_121388 = mem_121377; + for (int64_t i_93867 = 0; i_93867 < k2p2zq_73023; i_93867++) { + int64_t x_93869 = sub64(k2p2zq_73023, i_93867); + int64_t i_93870 = sub64(x_93869, (int64_t) 1); + bool x_93871 = sle64((int64_t) 0, i_93870); + bool y_93872 = slt64(i_93870, k2p2zq_73023); + bool bounds_check_93873 = x_93871 && y_93872; + int64_t j_m_i_93874 = sub64(k2p2zq_73023, x_93869); + bool empty_slice_93875 = j_m_i_93874 == (int64_t) 0; + int64_t m_93876 = sub64(j_m_i_93874, (int64_t) 1); + int64_t i_p_m_t_s_93877 = add64(x_93869, m_93876); + bool zzero_leq_i_p_m_t_s_93878 = sle64((int64_t) 0, i_p_m_t_s_93877); + bool i_p_m_t_s_leq_w_93879 = slt64(i_p_m_t_s_93877, k2p2zq_73023); + bool zzero_lte_i_93880 = sle64((int64_t) 0, x_93869); + bool i_lte_j_93881 = sle64(x_93869, k2p2zq_73023); + bool y_93882 = i_p_m_t_s_leq_w_93879 && zzero_lte_i_93880; + bool y_93883 = zzero_leq_i_p_m_t_s_93878 && y_93882; + bool y_93884 = i_lte_j_93881 && y_93883; + bool forwards_ok_93885 = zzero_lte_i_93880 && y_93884; + bool ok_or_empty_93886 = empty_slice_93875 || forwards_ok_93885; + bool index_ok_93887 = bounds_check_93873 && ok_or_empty_93886; + bool index_certs_93888; + + if (!index_ok_93887) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 514) == -1) { + global_failure_args[0] = i_93870; + global_failure_args[1] = x_93869; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + global_failure_args[4] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_1; + } + } + + bool index_certs_93889; + + if (!ok_or_empty_93886) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 515) == -1) { + global_failure_args[0] = x_93869; + global_failure_args[1] = k2p2zq_73023; + global_failure_args[2] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_1; + } + } + + int64_t num_whole_tiles_115714 = squot64(j_m_i_93874, + tile_sizze_115656); + int64_t ltid_y_115717 = sext_i32_i64(ltid_pre_126881); + int64_t ltid_x_115715 = sext_i32_i64(ltid_pre_126882); + int32_t ltid_flat_115716 = local_tid_126877; + + if (slt64(ltid_y_115717, tile_sizze_115656) && slt64(ltid_x_115715, + tile_sizze_115656)) { + mem_121395[(int64_t) 0] = 0.0; + } + + error_1: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + double accs_mem_121405[1]; + double mem_param_121396[1]; + + for (int32_t i_3 = 0; i_3 < 1; i_3++) + mem_param_121396[i_3] = mem_121395[i_3]; + for (int64_t tile_id_115726 = 0; tile_id_115726 < + num_whole_tiles_115714; tile_id_115726++) { + int64_t binop_x_115802 = tile_sizze_115656 * tile_id_115726; + int64_t ltid_y_115729 = sext_i32_i64(ltid_pre_126881); + int64_t ltid_x_115727 = sext_i32_i64(ltid_pre_126882); + int32_t ltid_flat_115728 = local_tid_126877; + int64_t j_115803 = ltid_x_115727 + binop_x_115802; + int64_t gtid_115805 = binop_x_115688 + ltid_y_115729; + bool binop_x_115811 = slt64(j_115803, j_m_i_93874); + bool binop_y_115812 = slt64(gtid_115805, m_73008); + bool cond_115813 = binop_x_115811 && binop_y_115812; + double pre_115814; + + if (cond_115813) { + int64_t slice_119565 = x_93869 + j_115803; + double x_115815 = ((__global + double *) mem_121359)[slice_119565 * + (k2p2zq_73023 * + m_73008) + + gtid_115805 * + k2p2zq_73023 + + i_93870]; + + pre_115814 = x_115815; + } else { + pre_115814 = 0.0; + } + ((__local double *) mem_121400)[ltid_y_115729 * tile_sizze_115656 + + ltid_x_115727] = pre_115814; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t slice_119566 = x_93869 + binop_x_115802; + double mem_121404[1]; + int64_t ltid_y_115762 = sext_i32_i64(ltid_pre_126881); + int64_t ltid_x_115760 = sext_i32_i64(ltid_pre_126882); + int32_t ltid_flat_115761 = local_tid_126877; + int64_t gtid_115819 = binop_x_115688 + ltid_y_115762; + int64_t gtid_115821 = binop_x_115690 + ltid_x_115760; + double acc_115825 = mem_param_121396[(int64_t) 0]; + bool binop_x_115829 = slt64(gtid_115819, m_73008); + bool binop_y_115830 = slt64(gtid_115821, k2p2zq_73023); + bool cond_115831 = binop_x_115829 && binop_y_115830; + double acc_115832; + + if (cond_115831) { + double x_115833; + double redout_119719 = acc_115825; + + for (int64_t i_119720 = 0; i_119720 < tile_sizze_115656; + i_119720++) { + int64_t slice_120008 = slice_119566 + i_119720; + double x_115838 = ((__local + double *) mem_121400)[ltid_y_115762 * + tile_sizze_115656 + + i_119720]; + bool isnan_res_115839; + + isnan_res_115839 = futrts_isnan64(x_115838); + + double defunc_1_f_res_115840; + + if (isnan_res_115839) { + defunc_1_f_res_115840 = 0.0; + } else { + double x_115837 = ((__local + double *) mem_param_121388)[ltid_y_115762 * + ctx_val_121390 + + ltid_x_115760 * + k2p2zq_73023 + + slice_120008]; + double defunc_1_f_res_f_res_115841 = x_115837 * + x_115838; + + defunc_1_f_res_115840 = defunc_1_f_res_f_res_115841; + } + + double defunc_1_op_res_115836 = defunc_1_f_res_115840 + + redout_119719; + double redout_tmp_126889 = defunc_1_op_res_115836; + + redout_119719 = redout_tmp_126889; + } + x_115833 = redout_119719; + acc_115832 = x_115833; + } else { + acc_115832 = acc_115825; + } + mem_121404[(int64_t) 0] = acc_115832; + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_param_tmp_126887[1]; + + for (int32_t i_4 = 0; i_4 < 1; i_4++) + mem_param_tmp_126887[i_4] = mem_121404[i_4]; + for (int32_t i_5 = 0; i_5 < 1; i_5++) + mem_param_121396[i_5] = mem_param_tmp_126887[i_5]; + } + for (int32_t i_6 = 0; i_6 < 1; i_6++) + accs_mem_121405[i_6] = mem_param_121396[i_6]; + + int64_t residual_input_115851 = srem64(j_m_i_93874, tile_sizze_115656); + bool cond_115852 = residual_input_115851 == (int64_t) 0; + + if (cond_115852) { + mem_125187[(int64_t) 0] = accs_mem_121405[(int64_t) 0]; + } else { + int64_t binop_x_115929 = tile_sizze_115656 * num_whole_tiles_115714; + int64_t ltid_y_115855 = sext_i32_i64(ltid_pre_126881); + int64_t ltid_x_115853 = sext_i32_i64(ltid_pre_126882); + int32_t ltid_flat_115854 = local_tid_126877; + int64_t j_115930 = ltid_x_115853 + binop_x_115929; + int64_t gtid_115932 = binop_x_115688 + ltid_y_115855; + bool binop_x_115938 = slt64(j_115930, j_m_i_93874); + bool binop_y_115939 = slt64(gtid_115932, m_73008); + bool cond_115940 = binop_x_115938 && binop_y_115939; + double pre_115941; + + if (cond_115940) { + int64_t slice_119567 = x_93869 + j_115930; + double x_115942 = ((__global + double *) mem_121359)[slice_119567 * + (k2p2zq_73023 * + m_73008) + + gtid_115932 * + k2p2zq_73023 + + i_93870]; + + pre_115941 = x_115942; + } else { + pre_115941 = 0.0; + } + ((__local double *) mem_121409)[ltid_y_115855 * tile_sizze_115656 + + ltid_x_115853] = pre_115941; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t slice_119568 = x_93869 + binop_x_115929; + int64_t ltid_y_115889 = sext_i32_i64(ltid_pre_126881); + int64_t ltid_x_115887 = sext_i32_i64(ltid_pre_126882); + int32_t ltid_flat_115888 = local_tid_126877; + int64_t gtid_115947 = binop_x_115688 + ltid_y_115889; + int64_t gtid_115949 = binop_x_115690 + ltid_x_115887; + double acc_115953 = accs_mem_121405[(int64_t) 0]; + bool binop_x_115957 = slt64(gtid_115947, m_73008); + bool binop_y_115958 = slt64(gtid_115949, k2p2zq_73023); + bool cond_115959 = binop_x_115957 && binop_y_115958; + double acc_115960; + + if (cond_115959) { + double x_115961; + double redout_119721 = acc_115953; + + for (int64_t i_119722 = 0; i_119722 < residual_input_115851; + i_119722++) { + int64_t slice_120009 = slice_119568 + i_119722; + double x_115966 = ((__local + double *) mem_121409)[ltid_y_115889 * + tile_sizze_115656 + + i_119722]; + bool isnan_res_115967; + + isnan_res_115967 = futrts_isnan64(x_115966); + + double defunc_1_f_res_115968; + + if (isnan_res_115967) { + defunc_1_f_res_115968 = 0.0; + } else { + double x_115965 = ((__local + double *) mem_param_121388)[ltid_y_115889 * + ctx_val_121390 + + ltid_x_115887 * + k2p2zq_73023 + + slice_120009]; + double defunc_1_f_res_f_res_115969 = x_115965 * + x_115966; + + defunc_1_f_res_115968 = defunc_1_f_res_f_res_115969; + } + + double defunc_1_op_res_115964 = defunc_1_f_res_115968 + + redout_119721; + double redout_tmp_126890 = defunc_1_op_res_115964; + + redout_119721 = redout_tmp_126890; + } + x_115961 = redout_119721; + acc_115960 = x_115961; + } else { + acc_115960 = acc_115953; + } + mem_121413[(int64_t) 0] = acc_115960; + barrier(CLK_LOCAL_MEM_FENCE); + mem_125187[(int64_t) 0] = mem_121413[(int64_t) 0]; + } + + __local char *mem_121428; + + mem_121428 = (__local char *) mem_121428_backing_7; + + int64_t ltid_y_115973 = sext_i32_i64(ltid_pre_126881); + int64_t ltid_x_115971 = sext_i32_i64(ltid_pre_126882); + int32_t ltid_flat_115972 = local_tid_126877; + + if (slt64(ltid_y_115973, tile_sizze_115656) && slt64(ltid_x_115971, + tile_sizze_115656)) { + int64_t gtid_115982 = binop_x_115688 + ltid_y_115973; + int64_t gtid_115984 = binop_x_115690 + ltid_x_115971; + bool binop_x_115986 = slt64(gtid_115982, m_73008); + bool binop_y_115987 = slt64(gtid_115984, k2p2zq_73023); + bool cond_115988 = binop_x_115986 && binop_y_115987; + __local char *mem_125194; + + mem_125194 = (__local char *) mem_125194_backing_8; + if (cond_115988) { + double defunc_2_reduce_res_115985 = mem_125187[(int64_t) 0]; + bool index_ok_115993 = bounds_check_93873 && bounds_check_93873; + bool index_certs_115994; + + if (!index_ok_115993) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 516) == -1) { + global_failure_args[0] = i_93870; + global_failure_args[1] = i_93870; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_6; + } + } + + double zs_arg_115995 = ((__global + double *) mem_121363)[i_93870 * + (k2p2zq_73023 * + m_73008) + + gtid_115982 * + k2p2zq_73023 + + i_93870]; + bool index_certs_115996; + + if (!bounds_check_93873) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 517) == -1) { + global_failure_args[0] = i_93870; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_6; + } + } + + double zm_arg_115997 = ((__global + double *) mem_121366)[i_93870 * + k2p2zq_73023 + + gtid_115984]; + double zm_res_115998 = zm_arg_115997 - + defunc_2_reduce_res_115985; + double zs_res_115999 = zm_res_115998 / zs_arg_115995; + + ((__local double *) mem_param_121388)[ltid_y_115973 * + ctx_val_121390 + + ltid_x_115971 * + k2p2zq_73023 + i_93870] = + zs_res_115999; + for (int64_t i_126891 = 0; i_126891 < k2p2zq_73023; + i_126891++) { + ((__local double *) mem_125194)[i_126891] = ((__local + double *) mem_param_121388)[ltid_y_115973 * + ctx_val_121390 + + ltid_x_115971 * + k2p2zq_73023 + + i_126891]; + } + } + for (int64_t i_126892 = 0; i_126892 < k2p2zq_73023; i_126892++) { + ((__local double *) mem_121428)[ltid_y_115973 * (k2p2zq_73023 * + tile_sizze_115656) + + ltid_x_115971 * k2p2zq_73023 + + i_126892] = ((__local + double *) mem_125194)[i_126892]; + } + } + + error_6: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + __local char *mem_param_tmp_126885; + + mem_param_tmp_126885 = mem_121428; + mem_param_121388 = mem_param_tmp_126885; + } + tiled_inside_loop_mem_121442 = mem_param_121388; + + int64_t thread_out_index_126893 = gid_x_115653 * tile_sizze_115656 + + sext_i32_i64(ltid_pre_126881); + int64_t thread_out_index_126894 = gid_y_115654 * tile_sizze_115656 + + sext_i32_i64(ltid_pre_126882); + + if (slt64(thread_out_index_126893, m_73008) && + slt64(thread_out_index_126894, k2p2zq_73023)) { + for (int64_t i_126895 = 0; i_126895 < k2p2zq_73023; i_126895++) { + ((__global double *) mem_121446)[thread_out_index_126893 * + (k2p2zq_73023 * k2p2zq_73023) + + thread_out_index_126894 * + k2p2zq_73023 + i_126895] = + ((__local + double *) tiled_inside_loop_mem_121442)[sext_i32_i64(ltid_pre_126881) * + ctx_val_121390 + + sext_i32_i64(ltid_pre_126882) * + k2p2zq_73023 + + i_126895]; + } + } + + error_7: + return; + #undef tile_sizze_115656 +} +__kernel void mainMagnitudezisegmap_intragroup_116023(__global + int *global_failure, + __local volatile + int64_t *mem_121547_backing_aligned_0, + __local volatile + int64_t *mem_121531_backing_aligned_1, + __local volatile + int64_t *mem_121522_backing_aligned_2, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t x_93925, + int64_t i_93926, + int64_t j_m_i_93930, + int64_t num_groups_y_116021, + int64_t num_whole_tiles_116039, + int64_t residual_input_116172, + unsigned char cond_116173, + int64_t num_threads_125927, + __global + unsigned char *mem_120252, + __global + unsigned char *mem_121351, + __global + unsigned char *mem_121458, + __global + unsigned char *mem_121508, + __global + unsigned char *mem_121512, + __global + unsigned char *mem_121551, + __global + unsigned char *mem_125219) +{ + #define tile_sizze_116018 (mainMagnitudezitile_sizze_116017) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_121547_backing_6 = (__local volatile + char *) mem_121547_backing_aligned_0; + __local volatile char *restrict mem_121531_backing_5 = (__local volatile + char *) mem_121531_backing_aligned_1; + __local volatile char *restrict mem_121522_backing_0 = (__local volatile + char *) mem_121522_backing_aligned_2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126913; + int32_t local_tid_126914; + int64_t group_sizze_126917; + int32_t wave_sizze_126916; + int32_t group_tid_126915; + + global_tid_126913 = get_global_id(0); + local_tid_126914 = get_local_id(0); + group_sizze_126917 = get_local_size(0); + wave_sizze_126916 = LOCKSTEP_WIDTH; + group_tid_126915 = get_group_id(0); + + int32_t gid_flat_116023; + + gid_flat_116023 = group_tid_126915; + + int32_t ltid_pre_126918; + + ltid_pre_126918 = squot32(local_tid_126914, + sext_i64_i32(tile_sizze_116018)); + + int32_t ltid_pre_126919; + + ltid_pre_126919 = local_tid_126914 - squot32(local_tid_126914, + sext_i64_i32(tile_sizze_116018)) * + sext_i64_i32(tile_sizze_116018); + + int64_t gid_x_116015; + + gid_x_116015 = squot64(sext_i32_i64(group_tid_126915), num_groups_y_116021); + + int64_t gid_y_116016; + + gid_y_116016 = sext_i32_i64(group_tid_126915) - + squot64(sext_i32_i64(group_tid_126915), num_groups_y_116021) * + num_groups_y_116021; + + double mem_121517[1]; + int64_t ltid_y_116042 = sext_i32_i64(ltid_pre_126918); + int64_t ltid_x_116040 = sext_i32_i64(ltid_pre_126919); + int32_t ltid_flat_116041 = local_tid_126914; + + if (slt64(ltid_y_116042, tile_sizze_116018) && slt64(ltid_x_116040, + tile_sizze_116018)) { + mem_121517[(int64_t) 0] = 0.0; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t binop_x_116127 = gid_x_116015 * tile_sizze_116018; + int64_t binop_x_116142 = gid_y_116016 * tile_sizze_116018; + __local char *mem_121522; + + mem_121522 = (__local char *) mem_121522_backing_0; + + double accs_mem_121527[1]; + double mem_param_121518[1]; + + for (int32_t i_1 = 0; i_1 < 1; i_1++) + mem_param_121518[i_1] = mem_121517[i_1]; + for (int64_t tile_id_116051 = 0; tile_id_116051 < num_whole_tiles_116039; + tile_id_116051++) { + int64_t binop_x_116125 = tile_sizze_116018 * tile_id_116051; + int64_t ltid_y_116054 = sext_i32_i64(ltid_pre_126918); + int64_t ltid_x_116052 = sext_i32_i64(ltid_pre_126919); + int32_t ltid_flat_116053 = local_tid_126914; + int64_t j_116126 = ltid_x_116052 + binop_x_116125; + int64_t gtid_116128 = ltid_y_116054 + binop_x_116127; + bool binop_x_116133 = slt64(j_116126, j_m_i_93930); + bool binop_y_116134 = slt64(gtid_116128, m_73008); + bool cond_116135 = binop_x_116133 && binop_y_116134; + double pre_116136; + + if (cond_116135) { + int64_t slice_119569 = x_93925 + j_116126; + double x_116137 = ((__global double *) mem_121458)[slice_119569 * + (k2p2zq_73023 * + m_73008) + + gtid_116128 * + k2p2zq_73023 + + i_93926]; + + pre_116136 = x_116137; + } else { + pre_116136 = 0.0; + } + ((__local double *) mem_121522)[ltid_y_116054 * tile_sizze_116018 + + ltid_x_116052] = pre_116136; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t slice_119570 = x_93925 + binop_x_116125; + double mem_121526[1]; + int64_t ltid_y_116086 = sext_i32_i64(ltid_pre_126918); + int64_t ltid_x_116084 = sext_i32_i64(ltid_pre_126919); + int32_t ltid_flat_116085 = local_tid_126914; + int64_t gtid_116141 = ltid_y_116086 + binop_x_116127; + int64_t gtid_116143 = ltid_x_116084 + binop_x_116142; + double acc_116146 = mem_param_121518[(int64_t) 0]; + bool binop_x_116150 = slt64(gtid_116141, m_73008); + bool binop_y_116151 = slt64(gtid_116143, k2p2zq_73023); + bool cond_116152 = binop_x_116150 && binop_y_116151; + double acc_116153; + + if (cond_116152) { + double x_116154; + double redout_119730 = acc_116146; + + for (int64_t i_119731 = 0; i_119731 < tile_sizze_116018; + i_119731++) { + int64_t slice_120012 = slice_119570 + i_119731; + double x_116159 = ((__local + double *) mem_121522)[ltid_y_116086 * + tile_sizze_116018 + + i_119731]; + bool isnan_res_116160; + + isnan_res_116160 = futrts_isnan64(x_116159); + + double defunc_1_f_res_116161; + + if (isnan_res_116160) { + defunc_1_f_res_116161 = 0.0; + } else { + double x_116158 = ((__global + double *) mem_121512)[slice_120012 * + (k2p2zq_73023 * + m_73008) + + gtid_116141 * + k2p2zq_73023 + + gtid_116143]; + double defunc_1_f_res_f_res_116162 = x_116158 * x_116159; + + defunc_1_f_res_116161 = defunc_1_f_res_f_res_116162; + } + + double defunc_1_op_res_116157 = defunc_1_f_res_116161 + + redout_119730; + double redout_tmp_126922 = defunc_1_op_res_116157; + + redout_119730 = redout_tmp_126922; + } + x_116154 = redout_119730; + acc_116153 = x_116154; + } else { + acc_116153 = acc_116146; + } + mem_121526[(int64_t) 0] = acc_116153; + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_param_tmp_126920[1]; + + for (int32_t i_2 = 0; i_2 < 1; i_2++) + mem_param_tmp_126920[i_2] = mem_121526[i_2]; + for (int32_t i_3 = 0; i_3 < 1; i_3++) + mem_param_121518[i_3] = mem_param_tmp_126920[i_3]; + } + for (int32_t i_4 = 0; i_4 < 1; i_4++) + accs_mem_121527[i_4] = mem_param_121518[i_4]; + + __local char *mem_121531; + + mem_121531 = (__local char *) mem_121531_backing_5; + + double mem_121535[1]; + double mem_125212[1]; + + if (cond_116173) { + mem_125212[(int64_t) 0] = accs_mem_121527[(int64_t) 0]; + } else { + int64_t binop_x_116248 = tile_sizze_116018 * num_whole_tiles_116039; + int64_t ltid_y_116176 = sext_i32_i64(ltid_pre_126918); + int64_t ltid_x_116174 = sext_i32_i64(ltid_pre_126919); + int32_t ltid_flat_116175 = local_tid_126914; + int64_t j_116249 = ltid_x_116174 + binop_x_116248; + int64_t gtid_116251 = binop_x_116127 + ltid_y_116176; + bool binop_x_116256 = slt64(j_116249, j_m_i_93930); + bool binop_y_116257 = slt64(gtid_116251, m_73008); + bool cond_116258 = binop_x_116256 && binop_y_116257; + double pre_116259; + + if (cond_116258) { + int64_t slice_119571 = x_93925 + j_116249; + double x_116260 = ((__global double *) mem_121458)[slice_119571 * + (k2p2zq_73023 * + m_73008) + + gtid_116251 * + k2p2zq_73023 + + i_93926]; + + pre_116259 = x_116260; + } else { + pre_116259 = 0.0; + } + ((__local double *) mem_121531)[ltid_y_116176 * tile_sizze_116018 + + ltid_x_116174] = pre_116259; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t slice_119572 = x_93925 + binop_x_116248; + int64_t ltid_y_116209 = sext_i32_i64(ltid_pre_126918); + int64_t ltid_x_116207 = sext_i32_i64(ltid_pre_126919); + int32_t ltid_flat_116208 = local_tid_126914; + int64_t gtid_116265 = binop_x_116127 + ltid_y_116209; + int64_t gtid_116267 = binop_x_116142 + ltid_x_116207; + double acc_116270 = accs_mem_121527[(int64_t) 0]; + bool binop_x_116274 = slt64(gtid_116265, m_73008); + bool binop_y_116275 = slt64(gtid_116267, k2p2zq_73023); + bool cond_116276 = binop_x_116274 && binop_y_116275; + double acc_116277; + + if (cond_116276) { + double x_116278; + double redout_119732 = acc_116270; + + for (int64_t i_119733 = 0; i_119733 < residual_input_116172; + i_119733++) { + int64_t slice_120013 = slice_119572 + i_119733; + double x_116283 = ((__local + double *) mem_121531)[ltid_y_116209 * + tile_sizze_116018 + + i_119733]; + bool isnan_res_116284; + + isnan_res_116284 = futrts_isnan64(x_116283); + + double defunc_1_f_res_116285; + + if (isnan_res_116284) { + defunc_1_f_res_116285 = 0.0; + } else { + double x_116282 = ((__global + double *) mem_121512)[slice_120013 * + (k2p2zq_73023 * + m_73008) + + gtid_116265 * + k2p2zq_73023 + + gtid_116267]; + double defunc_1_f_res_f_res_116286 = x_116282 * x_116283; + + defunc_1_f_res_116285 = defunc_1_f_res_f_res_116286; + } + + double defunc_1_op_res_116281 = defunc_1_f_res_116285 + + redout_119732; + double redout_tmp_126923 = defunc_1_op_res_116281; + + redout_119732 = redout_tmp_126923; + } + x_116278 = redout_119732; + acc_116277 = x_116278; + } else { + acc_116277 = acc_116270; + } + mem_121535[(int64_t) 0] = acc_116277; + barrier(CLK_LOCAL_MEM_FENCE); + mem_125212[(int64_t) 0] = mem_121535[(int64_t) 0]; + } + + __local char *mem_121547; + + mem_121547 = (__local char *) mem_121547_backing_6; + + int64_t ltid_y_116290 = sext_i32_i64(ltid_pre_126918); + int64_t ltid_x_116288 = sext_i32_i64(ltid_pre_126919); + int32_t ltid_flat_116289 = local_tid_126914; + + if (slt64(ltid_y_116290, tile_sizze_116018) && slt64(ltid_x_116288, + tile_sizze_116018)) { + int64_t gtid_116299 = binop_x_116127 + ltid_y_116290; + int64_t gtid_116301 = binop_x_116142 + ltid_x_116288; + bool binop_x_116303 = slt64(gtid_116299, m_73008); + bool binop_y_116304 = slt64(gtid_116301, k2p2zq_73023); + bool cond_116305 = binop_x_116303 && binop_y_116304; + + if (cond_116305) { + double defunc_2_reduce_res_116302 = mem_125212[(int64_t) 0]; + double defunc_3_map_res_r_transformed_row_116309 = ((__global + double *) mem_121351)[gtid_116299 * + (k2p2zq_73023 * + k2p2zq_73023) + + i_93926 * + k2p2zq_73023 + + i_93926]; + double defunc_2_map_res_transformed_row_116311 = ((__global + double *) mem_120252)[gtid_116301 * + k2p2zq_73023 + + i_93926]; + double zm_res_116312 = defunc_2_map_res_transformed_row_116311 - + defunc_2_reduce_res_116302; + double zs_res_116313 = zm_res_116312 / + defunc_3_map_res_r_transformed_row_116309; + + ((__global double *) mem_121508)[gtid_116299 * k2p2zq_73023 + + gtid_116301 + i_93926 * + (k2p2zq_73023 * m_73008)] = + zs_res_116313; + for (int64_t i_126924 = 0; i_126924 < k2p2zq_73023; i_126924++) { + ((__global double *) mem_125219)[gid_flat_116023 + i_126924 * + num_threads_125927] = + ((__global double *) mem_121508)[gtid_116299 * + k2p2zq_73023 + + gtid_116301 + i_126924 * + (k2p2zq_73023 * m_73008)]; + } + } + for (int64_t i_126925 = 0; i_126925 < k2p2zq_73023; i_126925++) { + ((__local double *) mem_121547)[ltid_y_116290 * (k2p2zq_73023 * + tile_sizze_116018) + + ltid_x_116288 * k2p2zq_73023 + + i_126925] = ((__global + double *) mem_125219)[gid_flat_116023 + + i_126925 * + num_threads_125927]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t thread_out_index_126926 = gid_x_116015 * tile_sizze_116018 + + sext_i32_i64(ltid_pre_126918); + int64_t thread_out_index_126927 = gid_y_116016 * tile_sizze_116018 + + sext_i32_i64(ltid_pre_126919); + + if (slt64(thread_out_index_126926, m_73008) && + slt64(thread_out_index_126927, k2p2zq_73023)) { + for (int64_t i_126928 = 0; i_126928 < k2p2zq_73023; i_126928++) { + ((__global double *) mem_121551)[thread_out_index_126926 * + (k2p2zq_73023 * k2p2zq_73023) + + thread_out_index_126927 * + k2p2zq_73023 + i_126928] = + ((__local double *) mem_121547)[sext_i32_i64(ltid_pre_126918) * + (k2p2zq_73023 * + tile_sizze_116018) + + sext_i32_i64(ltid_pre_126919) * + k2p2zq_73023 + i_126928]; + } + } + + error_6: + return; + #undef tile_sizze_116018 +} +__kernel void mainMagnitudezisegmap_intragroup_116342(__global + int *global_failure, + __local volatile + int64_t *mem_121654_backing_aligned_0, + __local volatile + int64_t *mem_121652_backing_aligned_1, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t gridDim_x_116335, + int64_t gridDim_y_116336, + int64_t full_tiles_116367, + int64_t kk_116570, + int64_t binop_x_120251, + __global + unsigned char *defunc_3_map_res_r_mem_121609, + __global + unsigned char *mem_121636, + __global + unsigned char *mem_121827) +{ + #define Ty_116322 (mainMagnitudeziTy_116319) + #define Ry_116323 (mainMagnitudeziRy_116321) + #define Tx_116324 (mainMagnitudeziTx_116318) + #define Rx_116325 (mainMagnitudeziRx_116320) + #define Tk_116326 (mainMagnitudeziTk_116317) + #define tk_div_tx_116327 (sdiv_up64(mainMagnitudeziTk_116317, mainMagnitudeziTx_116318)) + #define tk_div_ty_116328 (sdiv_up64(mainMagnitudeziTk_116317, mainMagnitudeziTy_116319)) + #define TxRx_116329 (mainMagnitudeziTx_116318 * mainMagnitudeziRx_116320) + #define TyRy_116330 (mainMagnitudeziTy_116319 * mainMagnitudeziRy_116321) + #define a_loc_szz_116332 (mainMagnitudeziTk_116317 * (mainMagnitudeziTy_116319 * mainMagnitudeziRy_116321)) + #define b_loc_szz_116334 (mainMagnitudeziRx_116320 * (mainMagnitudeziTx_116318 * mainMagnitudeziTk_116317)) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_121654_backing_1 = (__local volatile + char *) mem_121654_backing_aligned_0; + __local volatile char *restrict mem_121652_backing_0 = (__local volatile + char *) mem_121652_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127013; + int32_t local_tid_127014; + int64_t group_sizze_127017; + int32_t wave_sizze_127016; + int32_t group_tid_127015; + + global_tid_127013 = get_global_id(0); + local_tid_127014 = get_local_id(0); + group_sizze_127017 = get_local_size(0); + wave_sizze_127016 = LOCKSTEP_WIDTH; + group_tid_127015 = get_group_id(0); + + int32_t gid_flat_116342; + + gid_flat_116342 = group_tid_127015; + + int32_t ltid_pre_127018; + + ltid_pre_127018 = squot32(local_tid_127014, sext_i64_i32(Tx_116324)); + + int32_t ltid_pre_127019; + + ltid_pre_127019 = local_tid_127014 - squot32(local_tid_127014, + sext_i64_i32(Tx_116324)) * + sext_i64_i32(Tx_116324); + + int64_t gtid_92226; + + gtid_92226 = squot64(sext_i32_i64(group_tid_127015), gridDim_y_116336 * + gridDim_x_116335); + + int64_t gid_y_116341; + + gid_y_116341 = squot64(sext_i32_i64(group_tid_127015) - + squot64(sext_i32_i64(group_tid_127015), + gridDim_y_116336 * gridDim_x_116335) * + (gridDim_y_116336 * gridDim_x_116335), + gridDim_x_116335); + + int64_t gid_x_116340; + + gid_x_116340 = sext_i32_i64(group_tid_127015) - + squot64(sext_i32_i64(group_tid_127015), gridDim_y_116336 * + gridDim_x_116335) * (gridDim_y_116336 * gridDim_x_116335) - + squot64(sext_i32_i64(group_tid_127015) - + squot64(sext_i32_i64(group_tid_127015), gridDim_y_116336 * + gridDim_x_116335) * (gridDim_y_116336 * + gridDim_x_116335), + gridDim_x_116335) * gridDim_x_116335; + + int64_t iii_116343; + + iii_116343 = TyRy_116330 * gid_y_116341; + + int64_t jjj_116344 = TxRx_116329 * gid_x_116340; + double mem_121650[Ry_116323 * Rx_116325]; + int64_t ltid_y_116347 = sext_i32_i64(ltid_pre_127018); + int64_t ltid_x_116345 = sext_i32_i64(ltid_pre_127019); + int32_t ltid_flat_116346 = local_tid_127014; + double mem_121641[Ry_116323 * Rx_116325]; + + for (int64_t i_116358 = 0; i_116358 < Ry_116323; i_116358++) { + for (int64_t i_116361 = 0; i_116361 < Rx_116325; i_116361++) { + mem_121641[i_116358 * Rx_116325 + i_116361] = 0.0; + } + } + for (int64_t i_127022 = 0; i_127022 < Ry_116323; i_127022++) { + for (int64_t i_127023 = 0; i_127023 < Rx_116325; i_127023++) { + mem_121650[i_127022 * Rx_116325 + i_127023] = mem_121641[i_127022 * + Rx_116325 + + i_127023]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + __local char *mem_121652; + + mem_121652 = (__local char *) mem_121652_backing_0; + + __local char *mem_121654; + + mem_121654 = (__local char *) mem_121654_backing_1; + + double mem_121725[Ry_116323]; + double mem_121729[Rx_116325]; + double loop_mem_121741[Ry_116323 * Rx_116325]; + double mem_param_121655[Ry_116323 * Rx_116325]; + + for (int32_t i_2 = 0; i_2 < Ry_116323 * Rx_116325; i_2++) + mem_param_121655[i_2] = mem_121650[i_2]; + for (int64_t i_116368 = 0; i_116368 < full_tiles_116367; i_116368++) { + int64_t kk_116372 = Tk_116326 * i_116368; + + for (int64_t i_116373 = 0; i_116373 < Ry_116323; i_116373++) { + int64_t binop_y_116396 = Ty_116322 * i_116373; + + for (int64_t i_116375 = 0; i_116375 < tk_div_tx_116327; + i_116375++) { + int64_t binop_y_116394 = Tx_116324 * i_116375; + int64_t ltid_x_116377 = sext_i32_i64(ltid_pre_127018); + int64_t ltid_y_116378 = sext_i32_i64(ltid_pre_127019); + int32_t ltid_flat_116379 = local_tid_127014; + int64_t k_116395 = ltid_y_116378 + binop_y_116394; + int64_t i_116397 = ltid_x_116377 + binop_y_116396; + int64_t gtid_116398 = iii_116343 + i_116397; + int64_t A_col_idx_116399 = kk_116372 + k_116395; + bool cond_116400 = slt64(gtid_116398, k2p2zq_73023); + double A_elem_116401; + + if (cond_116400) { + double A_elem_116403 = ((__global + double *) mem_121636)[gtid_92226 * + (k2p2zq_73023 * + k2p2zq_73023) + + gtid_116398 * + k2p2zq_73023 + + A_col_idx_116399]; + + A_elem_116401 = A_elem_116403; + } else { + A_elem_116401 = 0.0; + } + + bool cond_116405 = slt64(k_116395, Tk_116326); + int64_t a_loc_ind_116406; + + if (cond_116405) { + int64_t binop_y_116407 = Tk_116326 * i_116397; + int64_t loc_fi_116408 = k_116395 + binop_y_116407; + + a_loc_ind_116406 = loc_fi_116408; + } else { + a_loc_ind_116406 = (int64_t) -1; + } + if (sle64((int64_t) 0, a_loc_ind_116406) && + slt64(a_loc_ind_116406, a_loc_szz_116332)) { + ((__local double *) mem_121652)[a_loc_ind_116406] = + A_elem_116401; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + for (int64_t i_116413 = 0; i_116413 < tk_div_ty_116328; i_116413++) { + int64_t binop_y_116434 = Ty_116322 * i_116413; + + for (int64_t i_116415 = 0; i_116415 < Rx_116325; i_116415++) { + int64_t binop_y_116436 = Tx_116324 * i_116415; + int64_t ltid_x_116417 = sext_i32_i64(ltid_pre_127018); + int64_t ltid_y_116418 = sext_i32_i64(ltid_pre_127019); + int32_t ltid_flat_116419 = local_tid_127014; + int64_t k_116435 = ltid_x_116417 + binop_y_116434; + int64_t j_116437 = ltid_y_116418 + binop_y_116436; + int64_t gtid_116438 = jjj_116344 + j_116437; + int64_t B_row_idx_116439 = kk_116372 + k_116435; + bool cond_116440 = slt64(gtid_116438, k2p2zq_73023); + double B_elem_116441; + + if (cond_116440) { + double B_elem_116443 = ((__global + double *) defunc_3_map_res_r_mem_121609)[gtid_92226 * + binop_x_120251 + + B_row_idx_116439 * + k2p2zq_73023 + + gtid_116438]; + + B_elem_116441 = B_elem_116443; + } else { + B_elem_116441 = 0.0; + } + + bool cond_116445 = slt64(k_116435, Tk_116326); + int64_t b_loc_ind_116446; + + if (cond_116445) { + int64_t binop_y_116447 = TxRx_116329 * k_116435; + int64_t loc_fi_116448 = j_116437 + binop_y_116447; + + b_loc_ind_116446 = loc_fi_116448; + } else { + b_loc_ind_116446 = (int64_t) -1; + } + if (sle64((int64_t) 0, b_loc_ind_116446) && + slt64(b_loc_ind_116446, b_loc_szz_116334)) { + ((__local double *) mem_121654)[b_loc_ind_116446] = + B_elem_116441; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + + double loop_mem_121740[Ry_116323 * Rx_116325]; + double mem_param_121712[Ry_116323 * Rx_116325]; + + for (int32_t i_3 = 0; i_3 < Ry_116323 * Rx_116325; i_3++) + mem_param_121712[i_3] = mem_param_121655[i_3]; + for (int64_t i_116453 = 0; i_116453 < Tk_116326; i_116453++) { + int64_t binop_y_116492 = TxRx_116329 * i_116453; + int64_t ltid_y_116457 = sext_i32_i64(ltid_pre_127018); + int64_t ltid_x_116455 = sext_i32_i64(ltid_pre_127019); + int32_t ltid_flat_116456 = local_tid_127014; + double mem_121715[Ry_116323]; + double mem_121717[Rx_116325]; + int64_t binop_x_116483 = Ry_116323 * ltid_y_116457; + + for (int64_t i_116481 = 0; i_116481 < Ry_116323; i_116481++) { + int64_t binop_x_116484 = i_116481 + binop_x_116483; + int64_t binop_y_116485 = Tk_116326 * binop_x_116484; + int64_t a_loc_ind_116486 = i_116453 + binop_y_116485; + + for (int64_t i_127035 = 0; i_127035 < (int64_t) 1; i_127035++) { + mem_121715[i_116481 + i_127035] = ((__local + double *) mem_121652)[a_loc_ind_116486 + + i_127035]; + } + } + + int64_t binop_y_116494 = Rx_116325 * ltid_x_116455; + + for (int64_t i_116490 = 0; i_116490 < Rx_116325; i_116490++) { + int64_t binop_x_116493 = i_116490 + binop_y_116492; + int64_t b_loc_ind_116495 = binop_x_116493 + binop_y_116494; + + for (int64_t i_127037 = 0; i_127037 < (int64_t) 1; i_127037++) { + mem_121717[i_116490 + i_127037] = ((__local + double *) mem_121654)[b_loc_ind_116495 + + i_127037]; + } + } + for (int64_t i_127038 = 0; i_127038 < Ry_116323; i_127038++) { + mem_121725[i_127038] = mem_121715[i_127038]; + } + for (int64_t i_127039 = 0; i_127039 < Rx_116325; i_127039++) { + mem_121729[i_127039] = mem_121717[i_127039]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_121739[Ry_116323 * Rx_116325]; + int64_t ltid_y_116502 = sext_i32_i64(ltid_pre_127018); + int64_t ltid_x_116500 = sext_i32_i64(ltid_pre_127019); + int32_t ltid_flat_116501 = local_tid_127014; + int64_t binop_y_116543 = Ry_116323 * ltid_y_116502; + int64_t binop_y_116547 = Rx_116325 * ltid_x_116500; + + for (int64_t i_116537 = 0; i_116537 < Ry_116323; i_116537++) { + int64_t binop_x_116542 = iii_116343 + i_116537; + int64_t cmpop_x_116544 = binop_x_116542 + binop_y_116543; + bool binop_x_116545 = slt64(cmpop_x_116544, k2p2zq_73023); + + for (int64_t i_116540 = 0; i_116540 < Rx_116325; i_116540++) { + int64_t binop_x_116546 = jjj_116344 + i_116540; + int64_t cmpop_x_116548 = binop_x_116546 + binop_y_116547; + bool binop_y_116549 = slt64(cmpop_x_116548, k2p2zq_73023); + bool cond_116550 = binop_x_116545 && binop_y_116549; + + if (cond_116550) { + double a_116552 = mem_121725[i_116537]; + double b_116553 = mem_121729[i_116540]; + double c_116554 = mem_param_121712[i_116537 * + Rx_116325 + + i_116540]; + double defunc_1_f_res_116557 = a_116552 * b_116553; + double defunc_1_op_res_116561 = c_116554 + + defunc_1_f_res_116557; + + mem_param_121712[i_116537 * Rx_116325 + i_116540] = + defunc_1_op_res_116561; + } + } + } + for (int64_t i_127042 = 0; i_127042 < Ry_116323; i_127042++) { + for (int64_t i_127043 = 0; i_127043 < Rx_116325; i_127043++) { + mem_121739[i_127042 * Rx_116325 + i_127043] = + mem_param_121712[i_127042 * Rx_116325 + i_127043]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_param_tmp_127032[Ry_116323 * Rx_116325]; + + for (int32_t i_4 = 0; i_4 < Ry_116323 * Rx_116325; i_4++) + mem_param_tmp_127032[i_4] = mem_121739[i_4]; + for (int32_t i_5 = 0; i_5 < Ry_116323 * Rx_116325; i_5++) + mem_param_121712[i_5] = mem_param_tmp_127032[i_5]; + } + for (int32_t i_6 = 0; i_6 < Ry_116323 * Rx_116325; i_6++) + loop_mem_121740[i_6] = mem_param_121712[i_6]; + + double mem_param_tmp_127024[Ry_116323 * Rx_116325]; + + for (int32_t i_7 = 0; i_7 < Ry_116323 * Rx_116325; i_7++) + mem_param_tmp_127024[i_7] = loop_mem_121740[i_7]; + for (int32_t i_8 = 0; i_8 < Ry_116323 * Rx_116325; i_8++) + mem_param_121655[i_8] = mem_param_tmp_127024[i_8]; + } + for (int32_t i_9 = 0; i_9 < Ry_116323 * Rx_116325; i_9++) + loop_mem_121741[i_9] = mem_param_121655[i_9]; + for (int64_t i_116571 = 0; i_116571 < Ry_116323; i_116571++) { + int64_t binop_y_116596 = Ty_116322 * i_116571; + + for (int64_t i_116573 = 0; i_116573 < tk_div_tx_116327; i_116573++) { + int64_t binop_y_116594 = Tx_116324 * i_116573; + int64_t ltid_x_116575 = sext_i32_i64(ltid_pre_127018); + int64_t ltid_y_116576 = sext_i32_i64(ltid_pre_127019); + int32_t ltid_flat_116577 = local_tid_127014; + int64_t k_116595 = ltid_y_116576 + binop_y_116594; + int64_t i_116597 = ltid_x_116575 + binop_y_116596; + int64_t gtid_116598 = iii_116343 + i_116597; + int64_t A_col_idx_116599 = kk_116570 + k_116595; + bool binop_x_116600 = slt64(gtid_116598, k2p2zq_73023); + bool binop_y_116601 = slt64(A_col_idx_116599, k2p2zq_73023); + bool cond_116602 = binop_x_116600 && binop_y_116601; + double A_elem_116603; + + if (cond_116602) { + double A_elem_116605 = ((__global + double *) mem_121636)[gtid_92226 * + (k2p2zq_73023 * + k2p2zq_73023) + + gtid_116598 * + k2p2zq_73023 + + A_col_idx_116599]; + + A_elem_116603 = A_elem_116605; + } else { + A_elem_116603 = 0.0; + } + + bool cond_116607 = slt64(k_116595, Tk_116326); + int64_t a_loc_ind_116608; + + if (cond_116607) { + int64_t binop_y_116609 = Tk_116326 * i_116597; + int64_t loc_fi_116610 = k_116595 + binop_y_116609; + + a_loc_ind_116608 = loc_fi_116610; + } else { + a_loc_ind_116608 = (int64_t) -1; + } + if (sle64((int64_t) 0, a_loc_ind_116608) && slt64(a_loc_ind_116608, + a_loc_szz_116332)) { + ((__local double *) mem_121652)[a_loc_ind_116608] = + A_elem_116603; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + for (int64_t i_116615 = 0; i_116615 < tk_div_ty_116328; i_116615++) { + int64_t binop_y_116638 = Ty_116322 * i_116615; + + for (int64_t i_116617 = 0; i_116617 < Rx_116325; i_116617++) { + int64_t binop_y_116640 = Tx_116324 * i_116617; + int64_t ltid_x_116619 = sext_i32_i64(ltid_pre_127018); + int64_t ltid_y_116620 = sext_i32_i64(ltid_pre_127019); + int32_t ltid_flat_116621 = local_tid_127014; + int64_t k_116639 = ltid_x_116619 + binop_y_116638; + int64_t j_116641 = ltid_y_116620 + binop_y_116640; + int64_t gtid_116642 = jjj_116344 + j_116641; + int64_t B_row_idx_116643 = kk_116570 + k_116639; + bool binop_x_116644 = slt64(gtid_116642, k2p2zq_73023); + bool binop_y_116645 = slt64(B_row_idx_116643, k2p2zq_73023); + bool cond_116646 = binop_x_116644 && binop_y_116645; + double B_elem_116647; + + if (cond_116646) { + double B_elem_116649 = ((__global + double *) defunc_3_map_res_r_mem_121609)[gtid_92226 * + binop_x_120251 + + B_row_idx_116643 * + k2p2zq_73023 + + gtid_116642]; + + B_elem_116647 = B_elem_116649; + } else { + B_elem_116647 = 0.0; + } + + bool cond_116651 = slt64(k_116639, Tk_116326); + int64_t b_loc_ind_116652; + + if (cond_116651) { + int64_t binop_y_116653 = TxRx_116329 * k_116639; + int64_t loc_fi_116654 = j_116641 + binop_y_116653; + + b_loc_ind_116652 = loc_fi_116654; + } else { + b_loc_ind_116652 = (int64_t) -1; + } + if (sle64((int64_t) 0, b_loc_ind_116652) && slt64(b_loc_ind_116652, + b_loc_szz_116334)) { + ((__local double *) mem_121654)[b_loc_ind_116652] = + B_elem_116647; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + + double mem_121807[Ry_116323]; + double mem_121811[Rx_116325]; + double mem_121821[Ry_116323 * Rx_116325]; + double loop_mem_121823[Ry_116323 * Rx_116325]; + double mem_param_121794[Ry_116323 * Rx_116325]; + + for (int32_t i_10 = 0; i_10 < Ry_116323 * Rx_116325; i_10++) + mem_param_121794[i_10] = loop_mem_121741[i_10]; + for (int64_t i_116659 = 0; i_116659 < Tk_116326; i_116659++) { + int64_t cmpop_x_116661 = kk_116570 + i_116659; + bool cond_116662 = slt64(cmpop_x_116661, k2p2zq_73023); + double mem_125235[Ry_116323 * Rx_116325]; + + if (cond_116662) { + int64_t binop_y_116700 = TxRx_116329 * i_116659; + int64_t bytes_121796 = (int64_t) 8 * Ry_116323; + int64_t bytes_121798 = (int64_t) 8 * Rx_116325; + int64_t ltid_y_116665 = sext_i32_i64(ltid_pre_127018); + int64_t ltid_x_116663 = sext_i32_i64(ltid_pre_127019); + int32_t ltid_flat_116664 = local_tid_127014; + double mem_121797[Ry_116323]; + double mem_121799[Rx_116325]; + int64_t binop_x_116691 = Ry_116323 * ltid_y_116665; + + for (int64_t i_116689 = 0; i_116689 < Ry_116323; i_116689++) { + int64_t binop_x_116692 = i_116689 + binop_x_116691; + int64_t binop_y_116693 = Tk_116326 * binop_x_116692; + int64_t a_loc_ind_116694 = i_116659 + binop_y_116693; + + for (int64_t i_127051 = 0; i_127051 < (int64_t) 1; i_127051++) { + mem_121797[i_116689 + i_127051] = ((__local + double *) mem_121652)[a_loc_ind_116694 + + i_127051]; + } + } + + int64_t binop_y_116702 = Rx_116325 * ltid_x_116663; + + for (int64_t i_116698 = 0; i_116698 < Rx_116325; i_116698++) { + int64_t binop_x_116701 = i_116698 + binop_y_116700; + int64_t b_loc_ind_116703 = binop_x_116701 + binop_y_116702; + + for (int64_t i_127053 = 0; i_127053 < (int64_t) 1; i_127053++) { + mem_121799[i_116698 + i_127053] = ((__local + double *) mem_121654)[b_loc_ind_116703 + + i_127053]; + } + } + for (int64_t i_127054 = 0; i_127054 < Ry_116323; i_127054++) { + mem_121807[i_127054] = mem_121797[i_127054]; + } + for (int64_t i_127055 = 0; i_127055 < Rx_116325; i_127055++) { + mem_121811[i_127055] = mem_121799[i_127055]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t ltid_y_116710 = sext_i32_i64(ltid_pre_127018); + int64_t ltid_x_116708 = sext_i32_i64(ltid_pre_127019); + int32_t ltid_flat_116709 = local_tid_127014; + int64_t binop_y_116751 = Ry_116323 * ltid_y_116710; + int64_t binop_y_116755 = Rx_116325 * ltid_x_116708; + + for (int64_t i_116745 = 0; i_116745 < Ry_116323; i_116745++) { + int64_t binop_x_116750 = iii_116343 + i_116745; + int64_t cmpop_x_116752 = binop_x_116750 + binop_y_116751; + bool binop_x_116753 = slt64(cmpop_x_116752, k2p2zq_73023); + + for (int64_t i_116748 = 0; i_116748 < Rx_116325; i_116748++) { + int64_t binop_x_116754 = jjj_116344 + i_116748; + int64_t cmpop_x_116756 = binop_x_116754 + binop_y_116755; + bool binop_y_116757 = slt64(cmpop_x_116756, k2p2zq_73023); + bool cond_116758 = binop_x_116753 && binop_y_116757; + + if (cond_116758) { + double a_116760 = mem_121807[i_116745]; + double b_116761 = mem_121811[i_116748]; + double c_116762 = mem_param_121794[i_116745 * + Rx_116325 + + i_116748]; + double defunc_1_f_res_116765 = a_116760 * b_116761; + double defunc_1_op_res_116769 = c_116762 + + defunc_1_f_res_116765; + + mem_param_121794[i_116745 * Rx_116325 + i_116748] = + defunc_1_op_res_116769; + } + } + } + for (int64_t i_127058 = 0; i_127058 < Ry_116323; i_127058++) { + for (int64_t i_127059 = 0; i_127059 < Rx_116325; i_127059++) { + mem_121821[i_127058 * Rx_116325 + i_127059] = + mem_param_121794[i_127058 * Rx_116325 + i_127059]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + for (int64_t i_127060 = 0; i_127060 < Ry_116323; i_127060++) { + for (int64_t i_127061 = 0; i_127061 < Rx_116325; i_127061++) { + mem_125235[i_127060 * Rx_116325 + i_127061] = + mem_121821[i_127060 * Rx_116325 + i_127061]; + } + } + } else { + for (int64_t i_127062 = 0; i_127062 < Ry_116323; i_127062++) { + for (int64_t i_127063 = 0; i_127063 < Rx_116325; i_127063++) { + mem_125235[i_127062 * Rx_116325 + i_127063] = + mem_param_121794[i_127062 * Rx_116325 + i_127063]; + } + } + } + + double mem_param_tmp_127048[Ry_116323 * Rx_116325]; + + for (int32_t i_11 = 0; i_11 < Ry_116323 * Rx_116325; i_11++) + mem_param_tmp_127048[i_11] = mem_125235[i_11]; + for (int32_t i_12 = 0; i_12 < Ry_116323 * Rx_116325; i_12++) + mem_param_121794[i_12] = mem_param_tmp_127048[i_12]; + } + for (int32_t i_13 = 0; i_13 < Ry_116323 * Rx_116325; i_13++) + loop_mem_121823[i_13] = mem_param_121794[i_13]; + + int64_t reg_tile_i_127064 = squot64(sext_i32_i64(local_tid_127014), + Ty_116322 * Tx_116324); + int64_t reg_tile_i_127065 = squot64(sext_i32_i64(local_tid_127014) - + squot64(sext_i32_i64(local_tid_127014), + Ty_116322 * Tx_116324) * + (Ty_116322 * Tx_116324), Tx_116324); + int64_t reg_tile_i_127066 = sext_i32_i64(local_tid_127014) - + squot64(sext_i32_i64(local_tid_127014), Ty_116322 * Tx_116324) * + (Ty_116322 * Tx_116324) - squot64(sext_i32_i64(local_tid_127014) - + squot64(sext_i32_i64(local_tid_127014), + Ty_116322 * Tx_116324) * + (Ty_116322 * Tx_116324), + Tx_116324) * Tx_116324; + int64_t tile_dim_start_127067 = gtid_92226 + reg_tile_i_127064; + int64_t tile_dim_start_127068 = Ry_116323 * (Ty_116322 * gid_y_116341 + + reg_tile_i_127065); + int64_t tile_dim_start_127069 = Rx_116325 * (Tx_116324 * gid_x_116340 + + reg_tile_i_127066); + + for (int64_t nest_i_127070 = 0; nest_i_127070 < (int64_t) 1; + nest_i_127070++) { + for (int64_t nest_i_127071 = 0; nest_i_127071 < Ry_116323; + nest_i_127071++) { + for (int64_t nest_i_127072 = 0; nest_i_127072 < Rx_116325; + nest_i_127072++) { + if ((slt64(tile_dim_start_127067 + nest_i_127070, m_73008) && + slt64(tile_dim_start_127068 + nest_i_127071, + k2p2zq_73023)) && slt64(tile_dim_start_127069 + + nest_i_127072, + k2p2zq_73023)) { + ((__global double *) mem_121827)[(tile_dim_start_127067 + + nest_i_127070) * + (k2p2zq_73023 * + k2p2zq_73023) + + (tile_dim_start_127068 + + nest_i_127071) * + k2p2zq_73023 + + (tile_dim_start_127069 + + nest_i_127072)] = + loop_mem_121823[squot64(nest_i_127071 * Rx_116325 + + nest_i_127072 - + squot64(nest_i_127071 * + Rx_116325 + + nest_i_127072, + Tx_116324 * Ry_116323 * + Rx_116325) * + (Tx_116324 * Ry_116323 * + Rx_116325) - + squot64(nest_i_127071 * + Rx_116325 + + nest_i_127072 - + squot64(nest_i_127071 * + Rx_116325 + + nest_i_127072, + Tx_116324 * + Ry_116323 * + Rx_116325) * + (Tx_116324 * Ry_116323 * + Rx_116325), Ry_116323 * + Rx_116325) * + (Ry_116323 * Rx_116325), + Rx_116325) * Rx_116325 + + (nest_i_127071 * Rx_116325 + + nest_i_127072 - squot64(nest_i_127071 * + Rx_116325 + + nest_i_127072, + Tx_116324 * + Ry_116323 * + Rx_116325) * + (Tx_116324 * Ry_116323 * Rx_116325) - + squot64(nest_i_127071 * Rx_116325 + + nest_i_127072 - + squot64(nest_i_127071 * + Rx_116325 + + nest_i_127072, + Tx_116324 * Ry_116323 * + Rx_116325) * + (Tx_116324 * Ry_116323 * + Rx_116325), Ry_116323 * + Rx_116325) * (Ry_116323 * + Rx_116325) - + squot64(nest_i_127071 * Rx_116325 + + nest_i_127072 - + squot64(nest_i_127071 * + Rx_116325 + + nest_i_127072, + Tx_116324 * Ry_116323 * + Rx_116325) * + (Tx_116324 * Ry_116323 * + Rx_116325) - + squot64(nest_i_127071 * + Rx_116325 + + nest_i_127072 - + squot64(nest_i_127071 * + Rx_116325 + + nest_i_127072, + Tx_116324 * + Ry_116323 * + Rx_116325) * + (Tx_116324 * + Ry_116323 * + Rx_116325), + Ry_116323 * + Rx_116325) * + (Ry_116323 * Rx_116325), + Rx_116325) * Rx_116325)]; + } + } + } + } + + error_9: + return; + #undef Ty_116322 + #undef Ry_116323 + #undef Tx_116324 + #undef Rx_116325 + #undef Tk_116326 + #undef tk_div_tx_116327 + #undef tk_div_ty_116328 + #undef TxRx_116329 + #undef TyRy_116330 + #undef a_loc_szz_116332 + #undef b_loc_szz_116334 +} +__kernel void mainMagnitudezisegmap_intragroup_116784(__global + int *global_failure, + int failure_is_an_option, + __global + int64_t *global_failure_args, + __local volatile + int64_t *mem_125292_backing_aligned_0, + __local volatile + int64_t *mem_123215_backing_aligned_1, + __local volatile + int64_t *mem_123196_backing_aligned_2, + __local volatile + int64_t *mem_123187_backing_aligned_3, + __local volatile + int64_t *mem_123164_backing_aligned_4, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t num_groups_y_116782, + int64_t ctx_val_123177, + int64_t num_threads_125992, + __global + unsigned char *mem_121944, + __global + unsigned char *mem_121946, + __global + unsigned char *mem_123151, + __global + unsigned char *mem_123155, + __global + unsigned char *mem_123233, + __global + unsigned char *mem_125275) +{ + #define tile_sizze_116779 (mainMagnitudezitile_sizze_116778) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_125292_backing_8 = (__local volatile + char *) mem_125292_backing_aligned_0; + __local volatile char *restrict mem_123215_backing_7 = (__local volatile + char *) mem_123215_backing_aligned_1; + __local volatile char *restrict mem_123196_backing_2 = (__local volatile + char *) mem_123196_backing_aligned_2; + __local volatile char *restrict mem_123187_backing_1 = (__local volatile + char *) mem_123187_backing_aligned_3; + __local volatile char *restrict mem_123164_backing_0 = (__local volatile + char *) mem_123164_backing_aligned_4; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_127660; + int32_t local_tid_127661; + int64_t group_sizze_127664; + int32_t wave_sizze_127663; + int32_t group_tid_127662; + + global_tid_127660 = get_global_id(0); + local_tid_127661 = get_local_id(0); + group_sizze_127664 = get_local_size(0); + wave_sizze_127663 = LOCKSTEP_WIDTH; + group_tid_127662 = get_group_id(0); + + int32_t gid_flat_116784; + + gid_flat_116784 = group_tid_127662; + + int32_t ltid_pre_127665; + + ltid_pre_127665 = squot32(local_tid_127661, + sext_i64_i32(tile_sizze_116779)); + + int32_t ltid_pre_127666; + + ltid_pre_127666 = local_tid_127661 - squot32(local_tid_127661, + sext_i64_i32(tile_sizze_116779)) * + sext_i64_i32(tile_sizze_116779); + + int64_t gid_x_116776; + + gid_x_116776 = squot64(sext_i32_i64(group_tid_127662), num_groups_y_116782); + + int64_t gid_y_116777; + + gid_y_116777 = sext_i32_i64(group_tid_127662) - + squot64(sext_i32_i64(group_tid_127662), num_groups_y_116782) * + num_groups_y_116782; + + int64_t binop_x_116811; + + binop_x_116811 = gid_x_116776 * tile_sizze_116779; + + int64_t binop_x_116813 = gid_y_116777 * tile_sizze_116779; + __local char *mem_123164; + + mem_123164 = (__local char *) mem_123164_backing_0; + + int64_t ltid_y_116803 = sext_i32_i64(ltid_pre_127665); + int64_t ltid_x_116801 = sext_i32_i64(ltid_pre_127666); + int32_t ltid_flat_116802 = local_tid_127661; + + if (slt64(ltid_y_116803, tile_sizze_116779) && slt64(ltid_x_116801, + tile_sizze_116779)) { + int64_t gtid_116812 = ltid_y_116803 + binop_x_116811; + int64_t gtid_116814 = ltid_x_116801 + binop_x_116813; + bool binop_x_116815 = slt64(gtid_116812, m_73008); + bool binop_y_116816 = slt64(gtid_116814, k2p2zq_73023); + bool cond_116817 = binop_x_116815 && binop_y_116816; + + if (cond_116817) { + for (int64_t i_127667 = 0; i_127667 < k2p2zq_73023; i_127667++) { + ((__global double *) mem_125275)[gid_flat_116784 + i_127667 * + num_threads_125992] = + ((__global double *) mem_121946)[i_127667]; + } + } + for (int64_t i_127668 = 0; i_127668 < k2p2zq_73023; i_127668++) { + ((__local double *) mem_123164)[ltid_y_116803 * (k2p2zq_73023 * + tile_sizze_116779) + + ltid_x_116801 * k2p2zq_73023 + + i_127668] = ((__global + double *) mem_125275)[gid_flat_116784 + + i_127668 * + num_threads_125992]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_123182[1]; + __local char *mem_123187; + + mem_123187 = (__local char *) mem_123187_backing_1; + + __local char *mem_123196; + + mem_123196 = (__local char *) mem_123196_backing_2; + + double mem_123200[1]; + double mem_125285[1]; + __local char *tiled_inside_loop_mem_123229; + __local char *mem_param_123175; + + mem_param_123175 = mem_123164; + for (int64_t i_97767 = 0; i_97767 < k2p2zq_73023; i_97767++) { + int64_t x_97769 = sub64(k2p2zq_73023, i_97767); + int64_t i_97770 = sub64(x_97769, (int64_t) 1); + bool x_97771 = sle64((int64_t) 0, i_97770); + bool y_97772 = slt64(i_97770, k2p2zq_73023); + bool bounds_check_97773 = x_97771 && y_97772; + int64_t j_m_i_97774 = sub64(k2p2zq_73023, x_97769); + bool empty_slice_97775 = j_m_i_97774 == (int64_t) 0; + int64_t m_97776 = sub64(j_m_i_97774, (int64_t) 1); + int64_t i_p_m_t_s_97777 = add64(x_97769, m_97776); + bool zzero_leq_i_p_m_t_s_97778 = sle64((int64_t) 0, i_p_m_t_s_97777); + bool i_p_m_t_s_leq_w_97779 = slt64(i_p_m_t_s_97777, k2p2zq_73023); + bool zzero_lte_i_97780 = sle64((int64_t) 0, x_97769); + bool i_lte_j_97781 = sle64(x_97769, k2p2zq_73023); + bool y_97782 = i_p_m_t_s_leq_w_97779 && zzero_lte_i_97780; + bool y_97783 = zzero_leq_i_p_m_t_s_97778 && y_97782; + bool y_97784 = i_lte_j_97781 && y_97783; + bool forwards_ok_97785 = zzero_lte_i_97780 && y_97784; + bool ok_or_empty_97786 = empty_slice_97775 || forwards_ok_97785; + bool index_ok_97787 = bounds_check_97773 && ok_or_empty_97786; + bool index_certs_97788; + + if (!index_ok_97787) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 602) == -1) { + global_failure_args[0] = i_97770; + global_failure_args[1] = x_97769; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + global_failure_args[4] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_1; + } + } + + bool index_certs_97789; + + if (!ok_or_empty_97786) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 603) == -1) { + global_failure_args[0] = x_97769; + global_failure_args[1] = k2p2zq_73023; + global_failure_args[2] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_1; + } + } + + int64_t num_whole_tiles_116837 = squot64(j_m_i_97774, + tile_sizze_116779); + int64_t ltid_y_116840 = sext_i32_i64(ltid_pre_127665); + int64_t ltid_x_116838 = sext_i32_i64(ltid_pre_127666); + int32_t ltid_flat_116839 = local_tid_127661; + + if (slt64(ltid_y_116840, tile_sizze_116779) && slt64(ltid_x_116838, + tile_sizze_116779)) { + mem_123182[(int64_t) 0] = 0.0; + } + + error_1: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + double accs_mem_123192[1]; + double mem_param_123183[1]; + + for (int32_t i_3 = 0; i_3 < 1; i_3++) + mem_param_123183[i_3] = mem_123182[i_3]; + for (int64_t tile_id_116849 = 0; tile_id_116849 < + num_whole_tiles_116837; tile_id_116849++) { + int64_t binop_x_116925 = tile_sizze_116779 * tile_id_116849; + int64_t ltid_y_116852 = sext_i32_i64(ltid_pre_127665); + int64_t ltid_x_116850 = sext_i32_i64(ltid_pre_127666); + int32_t ltid_flat_116851 = local_tid_127661; + int64_t j_116926 = ltid_x_116850 + binop_x_116925; + int64_t gtid_116928 = binop_x_116811 + ltid_y_116852; + bool binop_x_116934 = slt64(j_116926, j_m_i_97774); + bool binop_y_116935 = slt64(gtid_116928, m_73008); + bool cond_116936 = binop_x_116934 && binop_y_116935; + double pre_116937; + + if (cond_116936) { + int64_t slice_119577 = x_97769 + j_116926; + double x_116938 = ((__global + double *) mem_123151)[slice_119577 * + (k2p2zq_73023 * + m_73008) + + gtid_116928 * + k2p2zq_73023 + + i_97770]; + + pre_116937 = x_116938; + } else { + pre_116937 = 0.0; + } + ((__local double *) mem_123187)[ltid_y_116852 * tile_sizze_116779 + + ltid_x_116850] = pre_116937; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t slice_119578 = x_97769 + binop_x_116925; + double mem_123191[1]; + int64_t ltid_y_116885 = sext_i32_i64(ltid_pre_127665); + int64_t ltid_x_116883 = sext_i32_i64(ltid_pre_127666); + int32_t ltid_flat_116884 = local_tid_127661; + int64_t gtid_116942 = binop_x_116811 + ltid_y_116885; + int64_t gtid_116944 = binop_x_116813 + ltid_x_116883; + double acc_116948 = mem_param_123183[(int64_t) 0]; + bool binop_x_116952 = slt64(gtid_116942, m_73008); + bool binop_y_116953 = slt64(gtid_116944, k2p2zq_73023); + bool cond_116954 = binop_x_116952 && binop_y_116953; + double acc_116955; + + if (cond_116954) { + double x_116956; + double redout_119837 = acc_116948; + + for (int64_t i_119838 = 0; i_119838 < tile_sizze_116779; + i_119838++) { + int64_t slice_120037 = slice_119578 + i_119838; + double x_116961 = ((__local + double *) mem_123187)[ltid_y_116885 * + tile_sizze_116779 + + i_119838]; + bool isnan_res_116962; + + isnan_res_116962 = futrts_isnan64(x_116961); + + double defunc_1_f_res_116963; + + if (isnan_res_116962) { + defunc_1_f_res_116963 = 0.0; + } else { + double x_116960 = ((__local + double *) mem_param_123175)[ltid_y_116885 * + ctx_val_123177 + + ltid_x_116883 * + k2p2zq_73023 + + slice_120037]; + double defunc_1_f_res_f_res_116964 = x_116960 * + x_116961; + + defunc_1_f_res_116963 = defunc_1_f_res_f_res_116964; + } + + double defunc_1_op_res_116959 = defunc_1_f_res_116963 + + redout_119837; + double redout_tmp_127673 = defunc_1_op_res_116959; + + redout_119837 = redout_tmp_127673; + } + x_116956 = redout_119837; + acc_116955 = x_116956; + } else { + acc_116955 = acc_116948; + } + mem_123191[(int64_t) 0] = acc_116955; + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_param_tmp_127671[1]; + + for (int32_t i_4 = 0; i_4 < 1; i_4++) + mem_param_tmp_127671[i_4] = mem_123191[i_4]; + for (int32_t i_5 = 0; i_5 < 1; i_5++) + mem_param_123183[i_5] = mem_param_tmp_127671[i_5]; + } + for (int32_t i_6 = 0; i_6 < 1; i_6++) + accs_mem_123192[i_6] = mem_param_123183[i_6]; + + int64_t residual_input_116974 = srem64(j_m_i_97774, tile_sizze_116779); + bool cond_116975 = residual_input_116974 == (int64_t) 0; + + if (cond_116975) { + mem_125285[(int64_t) 0] = accs_mem_123192[(int64_t) 0]; + } else { + int64_t binop_x_117052 = tile_sizze_116779 * num_whole_tiles_116837; + int64_t ltid_y_116978 = sext_i32_i64(ltid_pre_127665); + int64_t ltid_x_116976 = sext_i32_i64(ltid_pre_127666); + int32_t ltid_flat_116977 = local_tid_127661; + int64_t j_117053 = ltid_x_116976 + binop_x_117052; + int64_t gtid_117055 = binop_x_116811 + ltid_y_116978; + bool binop_x_117061 = slt64(j_117053, j_m_i_97774); + bool binop_y_117062 = slt64(gtid_117055, m_73008); + bool cond_117063 = binop_x_117061 && binop_y_117062; + double pre_117064; + + if (cond_117063) { + int64_t slice_119579 = x_97769 + j_117053; + double x_117065 = ((__global + double *) mem_123151)[slice_119579 * + (k2p2zq_73023 * + m_73008) + + gtid_117055 * + k2p2zq_73023 + + i_97770]; + + pre_117064 = x_117065; + } else { + pre_117064 = 0.0; + } + ((__local double *) mem_123196)[ltid_y_116978 * tile_sizze_116779 + + ltid_x_116976] = pre_117064; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t slice_119580 = x_97769 + binop_x_117052; + int64_t ltid_y_117012 = sext_i32_i64(ltid_pre_127665); + int64_t ltid_x_117010 = sext_i32_i64(ltid_pre_127666); + int32_t ltid_flat_117011 = local_tid_127661; + int64_t gtid_117070 = binop_x_116811 + ltid_y_117012; + int64_t gtid_117072 = binop_x_116813 + ltid_x_117010; + double acc_117076 = accs_mem_123192[(int64_t) 0]; + bool binop_x_117080 = slt64(gtid_117070, m_73008); + bool binop_y_117081 = slt64(gtid_117072, k2p2zq_73023); + bool cond_117082 = binop_x_117080 && binop_y_117081; + double acc_117083; + + if (cond_117082) { + double x_117084; + double redout_119839 = acc_117076; + + for (int64_t i_119840 = 0; i_119840 < residual_input_116974; + i_119840++) { + int64_t slice_120038 = slice_119580 + i_119840; + double x_117089 = ((__local + double *) mem_123196)[ltid_y_117012 * + tile_sizze_116779 + + i_119840]; + bool isnan_res_117090; + + isnan_res_117090 = futrts_isnan64(x_117089); + + double defunc_1_f_res_117091; + + if (isnan_res_117090) { + defunc_1_f_res_117091 = 0.0; + } else { + double x_117088 = ((__local + double *) mem_param_123175)[ltid_y_117012 * + ctx_val_123177 + + ltid_x_117010 * + k2p2zq_73023 + + slice_120038]; + double defunc_1_f_res_f_res_117092 = x_117088 * + x_117089; + + defunc_1_f_res_117091 = defunc_1_f_res_f_res_117092; + } + + double defunc_1_op_res_117087 = defunc_1_f_res_117091 + + redout_119839; + double redout_tmp_127674 = defunc_1_op_res_117087; + + redout_119839 = redout_tmp_127674; + } + x_117084 = redout_119839; + acc_117083 = x_117084; + } else { + acc_117083 = acc_117076; + } + mem_123200[(int64_t) 0] = acc_117083; + barrier(CLK_LOCAL_MEM_FENCE); + mem_125285[(int64_t) 0] = mem_123200[(int64_t) 0]; + } + + __local char *mem_123215; + + mem_123215 = (__local char *) mem_123215_backing_7; + + int64_t ltid_y_117096 = sext_i32_i64(ltid_pre_127665); + int64_t ltid_x_117094 = sext_i32_i64(ltid_pre_127666); + int32_t ltid_flat_117095 = local_tid_127661; + + if (slt64(ltid_y_117096, tile_sizze_116779) && slt64(ltid_x_117094, + tile_sizze_116779)) { + int64_t gtid_117105 = binop_x_116811 + ltid_y_117096; + int64_t gtid_117107 = binop_x_116813 + ltid_x_117094; + bool binop_x_117109 = slt64(gtid_117105, m_73008); + bool binop_y_117110 = slt64(gtid_117107, k2p2zq_73023); + bool cond_117111 = binop_x_117109 && binop_y_117110; + __local char *mem_125292; + + mem_125292 = (__local char *) mem_125292_backing_8; + if (cond_117111) { + double defunc_2_reduce_res_117108 = mem_125285[(int64_t) 0]; + bool index_ok_117116 = bounds_check_97773 && bounds_check_97773; + bool index_certs_117117; + + if (!index_ok_117116) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 604) == -1) { + global_failure_args[0] = i_97770; + global_failure_args[1] = i_97770; + global_failure_args[2] = k2p2zq_73023; + global_failure_args[3] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_6; + } + } + + double zs_arg_117118 = ((__global + double *) mem_123155)[i_97770 * + (k2p2zq_73023 * + m_73008) + + gtid_117105 * + k2p2zq_73023 + + i_97770]; + bool index_certs_117119; + + if (!bounds_check_97773) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 605) == -1) { + global_failure_args[0] = i_97770; + global_failure_args[1] = k2p2zq_73023; + ; + } + local_failure = true; + goto error_6; + } + } + + double zm_arg_117120 = ((__global + double *) mem_121944)[i_97770 * + k2p2zq_73023 + + gtid_117107]; + double zm_res_117121 = zm_arg_117120 - + defunc_2_reduce_res_117108; + double zs_res_117122 = zm_res_117121 / zs_arg_117118; + + ((__local double *) mem_param_123175)[ltid_y_117096 * + ctx_val_123177 + + ltid_x_117094 * + k2p2zq_73023 + i_97770] = + zs_res_117122; + for (int64_t i_127675 = 0; i_127675 < k2p2zq_73023; + i_127675++) { + ((__local double *) mem_125292)[i_127675] = ((__local + double *) mem_param_123175)[ltid_y_117096 * + ctx_val_123177 + + ltid_x_117094 * + k2p2zq_73023 + + i_127675]; + } + } + for (int64_t i_127676 = 0; i_127676 < k2p2zq_73023; i_127676++) { + ((__local double *) mem_123215)[ltid_y_117096 * (k2p2zq_73023 * + tile_sizze_116779) + + ltid_x_117094 * k2p2zq_73023 + + i_127676] = ((__local + double *) mem_125292)[i_127676]; + } + } + + error_6: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + __local char *mem_param_tmp_127669; + + mem_param_tmp_127669 = mem_123215; + mem_param_123175 = mem_param_tmp_127669; + } + tiled_inside_loop_mem_123229 = mem_param_123175; + + int64_t thread_out_index_127677 = gid_x_116776 * tile_sizze_116779 + + sext_i32_i64(ltid_pre_127665); + int64_t thread_out_index_127678 = gid_y_116777 * tile_sizze_116779 + + sext_i32_i64(ltid_pre_127666); + + if (slt64(thread_out_index_127677, m_73008) && + slt64(thread_out_index_127678, k2p2zq_73023)) { + for (int64_t i_127679 = 0; i_127679 < k2p2zq_73023; i_127679++) { + ((__global double *) mem_123233)[thread_out_index_127677 * + (k2p2zq_73023 * k2p2zq_73023) + + thread_out_index_127678 * + k2p2zq_73023 + i_127679] = + ((__local + double *) tiled_inside_loop_mem_123229)[sext_i32_i64(ltid_pre_127665) * + ctx_val_123177 + + sext_i32_i64(ltid_pre_127666) * + k2p2zq_73023 + + i_127679]; + } + } + + error_7: + return; + #undef tile_sizze_116779 +} +__kernel void mainMagnitudezisegmap_intragroup_117146(__global + int *global_failure, + __local volatile + int64_t *mem_123330_backing_aligned_0, + __local volatile + int64_t *mem_123314_backing_aligned_1, + __local volatile + int64_t *mem_123305_backing_aligned_2, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t x_97825, + int64_t i_97826, + int64_t j_m_i_97830, + int64_t num_groups_y_117144, + int64_t num_whole_tiles_117162, + int64_t residual_input_117295, + unsigned char cond_117296, + int64_t num_threads_125997, + __global + unsigned char *mem_121938, + __global + unsigned char *mem_123143, + __global + unsigned char *mem_123241, + __global + unsigned char *mem_123291, + __global + unsigned char *mem_123295, + __global + unsigned char *mem_123334, + __global + unsigned char *mem_125317) +{ + #define tile_sizze_117141 (mainMagnitudezitile_sizze_117140) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_123330_backing_6 = (__local volatile + char *) mem_123330_backing_aligned_0; + __local volatile char *restrict mem_123314_backing_5 = (__local volatile + char *) mem_123314_backing_aligned_1; + __local volatile char *restrict mem_123305_backing_0 = (__local volatile + char *) mem_123305_backing_aligned_2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127697; + int32_t local_tid_127698; + int64_t group_sizze_127701; + int32_t wave_sizze_127700; + int32_t group_tid_127699; + + global_tid_127697 = get_global_id(0); + local_tid_127698 = get_local_id(0); + group_sizze_127701 = get_local_size(0); + wave_sizze_127700 = LOCKSTEP_WIDTH; + group_tid_127699 = get_group_id(0); + + int32_t gid_flat_117146; + + gid_flat_117146 = group_tid_127699; + + int32_t ltid_pre_127702; + + ltid_pre_127702 = squot32(local_tid_127698, + sext_i64_i32(tile_sizze_117141)); + + int32_t ltid_pre_127703; + + ltid_pre_127703 = local_tid_127698 - squot32(local_tid_127698, + sext_i64_i32(tile_sizze_117141)) * + sext_i64_i32(tile_sizze_117141); + + int64_t gid_x_117138; + + gid_x_117138 = squot64(sext_i32_i64(group_tid_127699), num_groups_y_117144); + + int64_t gid_y_117139; + + gid_y_117139 = sext_i32_i64(group_tid_127699) - + squot64(sext_i32_i64(group_tid_127699), num_groups_y_117144) * + num_groups_y_117144; + + double mem_123300[1]; + int64_t ltid_y_117165 = sext_i32_i64(ltid_pre_127702); + int64_t ltid_x_117163 = sext_i32_i64(ltid_pre_127703); + int32_t ltid_flat_117164 = local_tid_127698; + + if (slt64(ltid_y_117165, tile_sizze_117141) && slt64(ltid_x_117163, + tile_sizze_117141)) { + mem_123300[(int64_t) 0] = 0.0; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t binop_x_117250 = gid_x_117138 * tile_sizze_117141; + int64_t binop_x_117265 = gid_y_117139 * tile_sizze_117141; + __local char *mem_123305; + + mem_123305 = (__local char *) mem_123305_backing_0; + + double accs_mem_123310[1]; + double mem_param_123301[1]; + + for (int32_t i_1 = 0; i_1 < 1; i_1++) + mem_param_123301[i_1] = mem_123300[i_1]; + for (int64_t tile_id_117174 = 0; tile_id_117174 < num_whole_tiles_117162; + tile_id_117174++) { + int64_t binop_x_117248 = tile_sizze_117141 * tile_id_117174; + int64_t ltid_y_117177 = sext_i32_i64(ltid_pre_127702); + int64_t ltid_x_117175 = sext_i32_i64(ltid_pre_127703); + int32_t ltid_flat_117176 = local_tid_127698; + int64_t j_117249 = ltid_x_117175 + binop_x_117248; + int64_t gtid_117251 = ltid_y_117177 + binop_x_117250; + bool binop_x_117256 = slt64(j_117249, j_m_i_97830); + bool binop_y_117257 = slt64(gtid_117251, m_73008); + bool cond_117258 = binop_x_117256 && binop_y_117257; + double pre_117259; + + if (cond_117258) { + int64_t slice_119581 = x_97825 + j_117249; + double x_117260 = ((__global double *) mem_123241)[slice_119581 * + (k2p2zq_73023 * + m_73008) + + gtid_117251 * + k2p2zq_73023 + + i_97826]; + + pre_117259 = x_117260; + } else { + pre_117259 = 0.0; + } + ((__local double *) mem_123305)[ltid_y_117177 * tile_sizze_117141 + + ltid_x_117175] = pre_117259; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t slice_119582 = x_97825 + binop_x_117248; + double mem_123309[1]; + int64_t ltid_y_117209 = sext_i32_i64(ltid_pre_127702); + int64_t ltid_x_117207 = sext_i32_i64(ltid_pre_127703); + int32_t ltid_flat_117208 = local_tid_127698; + int64_t gtid_117264 = ltid_y_117209 + binop_x_117250; + int64_t gtid_117266 = ltid_x_117207 + binop_x_117265; + double acc_117269 = mem_param_123301[(int64_t) 0]; + bool binop_x_117273 = slt64(gtid_117264, m_73008); + bool binop_y_117274 = slt64(gtid_117266, k2p2zq_73023); + bool cond_117275 = binop_x_117273 && binop_y_117274; + double acc_117276; + + if (cond_117275) { + double x_117277; + double redout_119848 = acc_117269; + + for (int64_t i_119849 = 0; i_119849 < tile_sizze_117141; + i_119849++) { + int64_t slice_120041 = slice_119582 + i_119849; + double x_117282 = ((__local + double *) mem_123305)[ltid_y_117209 * + tile_sizze_117141 + + i_119849]; + bool isnan_res_117283; + + isnan_res_117283 = futrts_isnan64(x_117282); + + double defunc_1_f_res_117284; + + if (isnan_res_117283) { + defunc_1_f_res_117284 = 0.0; + } else { + double x_117281 = ((__global + double *) mem_123295)[slice_120041 * + (k2p2zq_73023 * + m_73008) + + gtid_117264 * + k2p2zq_73023 + + gtid_117266]; + double defunc_1_f_res_f_res_117285 = x_117281 * x_117282; + + defunc_1_f_res_117284 = defunc_1_f_res_f_res_117285; + } + + double defunc_1_op_res_117280 = defunc_1_f_res_117284 + + redout_119848; + double redout_tmp_127706 = defunc_1_op_res_117280; + + redout_119848 = redout_tmp_127706; + } + x_117277 = redout_119848; + acc_117276 = x_117277; + } else { + acc_117276 = acc_117269; + } + mem_123309[(int64_t) 0] = acc_117276; + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_param_tmp_127704[1]; + + for (int32_t i_2 = 0; i_2 < 1; i_2++) + mem_param_tmp_127704[i_2] = mem_123309[i_2]; + for (int32_t i_3 = 0; i_3 < 1; i_3++) + mem_param_123301[i_3] = mem_param_tmp_127704[i_3]; + } + for (int32_t i_4 = 0; i_4 < 1; i_4++) + accs_mem_123310[i_4] = mem_param_123301[i_4]; + + __local char *mem_123314; + + mem_123314 = (__local char *) mem_123314_backing_5; + + double mem_123318[1]; + double mem_125310[1]; + + if (cond_117296) { + mem_125310[(int64_t) 0] = accs_mem_123310[(int64_t) 0]; + } else { + int64_t binop_x_117371 = tile_sizze_117141 * num_whole_tiles_117162; + int64_t ltid_y_117299 = sext_i32_i64(ltid_pre_127702); + int64_t ltid_x_117297 = sext_i32_i64(ltid_pre_127703); + int32_t ltid_flat_117298 = local_tid_127698; + int64_t j_117372 = ltid_x_117297 + binop_x_117371; + int64_t gtid_117374 = binop_x_117250 + ltid_y_117299; + bool binop_x_117379 = slt64(j_117372, j_m_i_97830); + bool binop_y_117380 = slt64(gtid_117374, m_73008); + bool cond_117381 = binop_x_117379 && binop_y_117380; + double pre_117382; + + if (cond_117381) { + int64_t slice_119583 = x_97825 + j_117372; + double x_117383 = ((__global double *) mem_123241)[slice_119583 * + (k2p2zq_73023 * + m_73008) + + gtid_117374 * + k2p2zq_73023 + + i_97826]; + + pre_117382 = x_117383; + } else { + pre_117382 = 0.0; + } + ((__local double *) mem_123314)[ltid_y_117299 * tile_sizze_117141 + + ltid_x_117297] = pre_117382; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t slice_119584 = x_97825 + binop_x_117371; + int64_t ltid_y_117332 = sext_i32_i64(ltid_pre_127702); + int64_t ltid_x_117330 = sext_i32_i64(ltid_pre_127703); + int32_t ltid_flat_117331 = local_tid_127698; + int64_t gtid_117388 = binop_x_117250 + ltid_y_117332; + int64_t gtid_117390 = binop_x_117265 + ltid_x_117330; + double acc_117393 = accs_mem_123310[(int64_t) 0]; + bool binop_x_117397 = slt64(gtid_117388, m_73008); + bool binop_y_117398 = slt64(gtid_117390, k2p2zq_73023); + bool cond_117399 = binop_x_117397 && binop_y_117398; + double acc_117400; + + if (cond_117399) { + double x_117401; + double redout_119850 = acc_117393; + + for (int64_t i_119851 = 0; i_119851 < residual_input_117295; + i_119851++) { + int64_t slice_120042 = slice_119584 + i_119851; + double x_117406 = ((__local + double *) mem_123314)[ltid_y_117332 * + tile_sizze_117141 + + i_119851]; + bool isnan_res_117407; + + isnan_res_117407 = futrts_isnan64(x_117406); + + double defunc_1_f_res_117408; + + if (isnan_res_117407) { + defunc_1_f_res_117408 = 0.0; + } else { + double x_117405 = ((__global + double *) mem_123295)[slice_120042 * + (k2p2zq_73023 * + m_73008) + + gtid_117388 * + k2p2zq_73023 + + gtid_117390]; + double defunc_1_f_res_f_res_117409 = x_117405 * x_117406; + + defunc_1_f_res_117408 = defunc_1_f_res_f_res_117409; + } + + double defunc_1_op_res_117404 = defunc_1_f_res_117408 + + redout_119850; + double redout_tmp_127707 = defunc_1_op_res_117404; + + redout_119850 = redout_tmp_127707; + } + x_117401 = redout_119850; + acc_117400 = x_117401; + } else { + acc_117400 = acc_117393; + } + mem_123318[(int64_t) 0] = acc_117400; + barrier(CLK_LOCAL_MEM_FENCE); + mem_125310[(int64_t) 0] = mem_123318[(int64_t) 0]; + } + + __local char *mem_123330; + + mem_123330 = (__local char *) mem_123330_backing_6; + + int64_t ltid_y_117413 = sext_i32_i64(ltid_pre_127702); + int64_t ltid_x_117411 = sext_i32_i64(ltid_pre_127703); + int32_t ltid_flat_117412 = local_tid_127698; + + if (slt64(ltid_y_117413, tile_sizze_117141) && slt64(ltid_x_117411, + tile_sizze_117141)) { + int64_t gtid_117422 = binop_x_117250 + ltid_y_117413; + int64_t gtid_117424 = binop_x_117265 + ltid_x_117411; + bool binop_x_117426 = slt64(gtid_117422, m_73008); + bool binop_y_117427 = slt64(gtid_117424, k2p2zq_73023); + bool cond_117428 = binop_x_117426 && binop_y_117427; + + if (cond_117428) { + double defunc_2_reduce_res_117425 = mem_125310[(int64_t) 0]; + double defunc_3_map_res_r_transformed_row_117432 = ((__global + double *) mem_123143)[gtid_117422 * + (k2p2zq_73023 * + k2p2zq_73023) + + i_97826 * + k2p2zq_73023 + + i_97826]; + double defunc_2_map_res_transformed_row_117434 = ((__global + double *) mem_121938)[gtid_117424 * + k2p2zq_73023 + + i_97826]; + double zm_res_117435 = defunc_2_map_res_transformed_row_117434 - + defunc_2_reduce_res_117425; + double zs_res_117436 = zm_res_117435 / + defunc_3_map_res_r_transformed_row_117432; + + ((__global double *) mem_123291)[gtid_117422 * k2p2zq_73023 + + gtid_117424 + i_97826 * + (k2p2zq_73023 * m_73008)] = + zs_res_117436; + for (int64_t i_127708 = 0; i_127708 < k2p2zq_73023; i_127708++) { + ((__global double *) mem_125317)[gid_flat_117146 + i_127708 * + num_threads_125997] = + ((__global double *) mem_123291)[gtid_117422 * + k2p2zq_73023 + + gtid_117424 + i_127708 * + (k2p2zq_73023 * m_73008)]; + } + } + for (int64_t i_127709 = 0; i_127709 < k2p2zq_73023; i_127709++) { + ((__local double *) mem_123330)[ltid_y_117413 * (k2p2zq_73023 * + tile_sizze_117141) + + ltid_x_117411 * k2p2zq_73023 + + i_127709] = ((__global + double *) mem_125317)[gid_flat_117146 + + i_127709 * + num_threads_125997]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t thread_out_index_127710 = gid_x_117138 * tile_sizze_117141 + + sext_i32_i64(ltid_pre_127702); + int64_t thread_out_index_127711 = gid_y_117139 * tile_sizze_117141 + + sext_i32_i64(ltid_pre_127703); + + if (slt64(thread_out_index_127710, m_73008) && + slt64(thread_out_index_127711, k2p2zq_73023)) { + for (int64_t i_127712 = 0; i_127712 < k2p2zq_73023; i_127712++) { + ((__global double *) mem_123334)[thread_out_index_127710 * + (k2p2zq_73023 * k2p2zq_73023) + + thread_out_index_127711 * + k2p2zq_73023 + i_127712] = + ((__local double *) mem_123330)[sext_i32_i64(ltid_pre_127702) * + (k2p2zq_73023 * + tile_sizze_117141) + + sext_i32_i64(ltid_pre_127703) * + k2p2zq_73023 + i_127712]; + } + } + + error_6: + return; + #undef tile_sizze_117141 +} +__kernel void mainMagnitudezisegmap_intragroup_117465(__global + int *global_failure, + __local volatile + int64_t *mem_123437_backing_aligned_0, + __local volatile + int64_t *mem_123435_backing_aligned_1, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t gridDim_x_117458, + int64_t gridDim_y_117459, + int64_t full_tiles_117490, + int64_t kk_117693, + int64_t binop_x_120251, + __global + unsigned char *defunc_3_map_res_r_mem_123392, + __global + unsigned char *mem_123419, + __global + unsigned char *mem_123610) +{ + #define Ty_117445 (mainMagnitudeziTy_117442) + #define Ry_117446 (mainMagnitudeziRy_117444) + #define Tx_117447 (mainMagnitudeziTx_117441) + #define Rx_117448 (mainMagnitudeziRx_117443) + #define Tk_117449 (mainMagnitudeziTk_117440) + #define tk_div_tx_117450 (sdiv_up_safe64(mainMagnitudeziTk_117440, mainMagnitudeziTx_117441)) + #define tk_div_ty_117451 (sdiv_up_safe64(mainMagnitudeziTk_117440, mainMagnitudeziTy_117442)) + #define TxRx_117452 (mainMagnitudeziTx_117441 * mainMagnitudeziRx_117443) + #define TyRy_117453 (mainMagnitudeziTy_117442 * mainMagnitudeziRy_117444) + #define a_loc_szz_117455 (mainMagnitudeziTk_117440 * (mainMagnitudeziTy_117442 * mainMagnitudeziRy_117444)) + #define b_loc_szz_117457 (mainMagnitudeziRx_117443 * (mainMagnitudeziTx_117441 * mainMagnitudeziTk_117440)) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_123437_backing_1 = (__local volatile + char *) mem_123437_backing_aligned_0; + __local volatile char *restrict mem_123435_backing_0 = (__local volatile + char *) mem_123435_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127797; + int32_t local_tid_127798; + int64_t group_sizze_127801; + int32_t wave_sizze_127800; + int32_t group_tid_127799; + + global_tid_127797 = get_global_id(0); + local_tid_127798 = get_local_id(0); + group_sizze_127801 = get_local_size(0); + wave_sizze_127800 = LOCKSTEP_WIDTH; + group_tid_127799 = get_group_id(0); + + int32_t gid_flat_117465; + + gid_flat_117465 = group_tid_127799; + + int32_t ltid_pre_127802; + + ltid_pre_127802 = squot32(local_tid_127798, sext_i64_i32(Tx_117447)); + + int32_t ltid_pre_127803; + + ltid_pre_127803 = local_tid_127798 - squot32(local_tid_127798, + sext_i64_i32(Tx_117447)) * + sext_i64_i32(Tx_117447); + + int64_t gtid_95976; + + gtid_95976 = squot64(sext_i32_i64(group_tid_127799), gridDim_y_117459 * + gridDim_x_117458); + + int64_t gid_y_117464; + + gid_y_117464 = squot64(sext_i32_i64(group_tid_127799) - + squot64(sext_i32_i64(group_tid_127799), + gridDim_y_117459 * gridDim_x_117458) * + (gridDim_y_117459 * gridDim_x_117458), + gridDim_x_117458); + + int64_t gid_x_117463; + + gid_x_117463 = sext_i32_i64(group_tid_127799) - + squot64(sext_i32_i64(group_tid_127799), gridDim_y_117459 * + gridDim_x_117458) * (gridDim_y_117459 * gridDim_x_117458) - + squot64(sext_i32_i64(group_tid_127799) - + squot64(sext_i32_i64(group_tid_127799), gridDim_y_117459 * + gridDim_x_117458) * (gridDim_y_117459 * + gridDim_x_117458), + gridDim_x_117458) * gridDim_x_117458; + + int64_t iii_117466; + + iii_117466 = TyRy_117453 * gid_y_117464; + + int64_t jjj_117467 = TxRx_117452 * gid_x_117463; + double mem_123433[Ry_117446 * Rx_117448]; + int64_t ltid_y_117470 = sext_i32_i64(ltid_pre_127802); + int64_t ltid_x_117468 = sext_i32_i64(ltid_pre_127803); + int32_t ltid_flat_117469 = local_tid_127798; + double mem_123424[Ry_117446 * Rx_117448]; + + for (int64_t i_117481 = 0; i_117481 < Ry_117446; i_117481++) { + for (int64_t i_117484 = 0; i_117484 < Rx_117448; i_117484++) { + mem_123424[i_117481 * Rx_117448 + i_117484] = 0.0; + } + } + for (int64_t i_127806 = 0; i_127806 < Ry_117446; i_127806++) { + for (int64_t i_127807 = 0; i_127807 < Rx_117448; i_127807++) { + mem_123433[i_127806 * Rx_117448 + i_127807] = mem_123424[i_127806 * + Rx_117448 + + i_127807]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + __local char *mem_123435; + + mem_123435 = (__local char *) mem_123435_backing_0; + + __local char *mem_123437; + + mem_123437 = (__local char *) mem_123437_backing_1; + + double mem_123508[Ry_117446]; + double mem_123512[Rx_117448]; + double loop_mem_123524[Ry_117446 * Rx_117448]; + double mem_param_123438[Ry_117446 * Rx_117448]; + + for (int32_t i_2 = 0; i_2 < Ry_117446 * Rx_117448; i_2++) + mem_param_123438[i_2] = mem_123433[i_2]; + for (int64_t i_117491 = 0; i_117491 < full_tiles_117490; i_117491++) { + int64_t kk_117495 = Tk_117449 * i_117491; + + for (int64_t i_117496 = 0; i_117496 < Ry_117446; i_117496++) { + int64_t binop_y_117519 = Ty_117445 * i_117496; + + for (int64_t i_117498 = 0; i_117498 < tk_div_tx_117450; + i_117498++) { + int64_t binop_y_117517 = Tx_117447 * i_117498; + int64_t ltid_x_117500 = sext_i32_i64(ltid_pre_127802); + int64_t ltid_y_117501 = sext_i32_i64(ltid_pre_127803); + int32_t ltid_flat_117502 = local_tid_127798; + int64_t k_117518 = ltid_y_117501 + binop_y_117517; + int64_t i_117520 = ltid_x_117500 + binop_y_117519; + int64_t gtid_117521 = iii_117466 + i_117520; + int64_t A_col_idx_117522 = kk_117495 + k_117518; + bool cond_117523 = slt64(gtid_117521, k2p2zq_73023); + double A_elem_117524; + + if (cond_117523) { + double A_elem_117526 = ((__global + double *) mem_123419)[gtid_95976 * + (k2p2zq_73023 * + k2p2zq_73023) + + gtid_117521 * + k2p2zq_73023 + + A_col_idx_117522]; + + A_elem_117524 = A_elem_117526; + } else { + A_elem_117524 = 0.0; + } + + bool cond_117528 = slt64(k_117518, Tk_117449); + int64_t a_loc_ind_117529; + + if (cond_117528) { + int64_t binop_y_117530 = Tk_117449 * i_117520; + int64_t loc_fi_117531 = k_117518 + binop_y_117530; + + a_loc_ind_117529 = loc_fi_117531; + } else { + a_loc_ind_117529 = (int64_t) -1; + } + if (sle64((int64_t) 0, a_loc_ind_117529) && + slt64(a_loc_ind_117529, a_loc_szz_117455)) { + ((__local double *) mem_123435)[a_loc_ind_117529] = + A_elem_117524; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + for (int64_t i_117536 = 0; i_117536 < tk_div_ty_117451; i_117536++) { + int64_t binop_y_117557 = Ty_117445 * i_117536; + + for (int64_t i_117538 = 0; i_117538 < Rx_117448; i_117538++) { + int64_t binop_y_117559 = Tx_117447 * i_117538; + int64_t ltid_x_117540 = sext_i32_i64(ltid_pre_127802); + int64_t ltid_y_117541 = sext_i32_i64(ltid_pre_127803); + int32_t ltid_flat_117542 = local_tid_127798; + int64_t k_117558 = ltid_x_117540 + binop_y_117557; + int64_t j_117560 = ltid_y_117541 + binop_y_117559; + int64_t gtid_117561 = jjj_117467 + j_117560; + int64_t B_row_idx_117562 = kk_117495 + k_117558; + bool cond_117563 = slt64(gtid_117561, k2p2zq_73023); + double B_elem_117564; + + if (cond_117563) { + double B_elem_117566 = ((__global + double *) defunc_3_map_res_r_mem_123392)[gtid_95976 * + binop_x_120251 + + B_row_idx_117562 * + k2p2zq_73023 + + gtid_117561]; + + B_elem_117564 = B_elem_117566; + } else { + B_elem_117564 = 0.0; + } + + bool cond_117568 = slt64(k_117558, Tk_117449); + int64_t b_loc_ind_117569; + + if (cond_117568) { + int64_t binop_y_117570 = TxRx_117452 * k_117558; + int64_t loc_fi_117571 = j_117560 + binop_y_117570; + + b_loc_ind_117569 = loc_fi_117571; + } else { + b_loc_ind_117569 = (int64_t) -1; + } + if (sle64((int64_t) 0, b_loc_ind_117569) && + slt64(b_loc_ind_117569, b_loc_szz_117457)) { + ((__local double *) mem_123437)[b_loc_ind_117569] = + B_elem_117564; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + + double loop_mem_123523[Ry_117446 * Rx_117448]; + double mem_param_123495[Ry_117446 * Rx_117448]; + + for (int32_t i_3 = 0; i_3 < Ry_117446 * Rx_117448; i_3++) + mem_param_123495[i_3] = mem_param_123438[i_3]; + for (int64_t i_117576 = 0; i_117576 < Tk_117449; i_117576++) { + int64_t binop_y_117615 = TxRx_117452 * i_117576; + int64_t ltid_y_117580 = sext_i32_i64(ltid_pre_127802); + int64_t ltid_x_117578 = sext_i32_i64(ltid_pre_127803); + int32_t ltid_flat_117579 = local_tid_127798; + double mem_123498[Ry_117446]; + double mem_123500[Rx_117448]; + int64_t binop_x_117606 = Ry_117446 * ltid_y_117580; + + for (int64_t i_117604 = 0; i_117604 < Ry_117446; i_117604++) { + int64_t binop_x_117607 = i_117604 + binop_x_117606; + int64_t binop_y_117608 = Tk_117449 * binop_x_117607; + int64_t a_loc_ind_117609 = i_117576 + binop_y_117608; + + for (int64_t i_127819 = 0; i_127819 < (int64_t) 1; i_127819++) { + mem_123498[i_117604 + i_127819] = ((__local + double *) mem_123435)[a_loc_ind_117609 + + i_127819]; + } + } + + int64_t binop_y_117617 = Rx_117448 * ltid_x_117578; + + for (int64_t i_117613 = 0; i_117613 < Rx_117448; i_117613++) { + int64_t binop_x_117616 = i_117613 + binop_y_117615; + int64_t b_loc_ind_117618 = binop_x_117616 + binop_y_117617; + + for (int64_t i_127821 = 0; i_127821 < (int64_t) 1; i_127821++) { + mem_123500[i_117613 + i_127821] = ((__local + double *) mem_123437)[b_loc_ind_117618 + + i_127821]; + } + } + for (int64_t i_127822 = 0; i_127822 < Ry_117446; i_127822++) { + mem_123508[i_127822] = mem_123498[i_127822]; + } + for (int64_t i_127823 = 0; i_127823 < Rx_117448; i_127823++) { + mem_123512[i_127823] = mem_123500[i_127823]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_123522[Ry_117446 * Rx_117448]; + int64_t ltid_y_117625 = sext_i32_i64(ltid_pre_127802); + int64_t ltid_x_117623 = sext_i32_i64(ltid_pre_127803); + int32_t ltid_flat_117624 = local_tid_127798; + int64_t binop_y_117666 = Ry_117446 * ltid_y_117625; + int64_t binop_y_117670 = Rx_117448 * ltid_x_117623; + + for (int64_t i_117660 = 0; i_117660 < Ry_117446; i_117660++) { + int64_t binop_x_117665 = iii_117466 + i_117660; + int64_t cmpop_x_117667 = binop_x_117665 + binop_y_117666; + bool binop_x_117668 = slt64(cmpop_x_117667, k2p2zq_73023); + + for (int64_t i_117663 = 0; i_117663 < Rx_117448; i_117663++) { + int64_t binop_x_117669 = jjj_117467 + i_117663; + int64_t cmpop_x_117671 = binop_x_117669 + binop_y_117670; + bool binop_y_117672 = slt64(cmpop_x_117671, k2p2zq_73023); + bool cond_117673 = binop_x_117668 && binop_y_117672; + + if (cond_117673) { + double a_117675 = mem_123508[i_117660]; + double b_117676 = mem_123512[i_117663]; + double c_117677 = mem_param_123495[i_117660 * + Rx_117448 + + i_117663]; + double defunc_1_f_res_117680 = a_117675 * b_117676; + double defunc_1_op_res_117684 = c_117677 + + defunc_1_f_res_117680; + + mem_param_123495[i_117660 * Rx_117448 + i_117663] = + defunc_1_op_res_117684; + } + } + } + for (int64_t i_127826 = 0; i_127826 < Ry_117446; i_127826++) { + for (int64_t i_127827 = 0; i_127827 < Rx_117448; i_127827++) { + mem_123522[i_127826 * Rx_117448 + i_127827] = + mem_param_123495[i_127826 * Rx_117448 + i_127827]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_param_tmp_127816[Ry_117446 * Rx_117448]; + + for (int32_t i_4 = 0; i_4 < Ry_117446 * Rx_117448; i_4++) + mem_param_tmp_127816[i_4] = mem_123522[i_4]; + for (int32_t i_5 = 0; i_5 < Ry_117446 * Rx_117448; i_5++) + mem_param_123495[i_5] = mem_param_tmp_127816[i_5]; + } + for (int32_t i_6 = 0; i_6 < Ry_117446 * Rx_117448; i_6++) + loop_mem_123523[i_6] = mem_param_123495[i_6]; + + double mem_param_tmp_127808[Ry_117446 * Rx_117448]; + + for (int32_t i_7 = 0; i_7 < Ry_117446 * Rx_117448; i_7++) + mem_param_tmp_127808[i_7] = loop_mem_123523[i_7]; + for (int32_t i_8 = 0; i_8 < Ry_117446 * Rx_117448; i_8++) + mem_param_123438[i_8] = mem_param_tmp_127808[i_8]; + } + for (int32_t i_9 = 0; i_9 < Ry_117446 * Rx_117448; i_9++) + loop_mem_123524[i_9] = mem_param_123438[i_9]; + for (int64_t i_117694 = 0; i_117694 < Ry_117446; i_117694++) { + int64_t binop_y_117719 = Ty_117445 * i_117694; + + for (int64_t i_117696 = 0; i_117696 < tk_div_tx_117450; i_117696++) { + int64_t binop_y_117717 = Tx_117447 * i_117696; + int64_t ltid_x_117698 = sext_i32_i64(ltid_pre_127802); + int64_t ltid_y_117699 = sext_i32_i64(ltid_pre_127803); + int32_t ltid_flat_117700 = local_tid_127798; + int64_t k_117718 = ltid_y_117699 + binop_y_117717; + int64_t i_117720 = ltid_x_117698 + binop_y_117719; + int64_t gtid_117721 = iii_117466 + i_117720; + int64_t A_col_idx_117722 = kk_117693 + k_117718; + bool binop_x_117723 = slt64(gtid_117721, k2p2zq_73023); + bool binop_y_117724 = slt64(A_col_idx_117722, k2p2zq_73023); + bool cond_117725 = binop_x_117723 && binop_y_117724; + double A_elem_117726; + + if (cond_117725) { + double A_elem_117728 = ((__global + double *) mem_123419)[gtid_95976 * + (k2p2zq_73023 * + k2p2zq_73023) + + gtid_117721 * + k2p2zq_73023 + + A_col_idx_117722]; + + A_elem_117726 = A_elem_117728; + } else { + A_elem_117726 = 0.0; + } + + bool cond_117730 = slt64(k_117718, Tk_117449); + int64_t a_loc_ind_117731; + + if (cond_117730) { + int64_t binop_y_117732 = Tk_117449 * i_117720; + int64_t loc_fi_117733 = k_117718 + binop_y_117732; + + a_loc_ind_117731 = loc_fi_117733; + } else { + a_loc_ind_117731 = (int64_t) -1; + } + if (sle64((int64_t) 0, a_loc_ind_117731) && slt64(a_loc_ind_117731, + a_loc_szz_117455)) { + ((__local double *) mem_123435)[a_loc_ind_117731] = + A_elem_117726; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + for (int64_t i_117738 = 0; i_117738 < tk_div_ty_117451; i_117738++) { + int64_t binop_y_117761 = Ty_117445 * i_117738; + + for (int64_t i_117740 = 0; i_117740 < Rx_117448; i_117740++) { + int64_t binop_y_117763 = Tx_117447 * i_117740; + int64_t ltid_x_117742 = sext_i32_i64(ltid_pre_127802); + int64_t ltid_y_117743 = sext_i32_i64(ltid_pre_127803); + int32_t ltid_flat_117744 = local_tid_127798; + int64_t k_117762 = ltid_x_117742 + binop_y_117761; + int64_t j_117764 = ltid_y_117743 + binop_y_117763; + int64_t gtid_117765 = jjj_117467 + j_117764; + int64_t B_row_idx_117766 = kk_117693 + k_117762; + bool binop_x_117767 = slt64(gtid_117765, k2p2zq_73023); + bool binop_y_117768 = slt64(B_row_idx_117766, k2p2zq_73023); + bool cond_117769 = binop_x_117767 && binop_y_117768; + double B_elem_117770; + + if (cond_117769) { + double B_elem_117772 = ((__global + double *) defunc_3_map_res_r_mem_123392)[gtid_95976 * + binop_x_120251 + + B_row_idx_117766 * + k2p2zq_73023 + + gtid_117765]; + + B_elem_117770 = B_elem_117772; + } else { + B_elem_117770 = 0.0; + } + + bool cond_117774 = slt64(k_117762, Tk_117449); + int64_t b_loc_ind_117775; + + if (cond_117774) { + int64_t binop_y_117776 = TxRx_117452 * k_117762; + int64_t loc_fi_117777 = j_117764 + binop_y_117776; + + b_loc_ind_117775 = loc_fi_117777; + } else { + b_loc_ind_117775 = (int64_t) -1; + } + if (sle64((int64_t) 0, b_loc_ind_117775) && slt64(b_loc_ind_117775, + b_loc_szz_117457)) { + ((__local double *) mem_123437)[b_loc_ind_117775] = + B_elem_117770; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + + double mem_123590[Ry_117446]; + double mem_123594[Rx_117448]; + double mem_123604[Ry_117446 * Rx_117448]; + double loop_mem_123606[Ry_117446 * Rx_117448]; + double mem_param_123577[Ry_117446 * Rx_117448]; + + for (int32_t i_10 = 0; i_10 < Ry_117446 * Rx_117448; i_10++) + mem_param_123577[i_10] = loop_mem_123524[i_10]; + for (int64_t i_117782 = 0; i_117782 < Tk_117449; i_117782++) { + int64_t cmpop_x_117784 = kk_117693 + i_117782; + bool cond_117785 = slt64(cmpop_x_117784, k2p2zq_73023); + double mem_125333[Ry_117446 * Rx_117448]; + + if (cond_117785) { + int64_t binop_y_117823 = TxRx_117452 * i_117782; + int64_t bytes_123579 = (int64_t) 8 * Ry_117446; + int64_t bytes_123581 = (int64_t) 8 * Rx_117448; + int64_t ltid_y_117788 = sext_i32_i64(ltid_pre_127802); + int64_t ltid_x_117786 = sext_i32_i64(ltid_pre_127803); + int32_t ltid_flat_117787 = local_tid_127798; + double mem_123580[Ry_117446]; + double mem_123582[Rx_117448]; + int64_t binop_x_117814 = Ry_117446 * ltid_y_117788; + + for (int64_t i_117812 = 0; i_117812 < Ry_117446; i_117812++) { + int64_t binop_x_117815 = i_117812 + binop_x_117814; + int64_t binop_y_117816 = Tk_117449 * binop_x_117815; + int64_t a_loc_ind_117817 = i_117782 + binop_y_117816; + + for (int64_t i_127835 = 0; i_127835 < (int64_t) 1; i_127835++) { + mem_123580[i_117812 + i_127835] = ((__local + double *) mem_123435)[a_loc_ind_117817 + + i_127835]; + } + } + + int64_t binop_y_117825 = Rx_117448 * ltid_x_117786; + + for (int64_t i_117821 = 0; i_117821 < Rx_117448; i_117821++) { + int64_t binop_x_117824 = i_117821 + binop_y_117823; + int64_t b_loc_ind_117826 = binop_x_117824 + binop_y_117825; + + for (int64_t i_127837 = 0; i_127837 < (int64_t) 1; i_127837++) { + mem_123582[i_117821 + i_127837] = ((__local + double *) mem_123437)[b_loc_ind_117826 + + i_127837]; + } + } + for (int64_t i_127838 = 0; i_127838 < Ry_117446; i_127838++) { + mem_123590[i_127838] = mem_123580[i_127838]; + } + for (int64_t i_127839 = 0; i_127839 < Rx_117448; i_127839++) { + mem_123594[i_127839] = mem_123582[i_127839]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t ltid_y_117833 = sext_i32_i64(ltid_pre_127802); + int64_t ltid_x_117831 = sext_i32_i64(ltid_pre_127803); + int32_t ltid_flat_117832 = local_tid_127798; + int64_t binop_y_117874 = Ry_117446 * ltid_y_117833; + int64_t binop_y_117878 = Rx_117448 * ltid_x_117831; + + for (int64_t i_117868 = 0; i_117868 < Ry_117446; i_117868++) { + int64_t binop_x_117873 = iii_117466 + i_117868; + int64_t cmpop_x_117875 = binop_x_117873 + binop_y_117874; + bool binop_x_117876 = slt64(cmpop_x_117875, k2p2zq_73023); + + for (int64_t i_117871 = 0; i_117871 < Rx_117448; i_117871++) { + int64_t binop_x_117877 = jjj_117467 + i_117871; + int64_t cmpop_x_117879 = binop_x_117877 + binop_y_117878; + bool binop_y_117880 = slt64(cmpop_x_117879, k2p2zq_73023); + bool cond_117881 = binop_x_117876 && binop_y_117880; + + if (cond_117881) { + double a_117883 = mem_123590[i_117868]; + double b_117884 = mem_123594[i_117871]; + double c_117885 = mem_param_123577[i_117868 * + Rx_117448 + + i_117871]; + double defunc_1_f_res_117888 = a_117883 * b_117884; + double defunc_1_op_res_117892 = c_117885 + + defunc_1_f_res_117888; + + mem_param_123577[i_117868 * Rx_117448 + i_117871] = + defunc_1_op_res_117892; + } + } + } + for (int64_t i_127842 = 0; i_127842 < Ry_117446; i_127842++) { + for (int64_t i_127843 = 0; i_127843 < Rx_117448; i_127843++) { + mem_123604[i_127842 * Rx_117448 + i_127843] = + mem_param_123577[i_127842 * Rx_117448 + i_127843]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + for (int64_t i_127844 = 0; i_127844 < Ry_117446; i_127844++) { + for (int64_t i_127845 = 0; i_127845 < Rx_117448; i_127845++) { + mem_125333[i_127844 * Rx_117448 + i_127845] = + mem_123604[i_127844 * Rx_117448 + i_127845]; + } + } + } else { + for (int64_t i_127846 = 0; i_127846 < Ry_117446; i_127846++) { + for (int64_t i_127847 = 0; i_127847 < Rx_117448; i_127847++) { + mem_125333[i_127846 * Rx_117448 + i_127847] = + mem_param_123577[i_127846 * Rx_117448 + i_127847]; + } + } + } + + double mem_param_tmp_127832[Ry_117446 * Rx_117448]; + + for (int32_t i_11 = 0; i_11 < Ry_117446 * Rx_117448; i_11++) + mem_param_tmp_127832[i_11] = mem_125333[i_11]; + for (int32_t i_12 = 0; i_12 < Ry_117446 * Rx_117448; i_12++) + mem_param_123577[i_12] = mem_param_tmp_127832[i_12]; + } + for (int32_t i_13 = 0; i_13 < Ry_117446 * Rx_117448; i_13++) + loop_mem_123606[i_13] = mem_param_123577[i_13]; + + int64_t reg_tile_i_127848 = squot64(sext_i32_i64(local_tid_127798), + Ty_117445 * Tx_117447); + int64_t reg_tile_i_127849 = squot64(sext_i32_i64(local_tid_127798) - + squot64(sext_i32_i64(local_tid_127798), + Ty_117445 * Tx_117447) * + (Ty_117445 * Tx_117447), Tx_117447); + int64_t reg_tile_i_127850 = sext_i32_i64(local_tid_127798) - + squot64(sext_i32_i64(local_tid_127798), Ty_117445 * Tx_117447) * + (Ty_117445 * Tx_117447) - squot64(sext_i32_i64(local_tid_127798) - + squot64(sext_i32_i64(local_tid_127798), + Ty_117445 * Tx_117447) * + (Ty_117445 * Tx_117447), + Tx_117447) * Tx_117447; + int64_t tile_dim_start_127851 = gtid_95976 + reg_tile_i_127848; + int64_t tile_dim_start_127852 = Ry_117446 * (Ty_117445 * gid_y_117464 + + reg_tile_i_127849); + int64_t tile_dim_start_127853 = Rx_117448 * (Tx_117447 * gid_x_117463 + + reg_tile_i_127850); + + for (int64_t nest_i_127854 = 0; nest_i_127854 < (int64_t) 1; + nest_i_127854++) { + for (int64_t nest_i_127855 = 0; nest_i_127855 < Ry_117446; + nest_i_127855++) { + for (int64_t nest_i_127856 = 0; nest_i_127856 < Rx_117448; + nest_i_127856++) { + if ((slt64(tile_dim_start_127851 + nest_i_127854, m_73008) && + slt64(tile_dim_start_127852 + nest_i_127855, + k2p2zq_73023)) && slt64(tile_dim_start_127853 + + nest_i_127856, + k2p2zq_73023)) { + ((__global double *) mem_123610)[(tile_dim_start_127851 + + nest_i_127854) * + (k2p2zq_73023 * + k2p2zq_73023) + + (tile_dim_start_127852 + + nest_i_127855) * + k2p2zq_73023 + + (tile_dim_start_127853 + + nest_i_127856)] = + loop_mem_123606[squot64(nest_i_127855 * Rx_117448 + + nest_i_127856 - + squot64(nest_i_127855 * + Rx_117448 + + nest_i_127856, + Tx_117447 * Ry_117446 * + Rx_117448) * + (Tx_117447 * Ry_117446 * + Rx_117448) - + squot64(nest_i_127855 * + Rx_117448 + + nest_i_127856 - + squot64(nest_i_127855 * + Rx_117448 + + nest_i_127856, + Tx_117447 * + Ry_117446 * + Rx_117448) * + (Tx_117447 * Ry_117446 * + Rx_117448), Ry_117446 * + Rx_117448) * + (Ry_117446 * Rx_117448), + Rx_117448) * Rx_117448 + + (nest_i_127855 * Rx_117448 + + nest_i_127856 - squot64(nest_i_127855 * + Rx_117448 + + nest_i_127856, + Tx_117447 * + Ry_117446 * + Rx_117448) * + (Tx_117447 * Ry_117446 * Rx_117448) - + squot64(nest_i_127855 * Rx_117448 + + nest_i_127856 - + squot64(nest_i_127855 * + Rx_117448 + + nest_i_127856, + Tx_117447 * Ry_117446 * + Rx_117448) * + (Tx_117447 * Ry_117446 * + Rx_117448), Ry_117446 * + Rx_117448) * (Ry_117446 * + Rx_117448) - + squot64(nest_i_127855 * Rx_117448 + + nest_i_127856 - + squot64(nest_i_127855 * + Rx_117448 + + nest_i_127856, + Tx_117447 * Ry_117446 * + Rx_117448) * + (Tx_117447 * Ry_117446 * + Rx_117448) - + squot64(nest_i_127855 * + Rx_117448 + + nest_i_127856 - + squot64(nest_i_127855 * + Rx_117448 + + nest_i_127856, + Tx_117447 * + Ry_117446 * + Rx_117448) * + (Tx_117447 * + Ry_117446 * + Rx_117448), + Ry_117446 * + Rx_117448) * + (Ry_117446 * Rx_117448), + Rx_117448) * Rx_117448)]; + } + } + } + } + + error_9: + return; + #undef Ty_117445 + #undef Ry_117446 + #undef Tx_117447 + #undef Rx_117448 + #undef Tk_117449 + #undef tk_div_tx_117450 + #undef tk_div_ty_117451 + #undef TxRx_117452 + #undef TyRy_117453 + #undef a_loc_szz_117455 + #undef b_loc_szz_117457 +} +__kernel void mainMagnitudezisegmap_intragroup_117900(__global + int *global_failure, + __local volatile + int64_t *mem_124104_backing_aligned_0, + __local volatile + int64_t *mem_124097_backing_aligned_1, + int64_t m_73008, + double level_73014, + int64_t num_recresids_padded_73681, + int64_t num_whole_tiles_117920, + int64_t residual_input_118032, + unsigned char cond_118033, + __global + unsigned char *defunc_3_map_res_mem_124069, + __global + unsigned char *mem_124081, + __global + unsigned char *mem_124084, + __global + unsigned char *mem_124113) +{ + #define segmap_group_sizze_99365 (mainMagnitudezisegmap_group_sizze_99230) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_124104_backing_5 = (__local volatile + char *) mem_124104_backing_aligned_0; + __local volatile char *restrict mem_124097_backing_0 = (__local volatile + char *) mem_124097_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128538; + int32_t local_tid_128539; + int64_t group_sizze_128542; + int32_t wave_sizze_128541; + int32_t group_tid_128540; + + global_tid_128538 = get_global_id(0); + local_tid_128539 = get_local_id(0); + group_sizze_128542 = get_local_size(0); + wave_sizze_128541 = LOCKSTEP_WIDTH; + group_tid_128540 = get_group_id(0); + + int32_t gid_flat_117900; + + gid_flat_117900 = group_tid_128540; + + int32_t ltid_pre_128543; + + ltid_pre_128543 = local_tid_128539; + + int64_t gid_117899; + + gid_117899 = sext_i32_i64(group_tid_128540); + + int64_t binop_x_117909; + + binop_x_117909 = segmap_group_sizze_99365 * gid_117899; + + int64_t mem_124088[1]; + double mem_124090[1]; + int64_t ltid_117901 = sext_i32_i64(ltid_pre_128543); + int32_t ltid_flat_117902 = local_tid_128539; + int64_t gtid_117910 = ltid_117901 + binop_x_117909; + bool cond_117911 = slt64(gtid_117910, m_73008); + int64_t pre_117912; + double pre_117913; + + if (cond_117911) { + int64_t x_117914 = ((__global + int64_t *) defunc_3_map_res_mem_124069)[gtid_117910]; + double i64_res_117915 = sitofp_i64_f64(x_117914); + + pre_117912 = x_117914; + pre_117913 = i64_res_117915; + } else { + pre_117912 = (int64_t) 0; + pre_117913 = 0.0; + } + mem_124088[(int64_t) 0] = pre_117912; + mem_124090[(int64_t) 0] = pre_117913; + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_124093[1]; + int64_t ltid_117921 = sext_i32_i64(ltid_pre_128543); + int32_t ltid_flat_117922 = local_tid_128539; + + mem_124093[(int64_t) 0] = -INFINITY; + barrier(CLK_LOCAL_MEM_FENCE); + + __local char *mem_124097; + + mem_124097 = (__local char *) mem_124097_backing_0; + + double accs_mem_124101[1]; + double mem_param_124094[1]; + + for (int32_t i_1 = 0; i_1 < 1; i_1++) + mem_param_124094[i_1] = mem_124093[i_1]; + for (int64_t tile_id_117928 = 0; tile_id_117928 < num_whole_tiles_117920; + tile_id_117928++) { + int64_t binop_x_117984 = segmap_group_sizze_99365 * tile_id_117928; + int64_t ltid_117929 = sext_i32_i64(ltid_pre_128543); + int32_t ltid_flat_117930 = local_tid_128539; + int64_t j_117985 = ltid_117929 + binop_x_117984; + bool cond_117991 = slt64(j_117985, num_recresids_padded_73681); + int64_t pre_117992; + + if (cond_117991) { + pre_117992 = j_117985; + } else { + pre_117992 = (int64_t) 0; + } + ((__local int64_t *) mem_124097)[ltid_117929] = pre_117992; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t slice_119591 = (int64_t) 1 + binop_x_117984; + double mem_124100[1]; + int64_t ltid_117950 = sext_i32_i64(ltid_pre_128543); + int32_t ltid_flat_117951 = local_tid_128539; + int64_t gtid_117997 = binop_x_117909 + ltid_117950; + double acc_118001 = mem_param_124094[(int64_t) 0]; + bool cond_118004 = slt64(gtid_117997, m_73008); + double acc_118005; + + if (cond_118004) { + double i64_res_117999 = mem_124090[(int64_t) 0]; + double x_118006; + double redout_119906 = acc_118001; + + for (int64_t i_119907 = 0; i_119907 < segmap_group_sizze_99365; + i_119907++) { + int64_t slice_120045 = slice_119591 + i_119907; + double x_118010 = ((__global + double *) mem_124081)[slice_120045 * + m_73008 + + gtid_117997]; + int64_t x_118011 = ((__local int64_t *) mem_124097)[i_119907]; + int64_t x_118012 = mul64((int64_t) 2, x_118011); + int64_t i64_arg_118013 = add64((int64_t) 2, x_118012); + double i64_res_118014 = sitofp_i64_f64(i64_arg_118013); + double y_118015 = i64_res_118014 / i64_res_117999; + double lifted_div_res_118016 = 1.0 + y_118015; + double abs_arg_118017 = x_118010 / lifted_div_res_118016; + double abs_res_118018 = fabs(abs_arg_118017); + double defunc_1_op_res_118009 = fmax64(abs_res_118018, + redout_119906); + double redout_tmp_128546 = defunc_1_op_res_118009; + + redout_119906 = redout_tmp_128546; + } + x_118006 = redout_119906; + acc_118005 = x_118006; + } else { + acc_118005 = acc_118001; + } + mem_124100[(int64_t) 0] = acc_118005; + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_param_tmp_128544[1]; + + for (int32_t i_2 = 0; i_2 < 1; i_2++) + mem_param_tmp_128544[i_2] = mem_124100[i_2]; + for (int32_t i_3 = 0; i_3 < 1; i_3++) + mem_param_124094[i_3] = mem_param_tmp_128544[i_3]; + } + for (int32_t i_4 = 0; i_4 < 1; i_4++) + accs_mem_124101[i_4] = mem_param_124094[i_4]; + + __local char *mem_124104; + + mem_124104 = (__local char *) mem_124104_backing_5; + + double mem_124107[1]; + double mem_125358[1]; + + if (cond_118033) { + mem_125358[(int64_t) 0] = accs_mem_124101[(int64_t) 0]; + } else { + int64_t binop_x_118043 = segmap_group_sizze_99365 * + num_whole_tiles_117920; + int64_t ltid_118034 = sext_i32_i64(ltid_pre_128543); + int32_t ltid_flat_118035 = local_tid_128539; + int64_t j_118044 = ltid_118034 + binop_x_118043; + bool cond_118050 = slt64(j_118044, num_recresids_padded_73681); + int64_t pre_118051; + + if (cond_118050) { + pre_118051 = j_118044; + } else { + pre_118051 = (int64_t) 0; + } + ((__local int64_t *) mem_124104)[ltid_118034] = pre_118051; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t slice_offset_118071 = num_whole_tiles_117920 * + residual_input_118032; + int64_t slice_119594 = (int64_t) 1 + slice_offset_118071; + int64_t ltid_118056 = sext_i32_i64(ltid_pre_128543); + int32_t ltid_flat_118057 = local_tid_128539; + int64_t gtid_118066 = binop_x_117909 + ltid_118056; + double acc_118070 = accs_mem_124101[(int64_t) 0]; + bool cond_118073 = slt64(gtid_118066, m_73008); + double acc_118074; + + if (cond_118073) { + double i64_res_118068 = mem_124090[(int64_t) 0]; + double x_118075; + double redout_119908 = acc_118070; + + for (int64_t i_119909 = 0; i_119909 < residual_input_118032; + i_119909++) { + int64_t slice_120046 = slice_119594 + i_119909; + double x_118079 = ((__global + double *) mem_124081)[slice_120046 * + m_73008 + + gtid_118066]; + int64_t x_118080 = ((__local int64_t *) mem_124104)[i_119909]; + int64_t x_118081 = mul64((int64_t) 2, x_118080); + int64_t i64_arg_118082 = add64((int64_t) 2, x_118081); + double i64_res_118083 = sitofp_i64_f64(i64_arg_118082); + double y_118084 = i64_res_118083 / i64_res_118068; + double lifted_div_res_118085 = 1.0 + y_118084; + double abs_arg_118086 = x_118079 / lifted_div_res_118085; + double abs_res_118087 = fabs(abs_arg_118086); + double defunc_1_op_res_118078 = fmax64(abs_res_118087, + redout_119908); + double redout_tmp_128547 = defunc_1_op_res_118078; + + redout_119908 = redout_tmp_128547; + } + x_118075 = redout_119908; + acc_118074 = x_118075; + } else { + acc_118074 = acc_118070; + } + mem_124107[(int64_t) 0] = acc_118074; + barrier(CLK_LOCAL_MEM_FENCE); + mem_125358[(int64_t) 0] = mem_124107[(int64_t) 0]; + } + + int64_t mem_124111[1]; + int64_t ltid_118090 = sext_i32_i64(ltid_pre_128543); + int32_t ltid_flat_118091 = local_tid_128539; + int64_t gtid_118097 = binop_x_117909 + ltid_118090; + bool cond_118099 = slt64(gtid_118097, m_73008); + int64_t postlude_118100; + + if (cond_118099) { + double defunc_2_reduce_res_118098 = mem_125358[(int64_t) 0]; + double defunc_0_Q_arg_118105 = 3.0 * defunc_2_reduce_res_118098; + double zs_res_118106 = defunc_0_Q_arg_118105 / 1.4142135623730951; + double abs_res_118107 = fabs(zs_res_118106); + double zs_res_118108 = abs_res_118107 / 2.0; + double zp_res_118109 = 1.0 + zs_res_118108; + double zs_res_118110 = 1.0 / zp_res_118109; + double zt_res_118111 = zs_res_118110 * zs_res_118110; + double zt_res_118112 = zs_res_118110 * zt_res_118111; + double zt_res_118113 = zt_res_118111 * zt_res_118111; + double zt_res_118114 = zt_res_118111 * zt_res_118112; + double zt_res_118115 = zt_res_118112 * zt_res_118112; + double zt_res_118116 = zt_res_118112 * zt_res_118113; + double zt_res_118117 = zt_res_118113 * zt_res_118113; + double zt_res_118118 = zt_res_118113 * zt_res_118114; + double zt_res_118119 = 0.17087277 * zt_res_118118; + double zt_res_118120 = 0.82215223 * zt_res_118117; + double zt_res_118121 = 1.48851587 * zt_res_118116; + double zt_res_118122 = 1.13520398 * zt_res_118115; + double zt_res_118123 = 0.27886807 * zt_res_118114; + double zt_res_118124 = 0.18628806 * zt_res_118113; + double zt_res_118125 = 9.678418e-2 * zt_res_118112; + double zt_res_118126 = 0.37409196 * zt_res_118111; + double zt_res_118127 = 1.00002368 * zs_res_118110; + double zt_res_118128 = zs_res_118106 * zs_res_118106; + double zm_res_118129 = 0.0 - zt_res_118128; + double zm_res_118130 = zm_res_118129 - 1.26551223; + double zp_res_118131 = zt_res_118127 + zm_res_118130; + double zp_res_118132 = zt_res_118126 + zp_res_118131; + double zp_res_118133 = zt_res_118125 + zp_res_118132; + double zm_res_118134 = zp_res_118133 - zt_res_118124; + double zp_res_118135 = zt_res_118123 + zm_res_118134; + double zm_res_118136 = zp_res_118135 - zt_res_118122; + double zp_res_118137 = zt_res_118121 + zm_res_118136; + double zm_res_118138 = zp_res_118137 - zt_res_118120; + double zp_res_118139 = zt_res_118119 + zm_res_118138; + double exp_res_118140; + + exp_res_118140 = futrts_exp64(zp_res_118139); + + double zt_res_118141 = zs_res_118110 * exp_res_118140; + bool zgze_res_118142 = 0.0 <= zs_res_118106; + double erf_res_118143; + + if (zgze_res_118142) { + double zm_res_118144 = 1.0 - zt_res_118141; + + erf_res_118143 = zm_res_118144; + } else { + double zm_res_118145 = zt_res_118141 - 1.0; + + erf_res_118143 = zm_res_118145; + } + + double zp_res_118146 = 1.0 + erf_res_118143; + double zs_res_118147 = zp_res_118146 / 2.0; + double defunc_0_Q_res_118148 = 1.0 - zs_res_118147; + double y_118149 = fpow64(defunc_2_reduce_res_118098, 2.0); + double negate_arg_118150 = 4.0 * y_118149; + double defunc_0_exp_arg_118151 = 0.0 - negate_arg_118150; + double defunc_0_exp_res_118152 = fpow64(2.718281828459045, + defunc_0_exp_arg_118151); + double x_118153 = defunc_0_Q_res_118148 + defunc_0_exp_res_118152; + double zs_res_118154 = defunc_2_reduce_res_118098 / 1.4142135623730951; + double abs_res_118155 = fabs(zs_res_118154); + double zs_res_118156 = abs_res_118155 / 2.0; + double zp_res_118157 = 1.0 + zs_res_118156; + double zs_res_118158 = 1.0 / zp_res_118157; + double zt_res_118159 = zs_res_118158 * zs_res_118158; + double zt_res_118160 = zs_res_118158 * zt_res_118159; + double zt_res_118161 = zt_res_118159 * zt_res_118159; + double zt_res_118162 = zt_res_118159 * zt_res_118160; + double zt_res_118163 = zt_res_118160 * zt_res_118160; + double zt_res_118164 = zt_res_118160 * zt_res_118161; + double zt_res_118165 = zt_res_118161 * zt_res_118161; + double zt_res_118166 = zt_res_118161 * zt_res_118162; + double zt_res_118167 = 0.17087277 * zt_res_118166; + double zt_res_118168 = 0.82215223 * zt_res_118165; + double zt_res_118169 = 1.48851587 * zt_res_118164; + double zt_res_118170 = 1.13520398 * zt_res_118163; + double zt_res_118171 = 0.27886807 * zt_res_118162; + double zt_res_118172 = 0.18628806 * zt_res_118161; + double zt_res_118173 = 9.678418e-2 * zt_res_118160; + double zt_res_118174 = 0.37409196 * zt_res_118159; + double zt_res_118175 = 1.00002368 * zs_res_118158; + double zt_res_118176 = zs_res_118154 * zs_res_118154; + double zm_res_118177 = 0.0 - zt_res_118176; + double zm_res_118178 = zm_res_118177 - 1.26551223; + double zp_res_118179 = zt_res_118175 + zm_res_118178; + double zp_res_118180 = zt_res_118174 + zp_res_118179; + double zp_res_118181 = zt_res_118173 + zp_res_118180; + double zm_res_118182 = zp_res_118181 - zt_res_118172; + double zp_res_118183 = zt_res_118171 + zm_res_118182; + double zm_res_118184 = zp_res_118183 - zt_res_118170; + double zp_res_118185 = zt_res_118169 + zm_res_118184; + double zm_res_118186 = zp_res_118185 - zt_res_118168; + double zp_res_118187 = zt_res_118167 + zm_res_118186; + double exp_res_118188; + + exp_res_118188 = futrts_exp64(zp_res_118187); + + double zt_res_118189 = zs_res_118158 * exp_res_118188; + bool zgze_res_118190 = 0.0 <= zs_res_118154; + double erf_res_118191; + + if (zgze_res_118190) { + double zm_res_118192 = 1.0 - zt_res_118189; + + erf_res_118191 = zm_res_118192; + } else { + double zm_res_118193 = zt_res_118189 - 1.0; + + erf_res_118191 = zm_res_118193; + } + + double zp_res_118194 = 1.0 + erf_res_118191; + double zs_res_118195 = zp_res_118194 / 2.0; + double defunc_0_Q_res_118196 = 1.0 - zs_res_118195; + double y_118197 = defunc_0_exp_res_118152 * defunc_0_Q_res_118196; + double y_118198 = x_118153 - y_118197; + double pval_brownian_motion_max_res_118199 = 2.0 * y_118198; + int64_t defunc_0_f_res_118200; + int64_t redout_119910 = (int64_t) 9223372036854775807; + + for (int64_t i_119911 = 0; i_119911 < num_recresids_padded_73681; + i_119911++) { + int64_t slice_120048 = (int64_t) 1 + i_119911; + double x_118205 = ((__global double *) mem_124081)[slice_120048 * + m_73008 + + gtid_118097]; + double x_118206 = ((__global double *) mem_124084)[slice_120048 * + m_73008 + + gtid_118097]; + double abs_res_118207 = fabs(x_118205); + bool cond_118208 = x_118206 < abs_res_118207; + int64_t defunc_2_f_res_118209; + + if (cond_118208) { + defunc_2_f_res_118209 = i_119911; + } else { + defunc_2_f_res_118209 = (int64_t) 9223372036854775807; + } + + int64_t defunc_1_op_res_118203 = smin64(defunc_2_f_res_118209, + redout_119910); + int64_t redout_tmp_128548 = defunc_1_op_res_118203; + + redout_119910 = redout_tmp_128548; + } + defunc_0_f_res_118200 = redout_119910; + + bool isnan_res_118210; + + isnan_res_118210 = futrts_isnan64(pval_brownian_motion_max_res_118199); + + bool cond_118211 = !isnan_res_118210; + bool cond_t_res_118212 = pval_brownian_motion_max_res_118199 < + level_73014; + bool x_118213 = cond_118211 && cond_t_res_118212; + bool chk_t_res_118214 = defunc_0_f_res_118200 == + (int64_t) 9223372036854775807; + bool chk_t_res_118215 = !chk_t_res_118214; + bool x_118216 = x_118213 && chk_t_res_118215; + int64_t y_start_118217; + + if (x_118216) { + int64_t x_118101 = mem_124088[(int64_t) 0]; + int64_t y_start_t_res_118218 = sub64(x_118101, + defunc_0_f_res_118200); + + y_start_118217 = y_start_t_res_118218; + } else { + y_start_118217 = (int64_t) 0; + } + postlude_118100 = y_start_118217; + } else { + postlude_118100 = (int64_t) 0; + } + mem_124111[(int64_t) 0] = postlude_118100; + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64(sext_i32_i64(local_tid_128539) + segmap_group_sizze_99365 * + sext_i32_i64(group_tid_128540), m_73008)) { + ((__global int64_t *) mem_124113)[sext_i32_i64(local_tid_128539) + + segmap_group_sizze_99365 * + sext_i32_i64(group_tid_128540)] = + mem_124111[(int64_t) 0]; + } + + error_7: + return; + #undef segmap_group_sizze_99365 +} +__kernel void mainMagnitudezisegmap_intragroup_118238(__global + int *global_failure, + __local volatile + int64_t *mem_124225_backing_aligned_0, + int64_t m_73008, + int64_t n_73011, + int64_t k2p2zq_73023, + int64_t Ty_118226, + int64_t Tx_118227, + int64_t gridDim_x_118228, + int64_t gridDim_y_118229, + int64_t group_sizze_tile3d_118233, + int64_t count_shmem_118234, + __global + unsigned char *mem_120120, + __global + unsigned char *mem_120124, + __global + unsigned char *mem_124213, + __global + unsigned char *mem_124273) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_124225_backing_0 = (__local volatile + char *) mem_124225_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128732; + int32_t local_tid_128733; + int64_t group_sizze_128736; + int32_t wave_sizze_128735; + int32_t group_tid_128734; + + global_tid_128732 = get_global_id(0); + local_tid_128733 = get_local_id(0); + group_sizze_128736 = get_local_size(0); + wave_sizze_128735 = LOCKSTEP_WIDTH; + group_tid_128734 = get_group_id(0); + + int32_t gid_flat_118238; + + gid_flat_118238 = group_tid_128734; + + int32_t ltid_pre_128737; + + ltid_pre_128737 = squot32(local_tid_128733, sext_i64_i32(Ty_118226) * + sext_i64_i32(Tx_118227)); + + int32_t ltid_pre_128738; + + ltid_pre_128738 = squot32(local_tid_128733 - squot32(local_tid_128733, + sext_i64_i32(Ty_118226) * + sext_i64_i32(Tx_118227)) * + (sext_i64_i32(Ty_118226) * + sext_i64_i32(Tx_118227)), + sext_i64_i32(Tx_118227)); + + int32_t ltid_pre_128739; + + ltid_pre_128739 = local_tid_128733 - squot32(local_tid_128733, + sext_i64_i32(Ty_118226) * + sext_i64_i32(Tx_118227)) * + (sext_i64_i32(Ty_118226) * sext_i64_i32(Tx_118227)) - + squot32(local_tid_128733 - squot32(local_tid_128733, + sext_i64_i32(Ty_118226) * + sext_i64_i32(Tx_118227)) * + (sext_i64_i32(Ty_118226) * sext_i64_i32(Tx_118227)), + sext_i64_i32(Tx_118227)) * sext_i64_i32(Tx_118227); + + int32_t ltid_pre_128740; + + ltid_pre_128740 = squot32(local_tid_128733, sext_i64_i32(Tx_118227)); + + int32_t ltid_pre_128741; + + ltid_pre_128741 = local_tid_128733 - squot32(local_tid_128733, + sext_i64_i32(Tx_118227)) * + sext_i64_i32(Tx_118227); + + int32_t ltid_pre_128742; + + ltid_pre_128742 = local_tid_128733; + + int64_t gid_zz_118237; + + gid_zz_118237 = squot64(sext_i32_i64(group_tid_128734), gridDim_y_118229 * + gridDim_x_118228); + + int64_t gid_y_118236; + + gid_y_118236 = squot64(sext_i32_i64(group_tid_128734) - + squot64(sext_i32_i64(group_tid_128734), + gridDim_y_118229 * gridDim_x_118228) * + (gridDim_y_118229 * gridDim_x_118228), + gridDim_x_118228); + + int64_t gid_x_118235; + + gid_x_118235 = sext_i32_i64(group_tid_128734) - + squot64(sext_i32_i64(group_tid_128734), gridDim_y_118229 * + gridDim_x_118228) * (gridDim_y_118229 * gridDim_x_118228) - + squot64(sext_i32_i64(group_tid_128734) - + squot64(sext_i32_i64(group_tid_128734), gridDim_y_118229 * + gridDim_x_118228) * (gridDim_y_118229 * + gridDim_x_118228), + gridDim_x_118228) * gridDim_x_118228; + + int64_t ii_118239; + + ii_118239 = (int64_t) 30 * gid_zz_118237; + + int64_t jj1_118240 = Ty_118226 * gid_y_118236; + int64_t jj2_118241 = Tx_118227 * gid_x_118235; + double mem_124223[30]; + int64_t ltid_y_118244 = sext_i32_i64(ltid_pre_128740); + int64_t ltid_x_118242 = sext_i32_i64(ltid_pre_128741); + int32_t ltid_flat_118243 = local_tid_128733; + double mem_124217[30]; + + for (int32_t i_119595 = 0; i_119595 < 30; i_119595++) { + int64_t i_118252 = sext_i32_i64(i_119595); + + mem_124217[i_118252] = 0.0; + } + for (int64_t i_128744 = 0; i_128744 < (int64_t) 30; i_128744++) { + mem_124223[i_128744] = mem_124217[i_128744]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + __local char *mem_124225; + + mem_124225 = (__local char *) mem_124225_backing_0; + + double loop_mem_124255[30]; + double mem_param_124226[30]; + + for (int32_t i_1 = 0; i_1 < 30; i_1++) + mem_param_124226[i_1] = mem_124223[i_1]; + for (int64_t i_118257 = 0; i_118257 < n_73011; i_118257++) { + for (int64_t i_118260 = 0; i_118260 < count_shmem_118234; i_118260++) { + int64_t offs_118273 = group_sizze_tile3d_118233 * i_118260; + int64_t ltid_118263 = sext_i32_i64(ltid_pre_128742); + int32_t ltid_flat_118262 = local_tid_128733; + int64_t loc_ind_118274 = ltid_118263 + offs_118273; + int64_t gtid_118275 = ii_118239 + loc_ind_118274; + bool cond_118276 = slt64(gtid_118275, m_73008); + double y_elem_118277; + + if (cond_118276) { + double Y_elem_118279 = ((__global + double *) mem_124213)[i_118257 * + m_73008 + + gtid_118275]; + + y_elem_118277 = Y_elem_118279; + } else { + y_elem_118277 = 0.0; + } + + bool cond_118281 = slt64(loc_ind_118274, (int64_t) 30); + int64_t y_loc_ind_118282; + + if (cond_118281) { + y_loc_ind_118282 = loc_ind_118274; + } else { + y_loc_ind_118282 = (int64_t) -1; + } + if (sle64((int64_t) 0, y_loc_ind_118282) && slt64(y_loc_ind_118282, + (int64_t) 30)) { + ((__local double *) mem_124225)[y_loc_ind_118282] = + y_elem_118277; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + + double mem_124254[30]; + int64_t ltid_y_118288 = sext_i32_i64(ltid_pre_128740); + int64_t ltid_x_118286 = sext_i32_i64(ltid_pre_128741); + int32_t ltid_flat_118287 = local_tid_128733; + int64_t gtid_118315 = jj1_118240 + ltid_y_118288; + int64_t gtid_118316 = jj2_118241 + ltid_x_118286; + bool binop_x_118318 = slt64(gtid_118315, k2p2zq_73023); + bool binop_y_118319 = slt64(gtid_118316, k2p2zq_73023); + bool cond_118320 = binop_x_118318 && binop_y_118319; + double mem_125364[30]; + + if (cond_118320) { + double x_118323 = ((__global double *) mem_120120)[i_118257 * + k2p2zq_73023 + + gtid_118315]; + double x_118325 = ((__global double *) mem_120124)[i_118257 * + k2p2zq_73023 + + gtid_118316]; + + for (int32_t i_119596 = 0; i_119596 < 30; i_119596++) { + int64_t i_118327 = sext_i32_i64(i_119596); + int64_t gtid_118329 = ii_118239 + i_118327; + bool cond_118330 = slt64(gtid_118329, m_73008); + + if (cond_118330) { + double inp_reg_var2zz_118332 = ((__local + double *) mem_124225)[i_118327]; + double res_reg_var2zz_118333 = mem_param_124226[i_118327]; + double x_118337 = x_118323 * x_118325; + bool isnan_res_118338; + + isnan_res_118338 = futrts_isnan64(inp_reg_var2zz_118332); + + double y_118339; + + if (isnan_res_118338) { + y_118339 = 0.0; + } else { + y_118339 = 1.0; + } + + double defunc_2_f_res_118340 = x_118337 * y_118339; + double defunc_1_op_res_118344 = res_reg_var2zz_118333 + + defunc_2_f_res_118340; + + mem_param_124226[i_118327] = defunc_1_op_res_118344; + } + } + for (int64_t i_128750 = 0; i_128750 < (int64_t) 30; i_128750++) { + mem_125364[i_128750] = mem_param_124226[i_128750]; + } + } else { + for (int64_t i_128751 = 0; i_128751 < (int64_t) 30; i_128751++) { + mem_125364[i_128751] = mem_param_124226[i_128751]; + } + } + for (int64_t i_128752 = 0; i_128752 < (int64_t) 30; i_128752++) { + mem_124254[i_128752] = mem_125364[i_128752]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_param_tmp_128745[30]; + + for (int32_t i_2 = 0; i_2 < 30; i_2++) + mem_param_tmp_128745[i_2] = mem_124254[i_2]; + for (int32_t i_3 = 0; i_3 < 30; i_3++) + mem_param_124226[i_3] = mem_param_tmp_128745[i_3]; + } + for (int32_t i_4 = 0; i_4 < 30; i_4++) + loop_mem_124255[i_4] = mem_param_124226[i_4]; + + double mem_124269[30 * 1 * 1]; + int64_t ltid_zz_118353 = sext_i32_i64(ltid_pre_128737); + int64_t ltid_y_118352 = sext_i32_i64(ltid_pre_128738); + int64_t ltid_x_118350 = sext_i32_i64(ltid_pre_128739); + int32_t ltid_flat_118351 = local_tid_128733; + double mem_124263[30 * 1 * 1]; + + for (int32_t i_119598 = 0; i_119598 < 30; i_119598++) { + int64_t i_118362 = sext_i32_i64(i_119598); + + for (int64_t i_128754 = 0; i_128754 < (int64_t) 1; i_128754++) { + mem_124263[i_118362 + i_128754] = loop_mem_124255[i_118362 + + i_128754]; + } + } + for (int64_t i_128755 = 0; i_128755 < (int64_t) 30; i_128755++) { + for (int64_t i_128756 = 0; i_128756 < (int64_t) 1; i_128756++) { + for (int64_t i_128757 = 0; i_128757 < (int64_t) 1; i_128757++) { + mem_124269[i_128755 + i_128756 + i_128757] = + mem_124263[i_128755 + i_128756 + i_128757]; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t reg_tile_i_128758 = squot64(sext_i32_i64(local_tid_128733), + Ty_118226 * Tx_118227); + int64_t reg_tile_i_128759 = squot64(sext_i32_i64(local_tid_128733) - + squot64(sext_i32_i64(local_tid_128733), + Ty_118226 * Tx_118227) * + (Ty_118226 * Tx_118227), Tx_118227); + int64_t reg_tile_i_128760 = sext_i32_i64(local_tid_128733) - + squot64(sext_i32_i64(local_tid_128733), Ty_118226 * Tx_118227) * + (Ty_118226 * Tx_118227) - squot64(sext_i32_i64(local_tid_128733) - + squot64(sext_i32_i64(local_tid_128733), + Ty_118226 * Tx_118227) * + (Ty_118226 * Tx_118227), + Tx_118227) * Tx_118227; + int64_t tile_dim_start_128761 = (int64_t) 30 * (gid_zz_118237 + + reg_tile_i_128758); + int64_t tile_dim_start_128762 = Ty_118226 * gid_y_118236 + + reg_tile_i_128759; + int64_t tile_dim_start_128763 = Tx_118227 * gid_x_118235 + + reg_tile_i_128760; + + for (int64_t nest_i_128764 = 0; nest_i_128764 < (int64_t) 30; + nest_i_128764++) { + for (int64_t nest_i_128765 = 0; nest_i_128765 < (int64_t) 1; + nest_i_128765++) { + for (int64_t nest_i_128766 = 0; nest_i_128766 < (int64_t) 1; + nest_i_128766++) { + if ((slt64(tile_dim_start_128761 + nest_i_128764, m_73008) && + slt64(tile_dim_start_128762 + nest_i_128765, + k2p2zq_73023)) && slt64(tile_dim_start_128763 + + nest_i_128766, + k2p2zq_73023)) { + ((__global double *) mem_124273)[(tile_dim_start_128761 + + nest_i_128764) * + (k2p2zq_73023 * + k2p2zq_73023) + + (tile_dim_start_128762 + + nest_i_128765) * + k2p2zq_73023 + + (tile_dim_start_128763 + + nest_i_128766)] = + mem_124269[nest_i_128764 + nest_i_128765 + + nest_i_128766]; + } + } + } + } + + error_4: + return; +} +__kernel void mainMagnitudezisegmap_intragroup_118391(__global + int *global_failure, + __local volatile + int64_t *mem_124411_backing_aligned_0, + __local volatile + int64_t *mem_124409_backing_aligned_1, + int64_t N_73007, + int64_t m_73008, + int64_t n_73011, + int64_t k2p2zq_73023, + int64_t gridDim_x_118385, + int64_t full_tiles_118416, + int64_t kk_118623, + __global + unsigned char *mem_120120, + __global + unsigned char *mem_124142, + __global + unsigned char *mem_124583) +{ + #define Ty_118372 (mainMagnitudeziTy_118369) + #define Ry_118373 (mainMagnitudeziRy_118371) + #define Tx_118374 (mainMagnitudeziTx_118368) + #define Rx_118375 (mainMagnitudeziRx_118370) + #define Tk_118376 (mainMagnitudeziTk_118367) + #define tk_div_tx_118377 (sdiv_up64(mainMagnitudeziTk_118367, mainMagnitudeziTx_118368)) + #define tk_div_ty_118378 (sdiv_up64(mainMagnitudeziTk_118367, mainMagnitudeziTy_118369)) + #define TxRx_118379 (mainMagnitudeziTx_118368 * mainMagnitudeziRx_118370) + #define TyRy_118380 (mainMagnitudeziTy_118369 * mainMagnitudeziRy_118371) + #define a_loc_szz_118382 (mainMagnitudeziTk_118367 * (mainMagnitudeziTy_118369 * mainMagnitudeziRy_118371)) + #define b_loc_szz_118384 (mainMagnitudeziRx_118370 * (mainMagnitudeziTx_118368 * mainMagnitudeziTk_118367)) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_124411_backing_1 = (__local volatile + char *) mem_124411_backing_aligned_0; + __local volatile char *restrict mem_124409_backing_0 = (__local volatile + char *) mem_124409_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128900; + int32_t local_tid_128901; + int64_t group_sizze_128904; + int32_t wave_sizze_128903; + int32_t group_tid_128902; + + global_tid_128900 = get_global_id(0); + local_tid_128901 = get_local_id(0); + group_sizze_128904 = get_local_size(0); + wave_sizze_128903 = LOCKSTEP_WIDTH; + group_tid_128902 = get_group_id(0); + + int32_t gid_flat_118391; + + gid_flat_118391 = group_tid_128902; + + int32_t ltid_pre_128905; + + ltid_pre_128905 = squot32(local_tid_128901, sext_i64_i32(Tx_118374)); + + int32_t ltid_pre_128906; + + ltid_pre_128906 = local_tid_128901 - squot32(local_tid_128901, + sext_i64_i32(Tx_118374)) * + sext_i64_i32(Tx_118374); + + int64_t gid_y_118390; + + gid_y_118390 = squot64(sext_i32_i64(group_tid_128902), gridDim_x_118385); + + int64_t gid_x_118389; + + gid_x_118389 = sext_i32_i64(group_tid_128902) - + squot64(sext_i32_i64(group_tid_128902), gridDim_x_118385) * + gridDim_x_118385; + + int64_t iii_118392; + + iii_118392 = TyRy_118380 * gid_y_118390; + + int64_t jjj_118393 = TxRx_118379 * gid_x_118389; + double mem_124407[Ry_118373 * Rx_118375]; + int64_t ltid_y_118396 = sext_i32_i64(ltid_pre_128905); + int64_t ltid_x_118394 = sext_i32_i64(ltid_pre_128906); + int32_t ltid_flat_118395 = local_tid_128901; + double mem_124398[Ry_118373 * Rx_118375]; + + for (int64_t i_118407 = 0; i_118407 < Ry_118373; i_118407++) { + for (int64_t i_118410 = 0; i_118410 < Rx_118375; i_118410++) { + mem_124398[i_118407 * Rx_118375 + i_118410] = 0.0; + } + } + for (int64_t i_128909 = 0; i_128909 < Ry_118373; i_128909++) { + for (int64_t i_128910 = 0; i_128910 < Rx_118375; i_128910++) { + mem_124407[i_128909 * Rx_118375 + i_128910] = mem_124398[i_128909 * + Rx_118375 + + i_128910]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + __local char *mem_124409; + + mem_124409 = (__local char *) mem_124409_backing_0; + + __local char *mem_124411; + + mem_124411 = (__local char *) mem_124411_backing_1; + + double mem_124482[Ry_118373]; + double mem_124486[Rx_118375]; + double loop_mem_124498[Ry_118373 * Rx_118375]; + double mem_param_124412[Ry_118373 * Rx_118375]; + + for (int32_t i_2 = 0; i_2 < Ry_118373 * Rx_118375; i_2++) + mem_param_124412[i_2] = mem_124407[i_2]; + for (int64_t i_118417 = 0; i_118417 < full_tiles_118416; i_118417++) { + int64_t kk_118421 = Tk_118376 * i_118417; + + for (int64_t i_118422 = 0; i_118422 < Ry_118373; i_118422++) { + int64_t binop_y_118445 = Ty_118372 * i_118422; + + for (int64_t i_118424 = 0; i_118424 < tk_div_tx_118377; + i_118424++) { + int64_t binop_y_118443 = Tx_118374 * i_118424; + int64_t ltid_x_118426 = sext_i32_i64(ltid_pre_128905); + int64_t ltid_y_118427 = sext_i32_i64(ltid_pre_128906); + int32_t ltid_flat_118428 = local_tid_128901; + int64_t k_118444 = ltid_y_118427 + binop_y_118443; + int64_t i_118446 = ltid_x_118426 + binop_y_118445; + int64_t gtid_118447 = iii_118392 + i_118446; + int64_t A_col_idx_118448 = kk_118421 + k_118444; + bool cond_118449 = slt64(gtid_118447, m_73008); + double A_elem_118450; + + if (cond_118449) { + double A_elem_118452 = ((__global + double *) mem_124142)[gtid_118447 * + N_73007 + + A_col_idx_118448]; + + A_elem_118450 = A_elem_118452; + } else { + A_elem_118450 = 0.0; + } + + bool cond_118454 = slt64(k_118444, Tk_118376); + int64_t a_loc_ind_118455; + + if (cond_118454) { + int64_t binop_y_118456 = Tk_118376 * i_118446; + int64_t loc_fi_118457 = k_118444 + binop_y_118456; + + a_loc_ind_118455 = loc_fi_118457; + } else { + a_loc_ind_118455 = (int64_t) -1; + } + if (sle64((int64_t) 0, a_loc_ind_118455) && + slt64(a_loc_ind_118455, a_loc_szz_118382)) { + ((__local double *) mem_124409)[a_loc_ind_118455] = + A_elem_118450; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + for (int64_t i_118462 = 0; i_118462 < tk_div_ty_118378; i_118462++) { + int64_t binop_y_118483 = Ty_118372 * i_118462; + + for (int64_t i_118464 = 0; i_118464 < Rx_118375; i_118464++) { + int64_t binop_y_118485 = Tx_118374 * i_118464; + int64_t ltid_x_118466 = sext_i32_i64(ltid_pre_128905); + int64_t ltid_y_118467 = sext_i32_i64(ltid_pre_128906); + int32_t ltid_flat_118468 = local_tid_128901; + int64_t k_118484 = ltid_x_118466 + binop_y_118483; + int64_t j_118486 = ltid_y_118467 + binop_y_118485; + int64_t gtid_118487 = jjj_118393 + j_118486; + int64_t B_row_idx_118488 = kk_118421 + k_118484; + bool cond_118489 = slt64(gtid_118487, k2p2zq_73023); + double B_elem_118490; + + if (cond_118489) { + double B_elem_118492 = ((__global + double *) mem_120120)[B_row_idx_118488 * + k2p2zq_73023 + + gtid_118487]; + + B_elem_118490 = B_elem_118492; + } else { + B_elem_118490 = 0.0; + } + + bool cond_118494 = slt64(k_118484, Tk_118376); + int64_t b_loc_ind_118495; + + if (cond_118494) { + int64_t binop_y_118496 = TxRx_118379 * k_118484; + int64_t loc_fi_118497 = j_118486 + binop_y_118496; + + b_loc_ind_118495 = loc_fi_118497; + } else { + b_loc_ind_118495 = (int64_t) -1; + } + if (sle64((int64_t) 0, b_loc_ind_118495) && + slt64(b_loc_ind_118495, b_loc_szz_118384)) { + ((__local double *) mem_124411)[b_loc_ind_118495] = + B_elem_118490; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + + double loop_mem_124497[Ry_118373 * Rx_118375]; + double mem_param_124469[Ry_118373 * Rx_118375]; + + for (int32_t i_3 = 0; i_3 < Ry_118373 * Rx_118375; i_3++) + mem_param_124469[i_3] = mem_param_124412[i_3]; + for (int64_t i_118502 = 0; i_118502 < Tk_118376; i_118502++) { + int64_t binop_y_118541 = TxRx_118379 * i_118502; + int64_t ltid_y_118506 = sext_i32_i64(ltid_pre_128905); + int64_t ltid_x_118504 = sext_i32_i64(ltid_pre_128906); + int32_t ltid_flat_118505 = local_tid_128901; + double mem_124472[Ry_118373]; + double mem_124474[Rx_118375]; + int64_t binop_x_118532 = Ry_118373 * ltid_y_118506; + + for (int64_t i_118530 = 0; i_118530 < Ry_118373; i_118530++) { + int64_t binop_x_118533 = i_118530 + binop_x_118532; + int64_t binop_y_118534 = Tk_118376 * binop_x_118533; + int64_t a_loc_ind_118535 = i_118502 + binop_y_118534; + + for (int64_t i_128922 = 0; i_128922 < (int64_t) 1; i_128922++) { + mem_124472[i_118530 + i_128922] = ((__local + double *) mem_124409)[a_loc_ind_118535 + + i_128922]; + } + } + + int64_t binop_y_118543 = Rx_118375 * ltid_x_118504; + + for (int64_t i_118539 = 0; i_118539 < Rx_118375; i_118539++) { + int64_t binop_x_118542 = i_118539 + binop_y_118541; + int64_t b_loc_ind_118544 = binop_x_118542 + binop_y_118543; + + for (int64_t i_128924 = 0; i_128924 < (int64_t) 1; i_128924++) { + mem_124474[i_118539 + i_128924] = ((__local + double *) mem_124411)[b_loc_ind_118544 + + i_128924]; + } + } + for (int64_t i_128925 = 0; i_128925 < Ry_118373; i_128925++) { + mem_124482[i_128925] = mem_124472[i_128925]; + } + for (int64_t i_128926 = 0; i_128926 < Rx_118375; i_128926++) { + mem_124486[i_128926] = mem_124474[i_128926]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_124496[Ry_118373 * Rx_118375]; + int64_t ltid_y_118551 = sext_i32_i64(ltid_pre_128905); + int64_t ltid_x_118549 = sext_i32_i64(ltid_pre_128906); + int32_t ltid_flat_118550 = local_tid_128901; + int64_t binop_y_118594 = Ry_118373 * ltid_y_118551; + int64_t binop_y_118598 = Rx_118375 * ltid_x_118549; + + for (int64_t i_118588 = 0; i_118588 < Ry_118373; i_118588++) { + int64_t binop_x_118593 = iii_118392 + i_118588; + int64_t cmpop_x_118595 = binop_x_118593 + binop_y_118594; + bool binop_x_118596 = slt64(cmpop_x_118595, m_73008); + + for (int64_t i_118591 = 0; i_118591 < Rx_118375; i_118591++) { + int64_t binop_x_118597 = jjj_118393 + i_118591; + int64_t cmpop_x_118599 = binop_x_118597 + binop_y_118598; + bool binop_y_118600 = slt64(cmpop_x_118599, k2p2zq_73023); + bool cond_118601 = binop_x_118596 && binop_y_118600; + + if (cond_118601) { + double a_118603 = mem_124482[i_118588]; + double c_118605 = mem_param_124469[i_118588 * + Rx_118375 + + i_118591]; + bool isnan_res_118608; + + isnan_res_118608 = futrts_isnan64(a_118603); + + double defunc_1_f_res_118609; + + if (isnan_res_118608) { + defunc_1_f_res_118609 = 0.0; + } else { + double b_118604 = mem_124486[i_118591]; + double defunc_1_f_res_f_res_118610 = a_118603 * + b_118604; + + defunc_1_f_res_118609 = defunc_1_f_res_f_res_118610; + } + + double defunc_1_op_res_118614 = c_118605 + + defunc_1_f_res_118609; + + mem_param_124469[i_118588 * Rx_118375 + i_118591] = + defunc_1_op_res_118614; + } + } + } + for (int64_t i_128929 = 0; i_128929 < Ry_118373; i_128929++) { + for (int64_t i_128930 = 0; i_128930 < Rx_118375; i_128930++) { + mem_124496[i_128929 * Rx_118375 + i_128930] = + mem_param_124469[i_128929 * Rx_118375 + i_128930]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_param_tmp_128919[Ry_118373 * Rx_118375]; + + for (int32_t i_4 = 0; i_4 < Ry_118373 * Rx_118375; i_4++) + mem_param_tmp_128919[i_4] = mem_124496[i_4]; + for (int32_t i_5 = 0; i_5 < Ry_118373 * Rx_118375; i_5++) + mem_param_124469[i_5] = mem_param_tmp_128919[i_5]; + } + for (int32_t i_6 = 0; i_6 < Ry_118373 * Rx_118375; i_6++) + loop_mem_124497[i_6] = mem_param_124469[i_6]; + + double mem_param_tmp_128911[Ry_118373 * Rx_118375]; + + for (int32_t i_7 = 0; i_7 < Ry_118373 * Rx_118375; i_7++) + mem_param_tmp_128911[i_7] = loop_mem_124497[i_7]; + for (int32_t i_8 = 0; i_8 < Ry_118373 * Rx_118375; i_8++) + mem_param_124412[i_8] = mem_param_tmp_128911[i_8]; + } + for (int32_t i_9 = 0; i_9 < Ry_118373 * Rx_118375; i_9++) + loop_mem_124498[i_9] = mem_param_124412[i_9]; + for (int64_t i_118624 = 0; i_118624 < Ry_118373; i_118624++) { + int64_t binop_y_118649 = Ty_118372 * i_118624; + + for (int64_t i_118626 = 0; i_118626 < tk_div_tx_118377; i_118626++) { + int64_t binop_y_118647 = Tx_118374 * i_118626; + int64_t ltid_x_118628 = sext_i32_i64(ltid_pre_128905); + int64_t ltid_y_118629 = sext_i32_i64(ltid_pre_128906); + int32_t ltid_flat_118630 = local_tid_128901; + int64_t k_118648 = ltid_y_118629 + binop_y_118647; + int64_t i_118650 = ltid_x_118628 + binop_y_118649; + int64_t gtid_118651 = iii_118392 + i_118650; + int64_t A_col_idx_118652 = kk_118623 + k_118648; + bool binop_x_118653 = slt64(gtid_118651, m_73008); + bool binop_y_118654 = slt64(A_col_idx_118652, n_73011); + bool cond_118655 = binop_x_118653 && binop_y_118654; + double A_elem_118656; + + if (cond_118655) { + double A_elem_118658 = ((__global + double *) mem_124142)[gtid_118651 * + N_73007 + + A_col_idx_118652]; + + A_elem_118656 = A_elem_118658; + } else { + A_elem_118656 = 0.0; + } + + bool cond_118660 = slt64(k_118648, Tk_118376); + int64_t a_loc_ind_118661; + + if (cond_118660) { + int64_t binop_y_118662 = Tk_118376 * i_118650; + int64_t loc_fi_118663 = k_118648 + binop_y_118662; + + a_loc_ind_118661 = loc_fi_118663; + } else { + a_loc_ind_118661 = (int64_t) -1; + } + if (sle64((int64_t) 0, a_loc_ind_118661) && slt64(a_loc_ind_118661, + a_loc_szz_118382)) { + ((__local double *) mem_124409)[a_loc_ind_118661] = + A_elem_118656; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + for (int64_t i_118668 = 0; i_118668 < tk_div_ty_118378; i_118668++) { + int64_t binop_y_118691 = Ty_118372 * i_118668; + + for (int64_t i_118670 = 0; i_118670 < Rx_118375; i_118670++) { + int64_t binop_y_118693 = Tx_118374 * i_118670; + int64_t ltid_x_118672 = sext_i32_i64(ltid_pre_128905); + int64_t ltid_y_118673 = sext_i32_i64(ltid_pre_128906); + int32_t ltid_flat_118674 = local_tid_128901; + int64_t k_118692 = ltid_x_118672 + binop_y_118691; + int64_t j_118694 = ltid_y_118673 + binop_y_118693; + int64_t gtid_118695 = jjj_118393 + j_118694; + int64_t B_row_idx_118696 = kk_118623 + k_118692; + bool binop_x_118697 = slt64(gtid_118695, k2p2zq_73023); + bool binop_y_118698 = slt64(B_row_idx_118696, n_73011); + bool cond_118699 = binop_x_118697 && binop_y_118698; + double B_elem_118700; + + if (cond_118699) { + double B_elem_118702 = ((__global + double *) mem_120120)[B_row_idx_118696 * + k2p2zq_73023 + + gtid_118695]; + + B_elem_118700 = B_elem_118702; + } else { + B_elem_118700 = 0.0; + } + + bool cond_118704 = slt64(k_118692, Tk_118376); + int64_t b_loc_ind_118705; + + if (cond_118704) { + int64_t binop_y_118706 = TxRx_118379 * k_118692; + int64_t loc_fi_118707 = j_118694 + binop_y_118706; + + b_loc_ind_118705 = loc_fi_118707; + } else { + b_loc_ind_118705 = (int64_t) -1; + } + if (sle64((int64_t) 0, b_loc_ind_118705) && slt64(b_loc_ind_118705, + b_loc_szz_118384)) { + ((__local double *) mem_124411)[b_loc_ind_118705] = + B_elem_118700; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + + double mem_124564[Ry_118373]; + double mem_124568[Rx_118375]; + double mem_124578[Ry_118373 * Rx_118375]; + double loop_mem_124580[Ry_118373 * Rx_118375]; + double mem_param_124551[Ry_118373 * Rx_118375]; + + for (int32_t i_10 = 0; i_10 < Ry_118373 * Rx_118375; i_10++) + mem_param_124551[i_10] = loop_mem_124498[i_10]; + for (int64_t i_118712 = 0; i_118712 < Tk_118376; i_118712++) { + int64_t cmpop_x_118714 = kk_118623 + i_118712; + bool cond_118715 = slt64(cmpop_x_118714, n_73011); + double mem_125382[Ry_118373 * Rx_118375]; + + if (cond_118715) { + int64_t binop_y_118753 = TxRx_118379 * i_118712; + int64_t bytes_124553 = (int64_t) 8 * Ry_118373; + int64_t bytes_124555 = (int64_t) 8 * Rx_118375; + int64_t ltid_y_118718 = sext_i32_i64(ltid_pre_128905); + int64_t ltid_x_118716 = sext_i32_i64(ltid_pre_128906); + int32_t ltid_flat_118717 = local_tid_128901; + double mem_124554[Ry_118373]; + double mem_124556[Rx_118375]; + int64_t binop_x_118744 = Ry_118373 * ltid_y_118718; + + for (int64_t i_118742 = 0; i_118742 < Ry_118373; i_118742++) { + int64_t binop_x_118745 = i_118742 + binop_x_118744; + int64_t binop_y_118746 = Tk_118376 * binop_x_118745; + int64_t a_loc_ind_118747 = i_118712 + binop_y_118746; + + for (int64_t i_128938 = 0; i_128938 < (int64_t) 1; i_128938++) { + mem_124554[i_118742 + i_128938] = ((__local + double *) mem_124409)[a_loc_ind_118747 + + i_128938]; + } + } + + int64_t binop_y_118755 = Rx_118375 * ltid_x_118716; + + for (int64_t i_118751 = 0; i_118751 < Rx_118375; i_118751++) { + int64_t binop_x_118754 = i_118751 + binop_y_118753; + int64_t b_loc_ind_118756 = binop_x_118754 + binop_y_118755; + + for (int64_t i_128940 = 0; i_128940 < (int64_t) 1; i_128940++) { + mem_124556[i_118751 + i_128940] = ((__local + double *) mem_124411)[b_loc_ind_118756 + + i_128940]; + } + } + for (int64_t i_128941 = 0; i_128941 < Ry_118373; i_128941++) { + mem_124564[i_128941] = mem_124554[i_128941]; + } + for (int64_t i_128942 = 0; i_128942 < Rx_118375; i_128942++) { + mem_124568[i_128942] = mem_124556[i_128942]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t ltid_y_118763 = sext_i32_i64(ltid_pre_128905); + int64_t ltid_x_118761 = sext_i32_i64(ltid_pre_128906); + int32_t ltid_flat_118762 = local_tid_128901; + int64_t binop_y_118806 = Ry_118373 * ltid_y_118763; + int64_t binop_y_118810 = Rx_118375 * ltid_x_118761; + + for (int64_t i_118800 = 0; i_118800 < Ry_118373; i_118800++) { + int64_t binop_x_118805 = iii_118392 + i_118800; + int64_t cmpop_x_118807 = binop_x_118805 + binop_y_118806; + bool binop_x_118808 = slt64(cmpop_x_118807, m_73008); + + for (int64_t i_118803 = 0; i_118803 < Rx_118375; i_118803++) { + int64_t binop_x_118809 = jjj_118393 + i_118803; + int64_t cmpop_x_118811 = binop_x_118809 + binop_y_118810; + bool binop_y_118812 = slt64(cmpop_x_118811, k2p2zq_73023); + bool cond_118813 = binop_x_118808 && binop_y_118812; + + if (cond_118813) { + double a_118815 = mem_124564[i_118800]; + double c_118817 = mem_param_124551[i_118800 * + Rx_118375 + + i_118803]; + bool isnan_res_118820; + + isnan_res_118820 = futrts_isnan64(a_118815); + + double defunc_1_f_res_118821; + + if (isnan_res_118820) { + defunc_1_f_res_118821 = 0.0; + } else { + double b_118816 = mem_124568[i_118803]; + double defunc_1_f_res_f_res_118822 = a_118815 * + b_118816; + + defunc_1_f_res_118821 = defunc_1_f_res_f_res_118822; + } + + double defunc_1_op_res_118826 = c_118817 + + defunc_1_f_res_118821; + + mem_param_124551[i_118800 * Rx_118375 + i_118803] = + defunc_1_op_res_118826; + } + } + } + for (int64_t i_128945 = 0; i_128945 < Ry_118373; i_128945++) { + for (int64_t i_128946 = 0; i_128946 < Rx_118375; i_128946++) { + mem_124578[i_128945 * Rx_118375 + i_128946] = + mem_param_124551[i_128945 * Rx_118375 + i_128946]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + for (int64_t i_128947 = 0; i_128947 < Ry_118373; i_128947++) { + for (int64_t i_128948 = 0; i_128948 < Rx_118375; i_128948++) { + mem_125382[i_128947 * Rx_118375 + i_128948] = + mem_124578[i_128947 * Rx_118375 + i_128948]; + } + } + } else { + for (int64_t i_128949 = 0; i_128949 < Ry_118373; i_128949++) { + for (int64_t i_128950 = 0; i_128950 < Rx_118375; i_128950++) { + mem_125382[i_128949 * Rx_118375 + i_128950] = + mem_param_124551[i_128949 * Rx_118375 + i_128950]; + } + } + } + + double mem_param_tmp_128935[Ry_118373 * Rx_118375]; + + for (int32_t i_11 = 0; i_11 < Ry_118373 * Rx_118375; i_11++) + mem_param_tmp_128935[i_11] = mem_125382[i_11]; + for (int32_t i_12 = 0; i_12 < Ry_118373 * Rx_118375; i_12++) + mem_param_124551[i_12] = mem_param_tmp_128935[i_12]; + } + for (int32_t i_13 = 0; i_13 < Ry_118373 * Rx_118375; i_13++) + loop_mem_124580[i_13] = mem_param_124551[i_13]; + + int64_t reg_tile_i_128951 = squot64(sext_i32_i64(local_tid_128901), + Tx_118374); + int64_t reg_tile_i_128952 = sext_i32_i64(local_tid_128901) - + squot64(sext_i32_i64(local_tid_128901), Tx_118374) * Tx_118374; + int64_t tile_dim_start_128953 = Ry_118373 * (Ty_118372 * gid_y_118390 + + reg_tile_i_128951); + int64_t tile_dim_start_128954 = Rx_118375 * (Tx_118374 * gid_x_118389 + + reg_tile_i_128952); + + for (int64_t nest_i_128955 = 0; nest_i_128955 < Ry_118373; + nest_i_128955++) { + for (int64_t nest_i_128956 = 0; nest_i_128956 < Rx_118375; + nest_i_128956++) { + if (slt64(tile_dim_start_128953 + nest_i_128955, m_73008) && + slt64(tile_dim_start_128954 + nest_i_128956, k2p2zq_73023)) { + ((__global double *) mem_124583)[(tile_dim_start_128953 + + nest_i_128955) * + k2p2zq_73023 + + (tile_dim_start_128954 + + nest_i_128956)] = + loop_mem_124580[nest_i_128955 * Rx_118375 + nest_i_128956]; + } + } + } + + error_9: + return; + #undef Ty_118372 + #undef Ry_118373 + #undef Tx_118374 + #undef Rx_118375 + #undef Tk_118376 + #undef tk_div_tx_118377 + #undef tk_div_ty_118378 + #undef TxRx_118379 + #undef TyRy_118380 + #undef a_loc_szz_118382 + #undef b_loc_szz_118384 +} +__kernel void mainMagnitudezisegmap_intragroup_118840(__global + int *global_failure, + __local volatile + int64_t *mem_124641_backing_aligned_0, + __local volatile + int64_t *mem_124632_backing_aligned_1, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t num_groups_y_118838, + int64_t num_whole_tiles_118856, + int64_t residual_input_118983, + unsigned char cond_118984, + __global + unsigned char *defunc_3_map_res_mem_124593, + __global + unsigned char *mem_124622, + __global + unsigned char *mem_124649) +{ + #define tile_sizze_118835 (mainMagnitudezitile_sizze_118834) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_124641_backing_5 = (__local volatile + char *) mem_124641_backing_aligned_0; + __local volatile char *restrict mem_124632_backing_0 = (__local volatile + char *) mem_124632_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129032; + int32_t local_tid_129033; + int64_t group_sizze_129036; + int32_t wave_sizze_129035; + int32_t group_tid_129034; + + global_tid_129032 = get_global_id(0); + local_tid_129033 = get_local_id(0); + group_sizze_129036 = get_local_size(0); + wave_sizze_129035 = LOCKSTEP_WIDTH; + group_tid_129034 = get_group_id(0); + + int32_t gid_flat_118840; + + gid_flat_118840 = group_tid_129034; + + int32_t ltid_pre_129037; + + ltid_pre_129037 = squot32(local_tid_129033, + sext_i64_i32(tile_sizze_118835)); + + int32_t ltid_pre_129038; + + ltid_pre_129038 = local_tid_129033 - squot32(local_tid_129033, + sext_i64_i32(tile_sizze_118835)) * + sext_i64_i32(tile_sizze_118835); + + int64_t gid_x_118832; + + gid_x_118832 = squot64(sext_i32_i64(group_tid_129034), num_groups_y_118838); + + int64_t gid_y_118833; + + gid_y_118833 = sext_i32_i64(group_tid_129034) - + squot64(sext_i32_i64(group_tid_129034), num_groups_y_118838) * + num_groups_y_118838; + + double mem_124627[1]; + int64_t ltid_y_118859 = sext_i32_i64(ltid_pre_129037); + int64_t ltid_x_118857 = sext_i32_i64(ltid_pre_129038); + int32_t ltid_flat_118858 = local_tid_129033; + + if (slt64(ltid_y_118859, tile_sizze_118835) && slt64(ltid_x_118857, + tile_sizze_118835)) { + mem_124627[(int64_t) 0] = 0.0; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t binop_x_118942 = gid_x_118832 * tile_sizze_118835; + int64_t binop_x_118957 = gid_y_118833 * tile_sizze_118835; + __local char *mem_124632; + + mem_124632 = (__local char *) mem_124632_backing_0; + + double accs_mem_124637[1]; + double mem_param_124628[1]; + + for (int32_t i_1 = 0; i_1 < 1; i_1++) + mem_param_124628[i_1] = mem_124627[i_1]; + for (int64_t tile_id_118868 = 0; tile_id_118868 < num_whole_tiles_118856; + tile_id_118868++) { + int64_t binop_x_118940 = tile_sizze_118835 * tile_id_118868; + int64_t ltid_y_118871 = sext_i32_i64(ltid_pre_129037); + int64_t ltid_x_118869 = sext_i32_i64(ltid_pre_129038); + int32_t ltid_flat_118870 = local_tid_129033; + int64_t j_118941 = ltid_x_118869 + binop_x_118940; + int64_t gtid_118943 = ltid_y_118871 + binop_x_118942; + bool binop_x_118948 = slt64(j_118941, k2p2zq_73023); + bool binop_y_118949 = slt64(gtid_118943, m_73008); + bool cond_118950 = binop_x_118948 && binop_y_118949; + double pre_118951; + + if (cond_118950) { + double x_118952 = ((__global + double *) defunc_3_map_res_mem_124593)[gtid_118943 * + k2p2zq_73023 + + j_118941]; + + pre_118951 = x_118952; + } else { + pre_118951 = 0.0; + } + ((__local double *) mem_124632)[ltid_y_118871 * tile_sizze_118835 + + ltid_x_118869] = pre_118951; + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_124636[1]; + int64_t ltid_y_118903 = sext_i32_i64(ltid_pre_129037); + int64_t ltid_x_118901 = sext_i32_i64(ltid_pre_129038); + int32_t ltid_flat_118902 = local_tid_129033; + int64_t gtid_118956 = ltid_y_118903 + binop_x_118942; + int64_t gtid_118958 = ltid_x_118901 + binop_x_118957; + double acc_118961 = mem_param_124628[(int64_t) 0]; + bool binop_x_118965 = slt64(gtid_118956, m_73008); + bool binop_y_118966 = slt64(gtid_118958, k2p2zq_73023); + bool cond_118967 = binop_x_118965 && binop_y_118966; + double acc_118968; + + if (cond_118967) { + double x_118969; + double redout_119940 = acc_118961; + + for (int64_t i_119941 = 0; i_119941 < tile_sizze_118835; + i_119941++) { + double x_118973 = ((__local + double *) mem_124632)[ltid_y_118903 * + tile_sizze_118835 + + i_119941]; + int64_t slice_120051 = binop_x_118940 + i_119941; + double x_118974 = ((__global + double *) mem_124622)[slice_120051 * + (k2p2zq_73023 * + m_73008) + + gtid_118956 * + k2p2zq_73023 + + gtid_118958]; + double defunc_1_f_res_118975 = x_118973 * x_118974; + double defunc_1_op_res_118972 = defunc_1_f_res_118975 + + redout_119940; + double redout_tmp_129041 = defunc_1_op_res_118972; + + redout_119940 = redout_tmp_129041; + } + x_118969 = redout_119940; + acc_118968 = x_118969; + } else { + acc_118968 = acc_118961; + } + mem_124636[(int64_t) 0] = acc_118968; + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_param_tmp_129039[1]; + + for (int32_t i_2 = 0; i_2 < 1; i_2++) + mem_param_tmp_129039[i_2] = mem_124636[i_2]; + for (int32_t i_3 = 0; i_3 < 1; i_3++) + mem_param_124628[i_3] = mem_param_tmp_129039[i_3]; + } + for (int32_t i_4 = 0; i_4 < 1; i_4++) + accs_mem_124637[i_4] = mem_param_124628[i_4]; + + __local char *mem_124641; + + mem_124641 = (__local char *) mem_124641_backing_5; + + double mem_124645[1]; + double mem_125396[1]; + + if (cond_118984) { + mem_125396[(int64_t) 0] = accs_mem_124637[(int64_t) 0]; + } else { + int64_t binop_x_119057 = tile_sizze_118835 * num_whole_tiles_118856; + int64_t ltid_y_118987 = sext_i32_i64(ltid_pre_129037); + int64_t ltid_x_118985 = sext_i32_i64(ltid_pre_129038); + int32_t ltid_flat_118986 = local_tid_129033; + int64_t j_119058 = ltid_x_118985 + binop_x_119057; + int64_t gtid_119060 = binop_x_118942 + ltid_y_118987; + bool binop_x_119065 = slt64(j_119058, k2p2zq_73023); + bool binop_y_119066 = slt64(gtid_119060, m_73008); + bool cond_119067 = binop_x_119065 && binop_y_119066; + double pre_119068; + + if (cond_119067) { + double x_119069 = ((__global + double *) defunc_3_map_res_mem_124593)[gtid_119060 * + k2p2zq_73023 + + j_119058]; + + pre_119068 = x_119069; + } else { + pre_119068 = 0.0; + } + ((__local double *) mem_124641)[ltid_y_118987 * tile_sizze_118835 + + ltid_x_118985] = pre_119068; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t ltid_y_119020 = sext_i32_i64(ltid_pre_129037); + int64_t ltid_x_119018 = sext_i32_i64(ltid_pre_129038); + int32_t ltid_flat_119019 = local_tid_129033; + int64_t gtid_119074 = binop_x_118942 + ltid_y_119020; + int64_t gtid_119076 = binop_x_118957 + ltid_x_119018; + double acc_119079 = accs_mem_124637[(int64_t) 0]; + bool binop_x_119083 = slt64(gtid_119074, m_73008); + bool binop_y_119084 = slt64(gtid_119076, k2p2zq_73023); + bool cond_119085 = binop_x_119083 && binop_y_119084; + double acc_119086; + + if (cond_119085) { + double x_119087; + double redout_119942 = acc_119079; + + for (int64_t i_119943 = 0; i_119943 < residual_input_118983; + i_119943++) { + double x_119091 = ((__local + double *) mem_124641)[ltid_y_119020 * + tile_sizze_118835 + + i_119943]; + int64_t slice_120052 = binop_x_119057 + i_119943; + double x_119092 = ((__global + double *) mem_124622)[slice_120052 * + (k2p2zq_73023 * + m_73008) + + gtid_119074 * + k2p2zq_73023 + + gtid_119076]; + double defunc_1_f_res_119093 = x_119091 * x_119092; + double defunc_1_op_res_119090 = defunc_1_f_res_119093 + + redout_119942; + double redout_tmp_129042 = defunc_1_op_res_119090; + + redout_119942 = redout_tmp_129042; + } + x_119087 = redout_119942; + acc_119086 = x_119087; + } else { + acc_119086 = acc_119079; + } + mem_124645[(int64_t) 0] = acc_119086; + barrier(CLK_LOCAL_MEM_FENCE); + mem_125396[(int64_t) 0] = mem_124645[(int64_t) 0]; + } + + int64_t thread_out_index_129043 = gid_x_118832 * tile_sizze_118835 + + sext_i32_i64(ltid_pre_129037); + int64_t thread_out_index_129044 = gid_y_118833 * tile_sizze_118835 + + sext_i32_i64(ltid_pre_129038); + + if (slt64(thread_out_index_129043, m_73008) && + slt64(thread_out_index_129044, k2p2zq_73023)) { + ((__global double *) mem_124649)[thread_out_index_129043 * + k2p2zq_73023 + + thread_out_index_129044] = + mem_125396[(int64_t) 0]; + } + + error_5: + return; + #undef tile_sizze_118835 +} +__kernel void mainMagnitudezisegmap_intragroup_119132(__global + int *global_failure, + __local volatile + int64_t *mem_124701_backing_aligned_0, + __local volatile + int64_t *mem_124699_backing_aligned_1, + int64_t N_73007, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t gridDim_x_119126, + int64_t full_tiles_119157, + int64_t kk_119360, + __global + unsigned char *defunc_4_map_res_mem_124659, + __global + unsigned char *mem_124683, + __global + unsigned char *mem_124873) +{ + #define Ty_119113 (mainMagnitudeziTy_119110) + #define Ry_119114 (mainMagnitudeziRy_119112) + #define Tx_119115 (mainMagnitudeziTx_119109) + #define Rx_119116 (mainMagnitudeziRx_119111) + #define Tk_119117 (mainMagnitudeziTk_119108) + #define tk_div_tx_119118 (sdiv_up64(mainMagnitudeziTk_119108, mainMagnitudeziTx_119109)) + #define tk_div_ty_119119 (sdiv_up64(mainMagnitudeziTk_119108, mainMagnitudeziTy_119110)) + #define TxRx_119120 (mainMagnitudeziTx_119109 * mainMagnitudeziRx_119111) + #define TyRy_119121 (mainMagnitudeziTy_119110 * mainMagnitudeziRy_119112) + #define a_loc_szz_119123 (mainMagnitudeziTk_119108 * (mainMagnitudeziTy_119110 * mainMagnitudeziRy_119112)) + #define b_loc_szz_119125 (mainMagnitudeziRx_119111 * (mainMagnitudeziTx_119109 * mainMagnitudeziTk_119108)) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_124701_backing_1 = (__local volatile + char *) mem_124701_backing_aligned_0; + __local volatile char *restrict mem_124699_backing_0 = (__local volatile + char *) mem_124699_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129120; + int32_t local_tid_129121; + int64_t group_sizze_129124; + int32_t wave_sizze_129123; + int32_t group_tid_129122; + + global_tid_129120 = get_global_id(0); + local_tid_129121 = get_local_id(0); + group_sizze_129124 = get_local_size(0); + wave_sizze_129123 = LOCKSTEP_WIDTH; + group_tid_129122 = get_group_id(0); + + int32_t gid_flat_119132; + + gid_flat_119132 = group_tid_129122; + + int32_t ltid_pre_129125; + + ltid_pre_129125 = squot32(local_tid_129121, sext_i64_i32(Tx_119115)); + + int32_t ltid_pre_129126; + + ltid_pre_129126 = local_tid_129121 - squot32(local_tid_129121, + sext_i64_i32(Tx_119115)) * + sext_i64_i32(Tx_119115); + + int64_t gid_y_119131; + + gid_y_119131 = squot64(sext_i32_i64(group_tid_129122), gridDim_x_119126); + + int64_t gid_x_119130; + + gid_x_119130 = sext_i32_i64(group_tid_129122) - + squot64(sext_i32_i64(group_tid_129122), gridDim_x_119126) * + gridDim_x_119126; + + int64_t iii_119133; + + iii_119133 = TyRy_119121 * gid_y_119131; + + int64_t jjj_119134 = TxRx_119120 * gid_x_119130; + double mem_124697[Ry_119114 * Rx_119116]; + int64_t ltid_y_119137 = sext_i32_i64(ltid_pre_129125); + int64_t ltid_x_119135 = sext_i32_i64(ltid_pre_129126); + int32_t ltid_flat_119136 = local_tid_129121; + double mem_124688[Ry_119114 * Rx_119116]; + + for (int64_t i_119148 = 0; i_119148 < Ry_119114; i_119148++) { + for (int64_t i_119151 = 0; i_119151 < Rx_119116; i_119151++) { + mem_124688[i_119148 * Rx_119116 + i_119151] = 0.0; + } + } + for (int64_t i_129129 = 0; i_129129 < Ry_119114; i_129129++) { + for (int64_t i_129130 = 0; i_129130 < Rx_119116; i_129130++) { + mem_124697[i_129129 * Rx_119116 + i_129130] = mem_124688[i_129129 * + Rx_119116 + + i_129130]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + __local char *mem_124699; + + mem_124699 = (__local char *) mem_124699_backing_0; + + __local char *mem_124701; + + mem_124701 = (__local char *) mem_124701_backing_1; + + double mem_124772[Ry_119114]; + double mem_124776[Rx_119116]; + double loop_mem_124788[Ry_119114 * Rx_119116]; + double mem_param_124702[Ry_119114 * Rx_119116]; + + for (int32_t i_2 = 0; i_2 < Ry_119114 * Rx_119116; i_2++) + mem_param_124702[i_2] = mem_124697[i_2]; + for (int64_t i_119158 = 0; i_119158 < full_tiles_119157; i_119158++) { + int64_t kk_119162 = Tk_119117 * i_119158; + + for (int64_t i_119163 = 0; i_119163 < Ry_119114; i_119163++) { + int64_t binop_y_119186 = Ty_119113 * i_119163; + + for (int64_t i_119165 = 0; i_119165 < tk_div_tx_119118; + i_119165++) { + int64_t binop_y_119184 = Tx_119115 * i_119165; + int64_t ltid_x_119167 = sext_i32_i64(ltid_pre_129125); + int64_t ltid_y_119168 = sext_i32_i64(ltid_pre_129126); + int32_t ltid_flat_119169 = local_tid_129121; + int64_t k_119185 = ltid_y_119168 + binop_y_119184; + int64_t i_119187 = ltid_x_119167 + binop_y_119186; + int64_t gtid_119188 = iii_119133 + i_119187; + int64_t A_col_idx_119189 = kk_119162 + k_119185; + bool cond_119190 = slt64(gtid_119188, m_73008); + double A_elem_119191; + + if (cond_119190) { + double A_elem_119193 = ((__global + double *) defunc_4_map_res_mem_124659)[gtid_119188 * + k2p2zq_73023 + + A_col_idx_119189]; + + A_elem_119191 = A_elem_119193; + } else { + A_elem_119191 = 0.0; + } + + bool cond_119195 = slt64(k_119185, Tk_119117); + int64_t a_loc_ind_119196; + + if (cond_119195) { + int64_t binop_y_119197 = Tk_119117 * i_119187; + int64_t loc_fi_119198 = k_119185 + binop_y_119197; + + a_loc_ind_119196 = loc_fi_119198; + } else { + a_loc_ind_119196 = (int64_t) -1; + } + if (sle64((int64_t) 0, a_loc_ind_119196) && + slt64(a_loc_ind_119196, a_loc_szz_119123)) { + ((__local double *) mem_124699)[a_loc_ind_119196] = + A_elem_119191; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + for (int64_t i_119203 = 0; i_119203 < tk_div_ty_119119; i_119203++) { + int64_t binop_y_119224 = Ty_119113 * i_119203; + + for (int64_t i_119205 = 0; i_119205 < Rx_119116; i_119205++) { + int64_t binop_y_119226 = Tx_119115 * i_119205; + int64_t ltid_x_119207 = sext_i32_i64(ltid_pre_129125); + int64_t ltid_y_119208 = sext_i32_i64(ltid_pre_129126); + int32_t ltid_flat_119209 = local_tid_129121; + int64_t k_119225 = ltid_x_119207 + binop_y_119224; + int64_t j_119227 = ltid_y_119208 + binop_y_119226; + int64_t gtid_119228 = jjj_119134 + j_119227; + int64_t B_row_idx_119229 = kk_119162 + k_119225; + bool cond_119230 = slt64(gtid_119228, N_73007); + double B_elem_119231; + + if (cond_119230) { + double B_elem_119233 = ((__global + double *) mem_124683)[B_row_idx_119229 * + N_73007 + + gtid_119228]; + + B_elem_119231 = B_elem_119233; + } else { + B_elem_119231 = 0.0; + } + + bool cond_119235 = slt64(k_119225, Tk_119117); + int64_t b_loc_ind_119236; + + if (cond_119235) { + int64_t binop_y_119237 = TxRx_119120 * k_119225; + int64_t loc_fi_119238 = j_119227 + binop_y_119237; + + b_loc_ind_119236 = loc_fi_119238; + } else { + b_loc_ind_119236 = (int64_t) -1; + } + if (sle64((int64_t) 0, b_loc_ind_119236) && + slt64(b_loc_ind_119236, b_loc_szz_119125)) { + ((__local double *) mem_124701)[b_loc_ind_119236] = + B_elem_119231; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + + double loop_mem_124787[Ry_119114 * Rx_119116]; + double mem_param_124759[Ry_119114 * Rx_119116]; + + for (int32_t i_3 = 0; i_3 < Ry_119114 * Rx_119116; i_3++) + mem_param_124759[i_3] = mem_param_124702[i_3]; + for (int64_t i_119243 = 0; i_119243 < Tk_119117; i_119243++) { + int64_t binop_y_119282 = TxRx_119120 * i_119243; + int64_t ltid_y_119247 = sext_i32_i64(ltid_pre_129125); + int64_t ltid_x_119245 = sext_i32_i64(ltid_pre_129126); + int32_t ltid_flat_119246 = local_tid_129121; + double mem_124762[Ry_119114]; + double mem_124764[Rx_119116]; + int64_t binop_x_119273 = Ry_119114 * ltid_y_119247; + + for (int64_t i_119271 = 0; i_119271 < Ry_119114; i_119271++) { + int64_t binop_x_119274 = i_119271 + binop_x_119273; + int64_t binop_y_119275 = Tk_119117 * binop_x_119274; + int64_t a_loc_ind_119276 = i_119243 + binop_y_119275; + + for (int64_t i_129142 = 0; i_129142 < (int64_t) 1; i_129142++) { + mem_124762[i_119271 + i_129142] = ((__local + double *) mem_124699)[a_loc_ind_119276 + + i_129142]; + } + } + + int64_t binop_y_119284 = Rx_119116 * ltid_x_119245; + + for (int64_t i_119280 = 0; i_119280 < Rx_119116; i_119280++) { + int64_t binop_x_119283 = i_119280 + binop_y_119282; + int64_t b_loc_ind_119285 = binop_x_119283 + binop_y_119284; + + for (int64_t i_129144 = 0; i_129144 < (int64_t) 1; i_129144++) { + mem_124764[i_119280 + i_129144] = ((__local + double *) mem_124701)[b_loc_ind_119285 + + i_129144]; + } + } + for (int64_t i_129145 = 0; i_129145 < Ry_119114; i_129145++) { + mem_124772[i_129145] = mem_124762[i_129145]; + } + for (int64_t i_129146 = 0; i_129146 < Rx_119116; i_129146++) { + mem_124776[i_129146] = mem_124764[i_129146]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_124786[Ry_119114 * Rx_119116]; + int64_t ltid_y_119292 = sext_i32_i64(ltid_pre_129125); + int64_t ltid_x_119290 = sext_i32_i64(ltid_pre_129126); + int32_t ltid_flat_119291 = local_tid_129121; + int64_t binop_y_119333 = Ry_119114 * ltid_y_119292; + int64_t binop_y_119337 = Rx_119116 * ltid_x_119290; + + for (int64_t i_119327 = 0; i_119327 < Ry_119114; i_119327++) { + int64_t binop_x_119332 = iii_119133 + i_119327; + int64_t cmpop_x_119334 = binop_x_119332 + binop_y_119333; + bool binop_x_119335 = slt64(cmpop_x_119334, m_73008); + + for (int64_t i_119330 = 0; i_119330 < Rx_119116; i_119330++) { + int64_t binop_x_119336 = jjj_119134 + i_119330; + int64_t cmpop_x_119338 = binop_x_119336 + binop_y_119337; + bool binop_y_119339 = slt64(cmpop_x_119338, N_73007); + bool cond_119340 = binop_x_119335 && binop_y_119339; + + if (cond_119340) { + double a_119342 = mem_124772[i_119327]; + double b_119343 = mem_124776[i_119330]; + double c_119344 = mem_param_124759[i_119327 * + Rx_119116 + + i_119330]; + double defunc_1_f_res_119347 = a_119342 * b_119343; + double defunc_1_op_res_119351 = c_119344 + + defunc_1_f_res_119347; + + mem_param_124759[i_119327 * Rx_119116 + i_119330] = + defunc_1_op_res_119351; + } + } + } + for (int64_t i_129149 = 0; i_129149 < Ry_119114; i_129149++) { + for (int64_t i_129150 = 0; i_129150 < Rx_119116; i_129150++) { + mem_124786[i_129149 * Rx_119116 + i_129150] = + mem_param_124759[i_129149 * Rx_119116 + i_129150]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + double mem_param_tmp_129139[Ry_119114 * Rx_119116]; + + for (int32_t i_4 = 0; i_4 < Ry_119114 * Rx_119116; i_4++) + mem_param_tmp_129139[i_4] = mem_124786[i_4]; + for (int32_t i_5 = 0; i_5 < Ry_119114 * Rx_119116; i_5++) + mem_param_124759[i_5] = mem_param_tmp_129139[i_5]; + } + for (int32_t i_6 = 0; i_6 < Ry_119114 * Rx_119116; i_6++) + loop_mem_124787[i_6] = mem_param_124759[i_6]; + + double mem_param_tmp_129131[Ry_119114 * Rx_119116]; + + for (int32_t i_7 = 0; i_7 < Ry_119114 * Rx_119116; i_7++) + mem_param_tmp_129131[i_7] = loop_mem_124787[i_7]; + for (int32_t i_8 = 0; i_8 < Ry_119114 * Rx_119116; i_8++) + mem_param_124702[i_8] = mem_param_tmp_129131[i_8]; + } + for (int32_t i_9 = 0; i_9 < Ry_119114 * Rx_119116; i_9++) + loop_mem_124788[i_9] = mem_param_124702[i_9]; + for (int64_t i_119361 = 0; i_119361 < Ry_119114; i_119361++) { + int64_t binop_y_119386 = Ty_119113 * i_119361; + + for (int64_t i_119363 = 0; i_119363 < tk_div_tx_119118; i_119363++) { + int64_t binop_y_119384 = Tx_119115 * i_119363; + int64_t ltid_x_119365 = sext_i32_i64(ltid_pre_129125); + int64_t ltid_y_119366 = sext_i32_i64(ltid_pre_129126); + int32_t ltid_flat_119367 = local_tid_129121; + int64_t k_119385 = ltid_y_119366 + binop_y_119384; + int64_t i_119387 = ltid_x_119365 + binop_y_119386; + int64_t gtid_119388 = iii_119133 + i_119387; + int64_t A_col_idx_119389 = kk_119360 + k_119385; + bool binop_x_119390 = slt64(gtid_119388, m_73008); + bool binop_y_119391 = slt64(A_col_idx_119389, k2p2zq_73023); + bool cond_119392 = binop_x_119390 && binop_y_119391; + double A_elem_119393; + + if (cond_119392) { + double A_elem_119395 = ((__global + double *) defunc_4_map_res_mem_124659)[gtid_119388 * + k2p2zq_73023 + + A_col_idx_119389]; + + A_elem_119393 = A_elem_119395; + } else { + A_elem_119393 = 0.0; + } + + bool cond_119397 = slt64(k_119385, Tk_119117); + int64_t a_loc_ind_119398; + + if (cond_119397) { + int64_t binop_y_119399 = Tk_119117 * i_119387; + int64_t loc_fi_119400 = k_119385 + binop_y_119399; + + a_loc_ind_119398 = loc_fi_119400; + } else { + a_loc_ind_119398 = (int64_t) -1; + } + if (sle64((int64_t) 0, a_loc_ind_119398) && slt64(a_loc_ind_119398, + a_loc_szz_119123)) { + ((__local double *) mem_124699)[a_loc_ind_119398] = + A_elem_119393; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + for (int64_t i_119405 = 0; i_119405 < tk_div_ty_119119; i_119405++) { + int64_t binop_y_119428 = Ty_119113 * i_119405; + + for (int64_t i_119407 = 0; i_119407 < Rx_119116; i_119407++) { + int64_t binop_y_119430 = Tx_119115 * i_119407; + int64_t ltid_x_119409 = sext_i32_i64(ltid_pre_129125); + int64_t ltid_y_119410 = sext_i32_i64(ltid_pre_129126); + int32_t ltid_flat_119411 = local_tid_129121; + int64_t k_119429 = ltid_x_119409 + binop_y_119428; + int64_t j_119431 = ltid_y_119410 + binop_y_119430; + int64_t gtid_119432 = jjj_119134 + j_119431; + int64_t B_row_idx_119433 = kk_119360 + k_119429; + bool binop_x_119434 = slt64(gtid_119432, N_73007); + bool binop_y_119435 = slt64(B_row_idx_119433, k2p2zq_73023); + bool cond_119436 = binop_x_119434 && binop_y_119435; + double B_elem_119437; + + if (cond_119436) { + double B_elem_119439 = ((__global + double *) mem_124683)[B_row_idx_119433 * + N_73007 + + gtid_119432]; + + B_elem_119437 = B_elem_119439; + } else { + B_elem_119437 = 0.0; + } + + bool cond_119441 = slt64(k_119429, Tk_119117); + int64_t b_loc_ind_119442; + + if (cond_119441) { + int64_t binop_y_119443 = TxRx_119120 * k_119429; + int64_t loc_fi_119444 = j_119431 + binop_y_119443; + + b_loc_ind_119442 = loc_fi_119444; + } else { + b_loc_ind_119442 = (int64_t) -1; + } + if (sle64((int64_t) 0, b_loc_ind_119442) && slt64(b_loc_ind_119442, + b_loc_szz_119125)) { + ((__local double *) mem_124701)[b_loc_ind_119442] = + B_elem_119437; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + + double mem_124854[Ry_119114]; + double mem_124858[Rx_119116]; + double mem_124868[Ry_119114 * Rx_119116]; + double loop_mem_124870[Ry_119114 * Rx_119116]; + double mem_param_124841[Ry_119114 * Rx_119116]; + + for (int32_t i_10 = 0; i_10 < Ry_119114 * Rx_119116; i_10++) + mem_param_124841[i_10] = loop_mem_124788[i_10]; + for (int64_t i_119449 = 0; i_119449 < Tk_119117; i_119449++) { + int64_t cmpop_x_119451 = kk_119360 + i_119449; + bool cond_119452 = slt64(cmpop_x_119451, k2p2zq_73023); + double mem_125412[Ry_119114 * Rx_119116]; + + if (cond_119452) { + int64_t binop_y_119490 = TxRx_119120 * i_119449; + int64_t bytes_124843 = (int64_t) 8 * Ry_119114; + int64_t bytes_124845 = (int64_t) 8 * Rx_119116; + int64_t ltid_y_119455 = sext_i32_i64(ltid_pre_129125); + int64_t ltid_x_119453 = sext_i32_i64(ltid_pre_129126); + int32_t ltid_flat_119454 = local_tid_129121; + double mem_124844[Ry_119114]; + double mem_124846[Rx_119116]; + int64_t binop_x_119481 = Ry_119114 * ltid_y_119455; + + for (int64_t i_119479 = 0; i_119479 < Ry_119114; i_119479++) { + int64_t binop_x_119482 = i_119479 + binop_x_119481; + int64_t binop_y_119483 = Tk_119117 * binop_x_119482; + int64_t a_loc_ind_119484 = i_119449 + binop_y_119483; + + for (int64_t i_129158 = 0; i_129158 < (int64_t) 1; i_129158++) { + mem_124844[i_119479 + i_129158] = ((__local + double *) mem_124699)[a_loc_ind_119484 + + i_129158]; + } + } + + int64_t binop_y_119492 = Rx_119116 * ltid_x_119453; + + for (int64_t i_119488 = 0; i_119488 < Rx_119116; i_119488++) { + int64_t binop_x_119491 = i_119488 + binop_y_119490; + int64_t b_loc_ind_119493 = binop_x_119491 + binop_y_119492; + + for (int64_t i_129160 = 0; i_129160 < (int64_t) 1; i_129160++) { + mem_124846[i_119488 + i_129160] = ((__local + double *) mem_124701)[b_loc_ind_119493 + + i_129160]; + } + } + for (int64_t i_129161 = 0; i_129161 < Ry_119114; i_129161++) { + mem_124854[i_129161] = mem_124844[i_129161]; + } + for (int64_t i_129162 = 0; i_129162 < Rx_119116; i_129162++) { + mem_124858[i_129162] = mem_124846[i_129162]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t ltid_y_119500 = sext_i32_i64(ltid_pre_129125); + int64_t ltid_x_119498 = sext_i32_i64(ltid_pre_129126); + int32_t ltid_flat_119499 = local_tid_129121; + int64_t binop_y_119541 = Ry_119114 * ltid_y_119500; + int64_t binop_y_119545 = Rx_119116 * ltid_x_119498; + + for (int64_t i_119535 = 0; i_119535 < Ry_119114; i_119535++) { + int64_t binop_x_119540 = iii_119133 + i_119535; + int64_t cmpop_x_119542 = binop_x_119540 + binop_y_119541; + bool binop_x_119543 = slt64(cmpop_x_119542, m_73008); + + for (int64_t i_119538 = 0; i_119538 < Rx_119116; i_119538++) { + int64_t binop_x_119544 = jjj_119134 + i_119538; + int64_t cmpop_x_119546 = binop_x_119544 + binop_y_119545; + bool binop_y_119547 = slt64(cmpop_x_119546, N_73007); + bool cond_119548 = binop_x_119543 && binop_y_119547; + + if (cond_119548) { + double a_119550 = mem_124854[i_119535]; + double b_119551 = mem_124858[i_119538]; + double c_119552 = mem_param_124841[i_119535 * + Rx_119116 + + i_119538]; + double defunc_1_f_res_119555 = a_119550 * b_119551; + double defunc_1_op_res_119559 = c_119552 + + defunc_1_f_res_119555; + + mem_param_124841[i_119535 * Rx_119116 + i_119538] = + defunc_1_op_res_119559; + } + } + } + for (int64_t i_129165 = 0; i_129165 < Ry_119114; i_129165++) { + for (int64_t i_129166 = 0; i_129166 < Rx_119116; i_129166++) { + mem_124868[i_129165 * Rx_119116 + i_129166] = + mem_param_124841[i_129165 * Rx_119116 + i_129166]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + for (int64_t i_129167 = 0; i_129167 < Ry_119114; i_129167++) { + for (int64_t i_129168 = 0; i_129168 < Rx_119116; i_129168++) { + mem_125412[i_129167 * Rx_119116 + i_129168] = + mem_124868[i_129167 * Rx_119116 + i_129168]; + } + } + } else { + for (int64_t i_129169 = 0; i_129169 < Ry_119114; i_129169++) { + for (int64_t i_129170 = 0; i_129170 < Rx_119116; i_129170++) { + mem_125412[i_129169 * Rx_119116 + i_129170] = + mem_param_124841[i_129169 * Rx_119116 + i_129170]; + } + } + } + + double mem_param_tmp_129155[Ry_119114 * Rx_119116]; + + for (int32_t i_11 = 0; i_11 < Ry_119114 * Rx_119116; i_11++) + mem_param_tmp_129155[i_11] = mem_125412[i_11]; + for (int32_t i_12 = 0; i_12 < Ry_119114 * Rx_119116; i_12++) + mem_param_124841[i_12] = mem_param_tmp_129155[i_12]; + } + for (int32_t i_13 = 0; i_13 < Ry_119114 * Rx_119116; i_13++) + loop_mem_124870[i_13] = mem_param_124841[i_13]; + + int64_t reg_tile_i_129171 = squot64(sext_i32_i64(local_tid_129121), + Tx_119115); + int64_t reg_tile_i_129172 = sext_i32_i64(local_tid_129121) - + squot64(sext_i32_i64(local_tid_129121), Tx_119115) * Tx_119115; + int64_t tile_dim_start_129173 = Ry_119114 * (Ty_119113 * gid_y_119131 + + reg_tile_i_129171); + int64_t tile_dim_start_129174 = Rx_119116 * (Tx_119115 * gid_x_119130 + + reg_tile_i_129172); + + for (int64_t nest_i_129175 = 0; nest_i_129175 < Ry_119114; + nest_i_129175++) { + for (int64_t nest_i_129176 = 0; nest_i_129176 < Rx_119116; + nest_i_129176++) { + if (slt64(tile_dim_start_129173 + nest_i_129175, m_73008) && + slt64(tile_dim_start_129174 + nest_i_129176, N_73007)) { + ((__global double *) mem_124873)[(tile_dim_start_129173 + + nest_i_129175) * N_73007 + + (tile_dim_start_129174 + + nest_i_129176)] = + loop_mem_124870[nest_i_129175 * Rx_119116 + nest_i_129176]; + } + } + } + + error_9: + return; + #undef Ty_119113 + #undef Ry_119114 + #undef Tx_119115 + #undef Rx_119116 + #undef Tk_119117 + #undef tk_div_tx_119118 + #undef tk_div_ty_119119 + #undef TxRx_119120 + #undef TyRy_119121 + #undef a_loc_szz_119123 + #undef b_loc_szz_119125 +} +__kernel void mainMagnitudezisegmap_intragroup_90368(__global + int *global_failure, + __local volatile + int64_t *mem_120188_backing_aligned_0, + __local volatile + int64_t *mem_120186_backing_aligned_1, + __local volatile + int64_t *mem_120184_backing_aligned_2, + int64_t N_73007, + int64_t n_73011, + int64_t m_73103, __global + unsigned char *images_mem_120108, + __global + unsigned char *mem_120191, + __global + unsigned char *mem_120194, + __global + unsigned char *mem_120197) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_120188_backing_2 = (__local volatile + char *) mem_120188_backing_aligned_0; + __local volatile char *restrict mem_120186_backing_1 = (__local volatile + char *) mem_120186_backing_aligned_1; + __local volatile char *restrict mem_120184_backing_0 = (__local volatile + char *) mem_120184_backing_aligned_2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126394; + int32_t local_tid_126395; + int64_t group_sizze_126398; + int32_t wave_sizze_126397; + int32_t group_tid_126396; + + global_tid_126394 = get_global_id(0); + local_tid_126395 = get_local_id(0); + group_sizze_126398 = get_local_size(0); + wave_sizze_126397 = LOCKSTEP_WIDTH; + group_tid_126396 = get_group_id(0); + + int32_t phys_tid_90368; + + phys_tid_90368 = group_tid_126396; + + int32_t ltid_pre_126399; + + ltid_pre_126399 = local_tid_126395; + + int64_t gtid_90360; + + gtid_90360 = sext_i32_i64(group_tid_126396); + + __local char *mem_120184; + + mem_120184 = (__local char *) mem_120184_backing_0; + + int64_t gtid_90363 = sext_i32_i64(ltid_pre_126399); + int32_t phys_tid_90364 = local_tid_126395; + int64_t binop_y_115020 = (int64_t) -1 * gtid_90363; + int64_t slice_115021 = m_73103 + binop_y_115020; + double x_90578 = ((__global double *) images_mem_120108)[gtid_90360 * + N_73007 + + slice_115021]; + bool defunc_0_f_res_90579; + + defunc_0_f_res_90579 = futrts_isnan64(x_90578); + + bool defunc_0_g_res_90580 = !defunc_0_f_res_90579; + int64_t defunc_0_f_res_90581 = btoi_bool_i64(defunc_0_g_res_90580); + + ((__local int64_t *) mem_120184)[gtid_90363] = defunc_0_f_res_90581; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t dims_flat_126400; + + dims_flat_126400 = n_73011; + + int64_t x_90575; + int64_t x_90576; + int64_t x_126402; + int64_t x_126403; + bool ltid_in_bounds_126405; + + ltid_in_bounds_126405 = slt64(sext_i32_i64(local_tid_126395), n_73011); + + int32_t skip_threads_126406; + + // read input for in-block scan + { + if (ltid_in_bounds_126405) { + x_90576 = ((volatile __local + int64_t *) mem_120184)[sext_i32_i64(local_tid_126395)]; + if ((local_tid_126395 - squot32(local_tid_126395, 32) * 32) == 0) { + x_90575 = x_90576; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_126406 = 1; + while (slt32(skip_threads_126406, 32)) { + if (sle32(skip_threads_126406, local_tid_126395 - + squot32(local_tid_126395, 32) * 32) && + ltid_in_bounds_126405) { + // read operands + { + x_90575 = ((volatile __local + int64_t *) mem_120184)[sext_i32_i64(local_tid_126395) - + sext_i32_i64(skip_threads_126406)]; + } + // perform operation + { + bool inactive_126407 = + slt64(srem64(sext_i32_i64(local_tid_126395), n_73011), + sext_i32_i64(local_tid_126395) - + sext_i32_i64(local_tid_126395 - + skip_threads_126406)); + + if (inactive_126407) { + x_90575 = x_90576; + } + if (!inactive_126407) { + int64_t defunc_1_op_res_90577 = add64(x_90575, x_90576); + + x_90575 = defunc_1_op_res_90577; + } + } + } + if (sle32(wave_sizze_126397, skip_threads_126406)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_126406, local_tid_126395 - + squot32(local_tid_126395, 32) * 32) && + ltid_in_bounds_126405) { + // write result + { + ((volatile __local + int64_t *) mem_120184)[sext_i32_i64(local_tid_126395)] = + x_90575; + x_90576 = x_90575; + } + } + if (sle32(wave_sizze_126397, skip_threads_126406)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_126406 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_126395 - squot32(local_tid_126395, 32) * 32) == 31 && + ltid_in_bounds_126405) { + ((volatile __local + int64_t *) mem_120184)[sext_i32_i64(squot32(local_tid_126395, + 32))] = x_90575; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_126408; + + // read input for in-block scan + { + if (squot32(local_tid_126395, 32) == 0 && ltid_in_bounds_126405) { + x_126403 = ((volatile __local + int64_t *) mem_120184)[sext_i32_i64(local_tid_126395)]; + if ((local_tid_126395 - squot32(local_tid_126395, 32) * 32) == + 0) { + x_126402 = x_126403; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_126408 = 1; + while (slt32(skip_threads_126408, 32)) { + if (sle32(skip_threads_126408, local_tid_126395 - + squot32(local_tid_126395, 32) * 32) && + (squot32(local_tid_126395, 32) == 0 && + ltid_in_bounds_126405)) { + // read operands + { + x_126402 = ((volatile __local + int64_t *) mem_120184)[sext_i32_i64(local_tid_126395) - + sext_i32_i64(skip_threads_126408)]; + } + // perform operation + { + bool inactive_126409 = + slt64(srem64(sext_i32_i64(local_tid_126395 * 32 + + 32 - 1), n_73011), + sext_i32_i64(local_tid_126395 * 32 + 32 - + 1) - sext_i32_i64((local_tid_126395 - + skip_threads_126408) * + 32 + 32 - 1)); + + if (inactive_126409) { + x_126402 = x_126403; + } + if (!inactive_126409) { + int64_t defunc_1_op_res_126404 = add64(x_126402, + x_126403); + + x_126402 = defunc_1_op_res_126404; + } + } + } + if (sle32(wave_sizze_126397, skip_threads_126408)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_126408, local_tid_126395 - + squot32(local_tid_126395, 32) * 32) && + (squot32(local_tid_126395, 32) == 0 && + ltid_in_bounds_126405)) { + // write result + { + ((volatile __local + int64_t *) mem_120184)[sext_i32_i64(local_tid_126395)] = + x_126402; + x_126403 = x_126402; + } + } + if (sle32(wave_sizze_126397, skip_threads_126408)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_126408 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_126395, 32) == 0 || !ltid_in_bounds_126405)) { + // read operands + { + x_90576 = x_90575; + x_90575 = ((__local + int64_t *) mem_120184)[sext_i32_i64(squot32(local_tid_126395, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_126410 = + slt64(srem64(sext_i32_i64(local_tid_126395), n_73011), + sext_i32_i64(local_tid_126395) - + sext_i32_i64(squot32(local_tid_126395, 32) * 32 - + 1)); + + if (inactive_126410) { + x_90575 = x_90576; + } + if (!inactive_126410) { + int64_t defunc_1_op_res_90577 = add64(x_90575, x_90576); + + x_90575 = defunc_1_op_res_90577; + } + } + // write final result + { + ((__local + int64_t *) mem_120184)[sext_i32_i64(local_tid_126395)] = + x_90575; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_126395, 32) == 0) { + ((__local int64_t *) mem_120184)[sext_i32_i64(local_tid_126395)] = + x_90576; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t last_res_90582 = ((__local int64_t *) mem_120184)[m_73103]; + __local char *mem_120186; + + mem_120186 = (__local char *) mem_120186_backing_1; + ((__local double *) mem_120186)[sext_i32_i64(local_tid_126395)] = NAN; + barrier(CLK_LOCAL_MEM_FENCE); + + __local char *mem_120188; + + mem_120188 = (__local char *) mem_120188_backing_2; + ((__local int64_t *) mem_120188)[sext_i32_i64(local_tid_126395)] = + (int64_t) 0; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t write_i_90365 = sext_i32_i64(ltid_pre_126399); + int32_t phys_tid_90366 = local_tid_126395; + int64_t binop_y_115024 = (int64_t) -1 * write_i_90365; + int64_t slice_115025 = m_73103 + binop_y_115024; + double x_90587 = ((__global double *) images_mem_120108)[gtid_90360 * + N_73007 + + slice_115025]; + bool defunc_0_f_res_90590; + + defunc_0_f_res_90590 = futrts_isnan64(x_90587); + + bool defunc_0_g_res_90591 = !defunc_0_f_res_90590; + int64_t defunc_1_f_res_90592; + + if (defunc_0_g_res_90591) { + int64_t x_90588 = ((__local int64_t *) mem_120184)[write_i_90365]; + int64_t defunc_1_f_res_t_res_90593 = sub64(x_90588, (int64_t) 1); + + defunc_1_f_res_90592 = defunc_1_f_res_t_res_90593; + } else { + defunc_1_f_res_90592 = (int64_t) -1; + } + if (sle64((int64_t) 0, defunc_1_f_res_90592) && slt64(defunc_1_f_res_90592, + n_73011)) { + ((__local int64_t *) mem_120188)[defunc_1_f_res_90592] = write_i_90365; + } + if (sle64((int64_t) 0, defunc_1_f_res_90592) && slt64(defunc_1_f_res_90592, + n_73011)) { + ((__local double *) mem_120186)[defunc_1_f_res_90592] = x_90587; + } + barrier(CLK_LOCAL_MEM_FENCE); + if (local_tid_126395 == 0) { + ((__global int64_t *) mem_120191)[gtid_90360] = last_res_90582; + } + ((__global double *) mem_120194)[gtid_90360 * n_73011 + + sext_i32_i64(local_tid_126395)] = ((__local + double *) mem_120186)[sext_i32_i64(local_tid_126395)]; + barrier(CLK_LOCAL_MEM_FENCE); + ((__global int64_t *) mem_120197)[gtid_90360 * n_73011 + + sext_i32_i64(local_tid_126395)] = + ((__local int64_t *) mem_120188)[sext_i32_i64(local_tid_126395)]; + barrier(CLK_LOCAL_MEM_FENCE); + + error_2: + return; +} +__kernel void mainMagnitudezisegmap_intragroup_98302(__global + int *global_failure, + __local volatile + int64_t *mem_123888_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128052_backing_aligned_1, + __local volatile + int64_t *mem_123885_backing_aligned_2, + __local volatile + int64_t *mem_123882_backing_aligned_3, + __local volatile + int64_t *red_arr_mem_128048_backing_aligned_4, + __local volatile + int64_t *red_arr_mem_128043_backing_aligned_5, + __local volatile + int64_t *mem_123878_backing_aligned_6, + int64_t m_73008, + int64_t n_73011, + int64_t k2p2zq_73023, + int64_t defunc_2_reduce_res_73132, + int64_t index_primexp_74309, + int64_t computed_group_sizze_98269, + int64_t binop_x_120251, + __global + unsigned char *defunc_3_map_res_mem_120231, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_param_123778, + __global + unsigned char *mem_param_123786, + __global + unsigned char *mem_123892, + __global + unsigned char *mem_123895, + __global + unsigned char *mem_123897) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_123888_backing_6 = (__local volatile + char *) mem_123888_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128052_backing_5 = + (__local volatile + char *) red_arr_mem_128052_backing_aligned_1; + __local volatile char *restrict mem_123885_backing_4 = (__local volatile + char *) mem_123885_backing_aligned_2; + __local volatile char *restrict mem_123882_backing_3 = (__local volatile + char *) mem_123882_backing_aligned_3; + __local volatile char *restrict red_arr_mem_128048_backing_2 = + (__local volatile + char *) red_arr_mem_128048_backing_aligned_4; + __local volatile char *restrict red_arr_mem_128043_backing_1 = + (__local volatile + char *) red_arr_mem_128043_backing_aligned_5; + __local volatile char *restrict mem_123878_backing_0 = (__local volatile + char *) mem_123878_backing_aligned_6; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128035; + int32_t local_tid_128036; + int64_t group_sizze_128039; + int32_t wave_sizze_128038; + int32_t group_tid_128037; + + global_tid_128035 = get_global_id(0); + local_tid_128036 = get_local_id(0); + group_sizze_128039 = get_local_size(0); + wave_sizze_128038 = LOCKSTEP_WIDTH; + group_tid_128037 = get_group_id(0); + + int32_t phys_tid_98302; + + phys_tid_98302 = group_tid_128037; + + int32_t ltid_pre_128040; + + ltid_pre_128040 = local_tid_128036; + + int32_t ltid_pre_128041; + + ltid_pre_128041 = squot32(local_tid_128036, sext_i64_i32(k2p2zq_73023)); + + int32_t ltid_pre_128042; + + ltid_pre_128042 = local_tid_128036 - squot32(local_tid_128036, + sext_i64_i32(k2p2zq_73023)) * + sext_i64_i32(k2p2zq_73023); + + int64_t gtid_98267; + + gtid_98267 = sext_i32_i64(group_tid_128037); + + double defunc_11_internal_map_res_transformed_row_98432; + + defunc_11_internal_map_res_transformed_row_98432 = ((__global + double *) defunc_3_map_res_mem_120231)[gtid_98267 * + n_73011 + + index_primexp_74309]; + + __local char *mem_123878; + + mem_123878 = (__local char *) mem_123878_backing_0; + + double defunc_0_f_res_98433; + int64_t gtid_98270 = sext_i32_i64(ltid_pre_128040); + int32_t phys_tid_98271 = local_tid_128036; + __local char *red_arr_mem_128043; + + red_arr_mem_128043 = (__local char *) red_arr_mem_128043_backing_1; + if (slt64(gtid_98270, k2p2zq_73023)) { + double x_98439 = ((__global double *) mem_120246)[gtid_98270 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_98267 * + defunc_2_reduce_res_73132 + + index_primexp_74309]; + double defunc_0_f_res_98440; + double redout_119887 = 0.0; + + for (int64_t i_119888 = 0; i_119888 < k2p2zq_73023; i_119888++) { + double x_98444 = ((__global double *) mem_120246)[i_119888 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_98267 * + defunc_2_reduce_res_73132 + + index_primexp_74309]; + double x_98445 = ((__global double *) mem_param_123778)[gtid_98267 * + binop_x_120251 + + gtid_98270 * + k2p2zq_73023 + + i_119888]; + double defunc_1_f_res_98446 = x_98444 * x_98445; + double defunc_1_op_res_98443 = defunc_1_f_res_98446 + redout_119887; + double redout_tmp_128045 = defunc_1_op_res_98443; + + redout_119887 = redout_tmp_128045; + } + defunc_0_f_res_98440 = redout_119887; + + double defunc_1_f_res_98447 = x_98439 * defunc_0_f_res_98440; + + ((__local double *) red_arr_mem_128043)[gtid_98270] = + defunc_1_f_res_98447; + ((__local double *) mem_123878)[gtid_98270] = defunc_0_f_res_98440; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128046; + int32_t skip_waves_128047; + + skip_waves_128047 = 1; + + double x_98435; + double x_98436; + + offset_128046 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128036, sext_i64_i32(k2p2zq_73023))) { + x_98435 = ((__local + double *) red_arr_mem_128043)[sext_i32_i64(local_tid_128036 + + offset_128046)]; + } + } + offset_128046 = 1; + while (slt32(offset_128046, wave_sizze_128038)) { + if (slt32(local_tid_128036 + offset_128046, + sext_i64_i32(k2p2zq_73023)) && ((local_tid_128036 - + squot32(local_tid_128036, + wave_sizze_128038) * + wave_sizze_128038) & (2 * + offset_128046 - + 1)) == + 0) { + // read array element + { + x_98436 = ((volatile __local + double *) red_arr_mem_128043)[sext_i32_i64(local_tid_128036 + + offset_128046)]; + } + // apply reduction operation + { + double defunc_1_op_res_98437 = x_98435 + x_98436; + + x_98435 = defunc_1_op_res_98437; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128043)[sext_i32_i64(local_tid_128036)] = + x_98435; + } + } + offset_128046 *= 2; + } + while (slt32(skip_waves_128047, + squot32(sext_i64_i32(computed_group_sizze_98269) + + wave_sizze_128038 - 1, wave_sizze_128038))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128046 = skip_waves_128047 * wave_sizze_128038; + if (slt32(local_tid_128036 + offset_128046, + sext_i64_i32(k2p2zq_73023)) && ((local_tid_128036 - + squot32(local_tid_128036, + wave_sizze_128038) * + wave_sizze_128038) == 0 && + (squot32(local_tid_128036, + wave_sizze_128038) & + (2 * skip_waves_128047 - + 1)) == 0)) { + // read array element + { + x_98436 = ((__local + double *) red_arr_mem_128043)[sext_i32_i64(local_tid_128036 + + offset_128046)]; + } + // apply reduction operation + { + double defunc_1_op_res_98437 = x_98435 + x_98436; + + x_98435 = defunc_1_op_res_98437; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128043)[sext_i32_i64(local_tid_128036)] = + x_98435; + } + } + skip_waves_128047 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + defunc_0_f_res_98433 = ((__local double *) red_arr_mem_128043)[(int64_t) 0]; + + double fr_98448 = 1.0 + defunc_0_f_res_98433; + double defunc_0_f_res_98449; + int64_t gtid_98272 = sext_i32_i64(ltid_pre_128040); + int32_t phys_tid_98273 = local_tid_128036; + __local char *red_arr_mem_128048; + + red_arr_mem_128048 = (__local char *) red_arr_mem_128048_backing_2; + if (slt64(gtid_98272, k2p2zq_73023)) { + double x_98453 = ((__global double *) mem_120246)[gtid_98272 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_98267 * + defunc_2_reduce_res_73132 + + index_primexp_74309]; + double x_98454 = ((__global double *) mem_param_123786)[gtid_98267 * + k2p2zq_73023 + + gtid_98272]; + double defunc_1_f_res_98455 = x_98453 * x_98454; + + ((__local double *) red_arr_mem_128048)[gtid_98272] = + defunc_1_f_res_98455; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128050; + int32_t skip_waves_128051; + + skip_waves_128051 = 1; + + double x_98450; + double x_98451; + + offset_128050 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128036, sext_i64_i32(k2p2zq_73023))) { + x_98450 = ((__local + double *) red_arr_mem_128048)[sext_i32_i64(local_tid_128036 + + offset_128050)]; + } + } + offset_128050 = 1; + while (slt32(offset_128050, wave_sizze_128038)) { + if (slt32(local_tid_128036 + offset_128050, + sext_i64_i32(k2p2zq_73023)) && ((local_tid_128036 - + squot32(local_tid_128036, + wave_sizze_128038) * + wave_sizze_128038) & (2 * + offset_128050 - + 1)) == + 0) { + // read array element + { + x_98451 = ((volatile __local + double *) red_arr_mem_128048)[sext_i32_i64(local_tid_128036 + + offset_128050)]; + } + // apply reduction operation + { + double defunc_1_op_res_98452 = x_98450 + x_98451; + + x_98450 = defunc_1_op_res_98452; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128048)[sext_i32_i64(local_tid_128036)] = + x_98450; + } + } + offset_128050 *= 2; + } + while (slt32(skip_waves_128051, + squot32(sext_i64_i32(computed_group_sizze_98269) + + wave_sizze_128038 - 1, wave_sizze_128038))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128050 = skip_waves_128051 * wave_sizze_128038; + if (slt32(local_tid_128036 + offset_128050, + sext_i64_i32(k2p2zq_73023)) && ((local_tid_128036 - + squot32(local_tid_128036, + wave_sizze_128038) * + wave_sizze_128038) == 0 && + (squot32(local_tid_128036, + wave_sizze_128038) & + (2 * skip_waves_128051 - + 1)) == 0)) { + // read array element + { + x_98451 = ((__local + double *) red_arr_mem_128048)[sext_i32_i64(local_tid_128036 + + offset_128050)]; + } + // apply reduction operation + { + double defunc_1_op_res_98452 = x_98450 + x_98451; + + x_98450 = defunc_1_op_res_98452; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128048)[sext_i32_i64(local_tid_128036)] = + x_98450; + } + } + skip_waves_128051 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + defunc_0_f_res_98449 = ((__local double *) red_arr_mem_128048)[(int64_t) 0]; + + double resid_98456 = defunc_11_internal_map_res_transformed_row_98432 - + defunc_0_f_res_98449; + double sqrt_res_98457; + + sqrt_res_98457 = futrts_sqrt64(fr_98448); + + double recresid_r_98458 = resid_98456 / sqrt_res_98457; + __local char *mem_123882; + + mem_123882 = (__local char *) mem_123882_backing_3; + + __local char *mem_123885; + + mem_123885 = (__local char *) mem_123885_backing_4; + + int64_t gtid_98281 = sext_i32_i64(ltid_pre_128041); + int64_t gtid_98282 = sext_i32_i64(ltid_pre_128042); + int32_t phys_tid_98283 = local_tid_128036; + __local char *red_arr_mem_128052; + + red_arr_mem_128052 = (__local char *) red_arr_mem_128052_backing_5; + if (slt64(gtid_98281, k2p2zq_73023) && slt64(gtid_98282, k2p2zq_73023)) { + double x_98464 = ((__local double *) mem_123878)[gtid_98281]; + double x_98466 = ((__local double *) mem_123878)[gtid_98282]; + double x_98467 = ((__global double *) mem_param_123778)[gtid_98267 * + binop_x_120251 + + gtid_98281 * + k2p2zq_73023 + + gtid_98282]; + double x_98468 = ((__global double *) mem_120246)[gtid_98282 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_98267 * + defunc_2_reduce_res_73132 + + index_primexp_74309]; + double x_98469 = x_98464 * x_98466; + double y_98470 = x_98469 / fr_98448; + double defunc_1_f_res_98471 = x_98467 - y_98470; + double defunc_1_f_res_98472 = x_98468 * defunc_1_f_res_98471; + + ((__local double *) red_arr_mem_128052)[gtid_98281 * k2p2zq_73023 + + gtid_98282] = + defunc_1_f_res_98472; + ((__local double *) mem_123885)[gtid_98281 * k2p2zq_73023 + + gtid_98282] = defunc_1_f_res_98471; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t dims_flat_128054; + + dims_flat_128054 = k2p2zq_73023 * k2p2zq_73023; + + double x_98461; + double x_98462; + double x_128056; + double x_128057; + bool ltid_in_bounds_128059; + + ltid_in_bounds_128059 = slt64(sext_i32_i64(local_tid_128036), k2p2zq_73023 * + k2p2zq_73023); + + int32_t skip_threads_128060; + + // read input for in-block scan + { + if (ltid_in_bounds_128059) { + x_98462 = ((volatile __local + double *) red_arr_mem_128052)[sext_i32_i64(local_tid_128036)]; + if ((local_tid_128036 - squot32(local_tid_128036, 32) * 32) == 0) { + x_98461 = x_98462; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128060 = 1; + while (slt32(skip_threads_128060, 32)) { + if (sle32(skip_threads_128060, local_tid_128036 - + squot32(local_tid_128036, 32) * 32) && + ltid_in_bounds_128059) { + // read operands + { + x_98461 = ((volatile __local + double *) red_arr_mem_128052)[sext_i32_i64(local_tid_128036) - + sext_i32_i64(skip_threads_128060)]; + } + // perform operation + { + bool inactive_128061 = + slt64(srem64(sext_i32_i64(local_tid_128036), + k2p2zq_73023), + sext_i32_i64(local_tid_128036) - + sext_i32_i64(local_tid_128036 - + skip_threads_128060)); + + if (inactive_128061) { + x_98461 = x_98462; + } + if (!inactive_128061) { + double defunc_1_op_res_98463 = x_98461 + x_98462; + + x_98461 = defunc_1_op_res_98463; + } + } + } + if (sle32(wave_sizze_128038, skip_threads_128060)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128060, local_tid_128036 - + squot32(local_tid_128036, 32) * 32) && + ltid_in_bounds_128059) { + // write result + { + ((volatile __local + double *) red_arr_mem_128052)[sext_i32_i64(local_tid_128036)] = + x_98461; + x_98462 = x_98461; + } + } + if (sle32(wave_sizze_128038, skip_threads_128060)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128060 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128036 - squot32(local_tid_128036, 32) * 32) == 31 && + ltid_in_bounds_128059) { + ((volatile __local + double *) red_arr_mem_128052)[sext_i32_i64(squot32(local_tid_128036, + 32))] = + x_98461; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128062; + + // read input for in-block scan + { + if (squot32(local_tid_128036, 32) == 0 && ltid_in_bounds_128059) { + x_128057 = ((volatile __local + double *) red_arr_mem_128052)[sext_i32_i64(local_tid_128036)]; + if ((local_tid_128036 - squot32(local_tid_128036, 32) * 32) == + 0) { + x_128056 = x_128057; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128062 = 1; + while (slt32(skip_threads_128062, 32)) { + if (sle32(skip_threads_128062, local_tid_128036 - + squot32(local_tid_128036, 32) * 32) && + (squot32(local_tid_128036, 32) == 0 && + ltid_in_bounds_128059)) { + // read operands + { + x_128056 = ((volatile __local + double *) red_arr_mem_128052)[sext_i32_i64(local_tid_128036) - + sext_i32_i64(skip_threads_128062)]; + } + // perform operation + { + bool inactive_128063 = + slt64(srem64(sext_i32_i64(local_tid_128036 * 32 + + 32 - 1), k2p2zq_73023), + sext_i32_i64(local_tid_128036 * 32 + 32 - + 1) - sext_i32_i64((local_tid_128036 - + skip_threads_128062) * + 32 + 32 - 1)); + + if (inactive_128063) { + x_128056 = x_128057; + } + if (!inactive_128063) { + double defunc_1_op_res_128058 = x_128056 + x_128057; + + x_128056 = defunc_1_op_res_128058; + } + } + } + if (sle32(wave_sizze_128038, skip_threads_128062)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128062, local_tid_128036 - + squot32(local_tid_128036, 32) * 32) && + (squot32(local_tid_128036, 32) == 0 && + ltid_in_bounds_128059)) { + // write result + { + ((volatile __local + double *) red_arr_mem_128052)[sext_i32_i64(local_tid_128036)] = + x_128056; + x_128057 = x_128056; + } + } + if (sle32(wave_sizze_128038, skip_threads_128062)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128062 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128036, 32) == 0 || !ltid_in_bounds_128059)) { + // read operands + { + x_98462 = x_98461; + x_98461 = ((__local + double *) red_arr_mem_128052)[sext_i32_i64(squot32(local_tid_128036, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128064 = + slt64(srem64(sext_i32_i64(local_tid_128036), k2p2zq_73023), + sext_i32_i64(local_tid_128036) - + sext_i32_i64(squot32(local_tid_128036, 32) * 32 - + 1)); + + if (inactive_128064) { + x_98461 = x_98462; + } + if (!inactive_128064) { + double defunc_1_op_res_98463 = x_98461 + x_98462; + + x_98461 = defunc_1_op_res_98463; + } + } + // write final result + { + ((__local + double *) red_arr_mem_128052)[sext_i32_i64(local_tid_128036)] = + x_98461; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128036, 32) == 0) { + ((__local + double *) red_arr_mem_128052)[sext_i32_i64(local_tid_128036)] = + x_98462; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); + for (int64_t i_128065 = 0; i_128065 < sdiv_up64(k2p2zq_73023 - + sext_i32_i64(local_tid_128036), + computed_group_sizze_98269); + i_128065++) { + ((__local double *) mem_123882)[i_128065 * computed_group_sizze_98269 + + sext_i32_i64(local_tid_128036)] = + ((__local double *) red_arr_mem_128052)[(i_128065 * + computed_group_sizze_98269 + + sext_i32_i64(local_tid_128036)) * + k2p2zq_73023 + + (k2p2zq_73023 - + (int64_t) 1)]; + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); + + __local char *mem_123888; + + mem_123888 = (__local char *) mem_123888_backing_6; + + int64_t gtid_98275 = sext_i32_i64(ltid_pre_128040); + int32_t phys_tid_98276 = local_tid_128036; + + if (slt64(gtid_98275, k2p2zq_73023)) { + double x_98474 = ((__global double *) mem_param_123786)[gtid_98267 * + k2p2zq_73023 + + gtid_98275]; + double defunc_0_f_res_98475 = ((__local + double *) mem_123882)[gtid_98275]; + double defunc_0_g_res_98476 = resid_98456 * defunc_0_f_res_98475; + double defunc_1_f_res_98477 = x_98474 + defunc_0_g_res_98476; + + ((__local double *) mem_123888)[gtid_98275] = defunc_1_f_res_98477; + } + barrier(CLK_LOCAL_MEM_FENCE); + for (int64_t i_128066 = 0; i_128066 < sdiv_up64(k2p2zq_73023 * + k2p2zq_73023 - + sext_i32_i64(local_tid_128036), + computed_group_sizze_98269); + i_128066++) { + ((__global double *) mem_123892)[gtid_98267 * (k2p2zq_73023 * + k2p2zq_73023) + + squot64(i_128066 * + computed_group_sizze_98269 + + sext_i32_i64(local_tid_128036), + k2p2zq_73023) * k2p2zq_73023 + + (i_128066 * + computed_group_sizze_98269 + + sext_i32_i64(local_tid_128036) - + squot64(i_128066 * + computed_group_sizze_98269 + + sext_i32_i64(local_tid_128036), + k2p2zq_73023) * + k2p2zq_73023)] = ((__local + double *) mem_123885)[squot64(i_128066 * + computed_group_sizze_98269 + + sext_i32_i64(local_tid_128036), + k2p2zq_73023) * + k2p2zq_73023 + + (i_128066 * + computed_group_sizze_98269 + + sext_i32_i64(local_tid_128036) - + squot64(i_128066 * + computed_group_sizze_98269 + + sext_i32_i64(local_tid_128036), + k2p2zq_73023) * + k2p2zq_73023)]; + } + barrier(CLK_LOCAL_MEM_FENCE); + for (int64_t i_128067 = 0; i_128067 < sdiv_up64(k2p2zq_73023 - + sext_i32_i64(local_tid_128036), + computed_group_sizze_98269); + i_128067++) { + ((__global double *) mem_123895)[gtid_98267 * k2p2zq_73023 + (i_128067 * + computed_group_sizze_98269 + + sext_i32_i64(local_tid_128036))] = + ((__local double *) mem_123888)[i_128067 * + computed_group_sizze_98269 + + sext_i32_i64(local_tid_128036)]; + } + barrier(CLK_LOCAL_MEM_FENCE); + if (local_tid_128036 == 0) { + ((__global double *) mem_123897)[gtid_98267] = recresid_r_98458; + } + + error_7: + return; +} +__kernel void mainMagnitudezisegmap_intragroup_98800(__global + int *global_failure, + int failure_is_an_option, + __global + int64_t *global_failure_args, + __local volatile + int64_t *mem_124032_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128307_backing_aligned_1, + __local volatile + int64_t *red_arr_mem_128303_backing_aligned_2, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t num_recresids_padded_73681, + int64_t Nmk_74408, + int64_t computed_group_sizze_98791, + __global + unsigned char *defunc_3_map_res_mem_120230, + __global + unsigned char *mem_121934, + __global + unsigned char *mem_124035, + __global + unsigned char *mem_124037) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict mem_124032_backing_2 = (__local volatile + char *) mem_124032_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128307_backing_1 = + (__local volatile + char *) red_arr_mem_128307_backing_aligned_1; + __local volatile char *restrict red_arr_mem_128303_backing_0 = + (__local volatile + char *) red_arr_mem_128303_backing_aligned_2; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_128296; + int32_t local_tid_128297; + int64_t group_sizze_128300; + int32_t wave_sizze_128299; + int32_t group_tid_128298; + + global_tid_128296 = get_global_id(0); + local_tid_128297 = get_local_id(0); + group_sizze_128300 = get_local_size(0); + wave_sizze_128299 = LOCKSTEP_WIDTH; + group_tid_128298 = get_group_id(0); + + int32_t phys_tid_98800; + + phys_tid_98800 = group_tid_128298; + + int32_t ltid_pre_128301; + + ltid_pre_128301 = local_tid_128297; + + int32_t ltid_pre_128302; + + ltid_pre_128302 = local_tid_128297; + + int64_t gtid_98789; + + gtid_98789 = sext_i32_i64(group_tid_128298); + + int64_t x_98910; + + x_98910 = ((__global int64_t *) defunc_3_map_res_mem_120230)[gtid_98789]; + + int64_t n_98911 = sub64(x_98910, k2p2zq_73023); + double i64_res_98912 = sitofp_i64_f64(n_98911); + double defunc_2_reduce_res_98913; + int64_t gtid_98792 = sext_i32_i64(ltid_pre_128301); + int32_t phys_tid_98793 = local_tid_128297; + __local char *red_arr_mem_128303; + + red_arr_mem_128303 = (__local char *) red_arr_mem_128303_backing_0; + if (slt64(gtid_98792, num_recresids_padded_73681)) { + double x_98921 = ((__global double *) mem_121934)[gtid_98792 * m_73008 + + gtid_98789]; + + ((__local double *) red_arr_mem_128303)[gtid_98792] = x_98921; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128305; + int32_t skip_waves_128306; + + skip_waves_128306 = 1; + + double x_98914; + double x_98915; + + offset_128305 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128297, sext_i64_i32(num_recresids_padded_73681))) { + x_98914 = ((__local + double *) red_arr_mem_128303)[sext_i32_i64(local_tid_128297 + + offset_128305)]; + } + } + offset_128305 = 1; + while (slt32(offset_128305, wave_sizze_128299)) { + if (slt32(local_tid_128297 + offset_128305, + sext_i64_i32(num_recresids_padded_73681)) && + ((local_tid_128297 - squot32(local_tid_128297, wave_sizze_128299) * + wave_sizze_128299) & (2 * offset_128305 - 1)) == 0) { + // read array element + { + x_98915 = ((volatile __local + double *) red_arr_mem_128303)[sext_i32_i64(local_tid_128297 + + offset_128305)]; + } + // apply reduction operation + { + bool isnan_res_98916; + + isnan_res_98916 = futrts_isnan64(x_98914); + + double defunc_1_op_res_98917; + + if (isnan_res_98916) { + defunc_1_op_res_98917 = x_98915; + } else { + bool isnan_res_98918; + + isnan_res_98918 = futrts_isnan64(x_98915); + + double defunc_1_op_res_f_res_98919; + + if (isnan_res_98918) { + defunc_1_op_res_f_res_98919 = x_98914; + } else { + double defunc_1_op_res_f_res_f_res_98920 = x_98914 + + x_98915; + + defunc_1_op_res_f_res_98919 = + defunc_1_op_res_f_res_f_res_98920; + } + defunc_1_op_res_98917 = defunc_1_op_res_f_res_98919; + } + x_98914 = defunc_1_op_res_98917; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128303)[sext_i32_i64(local_tid_128297)] = + x_98914; + } + } + offset_128305 *= 2; + } + while (slt32(skip_waves_128306, + squot32(sext_i64_i32(computed_group_sizze_98791) + + wave_sizze_128299 - 1, wave_sizze_128299))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128305 = skip_waves_128306 * wave_sizze_128299; + if (slt32(local_tid_128297 + offset_128305, + sext_i64_i32(num_recresids_padded_73681)) && + ((local_tid_128297 - squot32(local_tid_128297, wave_sizze_128299) * + wave_sizze_128299) == 0 && (squot32(local_tid_128297, + wave_sizze_128299) & (2 * + skip_waves_128306 - + 1)) == + 0)) { + // read array element + { + x_98915 = ((__local + double *) red_arr_mem_128303)[sext_i32_i64(local_tid_128297 + + offset_128305)]; + } + // apply reduction operation + { + bool isnan_res_98916; + + isnan_res_98916 = futrts_isnan64(x_98914); + + double defunc_1_op_res_98917; + + if (isnan_res_98916) { + defunc_1_op_res_98917 = x_98915; + } else { + bool isnan_res_98918; + + isnan_res_98918 = futrts_isnan64(x_98915); + + double defunc_1_op_res_f_res_98919; + + if (isnan_res_98918) { + defunc_1_op_res_f_res_98919 = x_98914; + } else { + double defunc_1_op_res_f_res_f_res_98920 = x_98914 + + x_98915; + + defunc_1_op_res_f_res_98919 = + defunc_1_op_res_f_res_f_res_98920; + } + defunc_1_op_res_98917 = defunc_1_op_res_f_res_98919; + } + x_98914 = defunc_1_op_res_98917; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128303)[sext_i32_i64(local_tid_128297)] = + x_98914; + } + } + skip_waves_128306 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + defunc_2_reduce_res_98913 = ((__local + double *) red_arr_mem_128303)[(int64_t) 0]; + + double x_mean_98922 = defunc_2_reduce_res_98913 / i64_res_98912; + double defunc_2_reduce_res_98923; + int64_t gtid_98794 = sext_i32_i64(ltid_pre_128301); + int32_t phys_tid_98795 = local_tid_128297; + __local char *red_arr_mem_128307; + + red_arr_mem_128307 = (__local char *) red_arr_mem_128307_backing_1; + if (slt64(gtid_98794, num_recresids_padded_73681)) { + double x_98927 = ((__global double *) mem_121934)[gtid_98794 * m_73008 + + gtid_98789]; + bool isnan_res_98928; + + isnan_res_98928 = futrts_isnan64(x_98927); + + double defunc_0_f_res_98929; + + if (isnan_res_98928) { + defunc_0_f_res_98929 = 0.0; + } else { + double x_98930 = x_98927 - x_mean_98922; + double defunc_0_f_res_f_res_98931 = fpow64(x_98930, 2.0); + + defunc_0_f_res_98929 = defunc_0_f_res_f_res_98931; + } + ((__local double *) red_arr_mem_128307)[gtid_98794] = + defunc_0_f_res_98929; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128309; + int32_t skip_waves_128310; + + skip_waves_128310 = 1; + + double x_98924; + double x_98925; + + offset_128309 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128297, sext_i64_i32(num_recresids_padded_73681))) { + x_98924 = ((__local + double *) red_arr_mem_128307)[sext_i32_i64(local_tid_128297 + + offset_128309)]; + } + } + offset_128309 = 1; + while (slt32(offset_128309, wave_sizze_128299)) { + if (slt32(local_tid_128297 + offset_128309, + sext_i64_i32(num_recresids_padded_73681)) && + ((local_tid_128297 - squot32(local_tid_128297, wave_sizze_128299) * + wave_sizze_128299) & (2 * offset_128309 - 1)) == 0) { + // read array element + { + x_98925 = ((volatile __local + double *) red_arr_mem_128307)[sext_i32_i64(local_tid_128297 + + offset_128309)]; + } + // apply reduction operation + { + double defunc_1_op_res_98926 = x_98924 + x_98925; + + x_98924 = defunc_1_op_res_98926; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128307)[sext_i32_i64(local_tid_128297)] = + x_98924; + } + } + offset_128309 *= 2; + } + while (slt32(skip_waves_128310, + squot32(sext_i64_i32(computed_group_sizze_98791) + + wave_sizze_128299 - 1, wave_sizze_128299))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128309 = skip_waves_128310 * wave_sizze_128299; + if (slt32(local_tid_128297 + offset_128309, + sext_i64_i32(num_recresids_padded_73681)) && + ((local_tid_128297 - squot32(local_tid_128297, wave_sizze_128299) * + wave_sizze_128299) == 0 && (squot32(local_tid_128297, + wave_sizze_128299) & (2 * + skip_waves_128310 - + 1)) == + 0)) { + // read array element + { + x_98925 = ((__local + double *) red_arr_mem_128307)[sext_i32_i64(local_tid_128297 + + offset_128309)]; + } + // apply reduction operation + { + double defunc_1_op_res_98926 = x_98924 + x_98925; + + x_98924 = defunc_1_op_res_98926; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128307)[sext_i32_i64(local_tid_128297)] = + x_98924; + } + } + skip_waves_128310 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + defunc_2_reduce_res_98923 = ((__local + double *) red_arr_mem_128307)[(int64_t) 0]; + + double y_98932 = i64_res_98912 - 1.0; + double binop_p_98933 = defunc_2_reduce_res_98923 / y_98932; + double defunc_0_f_res_98934; + + defunc_0_f_res_98934 = futrts_sqrt64(binop_p_98933); + + double sqrt_res_98935; + + sqrt_res_98935 = futrts_sqrt64(i64_res_98912); + + double fr_98936 = defunc_0_f_res_98934 * sqrt_res_98935; + __local char *mem_124032; + + mem_124032 = (__local char *) mem_124032_backing_2; + + int64_t gtid_98796 = sext_i32_i64(ltid_pre_128302); + int32_t phys_tid_98797 = local_tid_128297; + + if (slt64(gtid_98796, Nmk_74408)) { + bool cond_98942 = gtid_98796 == (int64_t) 0; + double defunc_0_f_res_98943; + + if (cond_98942) { + defunc_0_f_res_98943 = 0.0; + } else { + int64_t i_98944 = sub64(gtid_98796, (int64_t) 1); + bool x_98945 = sle64((int64_t) 0, i_98944); + bool y_98946 = slt64(i_98944, num_recresids_padded_73681); + bool bounds_check_98947 = x_98945 && y_98946; + bool index_certs_98948; + + if (!bounds_check_98947) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, 613) == + -1) { + global_failure_args[0] = i_98944; + global_failure_args[1] = num_recresids_padded_73681; + ; + } + local_failure = true; + goto error_4; + } + } + + double x_98949 = ((__global double *) mem_121934)[i_98944 * + m_73008 + + gtid_98789]; + double defunc_0_f_res_f_res_98950 = x_98949 / fr_98936; + + defunc_0_f_res_98943 = defunc_0_f_res_f_res_98950; + } + ((__local double *) mem_124032)[gtid_98796] = defunc_0_f_res_98943; + } + + error_4: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + int64_t dims_flat_128311; + + dims_flat_128311 = Nmk_74408; + + double x_98938; + double x_98939; + double x_128313; + double x_128314; + bool ltid_in_bounds_128316; + + ltid_in_bounds_128316 = slt64(sext_i32_i64(local_tid_128297), Nmk_74408); + + int32_t skip_threads_128317; + + // read input for in-block scan + { + if (ltid_in_bounds_128316) { + x_98939 = ((volatile __local + double *) mem_124032)[sext_i32_i64(local_tid_128297)]; + if ((local_tid_128297 - squot32(local_tid_128297, 32) * 32) == 0) { + x_98938 = x_98939; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128317 = 1; + while (slt32(skip_threads_128317, 32)) { + if (sle32(skip_threads_128317, local_tid_128297 - + squot32(local_tid_128297, 32) * 32) && + ltid_in_bounds_128316) { + // read operands + { + x_98938 = ((volatile __local + double *) mem_124032)[sext_i32_i64(local_tid_128297) - + sext_i32_i64(skip_threads_128317)]; + } + // perform operation + { + bool inactive_128318 = + slt64(srem64(sext_i32_i64(local_tid_128297), + Nmk_74408), + sext_i32_i64(local_tid_128297) - + sext_i32_i64(local_tid_128297 - + skip_threads_128317)); + + if (inactive_128318) { + x_98938 = x_98939; + } + if (!inactive_128318) { + double defunc_1_op_res_98940 = x_98938 + x_98939; + + x_98938 = defunc_1_op_res_98940; + } + } + } + if (sle32(wave_sizze_128299, skip_threads_128317)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128317, local_tid_128297 - + squot32(local_tid_128297, 32) * 32) && + ltid_in_bounds_128316) { + // write result + { + ((volatile __local + double *) mem_124032)[sext_i32_i64(local_tid_128297)] = + x_98938; + x_98939 = x_98938; + } + } + if (sle32(wave_sizze_128299, skip_threads_128317)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128317 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128297 - squot32(local_tid_128297, 32) * 32) == 31 && + ltid_in_bounds_128316) { + ((volatile __local + double *) mem_124032)[sext_i32_i64(squot32(local_tid_128297, + 32))] = x_98938; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128319; + + // read input for in-block scan + { + if (squot32(local_tid_128297, 32) == 0 && ltid_in_bounds_128316) { + x_128314 = ((volatile __local + double *) mem_124032)[sext_i32_i64(local_tid_128297)]; + if ((local_tid_128297 - squot32(local_tid_128297, 32) * 32) == + 0) { + x_128313 = x_128314; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128319 = 1; + while (slt32(skip_threads_128319, 32)) { + if (sle32(skip_threads_128319, local_tid_128297 - + squot32(local_tid_128297, 32) * 32) && + (squot32(local_tid_128297, 32) == 0 && + ltid_in_bounds_128316)) { + // read operands + { + x_128313 = ((volatile __local + double *) mem_124032)[sext_i32_i64(local_tid_128297) - + sext_i32_i64(skip_threads_128319)]; + } + // perform operation + { + bool inactive_128320 = + slt64(srem64(sext_i32_i64(local_tid_128297 * 32 + + 32 - 1), Nmk_74408), + sext_i32_i64(local_tid_128297 * 32 + 32 - + 1) - sext_i32_i64((local_tid_128297 - + skip_threads_128319) * + 32 + 32 - 1)); + + if (inactive_128320) { + x_128313 = x_128314; + } + if (!inactive_128320) { + double defunc_1_op_res_128315 = x_128313 + x_128314; + + x_128313 = defunc_1_op_res_128315; + } + } + } + if (sle32(wave_sizze_128299, skip_threads_128319)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128319, local_tid_128297 - + squot32(local_tid_128297, 32) * 32) && + (squot32(local_tid_128297, 32) == 0 && + ltid_in_bounds_128316)) { + // write result + { + ((volatile __local + double *) mem_124032)[sext_i32_i64(local_tid_128297)] = + x_128313; + x_128314 = x_128313; + } + } + if (sle32(wave_sizze_128299, skip_threads_128319)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128319 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128297, 32) == 0 || !ltid_in_bounds_128316)) { + // read operands + { + x_98939 = x_98938; + x_98938 = ((__local + double *) mem_124032)[sext_i32_i64(squot32(local_tid_128297, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128321 = + slt64(srem64(sext_i32_i64(local_tid_128297), Nmk_74408), + sext_i32_i64(local_tid_128297) - + sext_i32_i64(squot32(local_tid_128297, 32) * 32 - + 1)); + + if (inactive_128321) { + x_98938 = x_98939; + } + if (!inactive_128321) { + double defunc_1_op_res_98940 = x_98938 + x_98939; + + x_98938 = defunc_1_op_res_98940; + } + } + // write final result + { + ((__local + double *) mem_124032)[sext_i32_i64(local_tid_128297)] = + x_98938; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128297, 32) == 0) { + ((__local double *) mem_124032)[sext_i32_i64(local_tid_128297)] = + x_98939; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + for (int64_t i_128322 = 0; i_128322 < sdiv_up64(Nmk_74408 - + sext_i32_i64(local_tid_128297), + computed_group_sizze_98791); + i_128322++) { + ((__global double *) mem_124035)[gtid_98789 * Nmk_74408 + (i_128322 * + computed_group_sizze_98791 + + sext_i32_i64(local_tid_128297))] = + ((__local double *) mem_124032)[i_128322 * + computed_group_sizze_98791 + + sext_i32_i64(local_tid_128297)]; + } + barrier(CLK_LOCAL_MEM_FENCE); + if (local_tid_128297 == 0) { + ((__global int64_t *) mem_124037)[gtid_98789] = n_98911; + } + + error_5: + return; +} +__kernel void mainMagnitudezisegmap_intragroup_99224(__global + int *global_failure, + __local volatile + int64_t *red_arr_mem_128559_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128555_backing_aligned_1, + double level_73014, + int64_t num_recresids_padded_73681, + int64_t Nmk_74408, __global + unsigned char *defunc_3_map_res_mem_124068, + __global + unsigned char *defunc_3_map_res_mem_124069, + __global + unsigned char *mem_124078, + __global + unsigned char *mem_124118) +{ + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_128559_backing_1 = + (__local volatile + char *) red_arr_mem_128559_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128555_backing_0 = + (__local volatile + char *) red_arr_mem_128555_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128549; + int32_t local_tid_128550; + int64_t group_sizze_128553; + int32_t wave_sizze_128552; + int32_t group_tid_128551; + + global_tid_128549 = get_global_id(0); + local_tid_128550 = get_local_id(0); + group_sizze_128553 = get_local_size(0); + wave_sizze_128552 = LOCKSTEP_WIDTH; + group_tid_128551 = get_group_id(0); + + int32_t phys_tid_99224; + + phys_tid_99224 = group_tid_128551; + + int32_t ltid_pre_128554; + + ltid_pre_128554 = local_tid_128550; + + int64_t gtid_99215; + + gtid_99215 = sext_i32_i64(group_tid_128551); + + int64_t x_99505; + + x_99505 = ((__global int64_t *) defunc_3_map_res_mem_124069)[gtid_99215]; + + double i64_res_99510 = sitofp_i64_f64(x_99505); + double defunc_2_reduce_res_99511; + int64_t gtid_99218 = sext_i32_i64(ltid_pre_128554); + int32_t phys_tid_99219 = local_tid_128550; + __local char *red_arr_mem_128555; + + red_arr_mem_128555 = (__local char *) red_arr_mem_128555_backing_0; + + int64_t slice_115279; + + slice_115279 = (int64_t) 1 + gtid_99218; + + double x_99515 = ((__global + double *) defunc_3_map_res_mem_124068)[gtid_99215 * + Nmk_74408 + + slice_115279]; + int64_t x_99517 = mul64((int64_t) 2, gtid_99218); + int64_t i64_arg_99518 = add64((int64_t) 2, x_99517); + double i64_res_99519 = sitofp_i64_f64(i64_arg_99518); + double y_99520 = i64_res_99519 / i64_res_99510; + double lifted_div_res_99521 = 1.0 + y_99520; + double abs_arg_99522 = x_99515 / lifted_div_res_99521; + double abs_res_99523 = fabs(abs_arg_99522); + + ((__local double *) red_arr_mem_128555)[gtid_99218] = abs_res_99523; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128557; + int32_t skip_waves_128558; + + skip_waves_128558 = 1; + + double x_99512; + double x_99513; + + offset_128557 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128550, sext_i64_i32(num_recresids_padded_73681))) { + x_99512 = ((__local + double *) red_arr_mem_128555)[sext_i32_i64(local_tid_128550 + + offset_128557)]; + } + } + offset_128557 = 1; + while (slt32(offset_128557, wave_sizze_128552)) { + if (slt32(local_tid_128550 + offset_128557, + sext_i64_i32(num_recresids_padded_73681)) && + ((local_tid_128550 - squot32(local_tid_128550, wave_sizze_128552) * + wave_sizze_128552) & (2 * offset_128557 - 1)) == 0) { + // read array element + { + x_99513 = ((volatile __local + double *) red_arr_mem_128555)[sext_i32_i64(local_tid_128550 + + offset_128557)]; + } + // apply reduction operation + { + double defunc_1_op_res_99514 = fmax64(x_99512, x_99513); + + x_99512 = defunc_1_op_res_99514; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128555)[sext_i32_i64(local_tid_128550)] = + x_99512; + } + } + offset_128557 *= 2; + } + while (slt32(skip_waves_128558, + squot32(sext_i64_i32(num_recresids_padded_73681) + + wave_sizze_128552 - 1, wave_sizze_128552))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128557 = skip_waves_128558 * wave_sizze_128552; + if (slt32(local_tid_128550 + offset_128557, + sext_i64_i32(num_recresids_padded_73681)) && + ((local_tid_128550 - squot32(local_tid_128550, wave_sizze_128552) * + wave_sizze_128552) == 0 && (squot32(local_tid_128550, + wave_sizze_128552) & (2 * + skip_waves_128558 - + 1)) == + 0)) { + // read array element + { + x_99513 = ((__local + double *) red_arr_mem_128555)[sext_i32_i64(local_tid_128550 + + offset_128557)]; + } + // apply reduction operation + { + double defunc_1_op_res_99514 = fmax64(x_99512, x_99513); + + x_99512 = defunc_1_op_res_99514; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128555)[sext_i32_i64(local_tid_128550)] = + x_99512; + } + } + skip_waves_128558 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + defunc_2_reduce_res_99511 = ((__local + double *) red_arr_mem_128555)[(int64_t) 0]; + + double defunc_0_Q_arg_99524 = 3.0 * defunc_2_reduce_res_99511; + double zs_res_99525 = defunc_0_Q_arg_99524 / 1.4142135623730951; + double abs_res_99526 = fabs(zs_res_99525); + double zs_res_99527 = abs_res_99526 / 2.0; + double zp_res_99528 = 1.0 + zs_res_99527; + double zs_res_99529 = 1.0 / zp_res_99528; + double zt_res_99530 = zs_res_99529 * zs_res_99529; + double zt_res_99531 = zs_res_99529 * zt_res_99530; + double zt_res_99532 = zt_res_99530 * zt_res_99530; + double zt_res_99533 = zt_res_99530 * zt_res_99531; + double zt_res_99534 = zt_res_99531 * zt_res_99531; + double zt_res_99535 = zt_res_99531 * zt_res_99532; + double zt_res_99536 = zt_res_99532 * zt_res_99532; + double zt_res_99537 = zt_res_99532 * zt_res_99533; + double zt_res_99538 = 0.17087277 * zt_res_99537; + double zt_res_99539 = 0.82215223 * zt_res_99536; + double zt_res_99540 = 1.48851587 * zt_res_99535; + double zt_res_99541 = 1.13520398 * zt_res_99534; + double zt_res_99542 = 0.27886807 * zt_res_99533; + double zt_res_99543 = 0.18628806 * zt_res_99532; + double zt_res_99544 = 9.678418e-2 * zt_res_99531; + double zt_res_99545 = 0.37409196 * zt_res_99530; + double zt_res_99546 = 1.00002368 * zs_res_99529; + double zt_res_99547 = zs_res_99525 * zs_res_99525; + double zm_res_99548 = 0.0 - zt_res_99547; + double zm_res_99549 = zm_res_99548 - 1.26551223; + double zp_res_99550 = zt_res_99546 + zm_res_99549; + double zp_res_99551 = zt_res_99545 + zp_res_99550; + double zp_res_99552 = zt_res_99544 + zp_res_99551; + double zm_res_99553 = zp_res_99552 - zt_res_99543; + double zp_res_99554 = zt_res_99542 + zm_res_99553; + double zm_res_99555 = zp_res_99554 - zt_res_99541; + double zp_res_99556 = zt_res_99540 + zm_res_99555; + double zm_res_99557 = zp_res_99556 - zt_res_99539; + double zp_res_99558 = zt_res_99538 + zm_res_99557; + double exp_res_99559; + + exp_res_99559 = futrts_exp64(zp_res_99558); + + double zt_res_99560 = zs_res_99529 * exp_res_99559; + bool zgze_res_99561 = 0.0 <= zs_res_99525; + double erf_res_99562; + + if (zgze_res_99561) { + double zm_res_99563 = 1.0 - zt_res_99560; + + erf_res_99562 = zm_res_99563; + } else { + double zm_res_99564 = zt_res_99560 - 1.0; + + erf_res_99562 = zm_res_99564; + } + + double zp_res_99565 = 1.0 + erf_res_99562; + double zs_res_99566 = zp_res_99565 / 2.0; + double defunc_0_Q_res_99567 = 1.0 - zs_res_99566; + double y_99568 = fpow64(defunc_2_reduce_res_99511, 2.0); + double negate_arg_99569 = 4.0 * y_99568; + double defunc_0_exp_arg_99570 = 0.0 - negate_arg_99569; + double defunc_0_exp_res_99571 = fpow64(2.718281828459045, + defunc_0_exp_arg_99570); + double x_99572 = defunc_0_Q_res_99567 + defunc_0_exp_res_99571; + double zs_res_99573 = defunc_2_reduce_res_99511 / 1.4142135623730951; + double abs_res_99574 = fabs(zs_res_99573); + double zs_res_99575 = abs_res_99574 / 2.0; + double zp_res_99576 = 1.0 + zs_res_99575; + double zs_res_99577 = 1.0 / zp_res_99576; + double zt_res_99578 = zs_res_99577 * zs_res_99577; + double zt_res_99579 = zs_res_99577 * zt_res_99578; + double zt_res_99580 = zt_res_99578 * zt_res_99578; + double zt_res_99581 = zt_res_99578 * zt_res_99579; + double zt_res_99582 = zt_res_99579 * zt_res_99579; + double zt_res_99583 = zt_res_99579 * zt_res_99580; + double zt_res_99584 = zt_res_99580 * zt_res_99580; + double zt_res_99585 = zt_res_99580 * zt_res_99581; + double zt_res_99586 = 0.17087277 * zt_res_99585; + double zt_res_99587 = 0.82215223 * zt_res_99584; + double zt_res_99588 = 1.48851587 * zt_res_99583; + double zt_res_99589 = 1.13520398 * zt_res_99582; + double zt_res_99590 = 0.27886807 * zt_res_99581; + double zt_res_99591 = 0.18628806 * zt_res_99580; + double zt_res_99592 = 9.678418e-2 * zt_res_99579; + double zt_res_99593 = 0.37409196 * zt_res_99578; + double zt_res_99594 = 1.00002368 * zs_res_99577; + double zt_res_99595 = zs_res_99573 * zs_res_99573; + double zm_res_99596 = 0.0 - zt_res_99595; + double zm_res_99597 = zm_res_99596 - 1.26551223; + double zp_res_99598 = zt_res_99594 + zm_res_99597; + double zp_res_99599 = zt_res_99593 + zp_res_99598; + double zp_res_99600 = zt_res_99592 + zp_res_99599; + double zm_res_99601 = zp_res_99600 - zt_res_99591; + double zp_res_99602 = zt_res_99590 + zm_res_99601; + double zm_res_99603 = zp_res_99602 - zt_res_99589; + double zp_res_99604 = zt_res_99588 + zm_res_99603; + double zm_res_99605 = zp_res_99604 - zt_res_99587; + double zp_res_99606 = zt_res_99586 + zm_res_99605; + double exp_res_99607; + + exp_res_99607 = futrts_exp64(zp_res_99606); + + double zt_res_99608 = zs_res_99577 * exp_res_99607; + bool zgze_res_99609 = 0.0 <= zs_res_99573; + double erf_res_99610; + + if (zgze_res_99609) { + double zm_res_99611 = 1.0 - zt_res_99608; + + erf_res_99610 = zm_res_99611; + } else { + double zm_res_99612 = zt_res_99608 - 1.0; + + erf_res_99610 = zm_res_99612; + } + + double zp_res_99613 = 1.0 + erf_res_99610; + double zs_res_99614 = zp_res_99613 / 2.0; + double defunc_0_Q_res_99615 = 1.0 - zs_res_99614; + double y_99616 = defunc_0_exp_res_99571 * defunc_0_Q_res_99615; + double y_99617 = x_99572 - y_99616; + double pval_brownian_motion_max_res_99618 = 2.0 * y_99617; + int64_t defunc_0_f_res_99619; + int64_t gtid_99220 = sext_i32_i64(ltid_pre_128554); + int32_t phys_tid_99221 = local_tid_128550; + __local char *red_arr_mem_128559; + + red_arr_mem_128559 = (__local char *) red_arr_mem_128559_backing_1; + + int64_t slice_115281; + + slice_115281 = (int64_t) 1 + gtid_99220; + + double x_99624 = ((__global + double *) defunc_3_map_res_mem_124068)[gtid_99215 * + Nmk_74408 + + slice_115281]; + double x_99625 = ((__global double *) mem_124078)[gtid_99215 * Nmk_74408 + + slice_115281]; + double abs_res_99626 = fabs(x_99624); + bool cond_99627 = x_99625 < abs_res_99626; + int64_t defunc_2_f_res_99628; + + if (cond_99627) { + defunc_2_f_res_99628 = gtid_99220; + } else { + defunc_2_f_res_99628 = (int64_t) 9223372036854775807; + } + ((__local int64_t *) red_arr_mem_128559)[gtid_99220] = defunc_2_f_res_99628; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128561; + int32_t skip_waves_128562; + + skip_waves_128562 = 1; + + int64_t x_99620; + int64_t x_99621; + + offset_128561 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128550, sext_i64_i32(num_recresids_padded_73681))) { + x_99620 = ((__local + int64_t *) red_arr_mem_128559)[sext_i32_i64(local_tid_128550 + + offset_128561)]; + } + } + offset_128561 = 1; + while (slt32(offset_128561, wave_sizze_128552)) { + if (slt32(local_tid_128550 + offset_128561, + sext_i64_i32(num_recresids_padded_73681)) && + ((local_tid_128550 - squot32(local_tid_128550, wave_sizze_128552) * + wave_sizze_128552) & (2 * offset_128561 - 1)) == 0) { + // read array element + { + x_99621 = ((volatile __local + int64_t *) red_arr_mem_128559)[sext_i32_i64(local_tid_128550 + + offset_128561)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_99622 = smin64(x_99620, x_99621); + + x_99620 = defunc_1_op_res_99622; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_128559)[sext_i32_i64(local_tid_128550)] = + x_99620; + } + } + offset_128561 *= 2; + } + while (slt32(skip_waves_128562, + squot32(sext_i64_i32(num_recresids_padded_73681) + + wave_sizze_128552 - 1, wave_sizze_128552))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128561 = skip_waves_128562 * wave_sizze_128552; + if (slt32(local_tid_128550 + offset_128561, + sext_i64_i32(num_recresids_padded_73681)) && + ((local_tid_128550 - squot32(local_tid_128550, wave_sizze_128552) * + wave_sizze_128552) == 0 && (squot32(local_tid_128550, + wave_sizze_128552) & (2 * + skip_waves_128562 - + 1)) == + 0)) { + // read array element + { + x_99621 = ((__local + int64_t *) red_arr_mem_128559)[sext_i32_i64(local_tid_128550 + + offset_128561)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_99622 = smin64(x_99620, x_99621); + + x_99620 = defunc_1_op_res_99622; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_128559)[sext_i32_i64(local_tid_128550)] = + x_99620; + } + } + skip_waves_128562 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + defunc_0_f_res_99619 = ((__local + int64_t *) red_arr_mem_128559)[(int64_t) 0]; + + bool isnan_res_99629; + + isnan_res_99629 = futrts_isnan64(pval_brownian_motion_max_res_99618); + + bool cond_99630 = !isnan_res_99629; + bool cond_t_res_99631 = pval_brownian_motion_max_res_99618 < level_73014; + bool x_99632 = cond_99630 && cond_t_res_99631; + bool chk_t_res_99633 = defunc_0_f_res_99619 == + (int64_t) 9223372036854775807; + bool chk_t_res_99634 = !chk_t_res_99633; + bool x_99635 = x_99632 && chk_t_res_99634; + int64_t y_start_99636; + + if (x_99635) { + int64_t y_start_t_res_99637 = sub64(x_99505, defunc_0_f_res_99619); + + y_start_99636 = y_start_t_res_99637; + } else { + y_start_99636 = (int64_t) 0; + } + if (local_tid_128550 == 0) { + ((__global int64_t *) mem_124118)[gtid_99215] = y_start_99636; + } + + error_4: + return; +} +__kernel void mainMagnitudezisegred_large_100140(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_128804_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128802_backing_aligned_1, + int64_t N_73007, + int64_t n_73011, + int64_t k2p2zq_73023, + int64_t num_groups_100279, + int64_t groups_per_segment_128788, + int64_t elements_per_thread_128789, + int64_t virt_num_groups_128790, + int64_t threads_per_segment_128792, + __global + unsigned char *binop_p_mem_120117, + __global + unsigned char *mem_124142, + __global + unsigned char *mem_124276, + __global + unsigned char *mem_124281, + __global + unsigned char *group_res_arr_mem_128793, + __global + unsigned char *mainMagnitudezicounter_mem_128795) +{ + #define segred_group_sizze_100278 (mainMagnitudezisegred_group_sizze_100134) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_128804_backing_1 = + (__local volatile + char *) sync_arr_mem_128804_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128802_backing_0 = + (__local volatile + char *) red_arr_mem_128802_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128797; + int32_t local_tid_128798; + int64_t group_sizze_128801; + int32_t wave_sizze_128800; + int32_t group_tid_128799; + + global_tid_128797 = get_global_id(0); + local_tid_128798 = get_local_id(0); + group_sizze_128801 = get_local_size(0); + wave_sizze_128800 = LOCKSTEP_WIDTH; + group_tid_128799 = get_group_id(0); + + int32_t phys_tid_100140; + + phys_tid_100140 = global_tid_128797; + + __local char *red_arr_mem_128802; + + red_arr_mem_128802 = (__local char *) red_arr_mem_128802_backing_0; + + __local char *sync_arr_mem_128804; + + sync_arr_mem_128804 = (__local char *) sync_arr_mem_128804_backing_1; + + int32_t phys_group_id_128806; + + phys_group_id_128806 = get_group_id(0); + for (int32_t i_128807 = 0; i_128807 < + sdiv_up32(sext_i64_i32(virt_num_groups_128790) - phys_group_id_128806, + sext_i64_i32(num_groups_100279)); i_128807++) { + int32_t virt_group_id_128808 = phys_group_id_128806 + i_128807 * + sext_i64_i32(num_groups_100279); + int32_t flat_segment_id_128809 = squot32(virt_group_id_128808, + sext_i64_i32(groups_per_segment_128788)); + int64_t global_tid_128810 = srem64(sext_i32_i64(virt_group_id_128808) * + segred_group_sizze_100278 + + sext_i32_i64(local_tid_128798), + segred_group_sizze_100278 * + groups_per_segment_128788); + int64_t gtid_100127 = squot64(sext_i32_i64(flat_segment_id_128809), + k2p2zq_73023 * k2p2zq_73023); + int64_t gtid_100128 = squot64(sext_i32_i64(flat_segment_id_128809) - + squot64(sext_i32_i64(flat_segment_id_128809), + k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023); + int64_t gtid_100129 = sext_i32_i64(flat_segment_id_128809) - + squot64(sext_i32_i64(flat_segment_id_128809), k2p2zq_73023 * + k2p2zq_73023) * (k2p2zq_73023 * k2p2zq_73023) - + squot64(sext_i32_i64(flat_segment_id_128809) - + squot64(sext_i32_i64(flat_segment_id_128809), + k2p2zq_73023 * k2p2zq_73023) * (k2p2zq_73023 * + k2p2zq_73023), + k2p2zq_73023) * k2p2zq_73023; + int64_t gtid_100139; + double x_acc_128811; + int64_t chunk_sizze_128812; + + chunk_sizze_128812 = smin64(elements_per_thread_128789, + sdiv_up64(n_73011 - global_tid_128810, + threads_per_segment_128792)); + + double x_100282; + double x_100283; + + // neutral-initialise the accumulators + { + x_acc_128811 = 0.0; + } + for (int64_t i_128816 = 0; i_128816 < chunk_sizze_128812; i_128816++) { + gtid_100139 = global_tid_128810 + threads_per_segment_128792 * + i_128816; + // apply map function + { + double x_100288 = ((__global double *) mem_124142)[gtid_100127 * + N_73007 + + gtid_100139]; + double x_100289 = ((__global + double *) binop_p_mem_120117)[gtid_100128 * + N_73007 + + gtid_100139]; + double x_100290 = ((__global double *) mem_124276)[gtid_100129 * + N_73007 + + gtid_100139]; + double x_100291 = x_100289 * x_100290; + bool isnan_res_100292; + + isnan_res_100292 = futrts_isnan64(x_100288); + + double y_100293; + + if (isnan_res_100292) { + y_100293 = 0.0; + } else { + y_100293 = 1.0; + } + + double defunc_2_f_res_100294 = x_100291 * y_100293; + + // save map-out results + { } + // load accumulator + { + x_100282 = x_acc_128811; + } + // load new values + { + x_100283 = defunc_2_f_res_100294; + } + // apply reduction operator + { + double defunc_1_op_res_100284 = x_100282 + x_100283; + + // store in accumulator + { + x_acc_128811 = defunc_1_op_res_100284; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_100282 = x_acc_128811; + ((__local + double *) red_arr_mem_128802)[sext_i32_i64(local_tid_128798)] = + x_100282; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128817; + int32_t skip_waves_128818; + + skip_waves_128818 = 1; + + double x_128813; + double x_128814; + + offset_128817 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128798, + sext_i64_i32(segred_group_sizze_100278))) { + x_128813 = ((__local + double *) red_arr_mem_128802)[sext_i32_i64(local_tid_128798 + + offset_128817)]; + } + } + offset_128817 = 1; + while (slt32(offset_128817, wave_sizze_128800)) { + if (slt32(local_tid_128798 + offset_128817, + sext_i64_i32(segred_group_sizze_100278)) && + ((local_tid_128798 - squot32(local_tid_128798, + wave_sizze_128800) * + wave_sizze_128800) & (2 * offset_128817 - 1)) == 0) { + // read array element + { + x_128814 = ((volatile __local + double *) red_arr_mem_128802)[sext_i32_i64(local_tid_128798 + + offset_128817)]; + } + // apply reduction operation + { + double defunc_1_op_res_128815 = x_128813 + x_128814; + + x_128813 = defunc_1_op_res_128815; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128802)[sext_i32_i64(local_tid_128798)] = + x_128813; + } + } + offset_128817 *= 2; + } + while (slt32(skip_waves_128818, + squot32(sext_i64_i32(segred_group_sizze_100278) + + wave_sizze_128800 - 1, wave_sizze_128800))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128817 = skip_waves_128818 * wave_sizze_128800; + if (slt32(local_tid_128798 + offset_128817, + sext_i64_i32(segred_group_sizze_100278)) && + ((local_tid_128798 - squot32(local_tid_128798, + wave_sizze_128800) * + wave_sizze_128800) == 0 && (squot32(local_tid_128798, + wave_sizze_128800) & (2 * + skip_waves_128818 - + 1)) == + 0)) { + // read array element + { + x_128814 = ((__local + double *) red_arr_mem_128802)[sext_i32_i64(local_tid_128798 + + offset_128817)]; + } + // apply reduction operation + { + double defunc_1_op_res_128815 = x_128813 + x_128814; + + x_128813 = defunc_1_op_res_128815; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128802)[sext_i32_i64(local_tid_128798)] = + x_128813; + } + } + skip_waves_128818 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_128798) == (int64_t) 0) { + x_acc_128811 = x_128813; + } + } + if (groups_per_segment_128788 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_128798 == 0) { + ((__global double *) mem_124281)[gtid_100127 * + (k2p2zq_73023 * + k2p2zq_73023) + + gtid_100128 * + k2p2zq_73023 + + gtid_100129] = + x_acc_128811; + } + } + } else { + int32_t old_counter_128819; + + // first thread in group saves group result to global memory + { + if (local_tid_128798 == 0) { + ((__global + double *) group_res_arr_mem_128793)[sext_i32_i64(virt_group_id_128808) * + segred_group_sizze_100278] = + x_acc_128811; + mem_fence_global(); + old_counter_128819 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_128795)[sext_i32_i64(srem32(flat_segment_id_128809, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_128804)[(int64_t) 0] = + old_counter_128819 == groups_per_segment_128788 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_128820; + + is_last_group_128820 = ((__local + bool *) sync_arr_mem_128804)[(int64_t) 0]; + if (is_last_group_128820) { + if (local_tid_128798 == 0) { + old_counter_128819 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_128795)[sext_i32_i64(srem32(flat_segment_id_128809, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_128788)); + } + // read in the per-group-results + { + int64_t read_per_thread_128821 = + sdiv_up64(groups_per_segment_128788, + segred_group_sizze_100278); + + x_100282 = 0.0; + for (int64_t i_128822 = 0; i_128822 < + read_per_thread_128821; i_128822++) { + int64_t group_res_id_128823 = + sext_i32_i64(local_tid_128798) * + read_per_thread_128821 + i_128822; + int64_t index_of_group_res_128824 = + sext_i32_i64(flat_segment_id_128809) * + groups_per_segment_128788 + group_res_id_128823; + + if (slt64(group_res_id_128823, + groups_per_segment_128788)) { + x_100283 = ((__global + double *) group_res_arr_mem_128793)[index_of_group_res_128824 * + segred_group_sizze_100278]; + + double defunc_1_op_res_100284; + + defunc_1_op_res_100284 = x_100282 + x_100283; + x_100282 = defunc_1_op_res_100284; + } + } + } + ((__local + double *) red_arr_mem_128802)[sext_i32_i64(local_tid_128798)] = + x_100282; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_128825; + int32_t skip_waves_128826; + + skip_waves_128826 = 1; + + double x_128813; + double x_128814; + + offset_128825 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128798, + sext_i64_i32(segred_group_sizze_100278))) { + x_128813 = ((__local + double *) red_arr_mem_128802)[sext_i32_i64(local_tid_128798 + + offset_128825)]; + } + } + offset_128825 = 1; + while (slt32(offset_128825, wave_sizze_128800)) { + if (slt32(local_tid_128798 + offset_128825, + sext_i64_i32(segred_group_sizze_100278)) && + ((local_tid_128798 - squot32(local_tid_128798, + wave_sizze_128800) * + wave_sizze_128800) & (2 * offset_128825 - 1)) == + 0) { + // read array element + { + x_128814 = ((volatile __local + double *) red_arr_mem_128802)[sext_i32_i64(local_tid_128798 + + offset_128825)]; + } + // apply reduction operation + { + double defunc_1_op_res_128815 = x_128813 + + x_128814; + + x_128813 = defunc_1_op_res_128815; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128802)[sext_i32_i64(local_tid_128798)] = + x_128813; + } + } + offset_128825 *= 2; + } + while (slt32(skip_waves_128826, + squot32(sext_i64_i32(segred_group_sizze_100278) + + wave_sizze_128800 - 1, + wave_sizze_128800))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128825 = skip_waves_128826 * wave_sizze_128800; + if (slt32(local_tid_128798 + offset_128825, + sext_i64_i32(segred_group_sizze_100278)) && + ((local_tid_128798 - squot32(local_tid_128798, + wave_sizze_128800) * + wave_sizze_128800) == 0 && + (squot32(local_tid_128798, wave_sizze_128800) & + (2 * skip_waves_128826 - 1)) == 0)) { + // read array element + { + x_128814 = ((__local + double *) red_arr_mem_128802)[sext_i32_i64(local_tid_128798 + + offset_128825)]; + } + // apply reduction operation + { + double defunc_1_op_res_128815 = x_128813 + + x_128814; + + x_128813 = defunc_1_op_res_128815; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128802)[sext_i32_i64(local_tid_128798)] = + x_128813; + } + } + skip_waves_128826 *= 2; + } + // and back to memory with the final result + { + if (local_tid_128798 == 0) { + ((__global double *) mem_124281)[gtid_100127 * + (k2p2zq_73023 * + k2p2zq_73023) + + gtid_100128 * + k2p2zq_73023 + + gtid_100129] = + x_128813; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_100278 +} +__kernel void mainMagnitudezisegred_large_101024(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_128994_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128992_backing_aligned_1, + int64_t N_73007, + int64_t n_73011, + int64_t k2p2zq_73023, + int64_t num_groups_101077, + int64_t groups_per_segment_128978, + int64_t elements_per_thread_128979, + int64_t virt_num_groups_128980, + int64_t threads_per_segment_128982, + __global + unsigned char *binop_p_mem_120117, + __global + unsigned char *mem_124142, + __global + unsigned char *mem_124587, + __global + unsigned char *group_res_arr_mem_128983, + __global + unsigned char *mainMagnitudezicounter_mem_128985) +{ + #define segred_group_sizze_101076 (mainMagnitudezisegred_group_sizze_101018) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_128994_backing_1 = + (__local volatile + char *) sync_arr_mem_128994_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128992_backing_0 = + (__local volatile + char *) red_arr_mem_128992_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128987; + int32_t local_tid_128988; + int64_t group_sizze_128991; + int32_t wave_sizze_128990; + int32_t group_tid_128989; + + global_tid_128987 = get_global_id(0); + local_tid_128988 = get_local_id(0); + group_sizze_128991 = get_local_size(0); + wave_sizze_128990 = LOCKSTEP_WIDTH; + group_tid_128989 = get_group_id(0); + + int32_t phys_tid_101024; + + phys_tid_101024 = global_tid_128987; + + __local char *red_arr_mem_128992; + + red_arr_mem_128992 = (__local char *) red_arr_mem_128992_backing_0; + + __local char *sync_arr_mem_128994; + + sync_arr_mem_128994 = (__local char *) sync_arr_mem_128994_backing_1; + + int32_t phys_group_id_128996; + + phys_group_id_128996 = get_group_id(0); + for (int32_t i_128997 = 0; i_128997 < + sdiv_up32(sext_i64_i32(virt_num_groups_128980) - phys_group_id_128996, + sext_i64_i32(num_groups_101077)); i_128997++) { + int32_t virt_group_id_128998 = phys_group_id_128996 + i_128997 * + sext_i64_i32(num_groups_101077); + int32_t flat_segment_id_128999 = squot32(virt_group_id_128998, + sext_i64_i32(groups_per_segment_128978)); + int64_t global_tid_129000 = srem64(sext_i32_i64(virt_group_id_128998) * + segred_group_sizze_101076 + + sext_i32_i64(local_tid_128988), + segred_group_sizze_101076 * + groups_per_segment_128978); + int64_t gtid_101013 = squot64(sext_i32_i64(flat_segment_id_128999), + k2p2zq_73023); + int64_t gtid_101014 = sext_i32_i64(flat_segment_id_128999) - + squot64(sext_i32_i64(flat_segment_id_128999), k2p2zq_73023) * + k2p2zq_73023; + int64_t gtid_101023; + double x_acc_129001; + int64_t chunk_sizze_129002; + + chunk_sizze_129002 = smin64(elements_per_thread_128979, + sdiv_up64(n_73011 - global_tid_129000, + threads_per_segment_128982)); + + double x_101080; + double x_101081; + + // neutral-initialise the accumulators + { + x_acc_129001 = 0.0; + } + for (int64_t i_129006 = 0; i_129006 < chunk_sizze_129002; i_129006++) { + gtid_101023 = global_tid_129000 + threads_per_segment_128982 * + i_129006; + // apply map function + { + double x_101086 = ((__global double *) mem_124142)[gtid_101013 * + N_73007 + + gtid_101023]; + bool isnan_res_101087; + + isnan_res_101087 = futrts_isnan64(x_101086); + + double defunc_1_f_res_101088; + + if (isnan_res_101087) { + defunc_1_f_res_101088 = 0.0; + } else { + double x_101085 = ((__global + double *) binop_p_mem_120117)[gtid_101014 * + N_73007 + + gtid_101023]; + double defunc_1_f_res_f_res_101089 = x_101085 * x_101086; + + defunc_1_f_res_101088 = defunc_1_f_res_f_res_101089; + } + // save map-out results + { } + // load accumulator + { + x_101080 = x_acc_129001; + } + // load new values + { + x_101081 = defunc_1_f_res_101088; + } + // apply reduction operator + { + double defunc_1_op_res_101082 = x_101080 + x_101081; + + // store in accumulator + { + x_acc_129001 = defunc_1_op_res_101082; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_101080 = x_acc_129001; + ((__local + double *) red_arr_mem_128992)[sext_i32_i64(local_tid_128988)] = + x_101080; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_129007; + int32_t skip_waves_129008; + + skip_waves_129008 = 1; + + double x_129003; + double x_129004; + + offset_129007 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128988, + sext_i64_i32(segred_group_sizze_101076))) { + x_129003 = ((__local + double *) red_arr_mem_128992)[sext_i32_i64(local_tid_128988 + + offset_129007)]; + } + } + offset_129007 = 1; + while (slt32(offset_129007, wave_sizze_128990)) { + if (slt32(local_tid_128988 + offset_129007, + sext_i64_i32(segred_group_sizze_101076)) && + ((local_tid_128988 - squot32(local_tid_128988, + wave_sizze_128990) * + wave_sizze_128990) & (2 * offset_129007 - 1)) == 0) { + // read array element + { + x_129004 = ((volatile __local + double *) red_arr_mem_128992)[sext_i32_i64(local_tid_128988 + + offset_129007)]; + } + // apply reduction operation + { + double defunc_1_op_res_129005 = x_129003 + x_129004; + + x_129003 = defunc_1_op_res_129005; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128992)[sext_i32_i64(local_tid_128988)] = + x_129003; + } + } + offset_129007 *= 2; + } + while (slt32(skip_waves_129008, + squot32(sext_i64_i32(segred_group_sizze_101076) + + wave_sizze_128990 - 1, wave_sizze_128990))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129007 = skip_waves_129008 * wave_sizze_128990; + if (slt32(local_tid_128988 + offset_129007, + sext_i64_i32(segred_group_sizze_101076)) && + ((local_tid_128988 - squot32(local_tid_128988, + wave_sizze_128990) * + wave_sizze_128990) == 0 && (squot32(local_tid_128988, + wave_sizze_128990) & (2 * + skip_waves_129008 - + 1)) == + 0)) { + // read array element + { + x_129004 = ((__local + double *) red_arr_mem_128992)[sext_i32_i64(local_tid_128988 + + offset_129007)]; + } + // apply reduction operation + { + double defunc_1_op_res_129005 = x_129003 + x_129004; + + x_129003 = defunc_1_op_res_129005; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128992)[sext_i32_i64(local_tid_128988)] = + x_129003; + } + } + skip_waves_129008 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_128988) == (int64_t) 0) { + x_acc_129001 = x_129003; + } + } + if (groups_per_segment_128978 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_128988 == 0) { + ((__global double *) mem_124587)[gtid_101013 * + k2p2zq_73023 + + gtid_101014] = + x_acc_129001; + } + } + } else { + int32_t old_counter_129009; + + // first thread in group saves group result to global memory + { + if (local_tid_128988 == 0) { + ((__global + double *) group_res_arr_mem_128983)[sext_i32_i64(virt_group_id_128998) * + segred_group_sizze_101076] = + x_acc_129001; + mem_fence_global(); + old_counter_129009 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_128985)[sext_i32_i64(srem32(flat_segment_id_128999, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_128994)[(int64_t) 0] = + old_counter_129009 == groups_per_segment_128978 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_129010; + + is_last_group_129010 = ((__local + bool *) sync_arr_mem_128994)[(int64_t) 0]; + if (is_last_group_129010) { + if (local_tid_128988 == 0) { + old_counter_129009 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_128985)[sext_i32_i64(srem32(flat_segment_id_128999, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_128978)); + } + // read in the per-group-results + { + int64_t read_per_thread_129011 = + sdiv_up64(groups_per_segment_128978, + segred_group_sizze_101076); + + x_101080 = 0.0; + for (int64_t i_129012 = 0; i_129012 < + read_per_thread_129011; i_129012++) { + int64_t group_res_id_129013 = + sext_i32_i64(local_tid_128988) * + read_per_thread_129011 + i_129012; + int64_t index_of_group_res_129014 = + sext_i32_i64(flat_segment_id_128999) * + groups_per_segment_128978 + group_res_id_129013; + + if (slt64(group_res_id_129013, + groups_per_segment_128978)) { + x_101081 = ((__global + double *) group_res_arr_mem_128983)[index_of_group_res_129014 * + segred_group_sizze_101076]; + + double defunc_1_op_res_101082; + + defunc_1_op_res_101082 = x_101080 + x_101081; + x_101080 = defunc_1_op_res_101082; + } + } + } + ((__local + double *) red_arr_mem_128992)[sext_i32_i64(local_tid_128988)] = + x_101080; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_129015; + int32_t skip_waves_129016; + + skip_waves_129016 = 1; + + double x_129003; + double x_129004; + + offset_129015 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128988, + sext_i64_i32(segred_group_sizze_101076))) { + x_129003 = ((__local + double *) red_arr_mem_128992)[sext_i32_i64(local_tid_128988 + + offset_129015)]; + } + } + offset_129015 = 1; + while (slt32(offset_129015, wave_sizze_128990)) { + if (slt32(local_tid_128988 + offset_129015, + sext_i64_i32(segred_group_sizze_101076)) && + ((local_tid_128988 - squot32(local_tid_128988, + wave_sizze_128990) * + wave_sizze_128990) & (2 * offset_129015 - 1)) == + 0) { + // read array element + { + x_129004 = ((volatile __local + double *) red_arr_mem_128992)[sext_i32_i64(local_tid_128988 + + offset_129015)]; + } + // apply reduction operation + { + double defunc_1_op_res_129005 = x_129003 + + x_129004; + + x_129003 = defunc_1_op_res_129005; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128992)[sext_i32_i64(local_tid_128988)] = + x_129003; + } + } + offset_129015 *= 2; + } + while (slt32(skip_waves_129016, + squot32(sext_i64_i32(segred_group_sizze_101076) + + wave_sizze_128990 - 1, + wave_sizze_128990))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129015 = skip_waves_129016 * wave_sizze_128990; + if (slt32(local_tid_128988 + offset_129015, + sext_i64_i32(segred_group_sizze_101076)) && + ((local_tid_128988 - squot32(local_tid_128988, + wave_sizze_128990) * + wave_sizze_128990) == 0 && + (squot32(local_tid_128988, wave_sizze_128990) & + (2 * skip_waves_129016 - 1)) == 0)) { + // read array element + { + x_129004 = ((__local + double *) red_arr_mem_128992)[sext_i32_i64(local_tid_128988 + + offset_129015)]; + } + // apply reduction operation + { + double defunc_1_op_res_129005 = x_129003 + + x_129004; + + x_129003 = defunc_1_op_res_129005; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128992)[sext_i32_i64(local_tid_128988)] = + x_129003; + } + } + skip_waves_129016 *= 2; + } + // and back to memory with the final result + { + if (local_tid_128988 == 0) { + ((__global double *) mem_124587)[gtid_101013 * + k2p2zq_73023 + + gtid_101014] = + x_129003; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_101076 +} +__kernel void mainMagnitudezisegred_large_101161(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_129082_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_129080_backing_aligned_1, + int64_t k2p2zq_73023, + int64_t num_groups_101210, + int64_t groups_per_segment_129066, + int64_t elements_per_thread_129067, + int64_t virt_num_groups_129068, + int64_t threads_per_segment_129070, + __global + unsigned char *defunc_3_map_res_mem_124372, + __global + unsigned char *defunc_3_map_res_mem_124593, + __global + unsigned char *mem_124653, + __global + unsigned char *group_res_arr_mem_129071, + __global + unsigned char *mainMagnitudezicounter_mem_129073) +{ + #define segred_group_sizze_101209 (mainMagnitudezisegred_group_sizze_101155) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_129082_backing_1 = + (__local volatile + char *) sync_arr_mem_129082_backing_aligned_0; + __local volatile char *restrict red_arr_mem_129080_backing_0 = + (__local volatile + char *) red_arr_mem_129080_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129075; + int32_t local_tid_129076; + int64_t group_sizze_129079; + int32_t wave_sizze_129078; + int32_t group_tid_129077; + + global_tid_129075 = get_global_id(0); + local_tid_129076 = get_local_id(0); + group_sizze_129079 = get_local_size(0); + wave_sizze_129078 = LOCKSTEP_WIDTH; + group_tid_129077 = get_group_id(0); + + int32_t phys_tid_101161; + + phys_tid_101161 = global_tid_129075; + + __local char *red_arr_mem_129080; + + red_arr_mem_129080 = (__local char *) red_arr_mem_129080_backing_0; + + __local char *sync_arr_mem_129082; + + sync_arr_mem_129082 = (__local char *) sync_arr_mem_129082_backing_1; + + int32_t phys_group_id_129084; + + phys_group_id_129084 = get_group_id(0); + for (int32_t i_129085 = 0; i_129085 < + sdiv_up32(sext_i64_i32(virt_num_groups_129068) - phys_group_id_129084, + sext_i64_i32(num_groups_101210)); i_129085++) { + int32_t virt_group_id_129086 = phys_group_id_129084 + i_129085 * + sext_i64_i32(num_groups_101210); + int32_t flat_segment_id_129087 = squot32(virt_group_id_129086, + sext_i64_i32(groups_per_segment_129066)); + int64_t global_tid_129088 = srem64(sext_i32_i64(virt_group_id_129086) * + segred_group_sizze_101209 + + sext_i32_i64(local_tid_129076), + segred_group_sizze_101209 * + groups_per_segment_129066); + int64_t gtid_101150 = squot64(sext_i32_i64(flat_segment_id_129087), + k2p2zq_73023); + int64_t gtid_101151 = sext_i32_i64(flat_segment_id_129087) - + squot64(sext_i32_i64(flat_segment_id_129087), k2p2zq_73023) * + k2p2zq_73023; + int64_t gtid_101160; + double x_acc_129089; + int64_t chunk_sizze_129090; + + chunk_sizze_129090 = smin64(elements_per_thread_129067, + sdiv_up64(k2p2zq_73023 - global_tid_129088, + threads_per_segment_129070)); + + double x_101213; + double x_101214; + + // neutral-initialise the accumulators + { + x_acc_129089 = 0.0; + } + for (int64_t i_129094 = 0; i_129094 < chunk_sizze_129090; i_129094++) { + gtid_101160 = global_tid_129088 + threads_per_segment_129070 * + i_129094; + // apply map function + { + double x_101219 = ((__global + double *) defunc_3_map_res_mem_124593)[gtid_101150 * + k2p2zq_73023 + + gtid_101160]; + double x_101220 = ((__global + double *) defunc_3_map_res_mem_124372)[gtid_101150 * + (k2p2zq_73023 * + k2p2zq_73023) + + gtid_101151 * + k2p2zq_73023 + + gtid_101160]; + double defunc_1_f_res_101221 = x_101219 * x_101220; + + // save map-out results + { } + // load accumulator + { + x_101213 = x_acc_129089; + } + // load new values + { + x_101214 = defunc_1_f_res_101221; + } + // apply reduction operator + { + double defunc_1_op_res_101215 = x_101213 + x_101214; + + // store in accumulator + { + x_acc_129089 = defunc_1_op_res_101215; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_101213 = x_acc_129089; + ((__local + double *) red_arr_mem_129080)[sext_i32_i64(local_tid_129076)] = + x_101213; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_129095; + int32_t skip_waves_129096; + + skip_waves_129096 = 1; + + double x_129091; + double x_129092; + + offset_129095 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129076, + sext_i64_i32(segred_group_sizze_101209))) { + x_129091 = ((__local + double *) red_arr_mem_129080)[sext_i32_i64(local_tid_129076 + + offset_129095)]; + } + } + offset_129095 = 1; + while (slt32(offset_129095, wave_sizze_129078)) { + if (slt32(local_tid_129076 + offset_129095, + sext_i64_i32(segred_group_sizze_101209)) && + ((local_tid_129076 - squot32(local_tid_129076, + wave_sizze_129078) * + wave_sizze_129078) & (2 * offset_129095 - 1)) == 0) { + // read array element + { + x_129092 = ((volatile __local + double *) red_arr_mem_129080)[sext_i32_i64(local_tid_129076 + + offset_129095)]; + } + // apply reduction operation + { + double defunc_1_op_res_129093 = x_129091 + x_129092; + + x_129091 = defunc_1_op_res_129093; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_129080)[sext_i32_i64(local_tid_129076)] = + x_129091; + } + } + offset_129095 *= 2; + } + while (slt32(skip_waves_129096, + squot32(sext_i64_i32(segred_group_sizze_101209) + + wave_sizze_129078 - 1, wave_sizze_129078))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129095 = skip_waves_129096 * wave_sizze_129078; + if (slt32(local_tid_129076 + offset_129095, + sext_i64_i32(segred_group_sizze_101209)) && + ((local_tid_129076 - squot32(local_tid_129076, + wave_sizze_129078) * + wave_sizze_129078) == 0 && (squot32(local_tid_129076, + wave_sizze_129078) & (2 * + skip_waves_129096 - + 1)) == + 0)) { + // read array element + { + x_129092 = ((__local + double *) red_arr_mem_129080)[sext_i32_i64(local_tid_129076 + + offset_129095)]; + } + // apply reduction operation + { + double defunc_1_op_res_129093 = x_129091 + x_129092; + + x_129091 = defunc_1_op_res_129093; + } + // write result of operation + { + ((__local + double *) red_arr_mem_129080)[sext_i32_i64(local_tid_129076)] = + x_129091; + } + } + skip_waves_129096 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_129076) == (int64_t) 0) { + x_acc_129089 = x_129091; + } + } + if (groups_per_segment_129066 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_129076 == 0) { + ((__global double *) mem_124653)[gtid_101150 * + k2p2zq_73023 + + gtid_101151] = + x_acc_129089; + } + } + } else { + int32_t old_counter_129097; + + // first thread in group saves group result to global memory + { + if (local_tid_129076 == 0) { + ((__global + double *) group_res_arr_mem_129071)[sext_i32_i64(virt_group_id_129086) * + segred_group_sizze_101209] = + x_acc_129089; + mem_fence_global(); + old_counter_129097 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_129073)[sext_i32_i64(srem32(flat_segment_id_129087, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_129082)[(int64_t) 0] = + old_counter_129097 == groups_per_segment_129066 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_129098; + + is_last_group_129098 = ((__local + bool *) sync_arr_mem_129082)[(int64_t) 0]; + if (is_last_group_129098) { + if (local_tid_129076 == 0) { + old_counter_129097 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_129073)[sext_i32_i64(srem32(flat_segment_id_129087, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_129066)); + } + // read in the per-group-results + { + int64_t read_per_thread_129099 = + sdiv_up64(groups_per_segment_129066, + segred_group_sizze_101209); + + x_101213 = 0.0; + for (int64_t i_129100 = 0; i_129100 < + read_per_thread_129099; i_129100++) { + int64_t group_res_id_129101 = + sext_i32_i64(local_tid_129076) * + read_per_thread_129099 + i_129100; + int64_t index_of_group_res_129102 = + sext_i32_i64(flat_segment_id_129087) * + groups_per_segment_129066 + group_res_id_129101; + + if (slt64(group_res_id_129101, + groups_per_segment_129066)) { + x_101214 = ((__global + double *) group_res_arr_mem_129071)[index_of_group_res_129102 * + segred_group_sizze_101209]; + + double defunc_1_op_res_101215; + + defunc_1_op_res_101215 = x_101213 + x_101214; + x_101213 = defunc_1_op_res_101215; + } + } + } + ((__local + double *) red_arr_mem_129080)[sext_i32_i64(local_tid_129076)] = + x_101213; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_129103; + int32_t skip_waves_129104; + + skip_waves_129104 = 1; + + double x_129091; + double x_129092; + + offset_129103 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129076, + sext_i64_i32(segred_group_sizze_101209))) { + x_129091 = ((__local + double *) red_arr_mem_129080)[sext_i32_i64(local_tid_129076 + + offset_129103)]; + } + } + offset_129103 = 1; + while (slt32(offset_129103, wave_sizze_129078)) { + if (slt32(local_tid_129076 + offset_129103, + sext_i64_i32(segred_group_sizze_101209)) && + ((local_tid_129076 - squot32(local_tid_129076, + wave_sizze_129078) * + wave_sizze_129078) & (2 * offset_129103 - 1)) == + 0) { + // read array element + { + x_129092 = ((volatile __local + double *) red_arr_mem_129080)[sext_i32_i64(local_tid_129076 + + offset_129103)]; + } + // apply reduction operation + { + double defunc_1_op_res_129093 = x_129091 + + x_129092; + + x_129091 = defunc_1_op_res_129093; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_129080)[sext_i32_i64(local_tid_129076)] = + x_129091; + } + } + offset_129103 *= 2; + } + while (slt32(skip_waves_129104, + squot32(sext_i64_i32(segred_group_sizze_101209) + + wave_sizze_129078 - 1, + wave_sizze_129078))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129103 = skip_waves_129104 * wave_sizze_129078; + if (slt32(local_tid_129076 + offset_129103, + sext_i64_i32(segred_group_sizze_101209)) && + ((local_tid_129076 - squot32(local_tid_129076, + wave_sizze_129078) * + wave_sizze_129078) == 0 && + (squot32(local_tid_129076, wave_sizze_129078) & + (2 * skip_waves_129104 - 1)) == 0)) { + // read array element + { + x_129092 = ((__local + double *) red_arr_mem_129080)[sext_i32_i64(local_tid_129076 + + offset_129103)]; + } + // apply reduction operation + { + double defunc_1_op_res_129093 = x_129091 + + x_129092; + + x_129091 = defunc_1_op_res_129093; + } + // write result of operation + { + ((__local + double *) red_arr_mem_129080)[sext_i32_i64(local_tid_129076)] = + x_129091; + } + } + skip_waves_129104 *= 2; + } + // and back to memory with the final result + { + if (local_tid_129076 == 0) { + ((__global double *) mem_124653)[gtid_101150 * + k2p2zq_73023 + + gtid_101151] = + x_129091; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_101209 +} +__kernel void mainMagnitudezisegred_large_101291(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_129214_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_129212_backing_aligned_1, + int64_t N_73007, + int64_t k2p2zq_73023, + int64_t num_groups_101338, + int64_t groups_per_segment_129198, + int64_t elements_per_thread_129199, + int64_t virt_num_groups_129200, + int64_t threads_per_segment_129202, + __global + unsigned char *mem_120124, + __global + unsigned char *defunc_4_map_res_mem_124659, + __global + unsigned char *mem_124877, + __global + unsigned char *group_res_arr_mem_129203, + __global + unsigned char *mainMagnitudezicounter_mem_129205) +{ + #define segred_group_sizze_101337 (mainMagnitudezisegred_group_sizze_101285) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_129214_backing_1 = + (__local volatile + char *) sync_arr_mem_129214_backing_aligned_0; + __local volatile char *restrict red_arr_mem_129212_backing_0 = + (__local volatile + char *) red_arr_mem_129212_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129207; + int32_t local_tid_129208; + int64_t group_sizze_129211; + int32_t wave_sizze_129210; + int32_t group_tid_129209; + + global_tid_129207 = get_global_id(0); + local_tid_129208 = get_local_id(0); + group_sizze_129211 = get_local_size(0); + wave_sizze_129210 = LOCKSTEP_WIDTH; + group_tid_129209 = get_group_id(0); + + int32_t phys_tid_101291; + + phys_tid_101291 = global_tid_129207; + + __local char *red_arr_mem_129212; + + red_arr_mem_129212 = (__local char *) red_arr_mem_129212_backing_0; + + __local char *sync_arr_mem_129214; + + sync_arr_mem_129214 = (__local char *) sync_arr_mem_129214_backing_1; + + int32_t phys_group_id_129216; + + phys_group_id_129216 = get_group_id(0); + for (int32_t i_129217 = 0; i_129217 < + sdiv_up32(sext_i64_i32(virt_num_groups_129200) - phys_group_id_129216, + sext_i64_i32(num_groups_101338)); i_129217++) { + int32_t virt_group_id_129218 = phys_group_id_129216 + i_129217 * + sext_i64_i32(num_groups_101338); + int32_t flat_segment_id_129219 = squot32(virt_group_id_129218, + sext_i64_i32(groups_per_segment_129198)); + int64_t global_tid_129220 = srem64(sext_i32_i64(virt_group_id_129218) * + segred_group_sizze_101337 + + sext_i32_i64(local_tid_129208), + segred_group_sizze_101337 * + groups_per_segment_129198); + int64_t gtid_101280 = squot64(sext_i32_i64(flat_segment_id_129219), + N_73007); + int64_t gtid_101281 = sext_i32_i64(flat_segment_id_129219) - + squot64(sext_i32_i64(flat_segment_id_129219), N_73007) * + N_73007; + int64_t gtid_101290; + double x_acc_129221; + int64_t chunk_sizze_129222; + + chunk_sizze_129222 = smin64(elements_per_thread_129199, + sdiv_up64(k2p2zq_73023 - global_tid_129220, + threads_per_segment_129202)); + + double x_101341; + double x_101342; + + // neutral-initialise the accumulators + { + x_acc_129221 = 0.0; + } + for (int64_t i_129226 = 0; i_129226 < chunk_sizze_129222; i_129226++) { + gtid_101290 = global_tid_129220 + threads_per_segment_129202 * + i_129226; + // apply map function + { + double x_101346 = ((__global + double *) defunc_4_map_res_mem_124659)[gtid_101280 * + k2p2zq_73023 + + gtid_101290]; + double x_101347 = ((__global double *) mem_120124)[gtid_101281 * + k2p2zq_73023 + + gtid_101290]; + double defunc_1_f_res_101348 = x_101346 * x_101347; + + // save map-out results + { } + // load accumulator + { + x_101341 = x_acc_129221; + } + // load new values + { + x_101342 = defunc_1_f_res_101348; + } + // apply reduction operator + { + double defunc_1_op_res_101343 = x_101341 + x_101342; + + // store in accumulator + { + x_acc_129221 = defunc_1_op_res_101343; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_101341 = x_acc_129221; + ((__local + double *) red_arr_mem_129212)[sext_i32_i64(local_tid_129208)] = + x_101341; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_129227; + int32_t skip_waves_129228; + + skip_waves_129228 = 1; + + double x_129223; + double x_129224; + + offset_129227 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129208, + sext_i64_i32(segred_group_sizze_101337))) { + x_129223 = ((__local + double *) red_arr_mem_129212)[sext_i32_i64(local_tid_129208 + + offset_129227)]; + } + } + offset_129227 = 1; + while (slt32(offset_129227, wave_sizze_129210)) { + if (slt32(local_tid_129208 + offset_129227, + sext_i64_i32(segred_group_sizze_101337)) && + ((local_tid_129208 - squot32(local_tid_129208, + wave_sizze_129210) * + wave_sizze_129210) & (2 * offset_129227 - 1)) == 0) { + // read array element + { + x_129224 = ((volatile __local + double *) red_arr_mem_129212)[sext_i32_i64(local_tid_129208 + + offset_129227)]; + } + // apply reduction operation + { + double defunc_1_op_res_129225 = x_129223 + x_129224; + + x_129223 = defunc_1_op_res_129225; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_129212)[sext_i32_i64(local_tid_129208)] = + x_129223; + } + } + offset_129227 *= 2; + } + while (slt32(skip_waves_129228, + squot32(sext_i64_i32(segred_group_sizze_101337) + + wave_sizze_129210 - 1, wave_sizze_129210))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129227 = skip_waves_129228 * wave_sizze_129210; + if (slt32(local_tid_129208 + offset_129227, + sext_i64_i32(segred_group_sizze_101337)) && + ((local_tid_129208 - squot32(local_tid_129208, + wave_sizze_129210) * + wave_sizze_129210) == 0 && (squot32(local_tid_129208, + wave_sizze_129210) & (2 * + skip_waves_129228 - + 1)) == + 0)) { + // read array element + { + x_129224 = ((__local + double *) red_arr_mem_129212)[sext_i32_i64(local_tid_129208 + + offset_129227)]; + } + // apply reduction operation + { + double defunc_1_op_res_129225 = x_129223 + x_129224; + + x_129223 = defunc_1_op_res_129225; + } + // write result of operation + { + ((__local + double *) red_arr_mem_129212)[sext_i32_i64(local_tid_129208)] = + x_129223; + } + } + skip_waves_129228 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_129208) == (int64_t) 0) { + x_acc_129221 = x_129223; + } + } + if (groups_per_segment_129198 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_129208 == 0) { + ((__global double *) mem_124877)[gtid_101280 * N_73007 + + gtid_101281] = + x_acc_129221; + } + } + } else { + int32_t old_counter_129229; + + // first thread in group saves group result to global memory + { + if (local_tid_129208 == 0) { + ((__global + double *) group_res_arr_mem_129203)[sext_i32_i64(virt_group_id_129218) * + segred_group_sizze_101337] = + x_acc_129221; + mem_fence_global(); + old_counter_129229 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_129205)[sext_i32_i64(srem32(flat_segment_id_129219, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_129214)[(int64_t) 0] = + old_counter_129229 == groups_per_segment_129198 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_129230; + + is_last_group_129230 = ((__local + bool *) sync_arr_mem_129214)[(int64_t) 0]; + if (is_last_group_129230) { + if (local_tid_129208 == 0) { + old_counter_129229 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_129205)[sext_i32_i64(srem32(flat_segment_id_129219, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_129198)); + } + // read in the per-group-results + { + int64_t read_per_thread_129231 = + sdiv_up64(groups_per_segment_129198, + segred_group_sizze_101337); + + x_101341 = 0.0; + for (int64_t i_129232 = 0; i_129232 < + read_per_thread_129231; i_129232++) { + int64_t group_res_id_129233 = + sext_i32_i64(local_tid_129208) * + read_per_thread_129231 + i_129232; + int64_t index_of_group_res_129234 = + sext_i32_i64(flat_segment_id_129219) * + groups_per_segment_129198 + group_res_id_129233; + + if (slt64(group_res_id_129233, + groups_per_segment_129198)) { + x_101342 = ((__global + double *) group_res_arr_mem_129203)[index_of_group_res_129234 * + segred_group_sizze_101337]; + + double defunc_1_op_res_101343; + + defunc_1_op_res_101343 = x_101341 + x_101342; + x_101341 = defunc_1_op_res_101343; + } + } + } + ((__local + double *) red_arr_mem_129212)[sext_i32_i64(local_tid_129208)] = + x_101341; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_129235; + int32_t skip_waves_129236; + + skip_waves_129236 = 1; + + double x_129223; + double x_129224; + + offset_129235 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129208, + sext_i64_i32(segred_group_sizze_101337))) { + x_129223 = ((__local + double *) red_arr_mem_129212)[sext_i32_i64(local_tid_129208 + + offset_129235)]; + } + } + offset_129235 = 1; + while (slt32(offset_129235, wave_sizze_129210)) { + if (slt32(local_tid_129208 + offset_129235, + sext_i64_i32(segred_group_sizze_101337)) && + ((local_tid_129208 - squot32(local_tid_129208, + wave_sizze_129210) * + wave_sizze_129210) & (2 * offset_129235 - 1)) == + 0) { + // read array element + { + x_129224 = ((volatile __local + double *) red_arr_mem_129212)[sext_i32_i64(local_tid_129208 + + offset_129235)]; + } + // apply reduction operation + { + double defunc_1_op_res_129225 = x_129223 + + x_129224; + + x_129223 = defunc_1_op_res_129225; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_129212)[sext_i32_i64(local_tid_129208)] = + x_129223; + } + } + offset_129235 *= 2; + } + while (slt32(skip_waves_129236, + squot32(sext_i64_i32(segred_group_sizze_101337) + + wave_sizze_129210 - 1, + wave_sizze_129210))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129235 = skip_waves_129236 * wave_sizze_129210; + if (slt32(local_tid_129208 + offset_129235, + sext_i64_i32(segred_group_sizze_101337)) && + ((local_tid_129208 - squot32(local_tid_129208, + wave_sizze_129210) * + wave_sizze_129210) == 0 && + (squot32(local_tid_129208, wave_sizze_129210) & + (2 * skip_waves_129236 - 1)) == 0)) { + // read array element + { + x_129224 = ((__local + double *) red_arr_mem_129212)[sext_i32_i64(local_tid_129208 + + offset_129235)]; + } + // apply reduction operation + { + double defunc_1_op_res_129225 = x_129223 + + x_129224; + + x_129223 = defunc_1_op_res_129225; + } + // write result of operation + { + ((__local + double *) red_arr_mem_129212)[sext_i32_i64(local_tid_129208)] = + x_129223; + } + } + skip_waves_129236 *= 2; + } + // and back to memory with the final result + { + if (local_tid_129208 == 0) { + ((__global double *) mem_124877)[gtid_101280 * + N_73007 + + gtid_101281] = + x_129223; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_101337 +} +__kernel void mainMagnitudezisegred_large_101712(__global int *global_failure, + int failure_is_an_option, + __global + int64_t *global_failure_args, + __local volatile + int64_t *sync_arr_mem_129444_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_129442_backing_aligned_1, + int64_t N_73007, + int64_t n_73011, + int64_t num_groups_101762, + int64_t groups_per_segment_129428, + int64_t elements_per_thread_129429, + int64_t virt_num_groups_129430, + int64_t threads_per_segment_129432, + __global + unsigned char *defunc_4_map_res_mem_124920, + __global + unsigned char *mem_124946, + __global + unsigned char *mem_124949, + __global + unsigned char *group_res_arr_mem_129433, + __global + unsigned char *mainMagnitudezicounter_mem_129435) +{ + #define segred_group_sizze_101761 (mainMagnitudezisegred_group_sizze_101706) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_129444_backing_1 = + (__local volatile + char *) sync_arr_mem_129444_backing_aligned_0; + __local volatile char *restrict red_arr_mem_129442_backing_0 = + (__local volatile + char *) red_arr_mem_129442_backing_aligned_1; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_129437; + int32_t local_tid_129438; + int64_t group_sizze_129441; + int32_t wave_sizze_129440; + int32_t group_tid_129439; + + global_tid_129437 = get_global_id(0); + local_tid_129438 = get_local_id(0); + group_sizze_129441 = get_local_size(0); + wave_sizze_129440 = LOCKSTEP_WIDTH; + group_tid_129439 = get_group_id(0); + + int32_t phys_tid_101712; + + phys_tid_101712 = global_tid_129437; + + __local char *red_arr_mem_129442; + + red_arr_mem_129442 = (__local char *) red_arr_mem_129442_backing_0; + + __local char *sync_arr_mem_129444; + + sync_arr_mem_129444 = (__local char *) sync_arr_mem_129444_backing_1; + + int32_t phys_group_id_129446; + + phys_group_id_129446 = get_group_id(0); + for (int32_t i_129447 = 0; i_129447 < + sdiv_up32(sext_i64_i32(virt_num_groups_129430) - phys_group_id_129446, + sext_i64_i32(num_groups_101762)); i_129447++) { + int32_t virt_group_id_129448 = phys_group_id_129446 + i_129447 * + sext_i64_i32(num_groups_101762); + int32_t flat_segment_id_129449 = squot32(virt_group_id_129448, + sext_i64_i32(groups_per_segment_129428)); + int64_t global_tid_129450 = srem64(sext_i32_i64(virt_group_id_129448) * + segred_group_sizze_101761 + + sext_i32_i64(local_tid_129438), + segred_group_sizze_101761 * + groups_per_segment_129428); + int64_t gtid_101703 = sext_i32_i64(flat_segment_id_129449); + int64_t gtid_101711; + double x_acc_129451; + int64_t chunk_sizze_129452; + + chunk_sizze_129452 = smin64(elements_per_thread_129429, + sdiv_up64(n_73011 - global_tid_129450, + threads_per_segment_129432)); + + double x_101765; + double x_101766; + + // neutral-initialise the accumulators + { + x_acc_129451 = 0.0; + } + for (int64_t i_129456 = 0; i_129456 < chunk_sizze_129452; i_129456++) { + gtid_101711 = global_tid_129450 + threads_per_segment_129432 * + i_129456; + // apply map function + { + int64_t defunc_0_f_res_101769 = ((__global + int64_t *) mem_124946)[gtid_101703]; + bool cond_101771 = slt64(gtid_101711, defunc_0_f_res_101769); + double defunc_0_f_res_101772; + + if (cond_101771) { + bool y_101774 = slt64(gtid_101711, N_73007); + bool index_certs_101776; + + if (!y_101774) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 633) == -1) { + global_failure_args[0] = gtid_101711; + global_failure_args[1] = N_73007; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_0_f_res_t_res_101777 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_101703 * + N_73007 + + gtid_101711]; + + defunc_0_f_res_101772 = defunc_0_f_res_t_res_101777; + } else { + defunc_0_f_res_101772 = 0.0; + } + + double defunc_0_f_res_101778 = defunc_0_f_res_101772 * + defunc_0_f_res_101772; + + // save map-out results + { } + // load accumulator + { + x_101765 = x_acc_129451; + } + // load new values + { + x_101766 = defunc_0_f_res_101778; + } + // apply reduction operator + { + double defunc_1_op_res_101767 = x_101765 + x_101766; + + // store in accumulator + { + x_acc_129451 = defunc_1_op_res_101767; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_101765 = x_acc_129451; + ((__local + double *) red_arr_mem_129442)[sext_i32_i64(local_tid_129438)] = + x_101765; + } + + error_0: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_129457; + int32_t skip_waves_129458; + + skip_waves_129458 = 1; + + double x_129453; + double x_129454; + + offset_129457 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129438, + sext_i64_i32(segred_group_sizze_101761))) { + x_129453 = ((__local + double *) red_arr_mem_129442)[sext_i32_i64(local_tid_129438 + + offset_129457)]; + } + } + offset_129457 = 1; + while (slt32(offset_129457, wave_sizze_129440)) { + if (slt32(local_tid_129438 + offset_129457, + sext_i64_i32(segred_group_sizze_101761)) && + ((local_tid_129438 - squot32(local_tid_129438, + wave_sizze_129440) * + wave_sizze_129440) & (2 * offset_129457 - 1)) == 0) { + // read array element + { + x_129454 = ((volatile __local + double *) red_arr_mem_129442)[sext_i32_i64(local_tid_129438 + + offset_129457)]; + } + // apply reduction operation + { + double defunc_1_op_res_129455 = x_129453 + x_129454; + + x_129453 = defunc_1_op_res_129455; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_129442)[sext_i32_i64(local_tid_129438)] = + x_129453; + } + } + offset_129457 *= 2; + } + while (slt32(skip_waves_129458, + squot32(sext_i64_i32(segred_group_sizze_101761) + + wave_sizze_129440 - 1, wave_sizze_129440))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129457 = skip_waves_129458 * wave_sizze_129440; + if (slt32(local_tid_129438 + offset_129457, + sext_i64_i32(segred_group_sizze_101761)) && + ((local_tid_129438 - squot32(local_tid_129438, + wave_sizze_129440) * + wave_sizze_129440) == 0 && (squot32(local_tid_129438, + wave_sizze_129440) & (2 * + skip_waves_129458 - + 1)) == + 0)) { + // read array element + { + x_129454 = ((__local + double *) red_arr_mem_129442)[sext_i32_i64(local_tid_129438 + + offset_129457)]; + } + // apply reduction operation + { + double defunc_1_op_res_129455 = x_129453 + x_129454; + + x_129453 = defunc_1_op_res_129455; + } + // write result of operation + { + ((__local + double *) red_arr_mem_129442)[sext_i32_i64(local_tid_129438)] = + x_129453; + } + } + skip_waves_129458 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_129438) == (int64_t) 0) { + x_acc_129451 = x_129453; + } + } + if (groups_per_segment_129428 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_129438 == 0) { + ((__global double *) mem_124949)[gtid_101703] = + x_acc_129451; + } + } + } else { + int32_t old_counter_129459; + + // first thread in group saves group result to global memory + { + if (local_tid_129438 == 0) { + ((__global + double *) group_res_arr_mem_129433)[sext_i32_i64(virt_group_id_129448) * + segred_group_sizze_101761] = + x_acc_129451; + mem_fence_global(); + old_counter_129459 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_129435)[sext_i32_i64(srem32(flat_segment_id_129449, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_129444)[(int64_t) 0] = + old_counter_129459 == groups_per_segment_129428 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_129460; + + is_last_group_129460 = ((__local + bool *) sync_arr_mem_129444)[(int64_t) 0]; + if (is_last_group_129460) { + if (local_tid_129438 == 0) { + old_counter_129459 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_129435)[sext_i32_i64(srem32(flat_segment_id_129449, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_129428)); + } + // read in the per-group-results + { + int64_t read_per_thread_129461 = + sdiv_up64(groups_per_segment_129428, + segred_group_sizze_101761); + + x_101765 = 0.0; + for (int64_t i_129462 = 0; i_129462 < + read_per_thread_129461; i_129462++) { + int64_t group_res_id_129463 = + sext_i32_i64(local_tid_129438) * + read_per_thread_129461 + i_129462; + int64_t index_of_group_res_129464 = + sext_i32_i64(flat_segment_id_129449) * + groups_per_segment_129428 + group_res_id_129463; + + if (slt64(group_res_id_129463, + groups_per_segment_129428)) { + x_101766 = ((__global + double *) group_res_arr_mem_129433)[index_of_group_res_129464 * + segred_group_sizze_101761]; + + double defunc_1_op_res_101767; + + defunc_1_op_res_101767 = x_101765 + x_101766; + x_101765 = defunc_1_op_res_101767; + } + } + } + ((__local + double *) red_arr_mem_129442)[sext_i32_i64(local_tid_129438)] = + x_101765; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_129465; + int32_t skip_waves_129466; + + skip_waves_129466 = 1; + + double x_129453; + double x_129454; + + offset_129465 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129438, + sext_i64_i32(segred_group_sizze_101761))) { + x_129453 = ((__local + double *) red_arr_mem_129442)[sext_i32_i64(local_tid_129438 + + offset_129465)]; + } + } + offset_129465 = 1; + while (slt32(offset_129465, wave_sizze_129440)) { + if (slt32(local_tid_129438 + offset_129465, + sext_i64_i32(segred_group_sizze_101761)) && + ((local_tid_129438 - squot32(local_tid_129438, + wave_sizze_129440) * + wave_sizze_129440) & (2 * offset_129465 - 1)) == + 0) { + // read array element + { + x_129454 = ((volatile __local + double *) red_arr_mem_129442)[sext_i32_i64(local_tid_129438 + + offset_129465)]; + } + // apply reduction operation + { + double defunc_1_op_res_129455 = x_129453 + + x_129454; + + x_129453 = defunc_1_op_res_129455; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_129442)[sext_i32_i64(local_tid_129438)] = + x_129453; + } + } + offset_129465 *= 2; + } + while (slt32(skip_waves_129466, + squot32(sext_i64_i32(segred_group_sizze_101761) + + wave_sizze_129440 - 1, + wave_sizze_129440))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129465 = skip_waves_129466 * wave_sizze_129440; + if (slt32(local_tid_129438 + offset_129465, + sext_i64_i32(segred_group_sizze_101761)) && + ((local_tid_129438 - squot32(local_tid_129438, + wave_sizze_129440) * + wave_sizze_129440) == 0 && + (squot32(local_tid_129438, wave_sizze_129440) & + (2 * skip_waves_129466 - 1)) == 0)) { + // read array element + { + x_129454 = ((__local + double *) red_arr_mem_129442)[sext_i32_i64(local_tid_129438 + + offset_129465)]; + } + // apply reduction operation + { + double defunc_1_op_res_129455 = x_129453 + + x_129454; + + x_129453 = defunc_1_op_res_129455; + } + // write result of operation + { + ((__local + double *) red_arr_mem_129442)[sext_i32_i64(local_tid_129438)] = + x_129453; + } + } + skip_waves_129466 *= 2; + } + // and back to memory with the final result + { + if (local_tid_129438 == 0) { + ((__global double *) mem_124949)[gtid_101703] = + x_129453; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_101761 +} +__kernel void mainMagnitudezisegred_large_101736(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_129384_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_129382_backing_aligned_1, + int64_t N_73007, + int64_t n_73011, + int64_t num_groups_101748, + int64_t groups_per_segment_129368, + int64_t elements_per_thread_129369, + int64_t virt_num_groups_129370, + int64_t threads_per_segment_129372, + __global + unsigned char *mem_124142, + __global + unsigned char *mem_124946, + __global + unsigned char *group_res_arr_mem_129373, + __global + unsigned char *mainMagnitudezicounter_mem_129375) +{ + #define segred_group_sizze_101747 (mainMagnitudezisegred_group_sizze_101730) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_129384_backing_1 = + (__local volatile + char *) sync_arr_mem_129384_backing_aligned_0; + __local volatile char *restrict red_arr_mem_129382_backing_0 = + (__local volatile + char *) red_arr_mem_129382_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129377; + int32_t local_tid_129378; + int64_t group_sizze_129381; + int32_t wave_sizze_129380; + int32_t group_tid_129379; + + global_tid_129377 = get_global_id(0); + local_tid_129378 = get_local_id(0); + group_sizze_129381 = get_local_size(0); + wave_sizze_129380 = LOCKSTEP_WIDTH; + group_tid_129379 = get_group_id(0); + + int32_t phys_tid_101736; + + phys_tid_101736 = global_tid_129377; + + __local char *red_arr_mem_129382; + + red_arr_mem_129382 = (__local char *) red_arr_mem_129382_backing_0; + + __local char *sync_arr_mem_129384; + + sync_arr_mem_129384 = (__local char *) sync_arr_mem_129384_backing_1; + + int32_t phys_group_id_129386; + + phys_group_id_129386 = get_group_id(0); + for (int32_t i_129387 = 0; i_129387 < + sdiv_up32(sext_i64_i32(virt_num_groups_129370) - phys_group_id_129386, + sext_i64_i32(num_groups_101748)); i_129387++) { + int32_t virt_group_id_129388 = phys_group_id_129386 + i_129387 * + sext_i64_i32(num_groups_101748); + int32_t flat_segment_id_129389 = squot32(virt_group_id_129388, + sext_i64_i32(groups_per_segment_129368)); + int64_t global_tid_129390 = srem64(sext_i32_i64(virt_group_id_129388) * + segred_group_sizze_101747 + + sext_i32_i64(local_tid_129378), + segred_group_sizze_101747 * + groups_per_segment_129368); + int64_t gtid_101727 = sext_i32_i64(flat_segment_id_129389); + int64_t gtid_101735; + int64_t x_acc_129391; + int64_t chunk_sizze_129392; + + chunk_sizze_129392 = smin64(elements_per_thread_129369, + sdiv_up64(n_73011 - global_tid_129390, + threads_per_segment_129372)); + + int64_t x_101751; + int64_t x_101752; + + // neutral-initialise the accumulators + { + x_acc_129391 = (int64_t) 0; + } + for (int64_t i_129396 = 0; i_129396 < chunk_sizze_129392; i_129396++) { + gtid_101735 = global_tid_129390 + threads_per_segment_129372 * + i_129396; + // apply map function + { + double x_101755 = ((__global double *) mem_124142)[gtid_101727 * + N_73007 + + gtid_101735]; + bool isnan_res_101756; + + isnan_res_101756 = futrts_isnan64(x_101755); + + bool cond_101757 = !isnan_res_101756; + int64_t defunc_0_f_res_101758 = btoi_bool_i64(cond_101757); + + // save map-out results + { } + // load accumulator + { + x_101751 = x_acc_129391; + } + // load new values + { + x_101752 = defunc_0_f_res_101758; + } + // apply reduction operator + { + int64_t defunc_1_op_res_101753 = add64(x_101751, x_101752); + + // store in accumulator + { + x_acc_129391 = defunc_1_op_res_101753; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_101751 = x_acc_129391; + ((__local + int64_t *) red_arr_mem_129382)[sext_i32_i64(local_tid_129378)] = + x_101751; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_129397; + int32_t skip_waves_129398; + + skip_waves_129398 = 1; + + int64_t x_129393; + int64_t x_129394; + + offset_129397 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129378, + sext_i64_i32(segred_group_sizze_101747))) { + x_129393 = ((__local + int64_t *) red_arr_mem_129382)[sext_i32_i64(local_tid_129378 + + offset_129397)]; + } + } + offset_129397 = 1; + while (slt32(offset_129397, wave_sizze_129380)) { + if (slt32(local_tid_129378 + offset_129397, + sext_i64_i32(segred_group_sizze_101747)) && + ((local_tid_129378 - squot32(local_tid_129378, + wave_sizze_129380) * + wave_sizze_129380) & (2 * offset_129397 - 1)) == 0) { + // read array element + { + x_129394 = ((volatile __local + int64_t *) red_arr_mem_129382)[sext_i32_i64(local_tid_129378 + + offset_129397)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_129395 = add64(x_129393, x_129394); + + x_129393 = defunc_1_op_res_129395; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_129382)[sext_i32_i64(local_tid_129378)] = + x_129393; + } + } + offset_129397 *= 2; + } + while (slt32(skip_waves_129398, + squot32(sext_i64_i32(segred_group_sizze_101747) + + wave_sizze_129380 - 1, wave_sizze_129380))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129397 = skip_waves_129398 * wave_sizze_129380; + if (slt32(local_tid_129378 + offset_129397, + sext_i64_i32(segred_group_sizze_101747)) && + ((local_tid_129378 - squot32(local_tid_129378, + wave_sizze_129380) * + wave_sizze_129380) == 0 && (squot32(local_tid_129378, + wave_sizze_129380) & (2 * + skip_waves_129398 - + 1)) == + 0)) { + // read array element + { + x_129394 = ((__local + int64_t *) red_arr_mem_129382)[sext_i32_i64(local_tid_129378 + + offset_129397)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_129395 = add64(x_129393, x_129394); + + x_129393 = defunc_1_op_res_129395; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_129382)[sext_i32_i64(local_tid_129378)] = + x_129393; + } + } + skip_waves_129398 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_129378) == (int64_t) 0) { + x_acc_129391 = x_129393; + } + } + if (groups_per_segment_129368 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_129378 == 0) { + ((__global int64_t *) mem_124946)[gtid_101727] = + x_acc_129391; + } + } + } else { + int32_t old_counter_129399; + + // first thread in group saves group result to global memory + { + if (local_tid_129378 == 0) { + ((__global + int64_t *) group_res_arr_mem_129373)[sext_i32_i64(virt_group_id_129388) * + segred_group_sizze_101747] = + x_acc_129391; + mem_fence_global(); + old_counter_129399 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_129375)[sext_i32_i64(srem32(flat_segment_id_129389, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_129384)[(int64_t) 0] = + old_counter_129399 == groups_per_segment_129368 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_129400; + + is_last_group_129400 = ((__local + bool *) sync_arr_mem_129384)[(int64_t) 0]; + if (is_last_group_129400) { + if (local_tid_129378 == 0) { + old_counter_129399 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_129375)[sext_i32_i64(srem32(flat_segment_id_129389, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_129368)); + } + // read in the per-group-results + { + int64_t read_per_thread_129401 = + sdiv_up64(groups_per_segment_129368, + segred_group_sizze_101747); + + x_101751 = (int64_t) 0; + for (int64_t i_129402 = 0; i_129402 < + read_per_thread_129401; i_129402++) { + int64_t group_res_id_129403 = + sext_i32_i64(local_tid_129378) * + read_per_thread_129401 + i_129402; + int64_t index_of_group_res_129404 = + sext_i32_i64(flat_segment_id_129389) * + groups_per_segment_129368 + group_res_id_129403; + + if (slt64(group_res_id_129403, + groups_per_segment_129368)) { + x_101752 = ((__global + int64_t *) group_res_arr_mem_129373)[index_of_group_res_129404 * + segred_group_sizze_101747]; + + int64_t defunc_1_op_res_101753; + + defunc_1_op_res_101753 = add64(x_101751, x_101752); + x_101751 = defunc_1_op_res_101753; + } + } + } + ((__local + int64_t *) red_arr_mem_129382)[sext_i32_i64(local_tid_129378)] = + x_101751; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_129405; + int32_t skip_waves_129406; + + skip_waves_129406 = 1; + + int64_t x_129393; + int64_t x_129394; + + offset_129405 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129378, + sext_i64_i32(segred_group_sizze_101747))) { + x_129393 = ((__local + int64_t *) red_arr_mem_129382)[sext_i32_i64(local_tid_129378 + + offset_129405)]; + } + } + offset_129405 = 1; + while (slt32(offset_129405, wave_sizze_129380)) { + if (slt32(local_tid_129378 + offset_129405, + sext_i64_i32(segred_group_sizze_101747)) && + ((local_tid_129378 - squot32(local_tid_129378, + wave_sizze_129380) * + wave_sizze_129380) & (2 * offset_129405 - 1)) == + 0) { + // read array element + { + x_129394 = ((volatile __local + int64_t *) red_arr_mem_129382)[sext_i32_i64(local_tid_129378 + + offset_129405)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_129395 = add64(x_129393, + x_129394); + + x_129393 = defunc_1_op_res_129395; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_129382)[sext_i32_i64(local_tid_129378)] = + x_129393; + } + } + offset_129405 *= 2; + } + while (slt32(skip_waves_129406, + squot32(sext_i64_i32(segred_group_sizze_101747) + + wave_sizze_129380 - 1, + wave_sizze_129380))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129405 = skip_waves_129406 * wave_sizze_129380; + if (slt32(local_tid_129378 + offset_129405, + sext_i64_i32(segred_group_sizze_101747)) && + ((local_tid_129378 - squot32(local_tid_129378, + wave_sizze_129380) * + wave_sizze_129380) == 0 && + (squot32(local_tid_129378, wave_sizze_129380) & + (2 * skip_waves_129406 - 1)) == 0)) { + // read array element + { + x_129394 = ((__local + int64_t *) red_arr_mem_129382)[sext_i32_i64(local_tid_129378 + + offset_129405)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_129395 = add64(x_129393, + x_129394); + + x_129393 = defunc_1_op_res_129395; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_129382)[sext_i32_i64(local_tid_129378)] = + x_129393; + } + } + skip_waves_129406 *= 2; + } + // and back to memory with the final result + { + if (local_tid_129378 == 0) { + ((__global int64_t *) mem_124946)[gtid_101727] = + x_129393; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_101747 +} +__kernel void mainMagnitudezisegred_large_101866(__global int *global_failure, + int failure_is_an_option, + __global + int64_t *global_failure_args, + __local volatile + int64_t *sync_arr_mem_129549_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_129547_backing_aligned_1, + int64_t N_73007, + int64_t defunc_2_reduce_comm_res_74867, + int64_t num_groups_101887, + int64_t groups_per_segment_129533, + int64_t elements_per_thread_129534, + int64_t virt_num_groups_129535, + int64_t threads_per_segment_129537, + __global + unsigned char *defunc_4_map_res_mem_124920, + __global + unsigned char *defunc_3_map_res_mem_124958, + __global + unsigned char *defunc_3_map_res_mem_124959, + __global + unsigned char *mem_124969, + __global + unsigned char *group_res_arr_mem_129538, + __global + unsigned char *mainMagnitudezicounter_mem_129540) +{ + #define segred_group_sizze_101886 (mainMagnitudezisegred_group_sizze_101860) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_129549_backing_1 = + (__local volatile + char *) sync_arr_mem_129549_backing_aligned_0; + __local volatile char *restrict red_arr_mem_129547_backing_0 = + (__local volatile + char *) red_arr_mem_129547_backing_aligned_1; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_129542; + int32_t local_tid_129543; + int64_t group_sizze_129546; + int32_t wave_sizze_129545; + int32_t group_tid_129544; + + global_tid_129542 = get_global_id(0); + local_tid_129543 = get_local_id(0); + group_sizze_129546 = get_local_size(0); + wave_sizze_129545 = LOCKSTEP_WIDTH; + group_tid_129544 = get_group_id(0); + + int32_t phys_tid_101866; + + phys_tid_101866 = global_tid_129542; + + __local char *red_arr_mem_129547; + + red_arr_mem_129547 = (__local char *) red_arr_mem_129547_backing_0; + + __local char *sync_arr_mem_129549; + + sync_arr_mem_129549 = (__local char *) sync_arr_mem_129549_backing_1; + + int32_t phys_group_id_129551; + + phys_group_id_129551 = get_group_id(0); + for (int32_t i_129552 = 0; i_129552 < + sdiv_up32(sext_i64_i32(virt_num_groups_129535) - phys_group_id_129551, + sext_i64_i32(num_groups_101887)); i_129552++) { + int32_t virt_group_id_129553 = phys_group_id_129551 + i_129552 * + sext_i64_i32(num_groups_101887); + int32_t flat_segment_id_129554 = squot32(virt_group_id_129553, + sext_i64_i32(groups_per_segment_129533)); + int64_t global_tid_129555 = srem64(sext_i32_i64(virt_group_id_129553) * + segred_group_sizze_101886 + + sext_i32_i64(local_tid_129543), + segred_group_sizze_101886 * + groups_per_segment_129533); + int64_t gtid_101857 = sext_i32_i64(flat_segment_id_129554); + int64_t gtid_101865; + double x_acc_129556; + int64_t chunk_sizze_129557; + + chunk_sizze_129557 = smin64(elements_per_thread_129534, + sdiv_up64(defunc_2_reduce_comm_res_74867 - + global_tid_129555, + threads_per_segment_129537)); + + double x_101890; + double x_101891; + + // neutral-initialise the accumulators + { + x_acc_129556 = 0.0; + } + for (int64_t i_129561 = 0; i_129561 < chunk_sizze_129557; i_129561++) { + gtid_101865 = global_tid_129555 + threads_per_segment_129537 * + i_129561; + // apply map function + { + int64_t x_101895 = ((__global + int64_t *) defunc_3_map_res_mem_124958)[gtid_101857]; + bool cond_101897 = slt64(gtid_101865, x_101895); + double defunc_0_f_res_101898; + + if (cond_101897) { + int64_t x_101894 = ((__global + int64_t *) defunc_3_map_res_mem_124959)[gtid_101857]; + int64_t x_101899 = add64(gtid_101865, x_101894); + int64_t x_101900 = sub64(x_101899, x_101895); + int64_t i_101901 = add64((int64_t) 1, x_101900); + bool x_101902 = sle64((int64_t) 0, i_101901); + bool y_101903 = slt64(i_101901, N_73007); + bool bounds_check_101904 = x_101902 && y_101903; + bool index_certs_101905; + + if (!bounds_check_101904) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 636) == -1) { + global_failure_args[0] = i_101901; + global_failure_args[1] = N_73007; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_0_f_res_t_res_101906 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_101857 * + N_73007 + + i_101901]; + + defunc_0_f_res_101898 = defunc_0_f_res_t_res_101906; + } else { + defunc_0_f_res_101898 = 0.0; + } + // save map-out results + { } + // load accumulator + { + x_101890 = x_acc_129556; + } + // load new values + { + x_101891 = defunc_0_f_res_101898; + } + // apply reduction operator + { + double defunc_1_op_res_101892 = x_101890 + x_101891; + + // store in accumulator + { + x_acc_129556 = defunc_1_op_res_101892; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_101890 = x_acc_129556; + ((__local + double *) red_arr_mem_129547)[sext_i32_i64(local_tid_129543)] = + x_101890; + } + + error_0: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_129562; + int32_t skip_waves_129563; + + skip_waves_129563 = 1; + + double x_129558; + double x_129559; + + offset_129562 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129543, + sext_i64_i32(segred_group_sizze_101886))) { + x_129558 = ((__local + double *) red_arr_mem_129547)[sext_i32_i64(local_tid_129543 + + offset_129562)]; + } + } + offset_129562 = 1; + while (slt32(offset_129562, wave_sizze_129545)) { + if (slt32(local_tid_129543 + offset_129562, + sext_i64_i32(segred_group_sizze_101886)) && + ((local_tid_129543 - squot32(local_tid_129543, + wave_sizze_129545) * + wave_sizze_129545) & (2 * offset_129562 - 1)) == 0) { + // read array element + { + x_129559 = ((volatile __local + double *) red_arr_mem_129547)[sext_i32_i64(local_tid_129543 + + offset_129562)]; + } + // apply reduction operation + { + double defunc_1_op_res_129560 = x_129558 + x_129559; + + x_129558 = defunc_1_op_res_129560; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_129547)[sext_i32_i64(local_tid_129543)] = + x_129558; + } + } + offset_129562 *= 2; + } + while (slt32(skip_waves_129563, + squot32(sext_i64_i32(segred_group_sizze_101886) + + wave_sizze_129545 - 1, wave_sizze_129545))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129562 = skip_waves_129563 * wave_sizze_129545; + if (slt32(local_tid_129543 + offset_129562, + sext_i64_i32(segred_group_sizze_101886)) && + ((local_tid_129543 - squot32(local_tid_129543, + wave_sizze_129545) * + wave_sizze_129545) == 0 && (squot32(local_tid_129543, + wave_sizze_129545) & (2 * + skip_waves_129563 - + 1)) == + 0)) { + // read array element + { + x_129559 = ((__local + double *) red_arr_mem_129547)[sext_i32_i64(local_tid_129543 + + offset_129562)]; + } + // apply reduction operation + { + double defunc_1_op_res_129560 = x_129558 + x_129559; + + x_129558 = defunc_1_op_res_129560; + } + // write result of operation + { + ((__local + double *) red_arr_mem_129547)[sext_i32_i64(local_tid_129543)] = + x_129558; + } + } + skip_waves_129563 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_129543) == (int64_t) 0) { + x_acc_129556 = x_129558; + } + } + if (groups_per_segment_129533 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_129543 == 0) { + ((__global double *) mem_124969)[gtid_101857] = + x_acc_129556; + } + } + } else { + int32_t old_counter_129564; + + // first thread in group saves group result to global memory + { + if (local_tid_129543 == 0) { + ((__global + double *) group_res_arr_mem_129538)[sext_i32_i64(virt_group_id_129553) * + segred_group_sizze_101886] = + x_acc_129556; + mem_fence_global(); + old_counter_129564 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_129540)[sext_i32_i64(srem32(flat_segment_id_129554, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_129549)[(int64_t) 0] = + old_counter_129564 == groups_per_segment_129533 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_129565; + + is_last_group_129565 = ((__local + bool *) sync_arr_mem_129549)[(int64_t) 0]; + if (is_last_group_129565) { + if (local_tid_129543 == 0) { + old_counter_129564 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_129540)[sext_i32_i64(srem32(flat_segment_id_129554, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_129533)); + } + // read in the per-group-results + { + int64_t read_per_thread_129566 = + sdiv_up64(groups_per_segment_129533, + segred_group_sizze_101886); + + x_101890 = 0.0; + for (int64_t i_129567 = 0; i_129567 < + read_per_thread_129566; i_129567++) { + int64_t group_res_id_129568 = + sext_i32_i64(local_tid_129543) * + read_per_thread_129566 + i_129567; + int64_t index_of_group_res_129569 = + sext_i32_i64(flat_segment_id_129554) * + groups_per_segment_129533 + group_res_id_129568; + + if (slt64(group_res_id_129568, + groups_per_segment_129533)) { + x_101891 = ((__global + double *) group_res_arr_mem_129538)[index_of_group_res_129569 * + segred_group_sizze_101886]; + + double defunc_1_op_res_101892; + + defunc_1_op_res_101892 = x_101890 + x_101891; + x_101890 = defunc_1_op_res_101892; + } + } + } + ((__local + double *) red_arr_mem_129547)[sext_i32_i64(local_tid_129543)] = + x_101890; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_129570; + int32_t skip_waves_129571; + + skip_waves_129571 = 1; + + double x_129558; + double x_129559; + + offset_129570 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129543, + sext_i64_i32(segred_group_sizze_101886))) { + x_129558 = ((__local + double *) red_arr_mem_129547)[sext_i32_i64(local_tid_129543 + + offset_129570)]; + } + } + offset_129570 = 1; + while (slt32(offset_129570, wave_sizze_129545)) { + if (slt32(local_tid_129543 + offset_129570, + sext_i64_i32(segred_group_sizze_101886)) && + ((local_tid_129543 - squot32(local_tid_129543, + wave_sizze_129545) * + wave_sizze_129545) & (2 * offset_129570 - 1)) == + 0) { + // read array element + { + x_129559 = ((volatile __local + double *) red_arr_mem_129547)[sext_i32_i64(local_tid_129543 + + offset_129570)]; + } + // apply reduction operation + { + double defunc_1_op_res_129560 = x_129558 + + x_129559; + + x_129558 = defunc_1_op_res_129560; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_129547)[sext_i32_i64(local_tid_129543)] = + x_129558; + } + } + offset_129570 *= 2; + } + while (slt32(skip_waves_129571, + squot32(sext_i64_i32(segred_group_sizze_101886) + + wave_sizze_129545 - 1, + wave_sizze_129545))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129570 = skip_waves_129571 * wave_sizze_129545; + if (slt32(local_tid_129543 + offset_129570, + sext_i64_i32(segred_group_sizze_101886)) && + ((local_tid_129543 - squot32(local_tid_129543, + wave_sizze_129545) * + wave_sizze_129545) == 0 && + (squot32(local_tid_129543, wave_sizze_129545) & + (2 * skip_waves_129571 - 1)) == 0)) { + // read array element + { + x_129559 = ((__local + double *) red_arr_mem_129547)[sext_i32_i64(local_tid_129543 + + offset_129570)]; + } + // apply reduction operation + { + double defunc_1_op_res_129560 = x_129558 + + x_129559; + + x_129558 = defunc_1_op_res_129560; + } + // write result of operation + { + ((__local + double *) red_arr_mem_129547)[sext_i32_i64(local_tid_129543)] = + x_129558; + } + } + skip_waves_129571 *= 2; + } + // and back to memory with the final result + { + if (local_tid_129543 == 0) { + ((__global double *) mem_124969)[gtid_101857] = + x_129558; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_101886 +} +__kernel void mainMagnitudezisegred_large_102410(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_129750_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_129748_backing_aligned_1, + __local volatile + int64_t *red_arr_mem_129746_backing_aligned_2, + __local volatile + int64_t *red_arr_mem_129744_backing_aligned_3, + int64_t iota_arg_74896, + int64_t num_groups_102593, + int64_t groups_per_segment_129726, + int64_t elements_per_thread_129727, + int64_t virt_num_groups_129728, + __global + unsigned char *mem_124973, + __global + unsigned char *mem_125026, + __global + unsigned char *mem_125028, + __global + unsigned char *mem_125032, + __global + unsigned char *mem_125035, + __global + unsigned char *mem_125037, + __global + unsigned char *mem_125039, + __global + unsigned char *group_res_arr_mem_129731, + __global + unsigned char *group_res_arr_mem_129733, + __global + unsigned char *group_res_arr_mem_129735, + __global + unsigned char *mainMagnitudezicounter_mem_129737) +{ + #define segred_group_sizze_102592 (mainMagnitudezisegred_group_sizze_102404) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_129750_backing_3 = + (__local volatile + char *) sync_arr_mem_129750_backing_aligned_0; + __local volatile char *restrict red_arr_mem_129748_backing_2 = + (__local volatile + char *) red_arr_mem_129748_backing_aligned_1; + __local volatile char *restrict red_arr_mem_129746_backing_1 = + (__local volatile + char *) red_arr_mem_129746_backing_aligned_2; + __local volatile char *restrict red_arr_mem_129744_backing_0 = + (__local volatile + char *) red_arr_mem_129744_backing_aligned_3; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129739; + int32_t local_tid_129740; + int64_t group_sizze_129743; + int32_t wave_sizze_129742; + int32_t group_tid_129741; + + global_tid_129739 = get_global_id(0); + local_tid_129740 = get_local_id(0); + group_sizze_129743 = get_local_size(0); + wave_sizze_129742 = LOCKSTEP_WIDTH; + group_tid_129741 = get_group_id(0); + + int32_t phys_tid_102410; + + phys_tid_102410 = global_tid_129739; + + __local char *red_arr_mem_129744; + + red_arr_mem_129744 = (__local char *) red_arr_mem_129744_backing_0; + + __local char *red_arr_mem_129746; + + red_arr_mem_129746 = (__local char *) red_arr_mem_129746_backing_1; + + __local char *red_arr_mem_129748; + + red_arr_mem_129748 = (__local char *) red_arr_mem_129748_backing_2; + + __local char *sync_arr_mem_129750; + + sync_arr_mem_129750 = (__local char *) sync_arr_mem_129750_backing_3; + + int32_t phys_group_id_129752; + + phys_group_id_129752 = get_group_id(0); + for (int32_t i_129753 = 0; i_129753 < + sdiv_up32(sext_i64_i32(virt_num_groups_129728) - phys_group_id_129752, + sext_i64_i32(num_groups_102593)); i_129753++) { + int32_t virt_group_id_129754 = phys_group_id_129752 + i_129753 * + sext_i64_i32(num_groups_102593); + int32_t flat_segment_id_129755 = squot32(virt_group_id_129754, + sext_i64_i32(groups_per_segment_129726)); + int64_t global_tid_129756 = srem64(sext_i32_i64(virt_group_id_129754) * + segred_group_sizze_102592 + + sext_i32_i64(local_tid_129740), + segred_group_sizze_102592 * + groups_per_segment_129726); + int64_t gtid_102401 = sext_i32_i64(flat_segment_id_129755); + int64_t gtid_102409; + bool x_acc_129757; + int64_t x_acc_129758; + double x_acc_129759; + int64_t chunk_sizze_129760; + int64_t starting_point_129761; + + starting_point_129761 = global_tid_129756 * elements_per_thread_129727; + + int64_t remaining_elements_129762; + + remaining_elements_129762 = iota_arg_74896 - starting_point_129761; + if (sle64(remaining_elements_129762, (int64_t) 0) || + sle64(iota_arg_74896, starting_point_129761)) { + chunk_sizze_129760 = (int64_t) 0; + } else { + if (slt64(iota_arg_74896, (global_tid_129756 + (int64_t) 1) * + elements_per_thread_129727)) { + chunk_sizze_129760 = iota_arg_74896 - global_tid_129756 * + elements_per_thread_129727; + } else { + chunk_sizze_129760 = elements_per_thread_129727; + } + } + + bool x_102598; + int64_t x_102599; + double x_102600; + bool x_102601; + int64_t x_102602; + double x_102603; + + // neutral-initialise the accumulators + { + x_acc_129757 = 0; + x_acc_129758 = (int64_t) -1; + x_acc_129759 = 0.0; + } + for (int64_t i_129777 = 0; i_129777 < elements_per_thread_129727; + i_129777++) { + gtid_102409 = sext_i32_i64(local_tid_129740) + + (squot64(global_tid_129756, segred_group_sizze_102592) * + elements_per_thread_129727 + i_129777) * + segred_group_sizze_102592; + if (slt64(gtid_102409, iota_arg_74896)) { + // apply map function + { + int64_t y_102612 = ((__global + int64_t *) mem_125028)[gtid_102401]; + double y_102613 = ((__global + double *) mem_125026)[gtid_102401]; + double x_102617 = ((__global + double *) mem_125032)[gtid_102401 * + iota_arg_74896 + + gtid_102409]; + double x_102618 = ((__global + double *) mem_124973)[gtid_102409]; + double defunc_0_f_res_102621 = x_102617 / y_102613; + bool cond_102622 = slt64(gtid_102409, y_102612); + bool isnan_res_102623; + + isnan_res_102623 = futrts_isnan64(defunc_0_f_res_102621); + + bool cond_t_res_102624 = !isnan_res_102623; + bool x_102625 = cond_102622 && cond_t_res_102624; + double abs_res_102626 = fabs(defunc_0_f_res_102621); + bool defunc_2_f_res_t_res_102627 = x_102618 < + abs_res_102626; + bool x_102628 = x_102625 && defunc_2_f_res_t_res_102627; + double defunc_1_f_res_102629; + + if (cond_102622) { + defunc_1_f_res_102629 = defunc_0_f_res_102621; + } else { + defunc_1_f_res_102629 = 0.0; + } + // save map-out results + { } + // load accumulator + { + x_102598 = x_acc_129757; + x_102599 = x_acc_129758; + x_102600 = x_acc_129759; + } + // load new values + { + x_102601 = x_102628; + x_102602 = gtid_102409; + x_102603 = defunc_1_f_res_102629; + } + // apply reduction operator + { + bool defunc_1_op_res_102604; + int64_t defunc_1_op_res_102605; + + if (x_102598) { + defunc_1_op_res_102604 = x_102598; + defunc_1_op_res_102605 = x_102599; + } else { + bool x_102606 = x_102601 && x_102601; + bool x_102607 = !x_102601; + bool y_102608 = x_102598 && x_102607; + bool defunc_1_op_res_f_res_102609 = x_102606 || + y_102608; + int64_t defunc_1_op_res_f_res_102610; + + if (x_102601) { + defunc_1_op_res_f_res_102610 = x_102602; + } else { + defunc_1_op_res_f_res_102610 = x_102599; + } + defunc_1_op_res_102604 = + defunc_1_op_res_f_res_102609; + defunc_1_op_res_102605 = + defunc_1_op_res_f_res_102610; + } + + double defunc_1_op_res_102611 = x_102600 + x_102603; + + // store in accumulator + { + x_acc_129757 = defunc_1_op_res_102604; + x_acc_129758 = defunc_1_op_res_102605; + x_acc_129759 = defunc_1_op_res_102611; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_102598 = x_acc_129757; + x_102599 = x_acc_129758; + x_102600 = x_acc_129759; + ((__local + bool *) red_arr_mem_129744)[sext_i32_i64(local_tid_129740)] = + x_102598; + ((__local + int64_t *) red_arr_mem_129746)[sext_i32_i64(local_tid_129740)] = + x_102599; + ((__local + double *) red_arr_mem_129748)[sext_i32_i64(local_tid_129740)] = + x_102600; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_129778; + int32_t skip_waves_129779; + + skip_waves_129779 = 1; + + bool x_129763; + int64_t x_129764; + double x_129765; + bool x_129766; + int64_t x_129767; + double x_129768; + + offset_129778 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129740, + sext_i64_i32(segred_group_sizze_102592))) { + x_129763 = ((__local + bool *) red_arr_mem_129744)[sext_i32_i64(local_tid_129740 + + offset_129778)]; + x_129764 = ((__local + int64_t *) red_arr_mem_129746)[sext_i32_i64(local_tid_129740 + + offset_129778)]; + x_129765 = ((__local + double *) red_arr_mem_129748)[sext_i32_i64(local_tid_129740 + + offset_129778)]; + } + } + offset_129778 = 1; + while (slt32(offset_129778, wave_sizze_129742)) { + if (slt32(local_tid_129740 + offset_129778, + sext_i64_i32(segred_group_sizze_102592)) && + ((local_tid_129740 - squot32(local_tid_129740, + wave_sizze_129742) * + wave_sizze_129742) & (2 * offset_129778 - 1)) == 0) { + // read array element + { + x_129766 = ((volatile __local + bool *) red_arr_mem_129744)[sext_i32_i64(local_tid_129740 + + offset_129778)]; + x_129767 = ((volatile __local + int64_t *) red_arr_mem_129746)[sext_i32_i64(local_tid_129740 + + offset_129778)]; + x_129768 = ((volatile __local + double *) red_arr_mem_129748)[sext_i32_i64(local_tid_129740 + + offset_129778)]; + } + // apply reduction operation + { + bool defunc_1_op_res_129769; + int64_t defunc_1_op_res_129770; + + if (x_129763) { + defunc_1_op_res_129769 = x_129763; + defunc_1_op_res_129770 = x_129764; + } else { + bool x_129771 = x_129766 && x_129766; + bool x_129772 = !x_129766; + bool y_129773 = x_129763 && x_129772; + bool defunc_1_op_res_f_res_129774 = x_129771 || + y_129773; + int64_t defunc_1_op_res_f_res_129775; + + if (x_129766) { + defunc_1_op_res_f_res_129775 = x_129767; + } else { + defunc_1_op_res_f_res_129775 = x_129764; + } + defunc_1_op_res_129769 = + defunc_1_op_res_f_res_129774; + defunc_1_op_res_129770 = + defunc_1_op_res_f_res_129775; + } + + double defunc_1_op_res_129776 = x_129765 + x_129768; + + x_129763 = defunc_1_op_res_129769; + x_129764 = defunc_1_op_res_129770; + x_129765 = defunc_1_op_res_129776; + } + // write result of operation + { + ((volatile __local + bool *) red_arr_mem_129744)[sext_i32_i64(local_tid_129740)] = + x_129763; + ((volatile __local + int64_t *) red_arr_mem_129746)[sext_i32_i64(local_tid_129740)] = + x_129764; + ((volatile __local + double *) red_arr_mem_129748)[sext_i32_i64(local_tid_129740)] = + x_129765; + } + } + offset_129778 *= 2; + } + while (slt32(skip_waves_129779, + squot32(sext_i64_i32(segred_group_sizze_102592) + + wave_sizze_129742 - 1, wave_sizze_129742))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129778 = skip_waves_129779 * wave_sizze_129742; + if (slt32(local_tid_129740 + offset_129778, + sext_i64_i32(segred_group_sizze_102592)) && + ((local_tid_129740 - squot32(local_tid_129740, + wave_sizze_129742) * + wave_sizze_129742) == 0 && (squot32(local_tid_129740, + wave_sizze_129742) & + (2 * skip_waves_129779 - + 1)) == 0)) { + // read array element + { + x_129766 = ((__local + bool *) red_arr_mem_129744)[sext_i32_i64(local_tid_129740 + + offset_129778)]; + x_129767 = ((__local + int64_t *) red_arr_mem_129746)[sext_i32_i64(local_tid_129740 + + offset_129778)]; + x_129768 = ((__local + double *) red_arr_mem_129748)[sext_i32_i64(local_tid_129740 + + offset_129778)]; + } + // apply reduction operation + { + bool defunc_1_op_res_129769; + int64_t defunc_1_op_res_129770; + + if (x_129763) { + defunc_1_op_res_129769 = x_129763; + defunc_1_op_res_129770 = x_129764; + } else { + bool x_129771 = x_129766 && x_129766; + bool x_129772 = !x_129766; + bool y_129773 = x_129763 && x_129772; + bool defunc_1_op_res_f_res_129774 = x_129771 || + y_129773; + int64_t defunc_1_op_res_f_res_129775; + + if (x_129766) { + defunc_1_op_res_f_res_129775 = x_129767; + } else { + defunc_1_op_res_f_res_129775 = x_129764; + } + defunc_1_op_res_129769 = + defunc_1_op_res_f_res_129774; + defunc_1_op_res_129770 = + defunc_1_op_res_f_res_129775; + } + + double defunc_1_op_res_129776 = x_129765 + x_129768; + + x_129763 = defunc_1_op_res_129769; + x_129764 = defunc_1_op_res_129770; + x_129765 = defunc_1_op_res_129776; + } + // write result of operation + { + ((__local + bool *) red_arr_mem_129744)[sext_i32_i64(local_tid_129740)] = + x_129763; + ((__local + int64_t *) red_arr_mem_129746)[sext_i32_i64(local_tid_129740)] = + x_129764; + ((__local + double *) red_arr_mem_129748)[sext_i32_i64(local_tid_129740)] = + x_129765; + } + } + skip_waves_129779 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_129740) == (int64_t) 0) { + x_acc_129757 = x_129763; + x_acc_129758 = x_129764; + x_acc_129759 = x_129765; + } + } + // first thread keeps accumulator; others reset to neutral element + { + if (!(sext_i32_i64(local_tid_129740) == (int64_t) 0)) { + x_acc_129757 = 0; + x_acc_129758 = (int64_t) -1; + x_acc_129759 = 0.0; + } + } + } + x_102598 = x_acc_129757; + x_102599 = x_acc_129758; + x_102600 = x_acc_129759; + if (groups_per_segment_129726 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_129740 == 0) { + ((__global bool *) mem_125035)[gtid_102401] = x_acc_129757; + ((__global int64_t *) mem_125037)[gtid_102401] = + x_acc_129758; + ((__global double *) mem_125039)[gtid_102401] = + x_acc_129759; + } + } + } else { + int32_t old_counter_129780; + + // first thread in group saves group result to global memory + { + if (local_tid_129740 == 0) { + ((__global + bool *) group_res_arr_mem_129731)[sext_i32_i64(virt_group_id_129754) * + segred_group_sizze_102592] = + x_acc_129757; + ((__global + int64_t *) group_res_arr_mem_129733)[sext_i32_i64(virt_group_id_129754) * + segred_group_sizze_102592] = + x_acc_129758; + ((__global + double *) group_res_arr_mem_129735)[sext_i32_i64(virt_group_id_129754) * + segred_group_sizze_102592] = + x_acc_129759; + mem_fence_global(); + old_counter_129780 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_129737)[sext_i32_i64(srem32(flat_segment_id_129755, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_129750)[(int64_t) 0] = + old_counter_129780 == groups_per_segment_129726 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_129781; + + is_last_group_129781 = ((__local + bool *) sync_arr_mem_129750)[(int64_t) 0]; + if (is_last_group_129781) { + if (local_tid_129740 == 0) { + old_counter_129780 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_129737)[sext_i32_i64(srem32(flat_segment_id_129755, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_129726)); + } + // read in the per-group-results + { + int64_t read_per_thread_129782 = + sdiv_up64(groups_per_segment_129726, + segred_group_sizze_102592); + + x_102598 = 0; + x_102599 = (int64_t) -1; + x_102600 = 0.0; + for (int64_t i_129783 = 0; i_129783 < + read_per_thread_129782; i_129783++) { + int64_t group_res_id_129784 = + sext_i32_i64(local_tid_129740) * + read_per_thread_129782 + i_129783; + int64_t index_of_group_res_129785 = + sext_i32_i64(flat_segment_id_129755) * + groups_per_segment_129726 + group_res_id_129784; + + if (slt64(group_res_id_129784, + groups_per_segment_129726)) { + x_102601 = ((__global + bool *) group_res_arr_mem_129731)[index_of_group_res_129785 * + segred_group_sizze_102592]; + x_102602 = ((__global + int64_t *) group_res_arr_mem_129733)[index_of_group_res_129785 * + segred_group_sizze_102592]; + x_102603 = ((__global + double *) group_res_arr_mem_129735)[index_of_group_res_129785 * + segred_group_sizze_102592]; + + bool defunc_1_op_res_102604; + int64_t defunc_1_op_res_102605; + + if (x_102598) { + defunc_1_op_res_102604 = x_102598; + defunc_1_op_res_102605 = x_102599; + } else { + bool x_102606 = x_102601 && x_102601; + bool x_102607 = !x_102601; + bool y_102608 = x_102598 && x_102607; + bool defunc_1_op_res_f_res_102609 = x_102606 || + y_102608; + int64_t defunc_1_op_res_f_res_102610; + + if (x_102601) { + defunc_1_op_res_f_res_102610 = x_102602; + } else { + defunc_1_op_res_f_res_102610 = x_102599; + } + defunc_1_op_res_102604 = + defunc_1_op_res_f_res_102609; + defunc_1_op_res_102605 = + defunc_1_op_res_f_res_102610; + } + + double defunc_1_op_res_102611 = x_102600 + x_102603; + + x_102598 = defunc_1_op_res_102604; + x_102599 = defunc_1_op_res_102605; + x_102600 = defunc_1_op_res_102611; + } + } + } + ((__local + bool *) red_arr_mem_129744)[sext_i32_i64(local_tid_129740)] = + x_102598; + ((__local + int64_t *) red_arr_mem_129746)[sext_i32_i64(local_tid_129740)] = + x_102599; + ((__local + double *) red_arr_mem_129748)[sext_i32_i64(local_tid_129740)] = + x_102600; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_129786; + int32_t skip_waves_129787; + + skip_waves_129787 = 1; + + bool x_129763; + int64_t x_129764; + double x_129765; + bool x_129766; + int64_t x_129767; + double x_129768; + + offset_129786 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129740, + sext_i64_i32(segred_group_sizze_102592))) { + x_129763 = ((__local + bool *) red_arr_mem_129744)[sext_i32_i64(local_tid_129740 + + offset_129786)]; + x_129764 = ((__local + int64_t *) red_arr_mem_129746)[sext_i32_i64(local_tid_129740 + + offset_129786)]; + x_129765 = ((__local + double *) red_arr_mem_129748)[sext_i32_i64(local_tid_129740 + + offset_129786)]; + } + } + offset_129786 = 1; + while (slt32(offset_129786, wave_sizze_129742)) { + if (slt32(local_tid_129740 + offset_129786, + sext_i64_i32(segred_group_sizze_102592)) && + ((local_tid_129740 - squot32(local_tid_129740, + wave_sizze_129742) * + wave_sizze_129742) & (2 * offset_129786 - 1)) == + 0) { + // read array element + { + x_129766 = ((volatile __local + bool *) red_arr_mem_129744)[sext_i32_i64(local_tid_129740 + + offset_129786)]; + x_129767 = ((volatile __local + int64_t *) red_arr_mem_129746)[sext_i32_i64(local_tid_129740 + + offset_129786)]; + x_129768 = ((volatile __local + double *) red_arr_mem_129748)[sext_i32_i64(local_tid_129740 + + offset_129786)]; + } + // apply reduction operation + { + bool defunc_1_op_res_129769; + int64_t defunc_1_op_res_129770; + + if (x_129763) { + defunc_1_op_res_129769 = x_129763; + defunc_1_op_res_129770 = x_129764; + } else { + bool x_129771 = x_129766 && x_129766; + bool x_129772 = !x_129766; + bool y_129773 = x_129763 && x_129772; + bool defunc_1_op_res_f_res_129774 = + x_129771 || y_129773; + int64_t defunc_1_op_res_f_res_129775; + + if (x_129766) { + defunc_1_op_res_f_res_129775 = x_129767; + } else { + defunc_1_op_res_f_res_129775 = x_129764; + } + defunc_1_op_res_129769 = + defunc_1_op_res_f_res_129774; + defunc_1_op_res_129770 = + defunc_1_op_res_f_res_129775; + } + + double defunc_1_op_res_129776 = x_129765 + + x_129768; + + x_129763 = defunc_1_op_res_129769; + x_129764 = defunc_1_op_res_129770; + x_129765 = defunc_1_op_res_129776; + } + // write result of operation + { + ((volatile __local + bool *) red_arr_mem_129744)[sext_i32_i64(local_tid_129740)] = + x_129763; + ((volatile __local + int64_t *) red_arr_mem_129746)[sext_i32_i64(local_tid_129740)] = + x_129764; + ((volatile __local + double *) red_arr_mem_129748)[sext_i32_i64(local_tid_129740)] = + x_129765; + } + } + offset_129786 *= 2; + } + while (slt32(skip_waves_129787, + squot32(sext_i64_i32(segred_group_sizze_102592) + + wave_sizze_129742 - 1, + wave_sizze_129742))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129786 = skip_waves_129787 * wave_sizze_129742; + if (slt32(local_tid_129740 + offset_129786, + sext_i64_i32(segred_group_sizze_102592)) && + ((local_tid_129740 - squot32(local_tid_129740, + wave_sizze_129742) * + wave_sizze_129742) == 0 && + (squot32(local_tid_129740, wave_sizze_129742) & + (2 * skip_waves_129787 - 1)) == 0)) { + // read array element + { + x_129766 = ((__local + bool *) red_arr_mem_129744)[sext_i32_i64(local_tid_129740 + + offset_129786)]; + x_129767 = ((__local + int64_t *) red_arr_mem_129746)[sext_i32_i64(local_tid_129740 + + offset_129786)]; + x_129768 = ((__local + double *) red_arr_mem_129748)[sext_i32_i64(local_tid_129740 + + offset_129786)]; + } + // apply reduction operation + { + bool defunc_1_op_res_129769; + int64_t defunc_1_op_res_129770; + + if (x_129763) { + defunc_1_op_res_129769 = x_129763; + defunc_1_op_res_129770 = x_129764; + } else { + bool x_129771 = x_129766 && x_129766; + bool x_129772 = !x_129766; + bool y_129773 = x_129763 && x_129772; + bool defunc_1_op_res_f_res_129774 = + x_129771 || y_129773; + int64_t defunc_1_op_res_f_res_129775; + + if (x_129766) { + defunc_1_op_res_f_res_129775 = x_129767; + } else { + defunc_1_op_res_f_res_129775 = x_129764; + } + defunc_1_op_res_129769 = + defunc_1_op_res_f_res_129774; + defunc_1_op_res_129770 = + defunc_1_op_res_f_res_129775; + } + + double defunc_1_op_res_129776 = x_129765 + + x_129768; + + x_129763 = defunc_1_op_res_129769; + x_129764 = defunc_1_op_res_129770; + x_129765 = defunc_1_op_res_129776; + } + // write result of operation + { + ((__local + bool *) red_arr_mem_129744)[sext_i32_i64(local_tid_129740)] = + x_129763; + ((__local + int64_t *) red_arr_mem_129746)[sext_i32_i64(local_tid_129740)] = + x_129764; + ((__local + double *) red_arr_mem_129748)[sext_i32_i64(local_tid_129740)] = + x_129765; + } + } + skip_waves_129787 *= 2; + } + // and back to memory with the final result + { + if (local_tid_129740 == 0) { + ((__global bool *) mem_125035)[gtid_102401] = + x_129763; + ((__global int64_t *) mem_125037)[gtid_102401] = + x_129764; + ((__global double *) mem_125039)[gtid_102401] = + x_129765; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_102592 +} +__kernel void mainMagnitudezisegred_large_92263(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_127110_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_127108_backing_aligned_1, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t num_groups_94094, + int64_t groups_per_segment_127094, + int64_t elements_per_thread_127095, + int64_t virt_num_groups_127096, + int64_t threads_per_segment_127098, + __global + unsigned char *mem_121831, + __global + unsigned char *mem_121835, + __global + unsigned char *mem_121840, + __global + unsigned char *group_res_arr_mem_127099, + __global + unsigned char *mainMagnitudezicounter_mem_127101) +{ + #define segred_group_sizze_94093 (mainMagnitudezisegred_group_sizze_92257) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_127110_backing_1 = + (__local volatile + char *) sync_arr_mem_127110_backing_aligned_0; + __local volatile char *restrict red_arr_mem_127108_backing_0 = + (__local volatile + char *) red_arr_mem_127108_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127103; + int32_t local_tid_127104; + int64_t group_sizze_127107; + int32_t wave_sizze_127106; + int32_t group_tid_127105; + + global_tid_127103 = get_global_id(0); + local_tid_127104 = get_local_id(0); + group_sizze_127107 = get_local_size(0); + wave_sizze_127106 = LOCKSTEP_WIDTH; + group_tid_127105 = get_group_id(0); + + int32_t phys_tid_92263; + + phys_tid_92263 = global_tid_127103; + + __local char *red_arr_mem_127108; + + red_arr_mem_127108 = (__local char *) red_arr_mem_127108_backing_0; + + __local char *sync_arr_mem_127110; + + sync_arr_mem_127110 = (__local char *) sync_arr_mem_127110_backing_1; + + int32_t phys_group_id_127112; + + phys_group_id_127112 = get_group_id(0); + for (int32_t i_127113 = 0; i_127113 < + sdiv_up32(sext_i64_i32(virt_num_groups_127096) - phys_group_id_127112, + sext_i64_i32(num_groups_94094)); i_127113++) { + int32_t virt_group_id_127114 = phys_group_id_127112 + i_127113 * + sext_i64_i32(num_groups_94094); + int32_t flat_segment_id_127115 = squot32(virt_group_id_127114, + sext_i64_i32(groups_per_segment_127094)); + int64_t global_tid_127116 = srem64(sext_i32_i64(virt_group_id_127114) * + segred_group_sizze_94093 + + sext_i32_i64(local_tid_127104), + segred_group_sizze_94093 * + groups_per_segment_127094); + int64_t gtid_92250 = squot64(sext_i32_i64(flat_segment_id_127115), + k2p2zq_73023 * k2p2zq_73023); + int64_t gtid_92251 = squot64(sext_i32_i64(flat_segment_id_127115) - + squot64(sext_i32_i64(flat_segment_id_127115), + k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023); + int64_t gtid_92252 = sext_i32_i64(flat_segment_id_127115) - + squot64(sext_i32_i64(flat_segment_id_127115), k2p2zq_73023 * + k2p2zq_73023) * (k2p2zq_73023 * k2p2zq_73023) - + squot64(sext_i32_i64(flat_segment_id_127115) - + squot64(sext_i32_i64(flat_segment_id_127115), + k2p2zq_73023 * k2p2zq_73023) * (k2p2zq_73023 * + k2p2zq_73023), + k2p2zq_73023) * k2p2zq_73023; + int64_t gtid_92262; + double x_acc_127117; + int64_t chunk_sizze_127118; + + chunk_sizze_127118 = smin64(elements_per_thread_127095, + sdiv_up64(k2p2zq_73023 - global_tid_127116, + threads_per_segment_127098)); + + double x_94097; + double x_94098; + + // neutral-initialise the accumulators + { + x_acc_127117 = 0.0; + } + for (int64_t i_127122 = 0; i_127122 < chunk_sizze_127118; i_127122++) { + gtid_92262 = global_tid_127116 + threads_per_segment_127098 * + i_127122; + // apply map function + { + double x_94103 = ((__global double *) mem_121831)[gtid_92251 * + (k2p2zq_73023 * + m_73008) + + gtid_92250 * + k2p2zq_73023 + + gtid_92262]; + double x_94104 = ((__global double *) mem_121835)[gtid_92250 * + (k2p2zq_73023 * + k2p2zq_73023) + + gtid_92252 * + k2p2zq_73023 + + gtid_92262]; + double defunc_1_f_res_94105 = x_94103 * x_94104; + + // save map-out results + { } + // load accumulator + { + x_94097 = x_acc_127117; + } + // load new values + { + x_94098 = defunc_1_f_res_94105; + } + // apply reduction operator + { + double defunc_1_op_res_94099 = x_94097 + x_94098; + + // store in accumulator + { + x_acc_127117 = defunc_1_op_res_94099; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_94097 = x_acc_127117; + ((__local + double *) red_arr_mem_127108)[sext_i32_i64(local_tid_127104)] = + x_94097; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_127123; + int32_t skip_waves_127124; + + skip_waves_127124 = 1; + + double x_127119; + double x_127120; + + offset_127123 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127104, + sext_i64_i32(segred_group_sizze_94093))) { + x_127119 = ((__local + double *) red_arr_mem_127108)[sext_i32_i64(local_tid_127104 + + offset_127123)]; + } + } + offset_127123 = 1; + while (slt32(offset_127123, wave_sizze_127106)) { + if (slt32(local_tid_127104 + offset_127123, + sext_i64_i32(segred_group_sizze_94093)) && + ((local_tid_127104 - squot32(local_tid_127104, + wave_sizze_127106) * + wave_sizze_127106) & (2 * offset_127123 - 1)) == 0) { + // read array element + { + x_127120 = ((volatile __local + double *) red_arr_mem_127108)[sext_i32_i64(local_tid_127104 + + offset_127123)]; + } + // apply reduction operation + { + double defunc_1_op_res_127121 = x_127119 + x_127120; + + x_127119 = defunc_1_op_res_127121; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_127108)[sext_i32_i64(local_tid_127104)] = + x_127119; + } + } + offset_127123 *= 2; + } + while (slt32(skip_waves_127124, + squot32(sext_i64_i32(segred_group_sizze_94093) + + wave_sizze_127106 - 1, wave_sizze_127106))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_127123 = skip_waves_127124 * wave_sizze_127106; + if (slt32(local_tid_127104 + offset_127123, + sext_i64_i32(segred_group_sizze_94093)) && + ((local_tid_127104 - squot32(local_tid_127104, + wave_sizze_127106) * + wave_sizze_127106) == 0 && (squot32(local_tid_127104, + wave_sizze_127106) & (2 * + skip_waves_127124 - + 1)) == + 0)) { + // read array element + { + x_127120 = ((__local + double *) red_arr_mem_127108)[sext_i32_i64(local_tid_127104 + + offset_127123)]; + } + // apply reduction operation + { + double defunc_1_op_res_127121 = x_127119 + x_127120; + + x_127119 = defunc_1_op_res_127121; + } + // write result of operation + { + ((__local + double *) red_arr_mem_127108)[sext_i32_i64(local_tid_127104)] = + x_127119; + } + } + skip_waves_127124 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_127104) == (int64_t) 0) { + x_acc_127117 = x_127119; + } + } + if (groups_per_segment_127094 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_127104 == 0) { + ((__global double *) mem_121840)[gtid_92250 * + (k2p2zq_73023 * + k2p2zq_73023) + + gtid_92251 * k2p2zq_73023 + + gtid_92252] = x_acc_127117; + } + } + } else { + int32_t old_counter_127125; + + // first thread in group saves group result to global memory + { + if (local_tid_127104 == 0) { + ((__global + double *) group_res_arr_mem_127099)[sext_i32_i64(virt_group_id_127114) * + segred_group_sizze_94093] = + x_acc_127117; + mem_fence_global(); + old_counter_127125 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_127101)[sext_i32_i64(srem32(flat_segment_id_127115, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_127110)[(int64_t) 0] = + old_counter_127125 == groups_per_segment_127094 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_127126; + + is_last_group_127126 = ((__local + bool *) sync_arr_mem_127110)[(int64_t) 0]; + if (is_last_group_127126) { + if (local_tid_127104 == 0) { + old_counter_127125 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_127101)[sext_i32_i64(srem32(flat_segment_id_127115, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_127094)); + } + // read in the per-group-results + { + int64_t read_per_thread_127127 = + sdiv_up64(groups_per_segment_127094, + segred_group_sizze_94093); + + x_94097 = 0.0; + for (int64_t i_127128 = 0; i_127128 < + read_per_thread_127127; i_127128++) { + int64_t group_res_id_127129 = + sext_i32_i64(local_tid_127104) * + read_per_thread_127127 + i_127128; + int64_t index_of_group_res_127130 = + sext_i32_i64(flat_segment_id_127115) * + groups_per_segment_127094 + group_res_id_127129; + + if (slt64(group_res_id_127129, + groups_per_segment_127094)) { + x_94098 = ((__global + double *) group_res_arr_mem_127099)[index_of_group_res_127130 * + segred_group_sizze_94093]; + + double defunc_1_op_res_94099; + + defunc_1_op_res_94099 = x_94097 + x_94098; + x_94097 = defunc_1_op_res_94099; + } + } + } + ((__local + double *) red_arr_mem_127108)[sext_i32_i64(local_tid_127104)] = + x_94097; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_127131; + int32_t skip_waves_127132; + + skip_waves_127132 = 1; + + double x_127119; + double x_127120; + + offset_127131 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127104, + sext_i64_i32(segred_group_sizze_94093))) { + x_127119 = ((__local + double *) red_arr_mem_127108)[sext_i32_i64(local_tid_127104 + + offset_127131)]; + } + } + offset_127131 = 1; + while (slt32(offset_127131, wave_sizze_127106)) { + if (slt32(local_tid_127104 + offset_127131, + sext_i64_i32(segred_group_sizze_94093)) && + ((local_tid_127104 - squot32(local_tid_127104, + wave_sizze_127106) * + wave_sizze_127106) & (2 * offset_127131 - 1)) == + 0) { + // read array element + { + x_127120 = ((volatile __local + double *) red_arr_mem_127108)[sext_i32_i64(local_tid_127104 + + offset_127131)]; + } + // apply reduction operation + { + double defunc_1_op_res_127121 = x_127119 + + x_127120; + + x_127119 = defunc_1_op_res_127121; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_127108)[sext_i32_i64(local_tid_127104)] = + x_127119; + } + } + offset_127131 *= 2; + } + while (slt32(skip_waves_127132, + squot32(sext_i64_i32(segred_group_sizze_94093) + + wave_sizze_127106 - 1, + wave_sizze_127106))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_127131 = skip_waves_127132 * wave_sizze_127106; + if (slt32(local_tid_127104 + offset_127131, + sext_i64_i32(segred_group_sizze_94093)) && + ((local_tid_127104 - squot32(local_tid_127104, + wave_sizze_127106) * + wave_sizze_127106) == 0 && + (squot32(local_tid_127104, wave_sizze_127106) & + (2 * skip_waves_127132 - 1)) == 0)) { + // read array element + { + x_127120 = ((__local + double *) red_arr_mem_127108)[sext_i32_i64(local_tid_127104 + + offset_127131)]; + } + // apply reduction operation + { + double defunc_1_op_res_127121 = x_127119 + + x_127120; + + x_127119 = defunc_1_op_res_127121; + } + // write result of operation + { + ((__local + double *) red_arr_mem_127108)[sext_i32_i64(local_tid_127104)] = + x_127119; + } + } + skip_waves_127132 *= 2; + } + // and back to memory with the final result + { + if (local_tid_127104 == 0) { + ((__global double *) mem_121840)[gtid_92250 * + (k2p2zq_73023 * + k2p2zq_73023) + + gtid_92251 * + k2p2zq_73023 + + gtid_92252] = + x_127119; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_94093 +} +__kernel void mainMagnitudezisegred_large_92541(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_126966_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_126964_backing_aligned_1, + int64_t k2p2zq_73023, + int64_t x_93925, + int64_t i_93926, + int64_t j_m_i_93930, + int64_t num_groups_94012, + int64_t binop_x_120251, + int64_t groups_per_segment_126950, + int64_t elements_per_thread_126951, + int64_t virt_num_groups_126952, + int64_t threads_per_segment_126954, + __global + unsigned char *mem_121351, + __global + unsigned char *mem_param_121469, + __global + unsigned char *mem_121555, + __global + unsigned char *group_res_arr_mem_126955, + __global + unsigned char *mainMagnitudezicounter_mem_126957) +{ + #define segred_group_sizze_94011 (mainMagnitudezisegred_group_sizze_92535) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_126966_backing_1 = + (__local volatile + char *) sync_arr_mem_126966_backing_aligned_0; + __local volatile char *restrict red_arr_mem_126964_backing_0 = + (__local volatile + char *) red_arr_mem_126964_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126959; + int32_t local_tid_126960; + int64_t group_sizze_126963; + int32_t wave_sizze_126962; + int32_t group_tid_126961; + + global_tid_126959 = get_global_id(0); + local_tid_126960 = get_local_id(0); + group_sizze_126963 = get_local_size(0); + wave_sizze_126962 = LOCKSTEP_WIDTH; + group_tid_126961 = get_group_id(0); + + int32_t phys_tid_92541; + + phys_tid_92541 = global_tid_126959; + + __local char *red_arr_mem_126964; + + red_arr_mem_126964 = (__local char *) red_arr_mem_126964_backing_0; + + __local char *sync_arr_mem_126966; + + sync_arr_mem_126966 = (__local char *) sync_arr_mem_126966_backing_1; + + int32_t phys_group_id_126968; + + phys_group_id_126968 = get_group_id(0); + for (int32_t i_126969 = 0; i_126969 < + sdiv_up32(sext_i64_i32(virt_num_groups_126952) - phys_group_id_126968, + sext_i64_i32(num_groups_94012)); i_126969++) { + int32_t virt_group_id_126970 = phys_group_id_126968 + i_126969 * + sext_i64_i32(num_groups_94012); + int32_t flat_segment_id_126971 = squot32(virt_group_id_126970, + sext_i64_i32(groups_per_segment_126950)); + int64_t global_tid_126972 = srem64(sext_i32_i64(virt_group_id_126970) * + segred_group_sizze_94011 + + sext_i32_i64(local_tid_126960), + segred_group_sizze_94011 * + groups_per_segment_126950); + int64_t gtid_92530 = squot64(sext_i32_i64(flat_segment_id_126971), + k2p2zq_73023); + int64_t gtid_92531 = sext_i32_i64(flat_segment_id_126971) - + squot64(sext_i32_i64(flat_segment_id_126971), k2p2zq_73023) * + k2p2zq_73023; + int64_t gtid_92540; + double x_acc_126973; + int64_t chunk_sizze_126974; + + chunk_sizze_126974 = smin64(elements_per_thread_126951, + sdiv_up64(j_m_i_93930 - global_tid_126972, + threads_per_segment_126954)); + + double x_94015; + double x_94016; + + // neutral-initialise the accumulators + { + x_acc_126973 = 0.0; + } + for (int64_t i_126978 = 0; i_126978 < chunk_sizze_126974; i_126978++) { + gtid_92540 = global_tid_126972 + threads_per_segment_126954 * + i_126978; + // apply map function + { + int64_t slice_115048 = gtid_92540 + x_93925; + double x_94022 = ((__global double *) mem_121351)[gtid_92530 * + (k2p2zq_73023 * + k2p2zq_73023) + + slice_115048 * + k2p2zq_73023 + + i_93926]; + bool isnan_res_94023; + + isnan_res_94023 = futrts_isnan64(x_94022); + + double defunc_1_f_res_94024; + + if (isnan_res_94023) { + defunc_1_f_res_94024 = 0.0; + } else { + double x_94021 = ((__global + double *) mem_param_121469)[gtid_92530 * + binop_x_120251 + + gtid_92531 * + k2p2zq_73023 + + slice_115048]; + double defunc_1_f_res_f_res_94025 = x_94021 * x_94022; + + defunc_1_f_res_94024 = defunc_1_f_res_f_res_94025; + } + // save map-out results + { } + // load accumulator + { + x_94015 = x_acc_126973; + } + // load new values + { + x_94016 = defunc_1_f_res_94024; + } + // apply reduction operator + { + double defunc_1_op_res_94017 = x_94015 + x_94016; + + // store in accumulator + { + x_acc_126973 = defunc_1_op_res_94017; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_94015 = x_acc_126973; + ((__local + double *) red_arr_mem_126964)[sext_i32_i64(local_tid_126960)] = + x_94015; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_126979; + int32_t skip_waves_126980; + + skip_waves_126980 = 1; + + double x_126975; + double x_126976; + + offset_126979 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_126960, + sext_i64_i32(segred_group_sizze_94011))) { + x_126975 = ((__local + double *) red_arr_mem_126964)[sext_i32_i64(local_tid_126960 + + offset_126979)]; + } + } + offset_126979 = 1; + while (slt32(offset_126979, wave_sizze_126962)) { + if (slt32(local_tid_126960 + offset_126979, + sext_i64_i32(segred_group_sizze_94011)) && + ((local_tid_126960 - squot32(local_tid_126960, + wave_sizze_126962) * + wave_sizze_126962) & (2 * offset_126979 - 1)) == 0) { + // read array element + { + x_126976 = ((volatile __local + double *) red_arr_mem_126964)[sext_i32_i64(local_tid_126960 + + offset_126979)]; + } + // apply reduction operation + { + double defunc_1_op_res_126977 = x_126975 + x_126976; + + x_126975 = defunc_1_op_res_126977; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_126964)[sext_i32_i64(local_tid_126960)] = + x_126975; + } + } + offset_126979 *= 2; + } + while (slt32(skip_waves_126980, + squot32(sext_i64_i32(segred_group_sizze_94011) + + wave_sizze_126962 - 1, wave_sizze_126962))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_126979 = skip_waves_126980 * wave_sizze_126962; + if (slt32(local_tid_126960 + offset_126979, + sext_i64_i32(segred_group_sizze_94011)) && + ((local_tid_126960 - squot32(local_tid_126960, + wave_sizze_126962) * + wave_sizze_126962) == 0 && (squot32(local_tid_126960, + wave_sizze_126962) & (2 * + skip_waves_126980 - + 1)) == + 0)) { + // read array element + { + x_126976 = ((__local + double *) red_arr_mem_126964)[sext_i32_i64(local_tid_126960 + + offset_126979)]; + } + // apply reduction operation + { + double defunc_1_op_res_126977 = x_126975 + x_126976; + + x_126975 = defunc_1_op_res_126977; + } + // write result of operation + { + ((__local + double *) red_arr_mem_126964)[sext_i32_i64(local_tid_126960)] = + x_126975; + } + } + skip_waves_126980 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_126960) == (int64_t) 0) { + x_acc_126973 = x_126975; + } + } + if (groups_per_segment_126950 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_126960 == 0) { + ((__global double *) mem_121555)[gtid_92530 * k2p2zq_73023 + + gtid_92531] = x_acc_126973; + } + } + } else { + int32_t old_counter_126981; + + // first thread in group saves group result to global memory + { + if (local_tid_126960 == 0) { + ((__global + double *) group_res_arr_mem_126955)[sext_i32_i64(virt_group_id_126970) * + segred_group_sizze_94011] = + x_acc_126973; + mem_fence_global(); + old_counter_126981 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_126957)[sext_i32_i64(srem32(flat_segment_id_126971, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_126966)[(int64_t) 0] = + old_counter_126981 == groups_per_segment_126950 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_126982; + + is_last_group_126982 = ((__local + bool *) sync_arr_mem_126966)[(int64_t) 0]; + if (is_last_group_126982) { + if (local_tid_126960 == 0) { + old_counter_126981 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_126957)[sext_i32_i64(srem32(flat_segment_id_126971, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_126950)); + } + // read in the per-group-results + { + int64_t read_per_thread_126983 = + sdiv_up64(groups_per_segment_126950, + segred_group_sizze_94011); + + x_94015 = 0.0; + for (int64_t i_126984 = 0; i_126984 < + read_per_thread_126983; i_126984++) { + int64_t group_res_id_126985 = + sext_i32_i64(local_tid_126960) * + read_per_thread_126983 + i_126984; + int64_t index_of_group_res_126986 = + sext_i32_i64(flat_segment_id_126971) * + groups_per_segment_126950 + group_res_id_126985; + + if (slt64(group_res_id_126985, + groups_per_segment_126950)) { + x_94016 = ((__global + double *) group_res_arr_mem_126955)[index_of_group_res_126986 * + segred_group_sizze_94011]; + + double defunc_1_op_res_94017; + + defunc_1_op_res_94017 = x_94015 + x_94016; + x_94015 = defunc_1_op_res_94017; + } + } + } + ((__local + double *) red_arr_mem_126964)[sext_i32_i64(local_tid_126960)] = + x_94015; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_126987; + int32_t skip_waves_126988; + + skip_waves_126988 = 1; + + double x_126975; + double x_126976; + + offset_126987 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_126960, + sext_i64_i32(segred_group_sizze_94011))) { + x_126975 = ((__local + double *) red_arr_mem_126964)[sext_i32_i64(local_tid_126960 + + offset_126987)]; + } + } + offset_126987 = 1; + while (slt32(offset_126987, wave_sizze_126962)) { + if (slt32(local_tid_126960 + offset_126987, + sext_i64_i32(segred_group_sizze_94011)) && + ((local_tid_126960 - squot32(local_tid_126960, + wave_sizze_126962) * + wave_sizze_126962) & (2 * offset_126987 - 1)) == + 0) { + // read array element + { + x_126976 = ((volatile __local + double *) red_arr_mem_126964)[sext_i32_i64(local_tid_126960 + + offset_126987)]; + } + // apply reduction operation + { + double defunc_1_op_res_126977 = x_126975 + + x_126976; + + x_126975 = defunc_1_op_res_126977; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_126964)[sext_i32_i64(local_tid_126960)] = + x_126975; + } + } + offset_126987 *= 2; + } + while (slt32(skip_waves_126988, + squot32(sext_i64_i32(segred_group_sizze_94011) + + wave_sizze_126962 - 1, + wave_sizze_126962))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_126987 = skip_waves_126988 * wave_sizze_126962; + if (slt32(local_tid_126960 + offset_126987, + sext_i64_i32(segred_group_sizze_94011)) && + ((local_tid_126960 - squot32(local_tid_126960, + wave_sizze_126962) * + wave_sizze_126962) == 0 && + (squot32(local_tid_126960, wave_sizze_126962) & + (2 * skip_waves_126988 - 1)) == 0)) { + // read array element + { + x_126976 = ((__local + double *) red_arr_mem_126964)[sext_i32_i64(local_tid_126960 + + offset_126987)]; + } + // apply reduction operation + { + double defunc_1_op_res_126977 = x_126975 + + x_126976; + + x_126975 = defunc_1_op_res_126977; + } + // write result of operation + { + ((__local + double *) red_arr_mem_126964)[sext_i32_i64(local_tid_126960)] = + x_126975; + } + } + skip_waves_126988 *= 2; + } + // and back to memory with the final result + { + if (local_tid_126960 == 0) { + ((__global double *) mem_121555)[gtid_92530 * + k2p2zq_73023 + + gtid_92531] = + x_126975; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_94011 +} +__kernel void mainMagnitudezisegred_large_93298(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_126741_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_126739_backing_aligned_1, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t defunc_2_reduce_res_73132, + int64_t j_93466, + int64_t num_groups_93499, + int64_t groups_per_segment_126725, + int64_t elements_per_thread_126726, + int64_t virt_num_groups_126727, + int64_t threads_per_segment_126729, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_120938, + __global + unsigned char *group_res_arr_mem_126730, + __global + unsigned char *mainMagnitudezicounter_mem_126732) +{ + #define segred_group_sizze_93498 (mainMagnitudezisegred_group_sizze_93292) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_126741_backing_1 = + (__local volatile + char *) sync_arr_mem_126741_backing_aligned_0; + __local volatile char *restrict red_arr_mem_126739_backing_0 = + (__local volatile + char *) red_arr_mem_126739_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126734; + int32_t local_tid_126735; + int64_t group_sizze_126738; + int32_t wave_sizze_126737; + int32_t group_tid_126736; + + global_tid_126734 = get_global_id(0); + local_tid_126735 = get_local_id(0); + group_sizze_126738 = get_local_size(0); + wave_sizze_126737 = LOCKSTEP_WIDTH; + group_tid_126736 = get_group_id(0); + + int32_t phys_tid_93298; + + phys_tid_93298 = global_tid_126734; + + __local char *red_arr_mem_126739; + + red_arr_mem_126739 = (__local char *) red_arr_mem_126739_backing_0; + + __local char *sync_arr_mem_126741; + + sync_arr_mem_126741 = (__local char *) sync_arr_mem_126741_backing_1; + + int32_t phys_group_id_126743; + + phys_group_id_126743 = get_group_id(0); + for (int32_t i_126744 = 0; i_126744 < + sdiv_up32(sext_i64_i32(virt_num_groups_126727) - phys_group_id_126743, + sext_i64_i32(num_groups_93499)); i_126744++) { + int32_t virt_group_id_126745 = phys_group_id_126743 + i_126744 * + sext_i64_i32(num_groups_93499); + int32_t flat_segment_id_126746 = squot32(virt_group_id_126745, + sext_i64_i32(groups_per_segment_126725)); + int64_t global_tid_126747 = srem64(sext_i32_i64(virt_group_id_126745) * + segred_group_sizze_93498 + + sext_i32_i64(local_tid_126735), + segred_group_sizze_93498 * + groups_per_segment_126725); + int64_t gtid_93289 = sext_i32_i64(flat_segment_id_126746); + int64_t gtid_93297; + double x_acc_126748; + int64_t chunk_sizze_126749; + + chunk_sizze_126749 = smin64(elements_per_thread_126726, + sdiv_up64(k2p2zq_73023 - global_tid_126747, + threads_per_segment_126729)); + + double x_93502; + double x_93503; + + // neutral-initialise the accumulators + { + x_acc_126748 = 0.0; + } + for (int64_t i_126753 = 0; i_126753 < chunk_sizze_126749; i_126753++) { + gtid_93297 = global_tid_126747 + threads_per_segment_126729 * + i_126753; + // apply map function + { + double x_93506 = ((__global double *) mem_120246)[j_93466 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_93289 * + defunc_2_reduce_res_73132 + + gtid_93297]; + double defunc_1_f_res_93507 = x_93506 * x_93506; + + // save map-out results + { } + // load accumulator + { + x_93502 = x_acc_126748; + } + // load new values + { + x_93503 = defunc_1_f_res_93507; + } + // apply reduction operator + { + double defunc_1_op_res_93504 = x_93502 + x_93503; + + // store in accumulator + { + x_acc_126748 = defunc_1_op_res_93504; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_93502 = x_acc_126748; + ((__local + double *) red_arr_mem_126739)[sext_i32_i64(local_tid_126735)] = + x_93502; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_126754; + int32_t skip_waves_126755; + + skip_waves_126755 = 1; + + double x_126750; + double x_126751; + + offset_126754 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_126735, + sext_i64_i32(segred_group_sizze_93498))) { + x_126750 = ((__local + double *) red_arr_mem_126739)[sext_i32_i64(local_tid_126735 + + offset_126754)]; + } + } + offset_126754 = 1; + while (slt32(offset_126754, wave_sizze_126737)) { + if (slt32(local_tid_126735 + offset_126754, + sext_i64_i32(segred_group_sizze_93498)) && + ((local_tid_126735 - squot32(local_tid_126735, + wave_sizze_126737) * + wave_sizze_126737) & (2 * offset_126754 - 1)) == 0) { + // read array element + { + x_126751 = ((volatile __local + double *) red_arr_mem_126739)[sext_i32_i64(local_tid_126735 + + offset_126754)]; + } + // apply reduction operation + { + double defunc_1_op_res_126752 = x_126750 + x_126751; + + x_126750 = defunc_1_op_res_126752; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_126739)[sext_i32_i64(local_tid_126735)] = + x_126750; + } + } + offset_126754 *= 2; + } + while (slt32(skip_waves_126755, + squot32(sext_i64_i32(segred_group_sizze_93498) + + wave_sizze_126737 - 1, wave_sizze_126737))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_126754 = skip_waves_126755 * wave_sizze_126737; + if (slt32(local_tid_126735 + offset_126754, + sext_i64_i32(segred_group_sizze_93498)) && + ((local_tid_126735 - squot32(local_tid_126735, + wave_sizze_126737) * + wave_sizze_126737) == 0 && (squot32(local_tid_126735, + wave_sizze_126737) & (2 * + skip_waves_126755 - + 1)) == + 0)) { + // read array element + { + x_126751 = ((__local + double *) red_arr_mem_126739)[sext_i32_i64(local_tid_126735 + + offset_126754)]; + } + // apply reduction operation + { + double defunc_1_op_res_126752 = x_126750 + x_126751; + + x_126750 = defunc_1_op_res_126752; + } + // write result of operation + { + ((__local + double *) red_arr_mem_126739)[sext_i32_i64(local_tid_126735)] = + x_126750; + } + } + skip_waves_126755 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_126735) == (int64_t) 0) { + x_acc_126748 = x_126750; + } + } + if (groups_per_segment_126725 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_126735 == 0) { + ((__global double *) mem_120938)[gtid_93289] = x_acc_126748; + } + } + } else { + int32_t old_counter_126756; + + // first thread in group saves group result to global memory + { + if (local_tid_126735 == 0) { + ((__global + double *) group_res_arr_mem_126730)[sext_i32_i64(virt_group_id_126745) * + segred_group_sizze_93498] = + x_acc_126748; + mem_fence_global(); + old_counter_126756 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_126732)[sext_i32_i64(srem32(flat_segment_id_126746, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_126741)[(int64_t) 0] = + old_counter_126756 == groups_per_segment_126725 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_126757; + + is_last_group_126757 = ((__local + bool *) sync_arr_mem_126741)[(int64_t) 0]; + if (is_last_group_126757) { + if (local_tid_126735 == 0) { + old_counter_126756 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_126732)[sext_i32_i64(srem32(flat_segment_id_126746, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_126725)); + } + // read in the per-group-results + { + int64_t read_per_thread_126758 = + sdiv_up64(groups_per_segment_126725, + segred_group_sizze_93498); + + x_93502 = 0.0; + for (int64_t i_126759 = 0; i_126759 < + read_per_thread_126758; i_126759++) { + int64_t group_res_id_126760 = + sext_i32_i64(local_tid_126735) * + read_per_thread_126758 + i_126759; + int64_t index_of_group_res_126761 = + sext_i32_i64(flat_segment_id_126746) * + groups_per_segment_126725 + group_res_id_126760; + + if (slt64(group_res_id_126760, + groups_per_segment_126725)) { + x_93503 = ((__global + double *) group_res_arr_mem_126730)[index_of_group_res_126761 * + segred_group_sizze_93498]; + + double defunc_1_op_res_93504; + + defunc_1_op_res_93504 = x_93502 + x_93503; + x_93502 = defunc_1_op_res_93504; + } + } + } + ((__local + double *) red_arr_mem_126739)[sext_i32_i64(local_tid_126735)] = + x_93502; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_126762; + int32_t skip_waves_126763; + + skip_waves_126763 = 1; + + double x_126750; + double x_126751; + + offset_126762 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_126735, + sext_i64_i32(segred_group_sizze_93498))) { + x_126750 = ((__local + double *) red_arr_mem_126739)[sext_i32_i64(local_tid_126735 + + offset_126762)]; + } + } + offset_126762 = 1; + while (slt32(offset_126762, wave_sizze_126737)) { + if (slt32(local_tid_126735 + offset_126762, + sext_i64_i32(segred_group_sizze_93498)) && + ((local_tid_126735 - squot32(local_tid_126735, + wave_sizze_126737) * + wave_sizze_126737) & (2 * offset_126762 - 1)) == + 0) { + // read array element + { + x_126751 = ((volatile __local + double *) red_arr_mem_126739)[sext_i32_i64(local_tid_126735 + + offset_126762)]; + } + // apply reduction operation + { + double defunc_1_op_res_126752 = x_126750 + + x_126751; + + x_126750 = defunc_1_op_res_126752; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_126739)[sext_i32_i64(local_tid_126735)] = + x_126750; + } + } + offset_126762 *= 2; + } + while (slt32(skip_waves_126763, + squot32(sext_i64_i32(segred_group_sizze_93498) + + wave_sizze_126737 - 1, + wave_sizze_126737))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_126762 = skip_waves_126763 * wave_sizze_126737; + if (slt32(local_tid_126735 + offset_126762, + sext_i64_i32(segred_group_sizze_93498)) && + ((local_tid_126735 - squot32(local_tid_126735, + wave_sizze_126737) * + wave_sizze_126737) == 0 && + (squot32(local_tid_126735, wave_sizze_126737) & + (2 * skip_waves_126763 - 1)) == 0)) { + // read array element + { + x_126751 = ((__local + double *) red_arr_mem_126739)[sext_i32_i64(local_tid_126735 + + offset_126762)]; + } + // apply reduction operation + { + double defunc_1_op_res_126752 = x_126750 + + x_126751; + + x_126750 = defunc_1_op_res_126752; + } + // write result of operation + { + ((__local + double *) red_arr_mem_126739)[sext_i32_i64(local_tid_126735)] = + x_126750; + } + } + skip_waves_126763 *= 2; + } + // and back to memory with the final result + { + if (local_tid_126735 == 0) { + ((__global double *) mem_120938)[gtid_93289] = + x_126750; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_93498 +} +__kernel void mainMagnitudezisegred_large_96013(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_127894_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_127892_backing_aligned_1, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t num_groups_97994, + int64_t groups_per_segment_127878, + int64_t elements_per_thread_127879, + int64_t virt_num_groups_127880, + int64_t threads_per_segment_127882, + __global + unsigned char *mem_123614, + __global + unsigned char *mem_123618, + __global + unsigned char *mem_123623, + __global + unsigned char *group_res_arr_mem_127883, + __global + unsigned char *mainMagnitudezicounter_mem_127885) +{ + #define segred_group_sizze_97993 (mainMagnitudezisegred_group_sizze_96007) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_127894_backing_1 = + (__local volatile + char *) sync_arr_mem_127894_backing_aligned_0; + __local volatile char *restrict red_arr_mem_127892_backing_0 = + (__local volatile + char *) red_arr_mem_127892_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127887; + int32_t local_tid_127888; + int64_t group_sizze_127891; + int32_t wave_sizze_127890; + int32_t group_tid_127889; + + global_tid_127887 = get_global_id(0); + local_tid_127888 = get_local_id(0); + group_sizze_127891 = get_local_size(0); + wave_sizze_127890 = LOCKSTEP_WIDTH; + group_tid_127889 = get_group_id(0); + + int32_t phys_tid_96013; + + phys_tid_96013 = global_tid_127887; + + __local char *red_arr_mem_127892; + + red_arr_mem_127892 = (__local char *) red_arr_mem_127892_backing_0; + + __local char *sync_arr_mem_127894; + + sync_arr_mem_127894 = (__local char *) sync_arr_mem_127894_backing_1; + + int32_t phys_group_id_127896; + + phys_group_id_127896 = get_group_id(0); + for (int32_t i_127897 = 0; i_127897 < + sdiv_up32(sext_i64_i32(virt_num_groups_127880) - phys_group_id_127896, + sext_i64_i32(num_groups_97994)); i_127897++) { + int32_t virt_group_id_127898 = phys_group_id_127896 + i_127897 * + sext_i64_i32(num_groups_97994); + int32_t flat_segment_id_127899 = squot32(virt_group_id_127898, + sext_i64_i32(groups_per_segment_127878)); + int64_t global_tid_127900 = srem64(sext_i32_i64(virt_group_id_127898) * + segred_group_sizze_97993 + + sext_i32_i64(local_tid_127888), + segred_group_sizze_97993 * + groups_per_segment_127878); + int64_t gtid_96000 = squot64(sext_i32_i64(flat_segment_id_127899), + k2p2zq_73023 * k2p2zq_73023); + int64_t gtid_96001 = squot64(sext_i32_i64(flat_segment_id_127899) - + squot64(sext_i32_i64(flat_segment_id_127899), + k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023); + int64_t gtid_96002 = sext_i32_i64(flat_segment_id_127899) - + squot64(sext_i32_i64(flat_segment_id_127899), k2p2zq_73023 * + k2p2zq_73023) * (k2p2zq_73023 * k2p2zq_73023) - + squot64(sext_i32_i64(flat_segment_id_127899) - + squot64(sext_i32_i64(flat_segment_id_127899), + k2p2zq_73023 * k2p2zq_73023) * (k2p2zq_73023 * + k2p2zq_73023), + k2p2zq_73023) * k2p2zq_73023; + int64_t gtid_96012; + double x_acc_127901; + int64_t chunk_sizze_127902; + + chunk_sizze_127902 = smin64(elements_per_thread_127879, + sdiv_up64(k2p2zq_73023 - global_tid_127900, + threads_per_segment_127882)); + + double x_97997; + double x_97998; + + // neutral-initialise the accumulators + { + x_acc_127901 = 0.0; + } + for (int64_t i_127906 = 0; i_127906 < chunk_sizze_127902; i_127906++) { + gtid_96012 = global_tid_127900 + threads_per_segment_127882 * + i_127906; + // apply map function + { + double x_98003 = ((__global double *) mem_123614)[gtid_96001 * + (k2p2zq_73023 * + m_73008) + + gtid_96000 * + k2p2zq_73023 + + gtid_96012]; + double x_98004 = ((__global double *) mem_123618)[gtid_96000 * + (k2p2zq_73023 * + k2p2zq_73023) + + gtid_96002 * + k2p2zq_73023 + + gtid_96012]; + double defunc_1_f_res_98005 = x_98003 * x_98004; + + // save map-out results + { } + // load accumulator + { + x_97997 = x_acc_127901; + } + // load new values + { + x_97998 = defunc_1_f_res_98005; + } + // apply reduction operator + { + double defunc_1_op_res_97999 = x_97997 + x_97998; + + // store in accumulator + { + x_acc_127901 = defunc_1_op_res_97999; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_97997 = x_acc_127901; + ((__local + double *) red_arr_mem_127892)[sext_i32_i64(local_tid_127888)] = + x_97997; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_127907; + int32_t skip_waves_127908; + + skip_waves_127908 = 1; + + double x_127903; + double x_127904; + + offset_127907 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127888, + sext_i64_i32(segred_group_sizze_97993))) { + x_127903 = ((__local + double *) red_arr_mem_127892)[sext_i32_i64(local_tid_127888 + + offset_127907)]; + } + } + offset_127907 = 1; + while (slt32(offset_127907, wave_sizze_127890)) { + if (slt32(local_tid_127888 + offset_127907, + sext_i64_i32(segred_group_sizze_97993)) && + ((local_tid_127888 - squot32(local_tid_127888, + wave_sizze_127890) * + wave_sizze_127890) & (2 * offset_127907 - 1)) == 0) { + // read array element + { + x_127904 = ((volatile __local + double *) red_arr_mem_127892)[sext_i32_i64(local_tid_127888 + + offset_127907)]; + } + // apply reduction operation + { + double defunc_1_op_res_127905 = x_127903 + x_127904; + + x_127903 = defunc_1_op_res_127905; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_127892)[sext_i32_i64(local_tid_127888)] = + x_127903; + } + } + offset_127907 *= 2; + } + while (slt32(skip_waves_127908, + squot32(sext_i64_i32(segred_group_sizze_97993) + + wave_sizze_127890 - 1, wave_sizze_127890))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_127907 = skip_waves_127908 * wave_sizze_127890; + if (slt32(local_tid_127888 + offset_127907, + sext_i64_i32(segred_group_sizze_97993)) && + ((local_tid_127888 - squot32(local_tid_127888, + wave_sizze_127890) * + wave_sizze_127890) == 0 && (squot32(local_tid_127888, + wave_sizze_127890) & (2 * + skip_waves_127908 - + 1)) == + 0)) { + // read array element + { + x_127904 = ((__local + double *) red_arr_mem_127892)[sext_i32_i64(local_tid_127888 + + offset_127907)]; + } + // apply reduction operation + { + double defunc_1_op_res_127905 = x_127903 + x_127904; + + x_127903 = defunc_1_op_res_127905; + } + // write result of operation + { + ((__local + double *) red_arr_mem_127892)[sext_i32_i64(local_tid_127888)] = + x_127903; + } + } + skip_waves_127908 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_127888) == (int64_t) 0) { + x_acc_127901 = x_127903; + } + } + if (groups_per_segment_127878 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_127888 == 0) { + ((__global double *) mem_123623)[gtid_96000 * + (k2p2zq_73023 * + k2p2zq_73023) + + gtid_96001 * k2p2zq_73023 + + gtid_96002] = x_acc_127901; + } + } + } else { + int32_t old_counter_127909; + + // first thread in group saves group result to global memory + { + if (local_tid_127888 == 0) { + ((__global + double *) group_res_arr_mem_127883)[sext_i32_i64(virt_group_id_127898) * + segred_group_sizze_97993] = + x_acc_127901; + mem_fence_global(); + old_counter_127909 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_127885)[sext_i32_i64(srem32(flat_segment_id_127899, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_127894)[(int64_t) 0] = + old_counter_127909 == groups_per_segment_127878 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_127910; + + is_last_group_127910 = ((__local + bool *) sync_arr_mem_127894)[(int64_t) 0]; + if (is_last_group_127910) { + if (local_tid_127888 == 0) { + old_counter_127909 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_127885)[sext_i32_i64(srem32(flat_segment_id_127899, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_127878)); + } + // read in the per-group-results + { + int64_t read_per_thread_127911 = + sdiv_up64(groups_per_segment_127878, + segred_group_sizze_97993); + + x_97997 = 0.0; + for (int64_t i_127912 = 0; i_127912 < + read_per_thread_127911; i_127912++) { + int64_t group_res_id_127913 = + sext_i32_i64(local_tid_127888) * + read_per_thread_127911 + i_127912; + int64_t index_of_group_res_127914 = + sext_i32_i64(flat_segment_id_127899) * + groups_per_segment_127878 + group_res_id_127913; + + if (slt64(group_res_id_127913, + groups_per_segment_127878)) { + x_97998 = ((__global + double *) group_res_arr_mem_127883)[index_of_group_res_127914 * + segred_group_sizze_97993]; + + double defunc_1_op_res_97999; + + defunc_1_op_res_97999 = x_97997 + x_97998; + x_97997 = defunc_1_op_res_97999; + } + } + } + ((__local + double *) red_arr_mem_127892)[sext_i32_i64(local_tid_127888)] = + x_97997; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_127915; + int32_t skip_waves_127916; + + skip_waves_127916 = 1; + + double x_127903; + double x_127904; + + offset_127915 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127888, + sext_i64_i32(segred_group_sizze_97993))) { + x_127903 = ((__local + double *) red_arr_mem_127892)[sext_i32_i64(local_tid_127888 + + offset_127915)]; + } + } + offset_127915 = 1; + while (slt32(offset_127915, wave_sizze_127890)) { + if (slt32(local_tid_127888 + offset_127915, + sext_i64_i32(segred_group_sizze_97993)) && + ((local_tid_127888 - squot32(local_tid_127888, + wave_sizze_127890) * + wave_sizze_127890) & (2 * offset_127915 - 1)) == + 0) { + // read array element + { + x_127904 = ((volatile __local + double *) red_arr_mem_127892)[sext_i32_i64(local_tid_127888 + + offset_127915)]; + } + // apply reduction operation + { + double defunc_1_op_res_127905 = x_127903 + + x_127904; + + x_127903 = defunc_1_op_res_127905; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_127892)[sext_i32_i64(local_tid_127888)] = + x_127903; + } + } + offset_127915 *= 2; + } + while (slt32(skip_waves_127916, + squot32(sext_i64_i32(segred_group_sizze_97993) + + wave_sizze_127890 - 1, + wave_sizze_127890))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_127915 = skip_waves_127916 * wave_sizze_127890; + if (slt32(local_tid_127888 + offset_127915, + sext_i64_i32(segred_group_sizze_97993)) && + ((local_tid_127888 - squot32(local_tid_127888, + wave_sizze_127890) * + wave_sizze_127890) == 0 && + (squot32(local_tid_127888, wave_sizze_127890) & + (2 * skip_waves_127916 - 1)) == 0)) { + // read array element + { + x_127904 = ((__local + double *) red_arr_mem_127892)[sext_i32_i64(local_tid_127888 + + offset_127915)]; + } + // apply reduction operation + { + double defunc_1_op_res_127905 = x_127903 + + x_127904; + + x_127903 = defunc_1_op_res_127905; + } + // write result of operation + { + ((__local + double *) red_arr_mem_127892)[sext_i32_i64(local_tid_127888)] = + x_127903; + } + } + skip_waves_127916 *= 2; + } + // and back to memory with the final result + { + if (local_tid_127888 == 0) { + ((__global double *) mem_123623)[gtid_96000 * + (k2p2zq_73023 * + k2p2zq_73023) + + gtid_96001 * + k2p2zq_73023 + + gtid_96002] = + x_127903; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_97993 +} +__kernel void mainMagnitudezisegred_large_96291(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_127750_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_127748_backing_aligned_1, + int64_t k2p2zq_73023, + int64_t x_97825, + int64_t i_97826, + int64_t j_m_i_97830, + int64_t num_groups_97912, + int64_t binop_x_120251, + int64_t groups_per_segment_127734, + int64_t elements_per_thread_127735, + int64_t virt_num_groups_127736, + int64_t threads_per_segment_127738, + __global + unsigned char *mem_123143, + __global + unsigned char *mem_param_123252, + __global + unsigned char *mem_123338, + __global + unsigned char *group_res_arr_mem_127739, + __global + unsigned char *mainMagnitudezicounter_mem_127741) +{ + #define segred_group_sizze_97911 (mainMagnitudezisegred_group_sizze_96285) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_127750_backing_1 = + (__local volatile + char *) sync_arr_mem_127750_backing_aligned_0; + __local volatile char *restrict red_arr_mem_127748_backing_0 = + (__local volatile + char *) red_arr_mem_127748_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127743; + int32_t local_tid_127744; + int64_t group_sizze_127747; + int32_t wave_sizze_127746; + int32_t group_tid_127745; + + global_tid_127743 = get_global_id(0); + local_tid_127744 = get_local_id(0); + group_sizze_127747 = get_local_size(0); + wave_sizze_127746 = LOCKSTEP_WIDTH; + group_tid_127745 = get_group_id(0); + + int32_t phys_tid_96291; + + phys_tid_96291 = global_tid_127743; + + __local char *red_arr_mem_127748; + + red_arr_mem_127748 = (__local char *) red_arr_mem_127748_backing_0; + + __local char *sync_arr_mem_127750; + + sync_arr_mem_127750 = (__local char *) sync_arr_mem_127750_backing_1; + + int32_t phys_group_id_127752; + + phys_group_id_127752 = get_group_id(0); + for (int32_t i_127753 = 0; i_127753 < + sdiv_up32(sext_i64_i32(virt_num_groups_127736) - phys_group_id_127752, + sext_i64_i32(num_groups_97912)); i_127753++) { + int32_t virt_group_id_127754 = phys_group_id_127752 + i_127753 * + sext_i64_i32(num_groups_97912); + int32_t flat_segment_id_127755 = squot32(virt_group_id_127754, + sext_i64_i32(groups_per_segment_127734)); + int64_t global_tid_127756 = srem64(sext_i32_i64(virt_group_id_127754) * + segred_group_sizze_97911 + + sext_i32_i64(local_tid_127744), + segred_group_sizze_97911 * + groups_per_segment_127734); + int64_t gtid_96280 = squot64(sext_i32_i64(flat_segment_id_127755), + k2p2zq_73023); + int64_t gtid_96281 = sext_i32_i64(flat_segment_id_127755) - + squot64(sext_i32_i64(flat_segment_id_127755), k2p2zq_73023) * + k2p2zq_73023; + int64_t gtid_96290; + double x_acc_127757; + int64_t chunk_sizze_127758; + + chunk_sizze_127758 = smin64(elements_per_thread_127735, + sdiv_up64(j_m_i_97830 - global_tid_127756, + threads_per_segment_127738)); + + double x_97915; + double x_97916; + + // neutral-initialise the accumulators + { + x_acc_127757 = 0.0; + } + for (int64_t i_127762 = 0; i_127762 < chunk_sizze_127758; i_127762++) { + gtid_96290 = global_tid_127756 + threads_per_segment_127738 * + i_127762; + // apply map function + { + int64_t slice_115165 = gtid_96290 + x_97825; + double x_97922 = ((__global double *) mem_123143)[gtid_96280 * + (k2p2zq_73023 * + k2p2zq_73023) + + slice_115165 * + k2p2zq_73023 + + i_97826]; + bool isnan_res_97923; + + isnan_res_97923 = futrts_isnan64(x_97922); + + double defunc_1_f_res_97924; + + if (isnan_res_97923) { + defunc_1_f_res_97924 = 0.0; + } else { + double x_97921 = ((__global + double *) mem_param_123252)[gtid_96280 * + binop_x_120251 + + gtid_96281 * + k2p2zq_73023 + + slice_115165]; + double defunc_1_f_res_f_res_97925 = x_97921 * x_97922; + + defunc_1_f_res_97924 = defunc_1_f_res_f_res_97925; + } + // save map-out results + { } + // load accumulator + { + x_97915 = x_acc_127757; + } + // load new values + { + x_97916 = defunc_1_f_res_97924; + } + // apply reduction operator + { + double defunc_1_op_res_97917 = x_97915 + x_97916; + + // store in accumulator + { + x_acc_127757 = defunc_1_op_res_97917; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_97915 = x_acc_127757; + ((__local + double *) red_arr_mem_127748)[sext_i32_i64(local_tid_127744)] = + x_97915; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_127763; + int32_t skip_waves_127764; + + skip_waves_127764 = 1; + + double x_127759; + double x_127760; + + offset_127763 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127744, + sext_i64_i32(segred_group_sizze_97911))) { + x_127759 = ((__local + double *) red_arr_mem_127748)[sext_i32_i64(local_tid_127744 + + offset_127763)]; + } + } + offset_127763 = 1; + while (slt32(offset_127763, wave_sizze_127746)) { + if (slt32(local_tid_127744 + offset_127763, + sext_i64_i32(segred_group_sizze_97911)) && + ((local_tid_127744 - squot32(local_tid_127744, + wave_sizze_127746) * + wave_sizze_127746) & (2 * offset_127763 - 1)) == 0) { + // read array element + { + x_127760 = ((volatile __local + double *) red_arr_mem_127748)[sext_i32_i64(local_tid_127744 + + offset_127763)]; + } + // apply reduction operation + { + double defunc_1_op_res_127761 = x_127759 + x_127760; + + x_127759 = defunc_1_op_res_127761; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_127748)[sext_i32_i64(local_tid_127744)] = + x_127759; + } + } + offset_127763 *= 2; + } + while (slt32(skip_waves_127764, + squot32(sext_i64_i32(segred_group_sizze_97911) + + wave_sizze_127746 - 1, wave_sizze_127746))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_127763 = skip_waves_127764 * wave_sizze_127746; + if (slt32(local_tid_127744 + offset_127763, + sext_i64_i32(segred_group_sizze_97911)) && + ((local_tid_127744 - squot32(local_tid_127744, + wave_sizze_127746) * + wave_sizze_127746) == 0 && (squot32(local_tid_127744, + wave_sizze_127746) & (2 * + skip_waves_127764 - + 1)) == + 0)) { + // read array element + { + x_127760 = ((__local + double *) red_arr_mem_127748)[sext_i32_i64(local_tid_127744 + + offset_127763)]; + } + // apply reduction operation + { + double defunc_1_op_res_127761 = x_127759 + x_127760; + + x_127759 = defunc_1_op_res_127761; + } + // write result of operation + { + ((__local + double *) red_arr_mem_127748)[sext_i32_i64(local_tid_127744)] = + x_127759; + } + } + skip_waves_127764 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_127744) == (int64_t) 0) { + x_acc_127757 = x_127759; + } + } + if (groups_per_segment_127734 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_127744 == 0) { + ((__global double *) mem_123338)[gtid_96280 * k2p2zq_73023 + + gtid_96281] = x_acc_127757; + } + } + } else { + int32_t old_counter_127765; + + // first thread in group saves group result to global memory + { + if (local_tid_127744 == 0) { + ((__global + double *) group_res_arr_mem_127739)[sext_i32_i64(virt_group_id_127754) * + segred_group_sizze_97911] = + x_acc_127757; + mem_fence_global(); + old_counter_127765 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_127741)[sext_i32_i64(srem32(flat_segment_id_127755, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_127750)[(int64_t) 0] = + old_counter_127765 == groups_per_segment_127734 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_127766; + + is_last_group_127766 = ((__local + bool *) sync_arr_mem_127750)[(int64_t) 0]; + if (is_last_group_127766) { + if (local_tid_127744 == 0) { + old_counter_127765 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_127741)[sext_i32_i64(srem32(flat_segment_id_127755, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_127734)); + } + // read in the per-group-results + { + int64_t read_per_thread_127767 = + sdiv_up64(groups_per_segment_127734, + segred_group_sizze_97911); + + x_97915 = 0.0; + for (int64_t i_127768 = 0; i_127768 < + read_per_thread_127767; i_127768++) { + int64_t group_res_id_127769 = + sext_i32_i64(local_tid_127744) * + read_per_thread_127767 + i_127768; + int64_t index_of_group_res_127770 = + sext_i32_i64(flat_segment_id_127755) * + groups_per_segment_127734 + group_res_id_127769; + + if (slt64(group_res_id_127769, + groups_per_segment_127734)) { + x_97916 = ((__global + double *) group_res_arr_mem_127739)[index_of_group_res_127770 * + segred_group_sizze_97911]; + + double defunc_1_op_res_97917; + + defunc_1_op_res_97917 = x_97915 + x_97916; + x_97915 = defunc_1_op_res_97917; + } + } + } + ((__local + double *) red_arr_mem_127748)[sext_i32_i64(local_tid_127744)] = + x_97915; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_127771; + int32_t skip_waves_127772; + + skip_waves_127772 = 1; + + double x_127759; + double x_127760; + + offset_127771 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127744, + sext_i64_i32(segred_group_sizze_97911))) { + x_127759 = ((__local + double *) red_arr_mem_127748)[sext_i32_i64(local_tid_127744 + + offset_127771)]; + } + } + offset_127771 = 1; + while (slt32(offset_127771, wave_sizze_127746)) { + if (slt32(local_tid_127744 + offset_127771, + sext_i64_i32(segred_group_sizze_97911)) && + ((local_tid_127744 - squot32(local_tid_127744, + wave_sizze_127746) * + wave_sizze_127746) & (2 * offset_127771 - 1)) == + 0) { + // read array element + { + x_127760 = ((volatile __local + double *) red_arr_mem_127748)[sext_i32_i64(local_tid_127744 + + offset_127771)]; + } + // apply reduction operation + { + double defunc_1_op_res_127761 = x_127759 + + x_127760; + + x_127759 = defunc_1_op_res_127761; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_127748)[sext_i32_i64(local_tid_127744)] = + x_127759; + } + } + offset_127771 *= 2; + } + while (slt32(skip_waves_127772, + squot32(sext_i64_i32(segred_group_sizze_97911) + + wave_sizze_127746 - 1, + wave_sizze_127746))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_127771 = skip_waves_127772 * wave_sizze_127746; + if (slt32(local_tid_127744 + offset_127771, + sext_i64_i32(segred_group_sizze_97911)) && + ((local_tid_127744 - squot32(local_tid_127744, + wave_sizze_127746) * + wave_sizze_127746) == 0 && + (squot32(local_tid_127744, wave_sizze_127746) & + (2 * skip_waves_127772 - 1)) == 0)) { + // read array element + { + x_127760 = ((__local + double *) red_arr_mem_127748)[sext_i32_i64(local_tid_127744 + + offset_127771)]; + } + // apply reduction operation + { + double defunc_1_op_res_127761 = x_127759 + + x_127760; + + x_127759 = defunc_1_op_res_127761; + } + // write result of operation + { + ((__local + double *) red_arr_mem_127748)[sext_i32_i64(local_tid_127744)] = + x_127759; + } + } + skip_waves_127772 *= 2; + } + // and back to memory with the final result + { + if (local_tid_127744 == 0) { + ((__global double *) mem_123338)[gtid_96280 * + k2p2zq_73023 + + gtid_96281] = + x_127759; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_97911 +} +__kernel void mainMagnitudezisegred_large_97064(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_127525_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_127523_backing_aligned_1, + int64_t m_73008, + int64_t defunc_2_reduce_res_73132, + int64_t rp1_73709, + int64_t j_97356, + int64_t num_groups_97389, + int64_t groups_per_segment_127509, + int64_t elements_per_thread_127510, + int64_t virt_num_groups_127511, + int64_t threads_per_segment_127513, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_122730, + __global + unsigned char *group_res_arr_mem_127514, + __global + unsigned char *mainMagnitudezicounter_mem_127516) +{ + #define segred_group_sizze_97388 (mainMagnitudezisegred_group_sizze_97058) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_127525_backing_1 = + (__local volatile + char *) sync_arr_mem_127525_backing_aligned_0; + __local volatile char *restrict red_arr_mem_127523_backing_0 = + (__local volatile + char *) red_arr_mem_127523_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127518; + int32_t local_tid_127519; + int64_t group_sizze_127522; + int32_t wave_sizze_127521; + int32_t group_tid_127520; + + global_tid_127518 = get_global_id(0); + local_tid_127519 = get_local_id(0); + group_sizze_127522 = get_local_size(0); + wave_sizze_127521 = LOCKSTEP_WIDTH; + group_tid_127520 = get_group_id(0); + + int32_t phys_tid_97064; + + phys_tid_97064 = global_tid_127518; + + __local char *red_arr_mem_127523; + + red_arr_mem_127523 = (__local char *) red_arr_mem_127523_backing_0; + + __local char *sync_arr_mem_127525; + + sync_arr_mem_127525 = (__local char *) sync_arr_mem_127525_backing_1; + + int32_t phys_group_id_127527; + + phys_group_id_127527 = get_group_id(0); + for (int32_t i_127528 = 0; i_127528 < + sdiv_up32(sext_i64_i32(virt_num_groups_127511) - phys_group_id_127527, + sext_i64_i32(num_groups_97389)); i_127528++) { + int32_t virt_group_id_127529 = phys_group_id_127527 + i_127528 * + sext_i64_i32(num_groups_97389); + int32_t flat_segment_id_127530 = squot32(virt_group_id_127529, + sext_i64_i32(groups_per_segment_127509)); + int64_t global_tid_127531 = srem64(sext_i32_i64(virt_group_id_127529) * + segred_group_sizze_97388 + + sext_i32_i64(local_tid_127519), + segred_group_sizze_97388 * + groups_per_segment_127509); + int64_t gtid_97055 = sext_i32_i64(flat_segment_id_127530); + int64_t gtid_97063; + double x_acc_127532; + int64_t chunk_sizze_127533; + + chunk_sizze_127533 = smin64(elements_per_thread_127510, + sdiv_up64(rp1_73709 - global_tid_127531, + threads_per_segment_127513)); + + double x_97392; + double x_97393; + + // neutral-initialise the accumulators + { + x_acc_127532 = 0.0; + } + for (int64_t i_127537 = 0; i_127537 < chunk_sizze_127533; i_127537++) { + gtid_97063 = global_tid_127531 + threads_per_segment_127513 * + i_127537; + // apply map function + { + double x_97396 = ((__global double *) mem_120246)[j_97356 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_97055 * + defunc_2_reduce_res_73132 + + gtid_97063]; + double defunc_1_f_res_97397 = x_97396 * x_97396; + + // save map-out results + { } + // load accumulator + { + x_97392 = x_acc_127532; + } + // load new values + { + x_97393 = defunc_1_f_res_97397; + } + // apply reduction operator + { + double defunc_1_op_res_97394 = x_97392 + x_97393; + + // store in accumulator + { + x_acc_127532 = defunc_1_op_res_97394; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_97392 = x_acc_127532; + ((__local + double *) red_arr_mem_127523)[sext_i32_i64(local_tid_127519)] = + x_97392; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_127538; + int32_t skip_waves_127539; + + skip_waves_127539 = 1; + + double x_127534; + double x_127535; + + offset_127538 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127519, + sext_i64_i32(segred_group_sizze_97388))) { + x_127534 = ((__local + double *) red_arr_mem_127523)[sext_i32_i64(local_tid_127519 + + offset_127538)]; + } + } + offset_127538 = 1; + while (slt32(offset_127538, wave_sizze_127521)) { + if (slt32(local_tid_127519 + offset_127538, + sext_i64_i32(segred_group_sizze_97388)) && + ((local_tid_127519 - squot32(local_tid_127519, + wave_sizze_127521) * + wave_sizze_127521) & (2 * offset_127538 - 1)) == 0) { + // read array element + { + x_127535 = ((volatile __local + double *) red_arr_mem_127523)[sext_i32_i64(local_tid_127519 + + offset_127538)]; + } + // apply reduction operation + { + double defunc_1_op_res_127536 = x_127534 + x_127535; + + x_127534 = defunc_1_op_res_127536; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_127523)[sext_i32_i64(local_tid_127519)] = + x_127534; + } + } + offset_127538 *= 2; + } + while (slt32(skip_waves_127539, + squot32(sext_i64_i32(segred_group_sizze_97388) + + wave_sizze_127521 - 1, wave_sizze_127521))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_127538 = skip_waves_127539 * wave_sizze_127521; + if (slt32(local_tid_127519 + offset_127538, + sext_i64_i32(segred_group_sizze_97388)) && + ((local_tid_127519 - squot32(local_tid_127519, + wave_sizze_127521) * + wave_sizze_127521) == 0 && (squot32(local_tid_127519, + wave_sizze_127521) & (2 * + skip_waves_127539 - + 1)) == + 0)) { + // read array element + { + x_127535 = ((__local + double *) red_arr_mem_127523)[sext_i32_i64(local_tid_127519 + + offset_127538)]; + } + // apply reduction operation + { + double defunc_1_op_res_127536 = x_127534 + x_127535; + + x_127534 = defunc_1_op_res_127536; + } + // write result of operation + { + ((__local + double *) red_arr_mem_127523)[sext_i32_i64(local_tid_127519)] = + x_127534; + } + } + skip_waves_127539 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_127519) == (int64_t) 0) { + x_acc_127532 = x_127534; + } + } + if (groups_per_segment_127509 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_127519 == 0) { + ((__global double *) mem_122730)[gtid_97055] = x_acc_127532; + } + } + } else { + int32_t old_counter_127540; + + // first thread in group saves group result to global memory + { + if (local_tid_127519 == 0) { + ((__global + double *) group_res_arr_mem_127514)[sext_i32_i64(virt_group_id_127529) * + segred_group_sizze_97388] = + x_acc_127532; + mem_fence_global(); + old_counter_127540 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_127516)[sext_i32_i64(srem32(flat_segment_id_127530, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_127525)[(int64_t) 0] = + old_counter_127540 == groups_per_segment_127509 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_127541; + + is_last_group_127541 = ((__local + bool *) sync_arr_mem_127525)[(int64_t) 0]; + if (is_last_group_127541) { + if (local_tid_127519 == 0) { + old_counter_127540 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_127516)[sext_i32_i64(srem32(flat_segment_id_127530, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_127509)); + } + // read in the per-group-results + { + int64_t read_per_thread_127542 = + sdiv_up64(groups_per_segment_127509, + segred_group_sizze_97388); + + x_97392 = 0.0; + for (int64_t i_127543 = 0; i_127543 < + read_per_thread_127542; i_127543++) { + int64_t group_res_id_127544 = + sext_i32_i64(local_tid_127519) * + read_per_thread_127542 + i_127543; + int64_t index_of_group_res_127545 = + sext_i32_i64(flat_segment_id_127530) * + groups_per_segment_127509 + group_res_id_127544; + + if (slt64(group_res_id_127544, + groups_per_segment_127509)) { + x_97393 = ((__global + double *) group_res_arr_mem_127514)[index_of_group_res_127545 * + segred_group_sizze_97388]; + + double defunc_1_op_res_97394; + + defunc_1_op_res_97394 = x_97392 + x_97393; + x_97392 = defunc_1_op_res_97394; + } + } + } + ((__local + double *) red_arr_mem_127523)[sext_i32_i64(local_tid_127519)] = + x_97392; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_127546; + int32_t skip_waves_127547; + + skip_waves_127547 = 1; + + double x_127534; + double x_127535; + + offset_127546 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127519, + sext_i64_i32(segred_group_sizze_97388))) { + x_127534 = ((__local + double *) red_arr_mem_127523)[sext_i32_i64(local_tid_127519 + + offset_127546)]; + } + } + offset_127546 = 1; + while (slt32(offset_127546, wave_sizze_127521)) { + if (slt32(local_tid_127519 + offset_127546, + sext_i64_i32(segred_group_sizze_97388)) && + ((local_tid_127519 - squot32(local_tid_127519, + wave_sizze_127521) * + wave_sizze_127521) & (2 * offset_127546 - 1)) == + 0) { + // read array element + { + x_127535 = ((volatile __local + double *) red_arr_mem_127523)[sext_i32_i64(local_tid_127519 + + offset_127546)]; + } + // apply reduction operation + { + double defunc_1_op_res_127536 = x_127534 + + x_127535; + + x_127534 = defunc_1_op_res_127536; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_127523)[sext_i32_i64(local_tid_127519)] = + x_127534; + } + } + offset_127546 *= 2; + } + while (slt32(skip_waves_127547, + squot32(sext_i64_i32(segred_group_sizze_97388) + + wave_sizze_127521 - 1, + wave_sizze_127521))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_127546 = skip_waves_127547 * wave_sizze_127521; + if (slt32(local_tid_127519 + offset_127546, + sext_i64_i32(segred_group_sizze_97388)) && + ((local_tid_127519 - squot32(local_tid_127519, + wave_sizze_127521) * + wave_sizze_127521) == 0 && + (squot32(local_tid_127519, wave_sizze_127521) & + (2 * skip_waves_127547 - 1)) == 0)) { + // read array element + { + x_127535 = ((__local + double *) red_arr_mem_127523)[sext_i32_i64(local_tid_127519 + + offset_127546)]; + } + // apply reduction operation + { + double defunc_1_op_res_127536 = x_127534 + + x_127535; + + x_127534 = defunc_1_op_res_127536; + } + // write result of operation + { + ((__local + double *) red_arr_mem_127523)[sext_i32_i64(local_tid_127519)] = + x_127534; + } + } + skip_waves_127547 *= 2; + } + // and back to memory with the final result + { + if (local_tid_127519 == 0) { + ((__global double *) mem_122730)[gtid_97055] = + x_127534; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_97388 +} +__kernel void mainMagnitudezisegred_large_97217(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_127439_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_127437_backing_aligned_1, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t defunc_2_reduce_res_73132, + int64_t r_73698, + int64_t num_groups_97296, + int64_t groups_per_segment_127423, + int64_t elements_per_thread_127424, + int64_t virt_num_groups_127425, + int64_t threads_per_segment_127427, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_param_121967, + __global + unsigned char *mem_122677, + __global + unsigned char *group_res_arr_mem_127428, + __global + unsigned char *mainMagnitudezicounter_mem_127430) +{ + #define segred_group_sizze_97295 (mainMagnitudezisegred_group_sizze_97211) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_127439_backing_1 = + (__local volatile + char *) sync_arr_mem_127439_backing_aligned_0; + __local volatile char *restrict red_arr_mem_127437_backing_0 = + (__local volatile + char *) red_arr_mem_127437_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127432; + int32_t local_tid_127433; + int64_t group_sizze_127436; + int32_t wave_sizze_127435; + int32_t group_tid_127434; + + global_tid_127432 = get_global_id(0); + local_tid_127433 = get_local_id(0); + group_sizze_127436 = get_local_size(0); + wave_sizze_127435 = LOCKSTEP_WIDTH; + group_tid_127434 = get_group_id(0); + + int32_t phys_tid_97217; + + phys_tid_97217 = global_tid_127432; + + __local char *red_arr_mem_127437; + + red_arr_mem_127437 = (__local char *) red_arr_mem_127437_backing_0; + + __local char *sync_arr_mem_127439; + + sync_arr_mem_127439 = (__local char *) sync_arr_mem_127439_backing_1; + + int32_t phys_group_id_127441; + + phys_group_id_127441 = get_group_id(0); + for (int32_t i_127442 = 0; i_127442 < + sdiv_up32(sext_i64_i32(virt_num_groups_127425) - phys_group_id_127441, + sext_i64_i32(num_groups_97296)); i_127442++) { + int32_t virt_group_id_127443 = phys_group_id_127441 + i_127442 * + sext_i64_i32(num_groups_97296); + int32_t flat_segment_id_127444 = squot32(virt_group_id_127443, + sext_i64_i32(groups_per_segment_127423)); + int64_t global_tid_127445 = srem64(sext_i32_i64(virt_group_id_127443) * + segred_group_sizze_97295 + + sext_i32_i64(local_tid_127433), + segred_group_sizze_97295 * + groups_per_segment_127423); + int64_t gtid_97208 = sext_i32_i64(flat_segment_id_127444); + int64_t gtid_97216; + double x_acc_127446; + int64_t chunk_sizze_127447; + + chunk_sizze_127447 = smin64(elements_per_thread_127424, + sdiv_up64(k2p2zq_73023 - global_tid_127445, + threads_per_segment_127427)); + + double x_97299; + double x_97300; + + // neutral-initialise the accumulators + { + x_acc_127446 = 0.0; + } + for (int64_t i_127451 = 0; i_127451 < chunk_sizze_127447; i_127451++) { + gtid_97216 = global_tid_127445 + threads_per_segment_127427 * + i_127451; + // apply map function + { + double x_97304 = ((__global double *) mem_120246)[gtid_97216 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_97208 * + defunc_2_reduce_res_73132 + + r_73698]; + double x_97305 = ((__global + double *) mem_param_121967)[gtid_97208 * + k2p2zq_73023 + + gtid_97216]; + double defunc_1_f_res_97306 = x_97304 * x_97305; + + // save map-out results + { } + // load accumulator + { + x_97299 = x_acc_127446; + } + // load new values + { + x_97300 = defunc_1_f_res_97306; + } + // apply reduction operator + { + double defunc_1_op_res_97301 = x_97299 + x_97300; + + // store in accumulator + { + x_acc_127446 = defunc_1_op_res_97301; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_97299 = x_acc_127446; + ((__local + double *) red_arr_mem_127437)[sext_i32_i64(local_tid_127433)] = + x_97299; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_127452; + int32_t skip_waves_127453; + + skip_waves_127453 = 1; + + double x_127448; + double x_127449; + + offset_127452 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127433, + sext_i64_i32(segred_group_sizze_97295))) { + x_127448 = ((__local + double *) red_arr_mem_127437)[sext_i32_i64(local_tid_127433 + + offset_127452)]; + } + } + offset_127452 = 1; + while (slt32(offset_127452, wave_sizze_127435)) { + if (slt32(local_tid_127433 + offset_127452, + sext_i64_i32(segred_group_sizze_97295)) && + ((local_tid_127433 - squot32(local_tid_127433, + wave_sizze_127435) * + wave_sizze_127435) & (2 * offset_127452 - 1)) == 0) { + // read array element + { + x_127449 = ((volatile __local + double *) red_arr_mem_127437)[sext_i32_i64(local_tid_127433 + + offset_127452)]; + } + // apply reduction operation + { + double defunc_1_op_res_127450 = x_127448 + x_127449; + + x_127448 = defunc_1_op_res_127450; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_127437)[sext_i32_i64(local_tid_127433)] = + x_127448; + } + } + offset_127452 *= 2; + } + while (slt32(skip_waves_127453, + squot32(sext_i64_i32(segred_group_sizze_97295) + + wave_sizze_127435 - 1, wave_sizze_127435))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_127452 = skip_waves_127453 * wave_sizze_127435; + if (slt32(local_tid_127433 + offset_127452, + sext_i64_i32(segred_group_sizze_97295)) && + ((local_tid_127433 - squot32(local_tid_127433, + wave_sizze_127435) * + wave_sizze_127435) == 0 && (squot32(local_tid_127433, + wave_sizze_127435) & (2 * + skip_waves_127453 - + 1)) == + 0)) { + // read array element + { + x_127449 = ((__local + double *) red_arr_mem_127437)[sext_i32_i64(local_tid_127433 + + offset_127452)]; + } + // apply reduction operation + { + double defunc_1_op_res_127450 = x_127448 + x_127449; + + x_127448 = defunc_1_op_res_127450; + } + // write result of operation + { + ((__local + double *) red_arr_mem_127437)[sext_i32_i64(local_tid_127433)] = + x_127448; + } + } + skip_waves_127453 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_127433) == (int64_t) 0) { + x_acc_127446 = x_127448; + } + } + if (groups_per_segment_127423 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_127433 == 0) { + ((__global double *) mem_122677)[gtid_97208] = x_acc_127446; + } + } + } else { + int32_t old_counter_127454; + + // first thread in group saves group result to global memory + { + if (local_tid_127433 == 0) { + ((__global + double *) group_res_arr_mem_127428)[sext_i32_i64(virt_group_id_127443) * + segred_group_sizze_97295] = + x_acc_127446; + mem_fence_global(); + old_counter_127454 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_127430)[sext_i32_i64(srem32(flat_segment_id_127444, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_127439)[(int64_t) 0] = + old_counter_127454 == groups_per_segment_127423 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_127455; + + is_last_group_127455 = ((__local + bool *) sync_arr_mem_127439)[(int64_t) 0]; + if (is_last_group_127455) { + if (local_tid_127433 == 0) { + old_counter_127454 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_127430)[sext_i32_i64(srem32(flat_segment_id_127444, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_127423)); + } + // read in the per-group-results + { + int64_t read_per_thread_127456 = + sdiv_up64(groups_per_segment_127423, + segred_group_sizze_97295); + + x_97299 = 0.0; + for (int64_t i_127457 = 0; i_127457 < + read_per_thread_127456; i_127457++) { + int64_t group_res_id_127458 = + sext_i32_i64(local_tid_127433) * + read_per_thread_127456 + i_127457; + int64_t index_of_group_res_127459 = + sext_i32_i64(flat_segment_id_127444) * + groups_per_segment_127423 + group_res_id_127458; + + if (slt64(group_res_id_127458, + groups_per_segment_127423)) { + x_97300 = ((__global + double *) group_res_arr_mem_127428)[index_of_group_res_127459 * + segred_group_sizze_97295]; + + double defunc_1_op_res_97301; + + defunc_1_op_res_97301 = x_97299 + x_97300; + x_97299 = defunc_1_op_res_97301; + } + } + } + ((__local + double *) red_arr_mem_127437)[sext_i32_i64(local_tid_127433)] = + x_97299; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_127460; + int32_t skip_waves_127461; + + skip_waves_127461 = 1; + + double x_127448; + double x_127449; + + offset_127460 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127433, + sext_i64_i32(segred_group_sizze_97295))) { + x_127448 = ((__local + double *) red_arr_mem_127437)[sext_i32_i64(local_tid_127433 + + offset_127460)]; + } + } + offset_127460 = 1; + while (slt32(offset_127460, wave_sizze_127435)) { + if (slt32(local_tid_127433 + offset_127460, + sext_i64_i32(segred_group_sizze_97295)) && + ((local_tid_127433 - squot32(local_tid_127433, + wave_sizze_127435) * + wave_sizze_127435) & (2 * offset_127460 - 1)) == + 0) { + // read array element + { + x_127449 = ((volatile __local + double *) red_arr_mem_127437)[sext_i32_i64(local_tid_127433 + + offset_127460)]; + } + // apply reduction operation + { + double defunc_1_op_res_127450 = x_127448 + + x_127449; + + x_127448 = defunc_1_op_res_127450; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_127437)[sext_i32_i64(local_tid_127433)] = + x_127448; + } + } + offset_127460 *= 2; + } + while (slt32(skip_waves_127461, + squot32(sext_i64_i32(segred_group_sizze_97295) + + wave_sizze_127435 - 1, + wave_sizze_127435))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_127460 = skip_waves_127461 * wave_sizze_127435; + if (slt32(local_tid_127433 + offset_127460, + sext_i64_i32(segred_group_sizze_97295)) && + ((local_tid_127433 - squot32(local_tid_127433, + wave_sizze_127435) * + wave_sizze_127435) == 0 && + (squot32(local_tid_127433, wave_sizze_127435) & + (2 * skip_waves_127461 - 1)) == 0)) { + // read array element + { + x_127449 = ((__local + double *) red_arr_mem_127437)[sext_i32_i64(local_tid_127433 + + offset_127460)]; + } + // apply reduction operation + { + double defunc_1_op_res_127450 = x_127448 + + x_127449; + + x_127448 = defunc_1_op_res_127450; + } + // write result of operation + { + ((__local + double *) red_arr_mem_127437)[sext_i32_i64(local_tid_127433)] = + x_127448; + } + } + skip_waves_127461 *= 2; + } + // and back to memory with the final result + { + if (local_tid_127433 == 0) { + ((__global double *) mem_122677)[gtid_97208] = + x_127448; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_97295 +} +__kernel void mainMagnitudezisegred_large_97246(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_127373_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_127371_backing_aligned_1, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t defunc_2_reduce_res_73132, + int64_t r_73698, + int64_t num_groups_97265, + int64_t groups_per_segment_127357, + int64_t elements_per_thread_127358, + int64_t virt_num_groups_127359, + int64_t threads_per_segment_127361, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_122665, + __global + unsigned char *mem_122668, + __global + unsigned char *mem_122671, + __global + unsigned char *group_res_arr_mem_127362, + __global + unsigned char *mainMagnitudezicounter_mem_127364) +{ + #define segred_group_sizze_97264 (mainMagnitudezisegred_group_sizze_97240) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_127373_backing_1 = + (__local volatile + char *) sync_arr_mem_127373_backing_aligned_0; + __local volatile char *restrict red_arr_mem_127371_backing_0 = + (__local volatile + char *) red_arr_mem_127371_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127366; + int32_t local_tid_127367; + int64_t group_sizze_127370; + int32_t wave_sizze_127369; + int32_t group_tid_127368; + + global_tid_127366 = get_global_id(0); + local_tid_127367 = get_local_id(0); + group_sizze_127370 = get_local_size(0); + wave_sizze_127369 = LOCKSTEP_WIDTH; + group_tid_127368 = get_group_id(0); + + int32_t phys_tid_97246; + + phys_tid_97246 = global_tid_127366; + + __local char *red_arr_mem_127371; + + red_arr_mem_127371 = (__local char *) red_arr_mem_127371_backing_0; + + __local char *sync_arr_mem_127373; + + sync_arr_mem_127373 = (__local char *) sync_arr_mem_127373_backing_1; + + int32_t phys_group_id_127375; + + phys_group_id_127375 = get_group_id(0); + for (int32_t i_127376 = 0; i_127376 < + sdiv_up32(sext_i64_i32(virt_num_groups_127359) - phys_group_id_127375, + sext_i64_i32(num_groups_97265)); i_127376++) { + int32_t virt_group_id_127377 = phys_group_id_127375 + i_127376 * + sext_i64_i32(num_groups_97265); + int32_t flat_segment_id_127378 = squot32(virt_group_id_127377, + sext_i64_i32(groups_per_segment_127357)); + int64_t global_tid_127379 = srem64(sext_i32_i64(virt_group_id_127377) * + segred_group_sizze_97264 + + sext_i32_i64(local_tid_127367), + segred_group_sizze_97264 * + groups_per_segment_127357); + int64_t gtid_97237 = sext_i32_i64(flat_segment_id_127378); + int64_t gtid_97245; + double x_acc_127380; + int64_t chunk_sizze_127381; + + chunk_sizze_127381 = smin64(elements_per_thread_127358, + sdiv_up64(k2p2zq_73023 - global_tid_127379, + threads_per_segment_127361)); + + double x_97269; + double x_97270; + + // neutral-initialise the accumulators + { + x_acc_127380 = 0.0; + } + for (int64_t i_127385 = 0; i_127385 < chunk_sizze_127381; i_127385++) { + gtid_97245 = global_tid_127379 + threads_per_segment_127361 * + i_127385; + // apply map function + { + double x_97275 = ((__global double *) mem_120246)[gtid_97245 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_97237 * + defunc_2_reduce_res_73132 + + r_73698]; + double defunc_0_f_res_97276; + double redout_119829 = 0.0; + + for (int64_t i_119830 = 0; i_119830 < k2p2zq_73023; + i_119830++) { + double x_97280 = ((__global double *) mem_120246)[i_119830 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_97237 * + defunc_2_reduce_res_73132 + + r_73698]; + double x_97281 = ((__global double *) mem_122665)[i_119830 * + (k2p2zq_73023 * + m_73008) + + gtid_97237 * + k2p2zq_73023 + + gtid_97245]; + double defunc_1_f_res_97282 = x_97280 * x_97281; + double defunc_1_op_res_97279 = defunc_1_f_res_97282 + + redout_119829; + double redout_tmp_127386 = defunc_1_op_res_97279; + + redout_119829 = redout_tmp_127386; + } + defunc_0_f_res_97276 = redout_119829; + + double defunc_1_f_res_97283 = x_97275 * defunc_0_f_res_97276; + + // save map-out results + { + ((__global double *) mem_122671)[gtid_97237 * k2p2zq_73023 + + gtid_97245] = + defunc_0_f_res_97276; + } + // load accumulator + { + x_97269 = x_acc_127380; + } + // load new values + { + x_97270 = defunc_1_f_res_97283; + } + // apply reduction operator + { + double defunc_1_op_res_97271 = x_97269 + x_97270; + + // store in accumulator + { + x_acc_127380 = defunc_1_op_res_97271; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_97269 = x_acc_127380; + ((__local + double *) red_arr_mem_127371)[sext_i32_i64(local_tid_127367)] = + x_97269; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_127387; + int32_t skip_waves_127388; + + skip_waves_127388 = 1; + + double x_127382; + double x_127383; + + offset_127387 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127367, + sext_i64_i32(segred_group_sizze_97264))) { + x_127382 = ((__local + double *) red_arr_mem_127371)[sext_i32_i64(local_tid_127367 + + offset_127387)]; + } + } + offset_127387 = 1; + while (slt32(offset_127387, wave_sizze_127369)) { + if (slt32(local_tid_127367 + offset_127387, + sext_i64_i32(segred_group_sizze_97264)) && + ((local_tid_127367 - squot32(local_tid_127367, + wave_sizze_127369) * + wave_sizze_127369) & (2 * offset_127387 - 1)) == 0) { + // read array element + { + x_127383 = ((volatile __local + double *) red_arr_mem_127371)[sext_i32_i64(local_tid_127367 + + offset_127387)]; + } + // apply reduction operation + { + double defunc_1_op_res_127384 = x_127382 + x_127383; + + x_127382 = defunc_1_op_res_127384; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_127371)[sext_i32_i64(local_tid_127367)] = + x_127382; + } + } + offset_127387 *= 2; + } + while (slt32(skip_waves_127388, + squot32(sext_i64_i32(segred_group_sizze_97264) + + wave_sizze_127369 - 1, wave_sizze_127369))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_127387 = skip_waves_127388 * wave_sizze_127369; + if (slt32(local_tid_127367 + offset_127387, + sext_i64_i32(segred_group_sizze_97264)) && + ((local_tid_127367 - squot32(local_tid_127367, + wave_sizze_127369) * + wave_sizze_127369) == 0 && (squot32(local_tid_127367, + wave_sizze_127369) & (2 * + skip_waves_127388 - + 1)) == + 0)) { + // read array element + { + x_127383 = ((__local + double *) red_arr_mem_127371)[sext_i32_i64(local_tid_127367 + + offset_127387)]; + } + // apply reduction operation + { + double defunc_1_op_res_127384 = x_127382 + x_127383; + + x_127382 = defunc_1_op_res_127384; + } + // write result of operation + { + ((__local + double *) red_arr_mem_127371)[sext_i32_i64(local_tid_127367)] = + x_127382; + } + } + skip_waves_127388 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_127367) == (int64_t) 0) { + x_acc_127380 = x_127382; + } + } + if (groups_per_segment_127357 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_127367 == 0) { + ((__global double *) mem_122668)[gtid_97237] = x_acc_127380; + } + } + } else { + int32_t old_counter_127389; + + // first thread in group saves group result to global memory + { + if (local_tid_127367 == 0) { + ((__global + double *) group_res_arr_mem_127362)[sext_i32_i64(virt_group_id_127377) * + segred_group_sizze_97264] = + x_acc_127380; + mem_fence_global(); + old_counter_127389 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_127364)[sext_i32_i64(srem32(flat_segment_id_127378, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_127373)[(int64_t) 0] = + old_counter_127389 == groups_per_segment_127357 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_127390; + + is_last_group_127390 = ((__local + bool *) sync_arr_mem_127373)[(int64_t) 0]; + if (is_last_group_127390) { + if (local_tid_127367 == 0) { + old_counter_127389 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_127364)[sext_i32_i64(srem32(flat_segment_id_127378, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_127357)); + } + // read in the per-group-results + { + int64_t read_per_thread_127391 = + sdiv_up64(groups_per_segment_127357, + segred_group_sizze_97264); + + x_97269 = 0.0; + for (int64_t i_127392 = 0; i_127392 < + read_per_thread_127391; i_127392++) { + int64_t group_res_id_127393 = + sext_i32_i64(local_tid_127367) * + read_per_thread_127391 + i_127392; + int64_t index_of_group_res_127394 = + sext_i32_i64(flat_segment_id_127378) * + groups_per_segment_127357 + group_res_id_127393; + + if (slt64(group_res_id_127393, + groups_per_segment_127357)) { + x_97270 = ((__global + double *) group_res_arr_mem_127362)[index_of_group_res_127394 * + segred_group_sizze_97264]; + + double defunc_1_op_res_97271; + + defunc_1_op_res_97271 = x_97269 + x_97270; + x_97269 = defunc_1_op_res_97271; + } + } + } + ((__local + double *) red_arr_mem_127371)[sext_i32_i64(local_tid_127367)] = + x_97269; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_127395; + int32_t skip_waves_127396; + + skip_waves_127396 = 1; + + double x_127382; + double x_127383; + + offset_127395 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127367, + sext_i64_i32(segred_group_sizze_97264))) { + x_127382 = ((__local + double *) red_arr_mem_127371)[sext_i32_i64(local_tid_127367 + + offset_127395)]; + } + } + offset_127395 = 1; + while (slt32(offset_127395, wave_sizze_127369)) { + if (slt32(local_tid_127367 + offset_127395, + sext_i64_i32(segred_group_sizze_97264)) && + ((local_tid_127367 - squot32(local_tid_127367, + wave_sizze_127369) * + wave_sizze_127369) & (2 * offset_127395 - 1)) == + 0) { + // read array element + { + x_127383 = ((volatile __local + double *) red_arr_mem_127371)[sext_i32_i64(local_tid_127367 + + offset_127395)]; + } + // apply reduction operation + { + double defunc_1_op_res_127384 = x_127382 + + x_127383; + + x_127382 = defunc_1_op_res_127384; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_127371)[sext_i32_i64(local_tid_127367)] = + x_127382; + } + } + offset_127395 *= 2; + } + while (slt32(skip_waves_127396, + squot32(sext_i64_i32(segred_group_sizze_97264) + + wave_sizze_127369 - 1, + wave_sizze_127369))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_127395 = skip_waves_127396 * wave_sizze_127369; + if (slt32(local_tid_127367 + offset_127395, + sext_i64_i32(segred_group_sizze_97264)) && + ((local_tid_127367 - squot32(local_tid_127367, + wave_sizze_127369) * + wave_sizze_127369) == 0 && + (squot32(local_tid_127367, wave_sizze_127369) & + (2 * skip_waves_127396 - 1)) == 0)) { + // read array element + { + x_127383 = ((__local + double *) red_arr_mem_127371)[sext_i32_i64(local_tid_127367 + + offset_127395)]; + } + // apply reduction operation + { + double defunc_1_op_res_127384 = x_127382 + + x_127383; + + x_127382 = defunc_1_op_res_127384; + } + // write result of operation + { + ((__local + double *) red_arr_mem_127371)[sext_i32_i64(local_tid_127367)] = + x_127382; + } + } + skip_waves_127396 *= 2; + } + // and back to memory with the final result + { + if (local_tid_127367 == 0) { + ((__global double *) mem_122668)[gtid_97237] = + x_127382; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_97264 +} +__kernel void mainMagnitudezisegred_large_98543(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_128248_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128246_backing_aligned_1, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t defunc_2_reduce_res_73132, + int64_t index_primexp_74309, + int64_t num_groups_98751, + int64_t binop_x_120251, + int64_t groups_per_segment_128232, + int64_t elements_per_thread_128233, + int64_t virt_num_groups_128234, + int64_t threads_per_segment_128236, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_param_123778, + __global + unsigned char *mem_123907, + __global + unsigned char *mem_123910, + __global + unsigned char *mem_123944, + __global + unsigned char *mem_123948, + __global + unsigned char *group_res_arr_mem_128237, + __global + unsigned char *mainMagnitudezicounter_mem_128239) +{ + #define segred_group_sizze_98750 (mainMagnitudezisegred_group_sizze_98537) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_128248_backing_1 = + (__local volatile + char *) sync_arr_mem_128248_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128246_backing_0 = + (__local volatile + char *) red_arr_mem_128246_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128241; + int32_t local_tid_128242; + int64_t group_sizze_128245; + int32_t wave_sizze_128244; + int32_t group_tid_128243; + + global_tid_128241 = get_global_id(0); + local_tid_128242 = get_local_id(0); + group_sizze_128245 = get_local_size(0); + wave_sizze_128244 = LOCKSTEP_WIDTH; + group_tid_128243 = get_group_id(0); + + int32_t phys_tid_98543; + + phys_tid_98543 = global_tid_128241; + + __local char *red_arr_mem_128246; + + red_arr_mem_128246 = (__local char *) red_arr_mem_128246_backing_0; + + __local char *sync_arr_mem_128248; + + sync_arr_mem_128248 = (__local char *) sync_arr_mem_128248_backing_1; + + int32_t phys_group_id_128250; + + phys_group_id_128250 = get_group_id(0); + for (int32_t i_128251 = 0; i_128251 < + sdiv_up32(sext_i64_i32(virt_num_groups_128234) - phys_group_id_128250, + sext_i64_i32(num_groups_98751)); i_128251++) { + int32_t virt_group_id_128252 = phys_group_id_128250 + i_128251 * + sext_i64_i32(num_groups_98751); + int32_t flat_segment_id_128253 = squot32(virt_group_id_128252, + sext_i64_i32(groups_per_segment_128232)); + int64_t global_tid_128254 = srem64(sext_i32_i64(virt_group_id_128252) * + segred_group_sizze_98750 + + sext_i32_i64(local_tid_128242), + segred_group_sizze_98750 * + groups_per_segment_128232); + int64_t gtid_98532 = squot64(sext_i32_i64(flat_segment_id_128253), + k2p2zq_73023); + int64_t gtid_98533 = sext_i32_i64(flat_segment_id_128253) - + squot64(sext_i32_i64(flat_segment_id_128253), k2p2zq_73023) * + k2p2zq_73023; + int64_t gtid_98542; + double x_acc_128255; + int64_t chunk_sizze_128256; + + chunk_sizze_128256 = smin64(elements_per_thread_128233, + sdiv_up64(k2p2zq_73023 - global_tid_128254, + threads_per_segment_128236)); + + double x_98755; + double x_98756; + + // neutral-initialise the accumulators + { + x_acc_128255 = 0.0; + } + for (int64_t i_128260 = 0; i_128260 < chunk_sizze_128256; i_128260++) { + gtid_98542 = global_tid_128254 + threads_per_segment_128236 * + i_128260; + // apply map function + { + double fr_98761 = ((__global double *) mem_123910)[gtid_98532]; + double x_98762 = ((__global double *) mem_123907)[gtid_98532 * + k2p2zq_73023 + + gtid_98533]; + double x_98764 = ((__global double *) mem_123907)[gtid_98532 * + k2p2zq_73023 + + gtid_98542]; + double x_98765 = ((__global + double *) mem_param_123778)[gtid_98532 * + binop_x_120251 + + gtid_98533 * + k2p2zq_73023 + + gtid_98542]; + double x_98766 = ((__global double *) mem_120246)[gtid_98542 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_98532 * + defunc_2_reduce_res_73132 + + index_primexp_74309]; + double x_98767 = x_98762 * x_98764; + double y_98768 = x_98767 / fr_98761; + double defunc_1_f_res_98769 = x_98765 - y_98768; + double defunc_1_f_res_98770 = x_98766 * defunc_1_f_res_98769; + + // save map-out results + { + ((__global double *) mem_123948)[gtid_98532 * + (k2p2zq_73023 * + k2p2zq_73023) + + gtid_98533 * k2p2zq_73023 + + gtid_98542] = + defunc_1_f_res_98769; + } + // load accumulator + { + x_98755 = x_acc_128255; + } + // load new values + { + x_98756 = defunc_1_f_res_98770; + } + // apply reduction operator + { + double defunc_1_op_res_98757 = x_98755 + x_98756; + + // store in accumulator + { + x_acc_128255 = defunc_1_op_res_98757; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_98755 = x_acc_128255; + ((__local + double *) red_arr_mem_128246)[sext_i32_i64(local_tid_128242)] = + x_98755; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128261; + int32_t skip_waves_128262; + + skip_waves_128262 = 1; + + double x_128257; + double x_128258; + + offset_128261 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128242, + sext_i64_i32(segred_group_sizze_98750))) { + x_128257 = ((__local + double *) red_arr_mem_128246)[sext_i32_i64(local_tid_128242 + + offset_128261)]; + } + } + offset_128261 = 1; + while (slt32(offset_128261, wave_sizze_128244)) { + if (slt32(local_tid_128242 + offset_128261, + sext_i64_i32(segred_group_sizze_98750)) && + ((local_tid_128242 - squot32(local_tid_128242, + wave_sizze_128244) * + wave_sizze_128244) & (2 * offset_128261 - 1)) == 0) { + // read array element + { + x_128258 = ((volatile __local + double *) red_arr_mem_128246)[sext_i32_i64(local_tid_128242 + + offset_128261)]; + } + // apply reduction operation + { + double defunc_1_op_res_128259 = x_128257 + x_128258; + + x_128257 = defunc_1_op_res_128259; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128246)[sext_i32_i64(local_tid_128242)] = + x_128257; + } + } + offset_128261 *= 2; + } + while (slt32(skip_waves_128262, + squot32(sext_i64_i32(segred_group_sizze_98750) + + wave_sizze_128244 - 1, wave_sizze_128244))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128261 = skip_waves_128262 * wave_sizze_128244; + if (slt32(local_tid_128242 + offset_128261, + sext_i64_i32(segred_group_sizze_98750)) && + ((local_tid_128242 - squot32(local_tid_128242, + wave_sizze_128244) * + wave_sizze_128244) == 0 && (squot32(local_tid_128242, + wave_sizze_128244) & (2 * + skip_waves_128262 - + 1)) == + 0)) { + // read array element + { + x_128258 = ((__local + double *) red_arr_mem_128246)[sext_i32_i64(local_tid_128242 + + offset_128261)]; + } + // apply reduction operation + { + double defunc_1_op_res_128259 = x_128257 + x_128258; + + x_128257 = defunc_1_op_res_128259; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128246)[sext_i32_i64(local_tid_128242)] = + x_128257; + } + } + skip_waves_128262 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_128242) == (int64_t) 0) { + x_acc_128255 = x_128257; + } + } + if (groups_per_segment_128232 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_128242 == 0) { + ((__global double *) mem_123944)[gtid_98532 * k2p2zq_73023 + + gtid_98533] = x_acc_128255; + } + } + } else { + int32_t old_counter_128263; + + // first thread in group saves group result to global memory + { + if (local_tid_128242 == 0) { + ((__global + double *) group_res_arr_mem_128237)[sext_i32_i64(virt_group_id_128252) * + segred_group_sizze_98750] = + x_acc_128255; + mem_fence_global(); + old_counter_128263 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_128239)[sext_i32_i64(srem32(flat_segment_id_128253, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_128248)[(int64_t) 0] = + old_counter_128263 == groups_per_segment_128232 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_128264; + + is_last_group_128264 = ((__local + bool *) sync_arr_mem_128248)[(int64_t) 0]; + if (is_last_group_128264) { + if (local_tid_128242 == 0) { + old_counter_128263 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_128239)[sext_i32_i64(srem32(flat_segment_id_128253, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_128232)); + } + // read in the per-group-results + { + int64_t read_per_thread_128265 = + sdiv_up64(groups_per_segment_128232, + segred_group_sizze_98750); + + x_98755 = 0.0; + for (int64_t i_128266 = 0; i_128266 < + read_per_thread_128265; i_128266++) { + int64_t group_res_id_128267 = + sext_i32_i64(local_tid_128242) * + read_per_thread_128265 + i_128266; + int64_t index_of_group_res_128268 = + sext_i32_i64(flat_segment_id_128253) * + groups_per_segment_128232 + group_res_id_128267; + + if (slt64(group_res_id_128267, + groups_per_segment_128232)) { + x_98756 = ((__global + double *) group_res_arr_mem_128237)[index_of_group_res_128268 * + segred_group_sizze_98750]; + + double defunc_1_op_res_98757; + + defunc_1_op_res_98757 = x_98755 + x_98756; + x_98755 = defunc_1_op_res_98757; + } + } + } + ((__local + double *) red_arr_mem_128246)[sext_i32_i64(local_tid_128242)] = + x_98755; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_128269; + int32_t skip_waves_128270; + + skip_waves_128270 = 1; + + double x_128257; + double x_128258; + + offset_128269 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128242, + sext_i64_i32(segred_group_sizze_98750))) { + x_128257 = ((__local + double *) red_arr_mem_128246)[sext_i32_i64(local_tid_128242 + + offset_128269)]; + } + } + offset_128269 = 1; + while (slt32(offset_128269, wave_sizze_128244)) { + if (slt32(local_tid_128242 + offset_128269, + sext_i64_i32(segred_group_sizze_98750)) && + ((local_tid_128242 - squot32(local_tid_128242, + wave_sizze_128244) * + wave_sizze_128244) & (2 * offset_128269 - 1)) == + 0) { + // read array element + { + x_128258 = ((volatile __local + double *) red_arr_mem_128246)[sext_i32_i64(local_tid_128242 + + offset_128269)]; + } + // apply reduction operation + { + double defunc_1_op_res_128259 = x_128257 + + x_128258; + + x_128257 = defunc_1_op_res_128259; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128246)[sext_i32_i64(local_tid_128242)] = + x_128257; + } + } + offset_128269 *= 2; + } + while (slt32(skip_waves_128270, + squot32(sext_i64_i32(segred_group_sizze_98750) + + wave_sizze_128244 - 1, + wave_sizze_128244))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128269 = skip_waves_128270 * wave_sizze_128244; + if (slt32(local_tid_128242 + offset_128269, + sext_i64_i32(segred_group_sizze_98750)) && + ((local_tid_128242 - squot32(local_tid_128242, + wave_sizze_128244) * + wave_sizze_128244) == 0 && + (squot32(local_tid_128242, wave_sizze_128244) & + (2 * skip_waves_128270 - 1)) == 0)) { + // read array element + { + x_128258 = ((__local + double *) red_arr_mem_128246)[sext_i32_i64(local_tid_128242 + + offset_128269)]; + } + // apply reduction operation + { + double defunc_1_op_res_128259 = x_128257 + + x_128258; + + x_128257 = defunc_1_op_res_128259; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128246)[sext_i32_i64(local_tid_128242)] = + x_128257; + } + } + skip_waves_128270 *= 2; + } + // and back to memory with the final result + { + if (local_tid_128242 == 0) { + ((__global double *) mem_123944)[gtid_98532 * + k2p2zq_73023 + + gtid_98533] = + x_128257; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_98750 +} +__kernel void mainMagnitudezisegred_large_98618(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_128172_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128170_backing_aligned_1, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t defunc_2_reduce_res_73132, + int64_t index_primexp_74309, + int64_t num_groups_98692, + int64_t groups_per_segment_128156, + int64_t elements_per_thread_128157, + int64_t virt_num_groups_128158, + int64_t threads_per_segment_128160, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_param_123786, + __global + unsigned char *mem_123913, + __global + unsigned char *group_res_arr_mem_128161, + __global + unsigned char *mainMagnitudezicounter_mem_128163) +{ + #define segred_group_sizze_98691 (mainMagnitudezisegred_group_sizze_98612) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_128172_backing_1 = + (__local volatile + char *) sync_arr_mem_128172_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128170_backing_0 = + (__local volatile + char *) red_arr_mem_128170_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128165; + int32_t local_tid_128166; + int64_t group_sizze_128169; + int32_t wave_sizze_128168; + int32_t group_tid_128167; + + global_tid_128165 = get_global_id(0); + local_tid_128166 = get_local_id(0); + group_sizze_128169 = get_local_size(0); + wave_sizze_128168 = LOCKSTEP_WIDTH; + group_tid_128167 = get_group_id(0); + + int32_t phys_tid_98618; + + phys_tid_98618 = global_tid_128165; + + __local char *red_arr_mem_128170; + + red_arr_mem_128170 = (__local char *) red_arr_mem_128170_backing_0; + + __local char *sync_arr_mem_128172; + + sync_arr_mem_128172 = (__local char *) sync_arr_mem_128172_backing_1; + + int32_t phys_group_id_128174; + + phys_group_id_128174 = get_group_id(0); + for (int32_t i_128175 = 0; i_128175 < + sdiv_up32(sext_i64_i32(virt_num_groups_128158) - phys_group_id_128174, + sext_i64_i32(num_groups_98692)); i_128175++) { + int32_t virt_group_id_128176 = phys_group_id_128174 + i_128175 * + sext_i64_i32(num_groups_98692); + int32_t flat_segment_id_128177 = squot32(virt_group_id_128176, + sext_i64_i32(groups_per_segment_128156)); + int64_t global_tid_128178 = srem64(sext_i32_i64(virt_group_id_128176) * + segred_group_sizze_98691 + + sext_i32_i64(local_tid_128166), + segred_group_sizze_98691 * + groups_per_segment_128156); + int64_t gtid_98609 = sext_i32_i64(flat_segment_id_128177); + int64_t gtid_98617; + double x_acc_128179; + int64_t chunk_sizze_128180; + + chunk_sizze_128180 = smin64(elements_per_thread_128157, + sdiv_up64(k2p2zq_73023 - global_tid_128178, + threads_per_segment_128160)); + + double x_98695; + double x_98696; + + // neutral-initialise the accumulators + { + x_acc_128179 = 0.0; + } + for (int64_t i_128184 = 0; i_128184 < chunk_sizze_128180; i_128184++) { + gtid_98617 = global_tid_128178 + threads_per_segment_128160 * + i_128184; + // apply map function + { + double x_98700 = ((__global double *) mem_120246)[gtid_98617 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_98609 * + defunc_2_reduce_res_73132 + + index_primexp_74309]; + double x_98701 = ((__global + double *) mem_param_123786)[gtid_98609 * + k2p2zq_73023 + + gtid_98617]; + double defunc_1_f_res_98702 = x_98700 * x_98701; + + // save map-out results + { } + // load accumulator + { + x_98695 = x_acc_128179; + } + // load new values + { + x_98696 = defunc_1_f_res_98702; + } + // apply reduction operator + { + double defunc_1_op_res_98697 = x_98695 + x_98696; + + // store in accumulator + { + x_acc_128179 = defunc_1_op_res_98697; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_98695 = x_acc_128179; + ((__local + double *) red_arr_mem_128170)[sext_i32_i64(local_tid_128166)] = + x_98695; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128185; + int32_t skip_waves_128186; + + skip_waves_128186 = 1; + + double x_128181; + double x_128182; + + offset_128185 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128166, + sext_i64_i32(segred_group_sizze_98691))) { + x_128181 = ((__local + double *) red_arr_mem_128170)[sext_i32_i64(local_tid_128166 + + offset_128185)]; + } + } + offset_128185 = 1; + while (slt32(offset_128185, wave_sizze_128168)) { + if (slt32(local_tid_128166 + offset_128185, + sext_i64_i32(segred_group_sizze_98691)) && + ((local_tid_128166 - squot32(local_tid_128166, + wave_sizze_128168) * + wave_sizze_128168) & (2 * offset_128185 - 1)) == 0) { + // read array element + { + x_128182 = ((volatile __local + double *) red_arr_mem_128170)[sext_i32_i64(local_tid_128166 + + offset_128185)]; + } + // apply reduction operation + { + double defunc_1_op_res_128183 = x_128181 + x_128182; + + x_128181 = defunc_1_op_res_128183; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128170)[sext_i32_i64(local_tid_128166)] = + x_128181; + } + } + offset_128185 *= 2; + } + while (slt32(skip_waves_128186, + squot32(sext_i64_i32(segred_group_sizze_98691) + + wave_sizze_128168 - 1, wave_sizze_128168))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128185 = skip_waves_128186 * wave_sizze_128168; + if (slt32(local_tid_128166 + offset_128185, + sext_i64_i32(segred_group_sizze_98691)) && + ((local_tid_128166 - squot32(local_tid_128166, + wave_sizze_128168) * + wave_sizze_128168) == 0 && (squot32(local_tid_128166, + wave_sizze_128168) & (2 * + skip_waves_128186 - + 1)) == + 0)) { + // read array element + { + x_128182 = ((__local + double *) red_arr_mem_128170)[sext_i32_i64(local_tid_128166 + + offset_128185)]; + } + // apply reduction operation + { + double defunc_1_op_res_128183 = x_128181 + x_128182; + + x_128181 = defunc_1_op_res_128183; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128170)[sext_i32_i64(local_tid_128166)] = + x_128181; + } + } + skip_waves_128186 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_128166) == (int64_t) 0) { + x_acc_128179 = x_128181; + } + } + if (groups_per_segment_128156 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_128166 == 0) { + ((__global double *) mem_123913)[gtid_98609] = x_acc_128179; + } + } + } else { + int32_t old_counter_128187; + + // first thread in group saves group result to global memory + { + if (local_tid_128166 == 0) { + ((__global + double *) group_res_arr_mem_128161)[sext_i32_i64(virt_group_id_128176) * + segred_group_sizze_98691] = + x_acc_128179; + mem_fence_global(); + old_counter_128187 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_128163)[sext_i32_i64(srem32(flat_segment_id_128177, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_128172)[(int64_t) 0] = + old_counter_128187 == groups_per_segment_128156 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_128188; + + is_last_group_128188 = ((__local + bool *) sync_arr_mem_128172)[(int64_t) 0]; + if (is_last_group_128188) { + if (local_tid_128166 == 0) { + old_counter_128187 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_128163)[sext_i32_i64(srem32(flat_segment_id_128177, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_128156)); + } + // read in the per-group-results + { + int64_t read_per_thread_128189 = + sdiv_up64(groups_per_segment_128156, + segred_group_sizze_98691); + + x_98695 = 0.0; + for (int64_t i_128190 = 0; i_128190 < + read_per_thread_128189; i_128190++) { + int64_t group_res_id_128191 = + sext_i32_i64(local_tid_128166) * + read_per_thread_128189 + i_128190; + int64_t index_of_group_res_128192 = + sext_i32_i64(flat_segment_id_128177) * + groups_per_segment_128156 + group_res_id_128191; + + if (slt64(group_res_id_128191, + groups_per_segment_128156)) { + x_98696 = ((__global + double *) group_res_arr_mem_128161)[index_of_group_res_128192 * + segred_group_sizze_98691]; + + double defunc_1_op_res_98697; + + defunc_1_op_res_98697 = x_98695 + x_98696; + x_98695 = defunc_1_op_res_98697; + } + } + } + ((__local + double *) red_arr_mem_128170)[sext_i32_i64(local_tid_128166)] = + x_98695; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_128193; + int32_t skip_waves_128194; + + skip_waves_128194 = 1; + + double x_128181; + double x_128182; + + offset_128193 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128166, + sext_i64_i32(segred_group_sizze_98691))) { + x_128181 = ((__local + double *) red_arr_mem_128170)[sext_i32_i64(local_tid_128166 + + offset_128193)]; + } + } + offset_128193 = 1; + while (slt32(offset_128193, wave_sizze_128168)) { + if (slt32(local_tid_128166 + offset_128193, + sext_i64_i32(segred_group_sizze_98691)) && + ((local_tid_128166 - squot32(local_tid_128166, + wave_sizze_128168) * + wave_sizze_128168) & (2 * offset_128193 - 1)) == + 0) { + // read array element + { + x_128182 = ((volatile __local + double *) red_arr_mem_128170)[sext_i32_i64(local_tid_128166 + + offset_128193)]; + } + // apply reduction operation + { + double defunc_1_op_res_128183 = x_128181 + + x_128182; + + x_128181 = defunc_1_op_res_128183; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128170)[sext_i32_i64(local_tid_128166)] = + x_128181; + } + } + offset_128193 *= 2; + } + while (slt32(skip_waves_128194, + squot32(sext_i64_i32(segred_group_sizze_98691) + + wave_sizze_128168 - 1, + wave_sizze_128168))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128193 = skip_waves_128194 * wave_sizze_128168; + if (slt32(local_tid_128166 + offset_128193, + sext_i64_i32(segred_group_sizze_98691)) && + ((local_tid_128166 - squot32(local_tid_128166, + wave_sizze_128168) * + wave_sizze_128168) == 0 && + (squot32(local_tid_128166, wave_sizze_128168) & + (2 * skip_waves_128194 - 1)) == 0)) { + // read array element + { + x_128182 = ((__local + double *) red_arr_mem_128170)[sext_i32_i64(local_tid_128166 + + offset_128193)]; + } + // apply reduction operation + { + double defunc_1_op_res_128183 = x_128181 + + x_128182; + + x_128181 = defunc_1_op_res_128183; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128170)[sext_i32_i64(local_tid_128166)] = + x_128181; + } + } + skip_waves_128194 *= 2; + } + // and back to memory with the final result + { + if (local_tid_128166 == 0) { + ((__global double *) mem_123913)[gtid_98609] = + x_128181; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_98691 +} +__kernel void mainMagnitudezisegred_large_98645(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_128106_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128104_backing_aligned_1, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t defunc_2_reduce_res_73132, + int64_t index_primexp_74309, + int64_t num_groups_98664, + int64_t groups_per_segment_128090, + int64_t elements_per_thread_128091, + int64_t virt_num_groups_128092, + int64_t threads_per_segment_128094, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_123901, + __global + unsigned char *mem_123904, + __global + unsigned char *mem_123907, + __global + unsigned char *group_res_arr_mem_128095, + __global + unsigned char *mainMagnitudezicounter_mem_128097) +{ + #define segred_group_sizze_98663 (mainMagnitudezisegred_group_sizze_98639) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_128106_backing_1 = + (__local volatile + char *) sync_arr_mem_128106_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128104_backing_0 = + (__local volatile + char *) red_arr_mem_128104_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128099; + int32_t local_tid_128100; + int64_t group_sizze_128103; + int32_t wave_sizze_128102; + int32_t group_tid_128101; + + global_tid_128099 = get_global_id(0); + local_tid_128100 = get_local_id(0); + group_sizze_128103 = get_local_size(0); + wave_sizze_128102 = LOCKSTEP_WIDTH; + group_tid_128101 = get_group_id(0); + + int32_t phys_tid_98645; + + phys_tid_98645 = global_tid_128099; + + __local char *red_arr_mem_128104; + + red_arr_mem_128104 = (__local char *) red_arr_mem_128104_backing_0; + + __local char *sync_arr_mem_128106; + + sync_arr_mem_128106 = (__local char *) sync_arr_mem_128106_backing_1; + + int32_t phys_group_id_128108; + + phys_group_id_128108 = get_group_id(0); + for (int32_t i_128109 = 0; i_128109 < + sdiv_up32(sext_i64_i32(virt_num_groups_128092) - phys_group_id_128108, + sext_i64_i32(num_groups_98664)); i_128109++) { + int32_t virt_group_id_128110 = phys_group_id_128108 + i_128109 * + sext_i64_i32(num_groups_98664); + int32_t flat_segment_id_128111 = squot32(virt_group_id_128110, + sext_i64_i32(groups_per_segment_128090)); + int64_t global_tid_128112 = srem64(sext_i32_i64(virt_group_id_128110) * + segred_group_sizze_98663 + + sext_i32_i64(local_tid_128100), + segred_group_sizze_98663 * + groups_per_segment_128090); + int64_t gtid_98636 = sext_i32_i64(flat_segment_id_128111); + int64_t gtid_98644; + double x_acc_128113; + int64_t chunk_sizze_128114; + + chunk_sizze_128114 = smin64(elements_per_thread_128091, + sdiv_up64(k2p2zq_73023 - global_tid_128112, + threads_per_segment_128094)); + + double x_98668; + double x_98669; + + // neutral-initialise the accumulators + { + x_acc_128113 = 0.0; + } + for (int64_t i_128118 = 0; i_128118 < chunk_sizze_128114; i_128118++) { + gtid_98644 = global_tid_128112 + threads_per_segment_128094 * + i_128118; + // apply map function + { + double x_98674 = ((__global double *) mem_120246)[gtid_98644 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_98636 * + defunc_2_reduce_res_73132 + + index_primexp_74309]; + double defunc_0_f_res_98675; + double redout_119889 = 0.0; + + for (int64_t i_119890 = 0; i_119890 < k2p2zq_73023; + i_119890++) { + double x_98679 = ((__global double *) mem_120246)[i_119890 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_98636 * + defunc_2_reduce_res_73132 + + index_primexp_74309]; + double x_98680 = ((__global double *) mem_123901)[i_119890 * + (k2p2zq_73023 * + m_73008) + + gtid_98636 * + k2p2zq_73023 + + gtid_98644]; + double defunc_1_f_res_98681 = x_98679 * x_98680; + double defunc_1_op_res_98678 = defunc_1_f_res_98681 + + redout_119889; + double redout_tmp_128119 = defunc_1_op_res_98678; + + redout_119889 = redout_tmp_128119; + } + defunc_0_f_res_98675 = redout_119889; + + double defunc_1_f_res_98682 = x_98674 * defunc_0_f_res_98675; + + // save map-out results + { + ((__global double *) mem_123907)[gtid_98636 * k2p2zq_73023 + + gtid_98644] = + defunc_0_f_res_98675; + } + // load accumulator + { + x_98668 = x_acc_128113; + } + // load new values + { + x_98669 = defunc_1_f_res_98682; + } + // apply reduction operator + { + double defunc_1_op_res_98670 = x_98668 + x_98669; + + // store in accumulator + { + x_acc_128113 = defunc_1_op_res_98670; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_98668 = x_acc_128113; + ((__local + double *) red_arr_mem_128104)[sext_i32_i64(local_tid_128100)] = + x_98668; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128120; + int32_t skip_waves_128121; + + skip_waves_128121 = 1; + + double x_128115; + double x_128116; + + offset_128120 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128100, + sext_i64_i32(segred_group_sizze_98663))) { + x_128115 = ((__local + double *) red_arr_mem_128104)[sext_i32_i64(local_tid_128100 + + offset_128120)]; + } + } + offset_128120 = 1; + while (slt32(offset_128120, wave_sizze_128102)) { + if (slt32(local_tid_128100 + offset_128120, + sext_i64_i32(segred_group_sizze_98663)) && + ((local_tid_128100 - squot32(local_tid_128100, + wave_sizze_128102) * + wave_sizze_128102) & (2 * offset_128120 - 1)) == 0) { + // read array element + { + x_128116 = ((volatile __local + double *) red_arr_mem_128104)[sext_i32_i64(local_tid_128100 + + offset_128120)]; + } + // apply reduction operation + { + double defunc_1_op_res_128117 = x_128115 + x_128116; + + x_128115 = defunc_1_op_res_128117; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128104)[sext_i32_i64(local_tid_128100)] = + x_128115; + } + } + offset_128120 *= 2; + } + while (slt32(skip_waves_128121, + squot32(sext_i64_i32(segred_group_sizze_98663) + + wave_sizze_128102 - 1, wave_sizze_128102))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128120 = skip_waves_128121 * wave_sizze_128102; + if (slt32(local_tid_128100 + offset_128120, + sext_i64_i32(segred_group_sizze_98663)) && + ((local_tid_128100 - squot32(local_tid_128100, + wave_sizze_128102) * + wave_sizze_128102) == 0 && (squot32(local_tid_128100, + wave_sizze_128102) & (2 * + skip_waves_128121 - + 1)) == + 0)) { + // read array element + { + x_128116 = ((__local + double *) red_arr_mem_128104)[sext_i32_i64(local_tid_128100 + + offset_128120)]; + } + // apply reduction operation + { + double defunc_1_op_res_128117 = x_128115 + x_128116; + + x_128115 = defunc_1_op_res_128117; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128104)[sext_i32_i64(local_tid_128100)] = + x_128115; + } + } + skip_waves_128121 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_128100) == (int64_t) 0) { + x_acc_128113 = x_128115; + } + } + if (groups_per_segment_128090 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_128100 == 0) { + ((__global double *) mem_123904)[gtid_98636] = x_acc_128113; + } + } + } else { + int32_t old_counter_128122; + + // first thread in group saves group result to global memory + { + if (local_tid_128100 == 0) { + ((__global + double *) group_res_arr_mem_128095)[sext_i32_i64(virt_group_id_128110) * + segred_group_sizze_98663] = + x_acc_128113; + mem_fence_global(); + old_counter_128122 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_128097)[sext_i32_i64(srem32(flat_segment_id_128111, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_128106)[(int64_t) 0] = + old_counter_128122 == groups_per_segment_128090 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_128123; + + is_last_group_128123 = ((__local + bool *) sync_arr_mem_128106)[(int64_t) 0]; + if (is_last_group_128123) { + if (local_tid_128100 == 0) { + old_counter_128122 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_128097)[sext_i32_i64(srem32(flat_segment_id_128111, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_128090)); + } + // read in the per-group-results + { + int64_t read_per_thread_128124 = + sdiv_up64(groups_per_segment_128090, + segred_group_sizze_98663); + + x_98668 = 0.0; + for (int64_t i_128125 = 0; i_128125 < + read_per_thread_128124; i_128125++) { + int64_t group_res_id_128126 = + sext_i32_i64(local_tid_128100) * + read_per_thread_128124 + i_128125; + int64_t index_of_group_res_128127 = + sext_i32_i64(flat_segment_id_128111) * + groups_per_segment_128090 + group_res_id_128126; + + if (slt64(group_res_id_128126, + groups_per_segment_128090)) { + x_98669 = ((__global + double *) group_res_arr_mem_128095)[index_of_group_res_128127 * + segred_group_sizze_98663]; + + double defunc_1_op_res_98670; + + defunc_1_op_res_98670 = x_98668 + x_98669; + x_98668 = defunc_1_op_res_98670; + } + } + } + ((__local + double *) red_arr_mem_128104)[sext_i32_i64(local_tid_128100)] = + x_98668; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_128128; + int32_t skip_waves_128129; + + skip_waves_128129 = 1; + + double x_128115; + double x_128116; + + offset_128128 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128100, + sext_i64_i32(segred_group_sizze_98663))) { + x_128115 = ((__local + double *) red_arr_mem_128104)[sext_i32_i64(local_tid_128100 + + offset_128128)]; + } + } + offset_128128 = 1; + while (slt32(offset_128128, wave_sizze_128102)) { + if (slt32(local_tid_128100 + offset_128128, + sext_i64_i32(segred_group_sizze_98663)) && + ((local_tid_128100 - squot32(local_tid_128100, + wave_sizze_128102) * + wave_sizze_128102) & (2 * offset_128128 - 1)) == + 0) { + // read array element + { + x_128116 = ((volatile __local + double *) red_arr_mem_128104)[sext_i32_i64(local_tid_128100 + + offset_128128)]; + } + // apply reduction operation + { + double defunc_1_op_res_128117 = x_128115 + + x_128116; + + x_128115 = defunc_1_op_res_128117; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128104)[sext_i32_i64(local_tid_128100)] = + x_128115; + } + } + offset_128128 *= 2; + } + while (slt32(skip_waves_128129, + squot32(sext_i64_i32(segred_group_sizze_98663) + + wave_sizze_128102 - 1, + wave_sizze_128102))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128128 = skip_waves_128129 * wave_sizze_128102; + if (slt32(local_tid_128100 + offset_128128, + sext_i64_i32(segred_group_sizze_98663)) && + ((local_tid_128100 - squot32(local_tid_128100, + wave_sizze_128102) * + wave_sizze_128102) == 0 && + (squot32(local_tid_128100, wave_sizze_128102) & + (2 * skip_waves_128129 - 1)) == 0)) { + // read array element + { + x_128116 = ((__local + double *) red_arr_mem_128104)[sext_i32_i64(local_tid_128100 + + offset_128128)]; + } + // apply reduction operation + { + double defunc_1_op_res_128117 = x_128115 + + x_128116; + + x_128115 = defunc_1_op_res_128117; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128104)[sext_i32_i64(local_tid_128100)] = + x_128115; + } + } + skip_waves_128129 *= 2; + } + // and back to memory with the final result + { + if (local_tid_128100 == 0) { + ((__global double *) mem_123904)[gtid_98636] = + x_128115; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_98663 +} +__kernel void mainMagnitudezisegred_large_99002(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_128440_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128438_backing_aligned_1, + int64_t num_recresids_padded_73681, + int64_t num_groups_99084, + int64_t groups_per_segment_128424, + int64_t elements_per_thread_128425, + int64_t virt_num_groups_128426, + int64_t threads_per_segment_128428, + __global + unsigned char *mem_124045, + __global + unsigned char *mem_124051, + __global + unsigned char *mem_124054, + __global + unsigned char *group_res_arr_mem_128429, + __global + unsigned char *mainMagnitudezicounter_mem_128431) +{ + #define segred_group_sizze_99083 (mainMagnitudezisegred_group_sizze_98996) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_128440_backing_1 = + (__local volatile + char *) sync_arr_mem_128440_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128438_backing_0 = + (__local volatile + char *) red_arr_mem_128438_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128433; + int32_t local_tid_128434; + int64_t group_sizze_128437; + int32_t wave_sizze_128436; + int32_t group_tid_128435; + + global_tid_128433 = get_global_id(0); + local_tid_128434 = get_local_id(0); + group_sizze_128437 = get_local_size(0); + wave_sizze_128436 = LOCKSTEP_WIDTH; + group_tid_128435 = get_group_id(0); + + int32_t phys_tid_99002; + + phys_tid_99002 = global_tid_128433; + + __local char *red_arr_mem_128438; + + red_arr_mem_128438 = (__local char *) red_arr_mem_128438_backing_0; + + __local char *sync_arr_mem_128440; + + sync_arr_mem_128440 = (__local char *) sync_arr_mem_128440_backing_1; + + int32_t phys_group_id_128442; + + phys_group_id_128442 = get_group_id(0); + for (int32_t i_128443 = 0; i_128443 < + sdiv_up32(sext_i64_i32(virt_num_groups_128426) - phys_group_id_128442, + sext_i64_i32(num_groups_99084)); i_128443++) { + int32_t virt_group_id_128444 = phys_group_id_128442 + i_128443 * + sext_i64_i32(num_groups_99084); + int32_t flat_segment_id_128445 = squot32(virt_group_id_128444, + sext_i64_i32(groups_per_segment_128424)); + int64_t global_tid_128446 = srem64(sext_i32_i64(virt_group_id_128444) * + segred_group_sizze_99083 + + sext_i32_i64(local_tid_128434), + segred_group_sizze_99083 * + groups_per_segment_128424); + int64_t gtid_98993 = sext_i32_i64(flat_segment_id_128445); + int64_t gtid_99001; + double x_acc_128447; + int64_t chunk_sizze_128448; + + chunk_sizze_128448 = smin64(elements_per_thread_128425, + sdiv_up64(num_recresids_padded_73681 - + global_tid_128446, + threads_per_segment_128428)); + + double x_99087; + double x_99088; + + // neutral-initialise the accumulators + { + x_acc_128447 = 0.0; + } + for (int64_t i_128452 = 0; i_128452 < chunk_sizze_128448; i_128452++) { + gtid_99001 = global_tid_128446 + threads_per_segment_128428 * + i_128452; + // apply map function + { + double x_99092 = ((__global double *) mem_124045)[gtid_98993 * + num_recresids_padded_73681 + + gtid_99001]; + bool isnan_res_99093; + + isnan_res_99093 = futrts_isnan64(x_99092); + + double defunc_0_f_res_99094; + + if (isnan_res_99093) { + defunc_0_f_res_99094 = 0.0; + } else { + double x_mean_99091 = ((__global + double *) mem_124051)[gtid_98993]; + double x_99095 = x_99092 - x_mean_99091; + double defunc_0_f_res_f_res_99096 = fpow64(x_99095, 2.0); + + defunc_0_f_res_99094 = defunc_0_f_res_f_res_99096; + } + // save map-out results + { } + // load accumulator + { + x_99087 = x_acc_128447; + } + // load new values + { + x_99088 = defunc_0_f_res_99094; + } + // apply reduction operator + { + double defunc_1_op_res_99089 = x_99087 + x_99088; + + // store in accumulator + { + x_acc_128447 = defunc_1_op_res_99089; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_99087 = x_acc_128447; + ((__local + double *) red_arr_mem_128438)[sext_i32_i64(local_tid_128434)] = + x_99087; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128453; + int32_t skip_waves_128454; + + skip_waves_128454 = 1; + + double x_128449; + double x_128450; + + offset_128453 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128434, + sext_i64_i32(segred_group_sizze_99083))) { + x_128449 = ((__local + double *) red_arr_mem_128438)[sext_i32_i64(local_tid_128434 + + offset_128453)]; + } + } + offset_128453 = 1; + while (slt32(offset_128453, wave_sizze_128436)) { + if (slt32(local_tid_128434 + offset_128453, + sext_i64_i32(segred_group_sizze_99083)) && + ((local_tid_128434 - squot32(local_tid_128434, + wave_sizze_128436) * + wave_sizze_128436) & (2 * offset_128453 - 1)) == 0) { + // read array element + { + x_128450 = ((volatile __local + double *) red_arr_mem_128438)[sext_i32_i64(local_tid_128434 + + offset_128453)]; + } + // apply reduction operation + { + double defunc_1_op_res_128451 = x_128449 + x_128450; + + x_128449 = defunc_1_op_res_128451; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128438)[sext_i32_i64(local_tid_128434)] = + x_128449; + } + } + offset_128453 *= 2; + } + while (slt32(skip_waves_128454, + squot32(sext_i64_i32(segred_group_sizze_99083) + + wave_sizze_128436 - 1, wave_sizze_128436))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128453 = skip_waves_128454 * wave_sizze_128436; + if (slt32(local_tid_128434 + offset_128453, + sext_i64_i32(segred_group_sizze_99083)) && + ((local_tid_128434 - squot32(local_tid_128434, + wave_sizze_128436) * + wave_sizze_128436) == 0 && (squot32(local_tid_128434, + wave_sizze_128436) & (2 * + skip_waves_128454 - + 1)) == + 0)) { + // read array element + { + x_128450 = ((__local + double *) red_arr_mem_128438)[sext_i32_i64(local_tid_128434 + + offset_128453)]; + } + // apply reduction operation + { + double defunc_1_op_res_128451 = x_128449 + x_128450; + + x_128449 = defunc_1_op_res_128451; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128438)[sext_i32_i64(local_tid_128434)] = + x_128449; + } + } + skip_waves_128454 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_128434) == (int64_t) 0) { + x_acc_128447 = x_128449; + } + } + if (groups_per_segment_128424 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_128434 == 0) { + ((__global double *) mem_124054)[gtid_98993] = x_acc_128447; + } + } + } else { + int32_t old_counter_128455; + + // first thread in group saves group result to global memory + { + if (local_tid_128434 == 0) { + ((__global + double *) group_res_arr_mem_128429)[sext_i32_i64(virt_group_id_128444) * + segred_group_sizze_99083] = + x_acc_128447; + mem_fence_global(); + old_counter_128455 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_128431)[sext_i32_i64(srem32(flat_segment_id_128445, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_128440)[(int64_t) 0] = + old_counter_128455 == groups_per_segment_128424 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_128456; + + is_last_group_128456 = ((__local + bool *) sync_arr_mem_128440)[(int64_t) 0]; + if (is_last_group_128456) { + if (local_tid_128434 == 0) { + old_counter_128455 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_128431)[sext_i32_i64(srem32(flat_segment_id_128445, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_128424)); + } + // read in the per-group-results + { + int64_t read_per_thread_128457 = + sdiv_up64(groups_per_segment_128424, + segred_group_sizze_99083); + + x_99087 = 0.0; + for (int64_t i_128458 = 0; i_128458 < + read_per_thread_128457; i_128458++) { + int64_t group_res_id_128459 = + sext_i32_i64(local_tid_128434) * + read_per_thread_128457 + i_128458; + int64_t index_of_group_res_128460 = + sext_i32_i64(flat_segment_id_128445) * + groups_per_segment_128424 + group_res_id_128459; + + if (slt64(group_res_id_128459, + groups_per_segment_128424)) { + x_99088 = ((__global + double *) group_res_arr_mem_128429)[index_of_group_res_128460 * + segred_group_sizze_99083]; + + double defunc_1_op_res_99089; + + defunc_1_op_res_99089 = x_99087 + x_99088; + x_99087 = defunc_1_op_res_99089; + } + } + } + ((__local + double *) red_arr_mem_128438)[sext_i32_i64(local_tid_128434)] = + x_99087; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_128461; + int32_t skip_waves_128462; + + skip_waves_128462 = 1; + + double x_128449; + double x_128450; + + offset_128461 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128434, + sext_i64_i32(segred_group_sizze_99083))) { + x_128449 = ((__local + double *) red_arr_mem_128438)[sext_i32_i64(local_tid_128434 + + offset_128461)]; + } + } + offset_128461 = 1; + while (slt32(offset_128461, wave_sizze_128436)) { + if (slt32(local_tid_128434 + offset_128461, + sext_i64_i32(segred_group_sizze_99083)) && + ((local_tid_128434 - squot32(local_tid_128434, + wave_sizze_128436) * + wave_sizze_128436) & (2 * offset_128461 - 1)) == + 0) { + // read array element + { + x_128450 = ((volatile __local + double *) red_arr_mem_128438)[sext_i32_i64(local_tid_128434 + + offset_128461)]; + } + // apply reduction operation + { + double defunc_1_op_res_128451 = x_128449 + + x_128450; + + x_128449 = defunc_1_op_res_128451; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128438)[sext_i32_i64(local_tid_128434)] = + x_128449; + } + } + offset_128461 *= 2; + } + while (slt32(skip_waves_128462, + squot32(sext_i64_i32(segred_group_sizze_99083) + + wave_sizze_128436 - 1, + wave_sizze_128436))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128461 = skip_waves_128462 * wave_sizze_128436; + if (slt32(local_tid_128434 + offset_128461, + sext_i64_i32(segred_group_sizze_99083)) && + ((local_tid_128434 - squot32(local_tid_128434, + wave_sizze_128436) * + wave_sizze_128436) == 0 && + (squot32(local_tid_128434, wave_sizze_128436) & + (2 * skip_waves_128462 - 1)) == 0)) { + // read array element + { + x_128450 = ((__local + double *) red_arr_mem_128438)[sext_i32_i64(local_tid_128434 + + offset_128461)]; + } + // apply reduction operation + { + double defunc_1_op_res_128451 = x_128449 + + x_128450; + + x_128449 = defunc_1_op_res_128451; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128438)[sext_i32_i64(local_tid_128434)] = + x_128449; + } + } + skip_waves_128462 *= 2; + } + // and back to memory with the final result + { + if (local_tid_128434 == 0) { + ((__global double *) mem_124054)[gtid_98993] = + x_128449; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_99083 +} +__kernel void mainMagnitudezisegred_large_99032(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_128369_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128367_backing_aligned_1, + int64_t num_recresids_padded_73681, + int64_t num_groups_99062, + int64_t groups_per_segment_128353, + int64_t elements_per_thread_128354, + int64_t virt_num_groups_128355, + __global + unsigned char *mem_124045, + __global + unsigned char *mem_124048, + __global + unsigned char *group_res_arr_mem_128358, + __global + unsigned char *mainMagnitudezicounter_mem_128360) +{ + #define segred_group_sizze_99061 (mainMagnitudezisegred_group_sizze_99026) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_128369_backing_1 = + (__local volatile + char *) sync_arr_mem_128369_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128367_backing_0 = + (__local volatile + char *) red_arr_mem_128367_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128362; + int32_t local_tid_128363; + int64_t group_sizze_128366; + int32_t wave_sizze_128365; + int32_t group_tid_128364; + + global_tid_128362 = get_global_id(0); + local_tid_128363 = get_local_id(0); + group_sizze_128366 = get_local_size(0); + wave_sizze_128365 = LOCKSTEP_WIDTH; + group_tid_128364 = get_group_id(0); + + int32_t phys_tid_99032; + + phys_tid_99032 = global_tid_128362; + + __local char *red_arr_mem_128367; + + red_arr_mem_128367 = (__local char *) red_arr_mem_128367_backing_0; + + __local char *sync_arr_mem_128369; + + sync_arr_mem_128369 = (__local char *) sync_arr_mem_128369_backing_1; + + int32_t phys_group_id_128371; + + phys_group_id_128371 = get_group_id(0); + for (int32_t i_128372 = 0; i_128372 < + sdiv_up32(sext_i64_i32(virt_num_groups_128355) - phys_group_id_128371, + sext_i64_i32(num_groups_99062)); i_128372++) { + int32_t virt_group_id_128373 = phys_group_id_128371 + i_128372 * + sext_i64_i32(num_groups_99062); + int32_t flat_segment_id_128374 = squot32(virt_group_id_128373, + sext_i64_i32(groups_per_segment_128353)); + int64_t global_tid_128375 = srem64(sext_i32_i64(virt_group_id_128373) * + segred_group_sizze_99061 + + sext_i32_i64(local_tid_128363), + segred_group_sizze_99061 * + groups_per_segment_128353); + int64_t gtid_99023 = sext_i32_i64(flat_segment_id_128374); + int64_t gtid_99031; + double x_acc_128376; + int64_t chunk_sizze_128377; + int64_t starting_point_128378; + + starting_point_128378 = global_tid_128375 * elements_per_thread_128354; + + int64_t remaining_elements_128379; + + remaining_elements_128379 = num_recresids_padded_73681 - + starting_point_128378; + if (sle64(remaining_elements_128379, (int64_t) 0) || + sle64(num_recresids_padded_73681, starting_point_128378)) { + chunk_sizze_128377 = (int64_t) 0; + } else { + if (slt64(num_recresids_padded_73681, (global_tid_128375 + + (int64_t) 1) * + elements_per_thread_128354)) { + chunk_sizze_128377 = num_recresids_padded_73681 - + global_tid_128375 * elements_per_thread_128354; + } else { + chunk_sizze_128377 = elements_per_thread_128354; + } + } + + double x_99065; + double x_99066; + + // neutral-initialise the accumulators + { + x_acc_128376 = 0.0; + } + for (int64_t i_128387 = 0; i_128387 < elements_per_thread_128354; + i_128387++) { + gtid_99031 = sext_i32_i64(local_tid_128363) + + (squot64(global_tid_128375, segred_group_sizze_99061) * + elements_per_thread_128354 + i_128387) * + segred_group_sizze_99061; + if (slt64(gtid_99031, num_recresids_padded_73681)) { + // apply map function + { + double x_99073 = ((__global + double *) mem_124045)[gtid_99023 * + num_recresids_padded_73681 + + gtid_99031]; + + // save map-out results + { } + // load accumulator + { + x_99065 = x_acc_128376; + } + // load new values + { + x_99066 = x_99073; + } + // apply reduction operator + { + bool isnan_res_99067; + + isnan_res_99067 = futrts_isnan64(x_99065); + + double defunc_1_op_res_99068; + + if (isnan_res_99067) { + defunc_1_op_res_99068 = x_99066; + } else { + bool isnan_res_99069; + + isnan_res_99069 = futrts_isnan64(x_99066); + + double defunc_1_op_res_f_res_99070; + + if (isnan_res_99069) { + defunc_1_op_res_f_res_99070 = x_99065; + } else { + double defunc_1_op_res_f_res_f_res_99071 = + x_99065 + x_99066; + + defunc_1_op_res_f_res_99070 = + defunc_1_op_res_f_res_f_res_99071; + } + defunc_1_op_res_99068 = defunc_1_op_res_f_res_99070; + } + // store in accumulator + { + x_acc_128376 = defunc_1_op_res_99068; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_99065 = x_acc_128376; + ((__local + double *) red_arr_mem_128367)[sext_i32_i64(local_tid_128363)] = + x_99065; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128388; + int32_t skip_waves_128389; + + skip_waves_128389 = 1; + + double x_128380; + double x_128381; + + offset_128388 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128363, + sext_i64_i32(segred_group_sizze_99061))) { + x_128380 = ((__local + double *) red_arr_mem_128367)[sext_i32_i64(local_tid_128363 + + offset_128388)]; + } + } + offset_128388 = 1; + while (slt32(offset_128388, wave_sizze_128365)) { + if (slt32(local_tid_128363 + offset_128388, + sext_i64_i32(segred_group_sizze_99061)) && + ((local_tid_128363 - squot32(local_tid_128363, + wave_sizze_128365) * + wave_sizze_128365) & (2 * offset_128388 - 1)) == 0) { + // read array element + { + x_128381 = ((volatile __local + double *) red_arr_mem_128367)[sext_i32_i64(local_tid_128363 + + offset_128388)]; + } + // apply reduction operation + { + bool isnan_res_128382; + + isnan_res_128382 = futrts_isnan64(x_128380); + + double defunc_1_op_res_128383; + + if (isnan_res_128382) { + defunc_1_op_res_128383 = x_128381; + } else { + bool isnan_res_128384; + + isnan_res_128384 = futrts_isnan64(x_128381); + + double defunc_1_op_res_f_res_128385; + + if (isnan_res_128384) { + defunc_1_op_res_f_res_128385 = x_128380; + } else { + double defunc_1_op_res_f_res_f_res_128386 = + x_128380 + x_128381; + + defunc_1_op_res_f_res_128385 = + defunc_1_op_res_f_res_f_res_128386; + } + defunc_1_op_res_128383 = + defunc_1_op_res_f_res_128385; + } + x_128380 = defunc_1_op_res_128383; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128367)[sext_i32_i64(local_tid_128363)] = + x_128380; + } + } + offset_128388 *= 2; + } + while (slt32(skip_waves_128389, + squot32(sext_i64_i32(segred_group_sizze_99061) + + wave_sizze_128365 - 1, wave_sizze_128365))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128388 = skip_waves_128389 * wave_sizze_128365; + if (slt32(local_tid_128363 + offset_128388, + sext_i64_i32(segred_group_sizze_99061)) && + ((local_tid_128363 - squot32(local_tid_128363, + wave_sizze_128365) * + wave_sizze_128365) == 0 && (squot32(local_tid_128363, + wave_sizze_128365) & + (2 * skip_waves_128389 - + 1)) == 0)) { + // read array element + { + x_128381 = ((__local + double *) red_arr_mem_128367)[sext_i32_i64(local_tid_128363 + + offset_128388)]; + } + // apply reduction operation + { + bool isnan_res_128382; + + isnan_res_128382 = futrts_isnan64(x_128380); + + double defunc_1_op_res_128383; + + if (isnan_res_128382) { + defunc_1_op_res_128383 = x_128381; + } else { + bool isnan_res_128384; + + isnan_res_128384 = futrts_isnan64(x_128381); + + double defunc_1_op_res_f_res_128385; + + if (isnan_res_128384) { + defunc_1_op_res_f_res_128385 = x_128380; + } else { + double defunc_1_op_res_f_res_f_res_128386 = + x_128380 + x_128381; + + defunc_1_op_res_f_res_128385 = + defunc_1_op_res_f_res_f_res_128386; + } + defunc_1_op_res_128383 = + defunc_1_op_res_f_res_128385; + } + x_128380 = defunc_1_op_res_128383; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128367)[sext_i32_i64(local_tid_128363)] = + x_128380; + } + } + skip_waves_128389 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_128363) == (int64_t) 0) { + x_acc_128376 = x_128380; + } + } + // first thread keeps accumulator; others reset to neutral element + { + if (!(sext_i32_i64(local_tid_128363) == (int64_t) 0)) { + x_acc_128376 = 0.0; + } + } + } + x_99065 = x_acc_128376; + if (groups_per_segment_128353 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_128363 == 0) { + ((__global double *) mem_124048)[gtid_99023] = x_acc_128376; + } + } + } else { + int32_t old_counter_128390; + + // first thread in group saves group result to global memory + { + if (local_tid_128363 == 0) { + ((__global + double *) group_res_arr_mem_128358)[sext_i32_i64(virt_group_id_128373) * + segred_group_sizze_99061] = + x_acc_128376; + mem_fence_global(); + old_counter_128390 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_128360)[sext_i32_i64(srem32(flat_segment_id_128374, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_128369)[(int64_t) 0] = + old_counter_128390 == groups_per_segment_128353 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_128391; + + is_last_group_128391 = ((__local + bool *) sync_arr_mem_128369)[(int64_t) 0]; + if (is_last_group_128391) { + if (local_tid_128363 == 0) { + old_counter_128390 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_128360)[sext_i32_i64(srem32(flat_segment_id_128374, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_128353)); + } + // read in the per-group-results + { + int64_t read_per_thread_128392 = + sdiv_up64(groups_per_segment_128353, + segred_group_sizze_99061); + + x_99065 = 0.0; + for (int64_t i_128393 = 0; i_128393 < + read_per_thread_128392; i_128393++) { + int64_t group_res_id_128394 = + sext_i32_i64(local_tid_128363) * + read_per_thread_128392 + i_128393; + int64_t index_of_group_res_128395 = + sext_i32_i64(flat_segment_id_128374) * + groups_per_segment_128353 + group_res_id_128394; + + if (slt64(group_res_id_128394, + groups_per_segment_128353)) { + x_99066 = ((__global + double *) group_res_arr_mem_128358)[index_of_group_res_128395 * + segred_group_sizze_99061]; + + bool isnan_res_99067; + + isnan_res_99067 = futrts_isnan64(x_99065); + + double defunc_1_op_res_99068; + + if (isnan_res_99067) { + defunc_1_op_res_99068 = x_99066; + } else { + bool isnan_res_99069; + + isnan_res_99069 = futrts_isnan64(x_99066); + + double defunc_1_op_res_f_res_99070; + + if (isnan_res_99069) { + defunc_1_op_res_f_res_99070 = x_99065; + } else { + double defunc_1_op_res_f_res_f_res_99071 = + x_99065 + x_99066; + + defunc_1_op_res_f_res_99070 = + defunc_1_op_res_f_res_f_res_99071; + } + defunc_1_op_res_99068 = + defunc_1_op_res_f_res_99070; + } + x_99065 = defunc_1_op_res_99068; + } + } + } + ((__local + double *) red_arr_mem_128367)[sext_i32_i64(local_tid_128363)] = + x_99065; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_128396; + int32_t skip_waves_128397; + + skip_waves_128397 = 1; + + double x_128380; + double x_128381; + + offset_128396 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128363, + sext_i64_i32(segred_group_sizze_99061))) { + x_128380 = ((__local + double *) red_arr_mem_128367)[sext_i32_i64(local_tid_128363 + + offset_128396)]; + } + } + offset_128396 = 1; + while (slt32(offset_128396, wave_sizze_128365)) { + if (slt32(local_tid_128363 + offset_128396, + sext_i64_i32(segred_group_sizze_99061)) && + ((local_tid_128363 - squot32(local_tid_128363, + wave_sizze_128365) * + wave_sizze_128365) & (2 * offset_128396 - 1)) == + 0) { + // read array element + { + x_128381 = ((volatile __local + double *) red_arr_mem_128367)[sext_i32_i64(local_tid_128363 + + offset_128396)]; + } + // apply reduction operation + { + bool isnan_res_128382; + + isnan_res_128382 = futrts_isnan64(x_128380); + + double defunc_1_op_res_128383; + + if (isnan_res_128382) { + defunc_1_op_res_128383 = x_128381; + } else { + bool isnan_res_128384; + + isnan_res_128384 = futrts_isnan64(x_128381); + + double defunc_1_op_res_f_res_128385; + + if (isnan_res_128384) { + defunc_1_op_res_f_res_128385 = x_128380; + } else { + double + defunc_1_op_res_f_res_f_res_128386 = + x_128380 + x_128381; + + defunc_1_op_res_f_res_128385 = + defunc_1_op_res_f_res_f_res_128386; + } + defunc_1_op_res_128383 = + defunc_1_op_res_f_res_128385; + } + x_128380 = defunc_1_op_res_128383; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128367)[sext_i32_i64(local_tid_128363)] = + x_128380; + } + } + offset_128396 *= 2; + } + while (slt32(skip_waves_128397, + squot32(sext_i64_i32(segred_group_sizze_99061) + + wave_sizze_128365 - 1, + wave_sizze_128365))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128396 = skip_waves_128397 * wave_sizze_128365; + if (slt32(local_tid_128363 + offset_128396, + sext_i64_i32(segred_group_sizze_99061)) && + ((local_tid_128363 - squot32(local_tid_128363, + wave_sizze_128365) * + wave_sizze_128365) == 0 && + (squot32(local_tid_128363, wave_sizze_128365) & + (2 * skip_waves_128397 - 1)) == 0)) { + // read array element + { + x_128381 = ((__local + double *) red_arr_mem_128367)[sext_i32_i64(local_tid_128363 + + offset_128396)]; + } + // apply reduction operation + { + bool isnan_res_128382; + + isnan_res_128382 = futrts_isnan64(x_128380); + + double defunc_1_op_res_128383; + + if (isnan_res_128382) { + defunc_1_op_res_128383 = x_128381; + } else { + bool isnan_res_128384; + + isnan_res_128384 = futrts_isnan64(x_128381); + + double defunc_1_op_res_f_res_128385; + + if (isnan_res_128384) { + defunc_1_op_res_f_res_128385 = x_128380; + } else { + double + defunc_1_op_res_f_res_f_res_128386 = + x_128380 + x_128381; + + defunc_1_op_res_f_res_128385 = + defunc_1_op_res_f_res_f_res_128386; + } + defunc_1_op_res_128383 = + defunc_1_op_res_f_res_128385; + } + x_128380 = defunc_1_op_res_128383; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128367)[sext_i32_i64(local_tid_128363)] = + x_128380; + } + } + skip_waves_128397 *= 2; + } + // and back to memory with the final result + { + if (local_tid_128363 == 0) { + ((__global double *) mem_124048)[gtid_99023] = + x_128380; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_99061 +} +__kernel void mainMagnitudezisegred_large_99667(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_128670_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128668_backing_aligned_1, + int64_t num_recresids_padded_73681, + int64_t Nmk_74408, + int64_t num_groups_99944, + int64_t groups_per_segment_128654, + int64_t elements_per_thread_128655, + int64_t virt_num_groups_128656, + int64_t threads_per_segment_128658, + __global + unsigned char *defunc_3_map_res_mem_124068, + __global + unsigned char *mem_124078, + __global + unsigned char *mem_124130, + __global + unsigned char *group_res_arr_mem_128659, + __global + unsigned char *mainMagnitudezicounter_mem_128661) +{ + #define segred_group_sizze_99943 (mainMagnitudezisegred_group_sizze_99661) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_128670_backing_1 = + (__local volatile + char *) sync_arr_mem_128670_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128668_backing_0 = + (__local volatile + char *) red_arr_mem_128668_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128663; + int32_t local_tid_128664; + int64_t group_sizze_128667; + int32_t wave_sizze_128666; + int32_t group_tid_128665; + + global_tid_128663 = get_global_id(0); + local_tid_128664 = get_local_id(0); + group_sizze_128667 = get_local_size(0); + wave_sizze_128666 = LOCKSTEP_WIDTH; + group_tid_128665 = get_group_id(0); + + int32_t phys_tid_99667; + + phys_tid_99667 = global_tid_128663; + + __local char *red_arr_mem_128668; + + red_arr_mem_128668 = (__local char *) red_arr_mem_128668_backing_0; + + __local char *sync_arr_mem_128670; + + sync_arr_mem_128670 = (__local char *) sync_arr_mem_128670_backing_1; + + int32_t phys_group_id_128672; + + phys_group_id_128672 = get_group_id(0); + for (int32_t i_128673 = 0; i_128673 < + sdiv_up32(sext_i64_i32(virt_num_groups_128656) - phys_group_id_128672, + sext_i64_i32(num_groups_99944)); i_128673++) { + int32_t virt_group_id_128674 = phys_group_id_128672 + i_128673 * + sext_i64_i32(num_groups_99944); + int32_t flat_segment_id_128675 = squot32(virt_group_id_128674, + sext_i64_i32(groups_per_segment_128654)); + int64_t global_tid_128676 = srem64(sext_i32_i64(virt_group_id_128674) * + segred_group_sizze_99943 + + sext_i32_i64(local_tid_128664), + segred_group_sizze_99943 * + groups_per_segment_128654); + int64_t gtid_99658 = sext_i32_i64(flat_segment_id_128675); + int64_t gtid_99666; + int64_t x_acc_128677; + int64_t chunk_sizze_128678; + + chunk_sizze_128678 = smin64(elements_per_thread_128655, + sdiv_up64(num_recresids_padded_73681 - + global_tid_128676, + threads_per_segment_128658)); + + int64_t x_99947; + int64_t x_99948; + + // neutral-initialise the accumulators + { + x_acc_128677 = (int64_t) 9223372036854775807; + } + for (int64_t i_128682 = 0; i_128682 < chunk_sizze_128678; i_128682++) { + gtid_99666 = global_tid_128676 + threads_per_segment_128658 * + i_128682; + // apply map function + { + int64_t slice_115288 = (int64_t) 1 + gtid_99666; + double x_99953 = ((__global + double *) defunc_3_map_res_mem_124068)[gtid_99658 * + Nmk_74408 + + slice_115288]; + double x_99954 = ((__global double *) mem_124078)[gtid_99658 * + Nmk_74408 + + slice_115288]; + double abs_res_99955 = fabs(x_99953); + bool cond_99956 = x_99954 < abs_res_99955; + int64_t defunc_2_f_res_99957; + + if (cond_99956) { + defunc_2_f_res_99957 = gtid_99666; + } else { + defunc_2_f_res_99957 = (int64_t) 9223372036854775807; + } + // save map-out results + { } + // load accumulator + { + x_99947 = x_acc_128677; + } + // load new values + { + x_99948 = defunc_2_f_res_99957; + } + // apply reduction operator + { + int64_t defunc_1_op_res_99949 = smin64(x_99947, x_99948); + + // store in accumulator + { + x_acc_128677 = defunc_1_op_res_99949; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_99947 = x_acc_128677; + ((__local + int64_t *) red_arr_mem_128668)[sext_i32_i64(local_tid_128664)] = + x_99947; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128683; + int32_t skip_waves_128684; + + skip_waves_128684 = 1; + + int64_t x_128679; + int64_t x_128680; + + offset_128683 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128664, + sext_i64_i32(segred_group_sizze_99943))) { + x_128679 = ((__local + int64_t *) red_arr_mem_128668)[sext_i32_i64(local_tid_128664 + + offset_128683)]; + } + } + offset_128683 = 1; + while (slt32(offset_128683, wave_sizze_128666)) { + if (slt32(local_tid_128664 + offset_128683, + sext_i64_i32(segred_group_sizze_99943)) && + ((local_tid_128664 - squot32(local_tid_128664, + wave_sizze_128666) * + wave_sizze_128666) & (2 * offset_128683 - 1)) == 0) { + // read array element + { + x_128680 = ((volatile __local + int64_t *) red_arr_mem_128668)[sext_i32_i64(local_tid_128664 + + offset_128683)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_128681 = smin64(x_128679, x_128680); + + x_128679 = defunc_1_op_res_128681; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_128668)[sext_i32_i64(local_tid_128664)] = + x_128679; + } + } + offset_128683 *= 2; + } + while (slt32(skip_waves_128684, + squot32(sext_i64_i32(segred_group_sizze_99943) + + wave_sizze_128666 - 1, wave_sizze_128666))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128683 = skip_waves_128684 * wave_sizze_128666; + if (slt32(local_tid_128664 + offset_128683, + sext_i64_i32(segred_group_sizze_99943)) && + ((local_tid_128664 - squot32(local_tid_128664, + wave_sizze_128666) * + wave_sizze_128666) == 0 && (squot32(local_tid_128664, + wave_sizze_128666) & (2 * + skip_waves_128684 - + 1)) == + 0)) { + // read array element + { + x_128680 = ((__local + int64_t *) red_arr_mem_128668)[sext_i32_i64(local_tid_128664 + + offset_128683)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_128681 = smin64(x_128679, x_128680); + + x_128679 = defunc_1_op_res_128681; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_128668)[sext_i32_i64(local_tid_128664)] = + x_128679; + } + } + skip_waves_128684 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_128664) == (int64_t) 0) { + x_acc_128677 = x_128679; + } + } + if (groups_per_segment_128654 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_128664 == 0) { + ((__global int64_t *) mem_124130)[gtid_99658] = + x_acc_128677; + } + } + } else { + int32_t old_counter_128685; + + // first thread in group saves group result to global memory + { + if (local_tid_128664 == 0) { + ((__global + int64_t *) group_res_arr_mem_128659)[sext_i32_i64(virt_group_id_128674) * + segred_group_sizze_99943] = + x_acc_128677; + mem_fence_global(); + old_counter_128685 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_128661)[sext_i32_i64(srem32(flat_segment_id_128675, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_128670)[(int64_t) 0] = + old_counter_128685 == groups_per_segment_128654 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_128686; + + is_last_group_128686 = ((__local + bool *) sync_arr_mem_128670)[(int64_t) 0]; + if (is_last_group_128686) { + if (local_tid_128664 == 0) { + old_counter_128685 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_128661)[sext_i32_i64(srem32(flat_segment_id_128675, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_128654)); + } + // read in the per-group-results + { + int64_t read_per_thread_128687 = + sdiv_up64(groups_per_segment_128654, + segred_group_sizze_99943); + + x_99947 = (int64_t) 9223372036854775807; + for (int64_t i_128688 = 0; i_128688 < + read_per_thread_128687; i_128688++) { + int64_t group_res_id_128689 = + sext_i32_i64(local_tid_128664) * + read_per_thread_128687 + i_128688; + int64_t index_of_group_res_128690 = + sext_i32_i64(flat_segment_id_128675) * + groups_per_segment_128654 + group_res_id_128689; + + if (slt64(group_res_id_128689, + groups_per_segment_128654)) { + x_99948 = ((__global + int64_t *) group_res_arr_mem_128659)[index_of_group_res_128690 * + segred_group_sizze_99943]; + + int64_t defunc_1_op_res_99949; + + defunc_1_op_res_99949 = smin64(x_99947, x_99948); + x_99947 = defunc_1_op_res_99949; + } + } + } + ((__local + int64_t *) red_arr_mem_128668)[sext_i32_i64(local_tid_128664)] = + x_99947; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_128691; + int32_t skip_waves_128692; + + skip_waves_128692 = 1; + + int64_t x_128679; + int64_t x_128680; + + offset_128691 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128664, + sext_i64_i32(segred_group_sizze_99943))) { + x_128679 = ((__local + int64_t *) red_arr_mem_128668)[sext_i32_i64(local_tid_128664 + + offset_128691)]; + } + } + offset_128691 = 1; + while (slt32(offset_128691, wave_sizze_128666)) { + if (slt32(local_tid_128664 + offset_128691, + sext_i64_i32(segred_group_sizze_99943)) && + ((local_tid_128664 - squot32(local_tid_128664, + wave_sizze_128666) * + wave_sizze_128666) & (2 * offset_128691 - 1)) == + 0) { + // read array element + { + x_128680 = ((volatile __local + int64_t *) red_arr_mem_128668)[sext_i32_i64(local_tid_128664 + + offset_128691)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_128681 = + smin64(x_128679, x_128680); + + x_128679 = defunc_1_op_res_128681; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_128668)[sext_i32_i64(local_tid_128664)] = + x_128679; + } + } + offset_128691 *= 2; + } + while (slt32(skip_waves_128692, + squot32(sext_i64_i32(segred_group_sizze_99943) + + wave_sizze_128666 - 1, + wave_sizze_128666))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128691 = skip_waves_128692 * wave_sizze_128666; + if (slt32(local_tid_128664 + offset_128691, + sext_i64_i32(segred_group_sizze_99943)) && + ((local_tid_128664 - squot32(local_tid_128664, + wave_sizze_128666) * + wave_sizze_128666) == 0 && + (squot32(local_tid_128664, wave_sizze_128666) & + (2 * skip_waves_128692 - 1)) == 0)) { + // read array element + { + x_128680 = ((__local + int64_t *) red_arr_mem_128668)[sext_i32_i64(local_tid_128664 + + offset_128691)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_128681 = + smin64(x_128679, x_128680); + + x_128679 = defunc_1_op_res_128681; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_128668)[sext_i32_i64(local_tid_128664)] = + x_128679; + } + } + skip_waves_128692 *= 2; + } + // and back to memory with the final result + { + if (local_tid_128664 == 0) { + ((__global int64_t *) mem_124130)[gtid_99658] = + x_128679; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_99943 +} +__kernel void mainMagnitudezisegred_large_99792(__global int *global_failure, + __local volatile + int64_t *sync_arr_mem_128605_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_128603_backing_aligned_1, + int64_t num_recresids_padded_73681, + int64_t Nmk_74408, + int64_t num_groups_99824, + int64_t groups_per_segment_128589, + int64_t elements_per_thread_128590, + int64_t virt_num_groups_128591, + int64_t threads_per_segment_128593, + __global + unsigned char *defunc_3_map_res_mem_124068, + __global + unsigned char *mem_124121, + __global + unsigned char *mem_124124, + __global + unsigned char *group_res_arr_mem_128594, + __global + unsigned char *mainMagnitudezicounter_mem_128596) +{ + #define segred_group_sizze_99823 (mainMagnitudezisegred_group_sizze_99786) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict sync_arr_mem_128605_backing_1 = + (__local volatile + char *) sync_arr_mem_128605_backing_aligned_0; + __local volatile char *restrict red_arr_mem_128603_backing_0 = + (__local volatile + char *) red_arr_mem_128603_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128598; + int32_t local_tid_128599; + int64_t group_sizze_128602; + int32_t wave_sizze_128601; + int32_t group_tid_128600; + + global_tid_128598 = get_global_id(0); + local_tid_128599 = get_local_id(0); + group_sizze_128602 = get_local_size(0); + wave_sizze_128601 = LOCKSTEP_WIDTH; + group_tid_128600 = get_group_id(0); + + int32_t phys_tid_99792; + + phys_tid_99792 = global_tid_128598; + + __local char *red_arr_mem_128603; + + red_arr_mem_128603 = (__local char *) red_arr_mem_128603_backing_0; + + __local char *sync_arr_mem_128605; + + sync_arr_mem_128605 = (__local char *) sync_arr_mem_128605_backing_1; + + int32_t phys_group_id_128607; + + phys_group_id_128607 = get_group_id(0); + for (int32_t i_128608 = 0; i_128608 < + sdiv_up32(sext_i64_i32(virt_num_groups_128591) - phys_group_id_128607, + sext_i64_i32(num_groups_99824)); i_128608++) { + int32_t virt_group_id_128609 = phys_group_id_128607 + i_128608 * + sext_i64_i32(num_groups_99824); + int32_t flat_segment_id_128610 = squot32(virt_group_id_128609, + sext_i64_i32(groups_per_segment_128589)); + int64_t global_tid_128611 = srem64(sext_i32_i64(virt_group_id_128609) * + segred_group_sizze_99823 + + sext_i32_i64(local_tid_128599), + segred_group_sizze_99823 * + groups_per_segment_128589); + int64_t gtid_99783 = sext_i32_i64(flat_segment_id_128610); + int64_t gtid_99791; + double x_acc_128612; + int64_t chunk_sizze_128613; + + chunk_sizze_128613 = smin64(elements_per_thread_128590, + sdiv_up64(num_recresids_padded_73681 - + global_tid_128611, + threads_per_segment_128593)); + + double x_99827; + double x_99828; + + // neutral-initialise the accumulators + { + x_acc_128612 = -INFINITY; + } + for (int64_t i_128617 = 0; i_128617 < chunk_sizze_128613; i_128617++) { + gtid_99791 = global_tid_128611 + threads_per_segment_128593 * + i_128617; + // apply map function + { + double i64_res_99831 = ((__global + double *) mem_124121)[gtid_99783]; + int64_t slice_115286 = (int64_t) 1 + gtid_99791; + double x_99832 = ((__global + double *) defunc_3_map_res_mem_124068)[gtid_99783 * + Nmk_74408 + + slice_115286]; + int64_t x_99834 = mul64((int64_t) 2, gtid_99791); + int64_t i64_arg_99835 = add64((int64_t) 2, x_99834); + double i64_res_99836 = sitofp_i64_f64(i64_arg_99835); + double y_99837 = i64_res_99836 / i64_res_99831; + double lifted_div_res_99838 = 1.0 + y_99837; + double abs_arg_99839 = x_99832 / lifted_div_res_99838; + double abs_res_99840 = fabs(abs_arg_99839); + + // save map-out results + { } + // load accumulator + { + x_99827 = x_acc_128612; + } + // load new values + { + x_99828 = abs_res_99840; + } + // apply reduction operator + { + double defunc_1_op_res_99829 = fmax64(x_99827, x_99828); + + // store in accumulator + { + x_acc_128612 = defunc_1_op_res_99829; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_99827 = x_acc_128612; + ((__local + double *) red_arr_mem_128603)[sext_i32_i64(local_tid_128599)] = + x_99827; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_128618; + int32_t skip_waves_128619; + + skip_waves_128619 = 1; + + double x_128614; + double x_128615; + + offset_128618 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128599, + sext_i64_i32(segred_group_sizze_99823))) { + x_128614 = ((__local + double *) red_arr_mem_128603)[sext_i32_i64(local_tid_128599 + + offset_128618)]; + } + } + offset_128618 = 1; + while (slt32(offset_128618, wave_sizze_128601)) { + if (slt32(local_tid_128599 + offset_128618, + sext_i64_i32(segred_group_sizze_99823)) && + ((local_tid_128599 - squot32(local_tid_128599, + wave_sizze_128601) * + wave_sizze_128601) & (2 * offset_128618 - 1)) == 0) { + // read array element + { + x_128615 = ((volatile __local + double *) red_arr_mem_128603)[sext_i32_i64(local_tid_128599 + + offset_128618)]; + } + // apply reduction operation + { + double defunc_1_op_res_128616 = fmax64(x_128614, x_128615); + + x_128614 = defunc_1_op_res_128616; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128603)[sext_i32_i64(local_tid_128599)] = + x_128614; + } + } + offset_128618 *= 2; + } + while (slt32(skip_waves_128619, + squot32(sext_i64_i32(segred_group_sizze_99823) + + wave_sizze_128601 - 1, wave_sizze_128601))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128618 = skip_waves_128619 * wave_sizze_128601; + if (slt32(local_tid_128599 + offset_128618, + sext_i64_i32(segred_group_sizze_99823)) && + ((local_tid_128599 - squot32(local_tid_128599, + wave_sizze_128601) * + wave_sizze_128601) == 0 && (squot32(local_tid_128599, + wave_sizze_128601) & (2 * + skip_waves_128619 - + 1)) == + 0)) { + // read array element + { + x_128615 = ((__local + double *) red_arr_mem_128603)[sext_i32_i64(local_tid_128599 + + offset_128618)]; + } + // apply reduction operation + { + double defunc_1_op_res_128616 = fmax64(x_128614, x_128615); + + x_128614 = defunc_1_op_res_128616; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128603)[sext_i32_i64(local_tid_128599)] = + x_128614; + } + } + skip_waves_128619 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_128599) == (int64_t) 0) { + x_acc_128612 = x_128614; + } + } + if (groups_per_segment_128589 == (int64_t) 1) { + // first thread in group saves final result to memory + { + if (local_tid_128599 == 0) { + ((__global double *) mem_124124)[gtid_99783] = x_acc_128612; + } + } + } else { + int32_t old_counter_128620; + + // first thread in group saves group result to global memory + { + if (local_tid_128599 == 0) { + ((__global + double *) group_res_arr_mem_128594)[sext_i32_i64(virt_group_id_128609) * + segred_group_sizze_99823] = + x_acc_128612; + mem_fence_global(); + old_counter_128620 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_128596)[sext_i32_i64(srem32(flat_segment_id_128610, + 10240))], + (int) 1); + ((__local bool *) sync_arr_mem_128605)[(int64_t) 0] = + old_counter_128620 == groups_per_segment_128589 - + (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_128621; + + is_last_group_128621 = ((__local + bool *) sync_arr_mem_128605)[(int64_t) 0]; + if (is_last_group_128621) { + if (local_tid_128599 == 0) { + old_counter_128620 = + atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_128596)[sext_i32_i64(srem32(flat_segment_id_128610, + 10240))], + (int) ((int64_t) 0 - + groups_per_segment_128589)); + } + // read in the per-group-results + { + int64_t read_per_thread_128622 = + sdiv_up64(groups_per_segment_128589, + segred_group_sizze_99823); + + x_99827 = -INFINITY; + for (int64_t i_128623 = 0; i_128623 < + read_per_thread_128622; i_128623++) { + int64_t group_res_id_128624 = + sext_i32_i64(local_tid_128599) * + read_per_thread_128622 + i_128623; + int64_t index_of_group_res_128625 = + sext_i32_i64(flat_segment_id_128610) * + groups_per_segment_128589 + group_res_id_128624; + + if (slt64(group_res_id_128624, + groups_per_segment_128589)) { + x_99828 = ((__global + double *) group_res_arr_mem_128594)[index_of_group_res_128625 * + segred_group_sizze_99823]; + + double defunc_1_op_res_99829; + + defunc_1_op_res_99829 = fmax64(x_99827, x_99828); + x_99827 = defunc_1_op_res_99829; + } + } + } + ((__local + double *) red_arr_mem_128603)[sext_i32_i64(local_tid_128599)] = + x_99827; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_128626; + int32_t skip_waves_128627; + + skip_waves_128627 = 1; + + double x_128614; + double x_128615; + + offset_128626 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_128599, + sext_i64_i32(segred_group_sizze_99823))) { + x_128614 = ((__local + double *) red_arr_mem_128603)[sext_i32_i64(local_tid_128599 + + offset_128626)]; + } + } + offset_128626 = 1; + while (slt32(offset_128626, wave_sizze_128601)) { + if (slt32(local_tid_128599 + offset_128626, + sext_i64_i32(segred_group_sizze_99823)) && + ((local_tid_128599 - squot32(local_tid_128599, + wave_sizze_128601) * + wave_sizze_128601) & (2 * offset_128626 - 1)) == + 0) { + // read array element + { + x_128615 = ((volatile __local + double *) red_arr_mem_128603)[sext_i32_i64(local_tid_128599 + + offset_128626)]; + } + // apply reduction operation + { + double defunc_1_op_res_128616 = fmax64(x_128614, + x_128615); + + x_128614 = defunc_1_op_res_128616; + } + // write result of operation + { + ((volatile __local + double *) red_arr_mem_128603)[sext_i32_i64(local_tid_128599)] = + x_128614; + } + } + offset_128626 *= 2; + } + while (slt32(skip_waves_128627, + squot32(sext_i64_i32(segred_group_sizze_99823) + + wave_sizze_128601 - 1, + wave_sizze_128601))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128626 = skip_waves_128627 * wave_sizze_128601; + if (slt32(local_tid_128599 + offset_128626, + sext_i64_i32(segred_group_sizze_99823)) && + ((local_tid_128599 - squot32(local_tid_128599, + wave_sizze_128601) * + wave_sizze_128601) == 0 && + (squot32(local_tid_128599, wave_sizze_128601) & + (2 * skip_waves_128627 - 1)) == 0)) { + // read array element + { + x_128615 = ((__local + double *) red_arr_mem_128603)[sext_i32_i64(local_tid_128599 + + offset_128626)]; + } + // apply reduction operation + { + double defunc_1_op_res_128616 = fmax64(x_128614, + x_128615); + + x_128614 = defunc_1_op_res_128616; + } + // write result of operation + { + ((__local + double *) red_arr_mem_128603)[sext_i32_i64(local_tid_128599)] = + x_128614; + } + } + skip_waves_128627 *= 2; + } + // and back to memory with the final result + { + if (local_tid_128599 == 0) { + ((__global double *) mem_124124)[gtid_99783] = + x_128614; + } + } + } + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_99823 +} +__kernel void mainMagnitudezisegred_nonseg_101808(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_129487_backing_aligned_0, + __local volatile + int64_t *sync_arr_mem_129485_backing_aligned_1, + int64_t m_73008, + int64_t num_groups_101803, + int64_t num_threads_129479, + __global + unsigned char *defunc_3_map_res_mem_124958, + __global + unsigned char *mem_124963, + __global + unsigned char *mainMagnitudezicounter_mem_129475, + __global + unsigned char *group_res_arr_mem_129477) +{ + #define segred_group_sizze_101801 (mainMagnitudezisegred_group_sizze_101800) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_129487_backing_1 = + (__local volatile + char *) red_arr_mem_129487_backing_aligned_0; + __local volatile char *restrict sync_arr_mem_129485_backing_0 = + (__local volatile + char *) sync_arr_mem_129485_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129480; + int32_t local_tid_129481; + int64_t group_sizze_129484; + int32_t wave_sizze_129483; + int32_t group_tid_129482; + + global_tid_129480 = get_global_id(0); + local_tid_129481 = get_local_id(0); + group_sizze_129484 = get_local_size(0); + wave_sizze_129483 = LOCKSTEP_WIDTH; + group_tid_129482 = get_group_id(0); + + int32_t phys_tid_101808; + + phys_tid_101808 = global_tid_129480; + + __local char *sync_arr_mem_129485; + + sync_arr_mem_129485 = (__local char *) sync_arr_mem_129485_backing_0; + + __local char *red_arr_mem_129487; + + red_arr_mem_129487 = (__local char *) red_arr_mem_129487_backing_1; + + int64_t dummy_101806; + + dummy_101806 = (int64_t) 0; + + int64_t gtid_101807; + + gtid_101807 = (int64_t) 0; + + int64_t x_acc_129489; + int64_t chunk_sizze_129490; + + chunk_sizze_129490 = smin64(sdiv_up64(m_73008, + sext_i32_i64(sext_i64_i32(segred_group_sizze_101801 * + num_groups_101803))), + sdiv_up64(m_73008 - phys_tid_101808, + num_threads_129479)); + + int64_t x_74868; + int64_t x_74869; + + // neutral-initialise the accumulators + { + x_acc_129489 = (int64_t) 0; + } + for (int64_t i_129494 = 0; i_129494 < chunk_sizze_129490; i_129494++) { + gtid_101807 = phys_tid_101808 + num_threads_129479 * i_129494; + // apply map function + { + int64_t x_74871 = ((__global + int64_t *) defunc_3_map_res_mem_124958)[gtid_101807]; + + // save map-out results + { } + // load accumulator + { + x_74868 = x_acc_129489; + } + // load new values + { + x_74869 = x_74871; + } + // apply reduction operator + { + int64_t defunc_1_op_res_74870 = smax64(x_74868, x_74869); + + // store in accumulator + { + x_acc_129489 = defunc_1_op_res_74870; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_74868 = x_acc_129489; + ((__local + int64_t *) red_arr_mem_129487)[sext_i32_i64(local_tid_129481)] = + x_74868; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_129495; + int32_t skip_waves_129496; + + skip_waves_129496 = 1; + + int64_t x_129491; + int64_t x_129492; + + offset_129495 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129481, sext_i64_i32(segred_group_sizze_101801))) { + x_129491 = ((__local + int64_t *) red_arr_mem_129487)[sext_i32_i64(local_tid_129481 + + offset_129495)]; + } + } + offset_129495 = 1; + while (slt32(offset_129495, wave_sizze_129483)) { + if (slt32(local_tid_129481 + offset_129495, + sext_i64_i32(segred_group_sizze_101801)) && + ((local_tid_129481 - squot32(local_tid_129481, wave_sizze_129483) * + wave_sizze_129483) & (2 * offset_129495 - 1)) == 0) { + // read array element + { + x_129492 = ((volatile __local + int64_t *) red_arr_mem_129487)[sext_i32_i64(local_tid_129481 + + offset_129495)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_129493 = smax64(x_129491, x_129492); + + x_129491 = defunc_1_op_res_129493; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_129487)[sext_i32_i64(local_tid_129481)] = + x_129491; + } + } + offset_129495 *= 2; + } + while (slt32(skip_waves_129496, + squot32(sext_i64_i32(segred_group_sizze_101801) + + wave_sizze_129483 - 1, wave_sizze_129483))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129495 = skip_waves_129496 * wave_sizze_129483; + if (slt32(local_tid_129481 + offset_129495, + sext_i64_i32(segred_group_sizze_101801)) && + ((local_tid_129481 - squot32(local_tid_129481, wave_sizze_129483) * + wave_sizze_129483) == 0 && (squot32(local_tid_129481, + wave_sizze_129483) & (2 * + skip_waves_129496 - + 1)) == + 0)) { + // read array element + { + x_129492 = ((__local + int64_t *) red_arr_mem_129487)[sext_i32_i64(local_tid_129481 + + offset_129495)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_129493 = smax64(x_129491, x_129492); + + x_129491 = defunc_1_op_res_129493; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_129487)[sext_i32_i64(local_tid_129481)] = + x_129491; + } + } + skip_waves_129496 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_129481) == (int64_t) 0) { + x_acc_129489 = x_129491; + } + } + + int32_t old_counter_129497; + + // first thread in group saves group result to global memory + { + if (local_tid_129481 == 0) { + ((__global + int64_t *) group_res_arr_mem_129477)[sext_i32_i64(group_tid_129482) * + segred_group_sizze_101801] = + x_acc_129489; + mem_fence_global(); + old_counter_129497 = atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_129475)[(int64_t) 0], + (int) 1); + ((__local bool *) sync_arr_mem_129485)[(int64_t) 0] = + old_counter_129497 == num_groups_101803 - (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_129498; + + is_last_group_129498 = ((__local bool *) sync_arr_mem_129485)[(int64_t) 0]; + if (is_last_group_129498) { + if (local_tid_129481 == 0) { + old_counter_129497 = atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_129475)[(int64_t) 0], + (int) ((int64_t) 0 - + num_groups_101803)); + } + // read in the per-group-results + { + int64_t read_per_thread_129499 = sdiv_up64(num_groups_101803, + segred_group_sizze_101801); + + x_74868 = (int64_t) 0; + for (int64_t i_129500 = 0; i_129500 < read_per_thread_129499; + i_129500++) { + int64_t group_res_id_129501 = sext_i32_i64(local_tid_129481) * + read_per_thread_129499 + i_129500; + int64_t index_of_group_res_129502 = group_res_id_129501; + + if (slt64(group_res_id_129501, num_groups_101803)) { + x_74869 = ((__global + int64_t *) group_res_arr_mem_129477)[index_of_group_res_129502 * + segred_group_sizze_101801]; + + int64_t defunc_1_op_res_74870; + + defunc_1_op_res_74870 = smax64(x_74868, x_74869); + x_74868 = defunc_1_op_res_74870; + } + } + } + ((__local + int64_t *) red_arr_mem_129487)[sext_i32_i64(local_tid_129481)] = + x_74868; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_129503; + int32_t skip_waves_129504; + + skip_waves_129504 = 1; + + int64_t x_129491; + int64_t x_129492; + + offset_129503 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_129481, + sext_i64_i32(segred_group_sizze_101801))) { + x_129491 = ((__local + int64_t *) red_arr_mem_129487)[sext_i32_i64(local_tid_129481 + + offset_129503)]; + } + } + offset_129503 = 1; + while (slt32(offset_129503, wave_sizze_129483)) { + if (slt32(local_tid_129481 + offset_129503, + sext_i64_i32(segred_group_sizze_101801)) && + ((local_tid_129481 - squot32(local_tid_129481, + wave_sizze_129483) * + wave_sizze_129483) & (2 * offset_129503 - 1)) == 0) { + // read array element + { + x_129492 = ((volatile __local + int64_t *) red_arr_mem_129487)[sext_i32_i64(local_tid_129481 + + offset_129503)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_129493 = smax64(x_129491, + x_129492); + + x_129491 = defunc_1_op_res_129493; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_129487)[sext_i32_i64(local_tid_129481)] = + x_129491; + } + } + offset_129503 *= 2; + } + while (slt32(skip_waves_129504, + squot32(sext_i64_i32(segred_group_sizze_101801) + + wave_sizze_129483 - 1, wave_sizze_129483))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_129503 = skip_waves_129504 * wave_sizze_129483; + if (slt32(local_tid_129481 + offset_129503, + sext_i64_i32(segred_group_sizze_101801)) && + ((local_tid_129481 - squot32(local_tid_129481, + wave_sizze_129483) * + wave_sizze_129483) == 0 && (squot32(local_tid_129481, + wave_sizze_129483) & + (2 * skip_waves_129504 - + 1)) == 0)) { + // read array element + { + x_129492 = ((__local + int64_t *) red_arr_mem_129487)[sext_i32_i64(local_tid_129481 + + offset_129503)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_129493 = smax64(x_129491, + x_129492); + + x_129491 = defunc_1_op_res_129493; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_129487)[sext_i32_i64(local_tid_129481)] = + x_129491; + } + } + skip_waves_129504 *= 2; + } + // and back to memory with the final result + { + if (local_tid_129481 == 0) { + ((__global int64_t *) mem_124963)[(int64_t) 0] = x_129491; + } + } + } + } + + error_1: + return; + #undef segred_group_sizze_101801 +} +__kernel void mainMagnitudezisegred_nonseg_90321(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_126357_backing_aligned_0, + __local volatile + int64_t *sync_arr_mem_126355_backing_aligned_1, + int64_t m_73008, + int64_t n_73011, + int64_t m_73103, + int64_t num_groups_90324, + int64_t num_threads_125867, + int64_t num_threads_126349, + __global + unsigned char *mem_120127, + __global + unsigned char *mem_120130, + __global + unsigned char *mem_120144, + __global + unsigned char *mem_120146, + __global + unsigned char *mem_120172, + __global + unsigned char *mem_120174, + __global + unsigned char *mem_120177, + __global + unsigned char *mem_120180, + __global + unsigned char *mainMagnitudezicounter_mem_126345, + __global + unsigned char *group_res_arr_mem_126347) +{ + #define segred_group_sizze_90323 (mainMagnitudezisegred_group_sizze_90310) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_126357_backing_1 = + (__local volatile + char *) red_arr_mem_126357_backing_aligned_0; + __local volatile char *restrict sync_arr_mem_126355_backing_0 = + (__local volatile + char *) sync_arr_mem_126355_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126350; + int32_t local_tid_126351; + int64_t group_sizze_126354; + int32_t wave_sizze_126353; + int32_t group_tid_126352; + + global_tid_126350 = get_global_id(0); + local_tid_126351 = get_local_id(0); + group_sizze_126354 = get_local_size(0); + wave_sizze_126353 = LOCKSTEP_WIDTH; + group_tid_126352 = get_group_id(0); + + int32_t phys_tid_90321; + + phys_tid_90321 = global_tid_126350; + + __local char *sync_arr_mem_126355; + + sync_arr_mem_126355 = (__local char *) sync_arr_mem_126355_backing_0; + + __local char *red_arr_mem_126357; + + red_arr_mem_126357 = (__local char *) red_arr_mem_126357_backing_1; + + int64_t dummy_90319; + + dummy_90319 = (int64_t) 0; + + int64_t gtid_90320; + + gtid_90320 = (int64_t) 0; + + int64_t x_acc_126359; + int64_t chunk_sizze_126360; + + chunk_sizze_126360 = smin64(sdiv_up64(m_73008, + sext_i32_i64(sext_i64_i32(segred_group_sizze_90323 * + num_groups_90324))), + sdiv_up64(m_73008 - phys_tid_90321, + num_threads_126349)); + + int64_t x_90330; + int64_t x_90331; + + // neutral-initialise the accumulators + { + x_acc_126359 = (int64_t) -9223372036854775808; + } + for (int64_t i_126364 = 0; i_126364 < chunk_sizze_126360; i_126364++) { + gtid_90320 = phys_tid_90321 + num_threads_126349 * i_126364; + // apply map function + { + int64_t discard_119622; + int64_t scanacc_119618 = (int64_t) 0; + + for (int64_t i_119620 = 0; i_119620 < n_73011; i_119620++) { + int64_t binop_y_119975 = (int64_t) -1 * i_119620; + int64_t slice_119976 = m_73103 + binop_y_119975; + double x_90338 = ((__global double *) mem_120127)[slice_119976 * + m_73008 + + gtid_90320]; + bool defunc_0_f_res_90339; + + defunc_0_f_res_90339 = futrts_isnan64(x_90338); + + bool defunc_0_g_res_90340 = !defunc_0_f_res_90339; + int64_t defunc_0_f_res_90341 = + btoi_bool_i64(defunc_0_g_res_90340); + int64_t defunc_1_op_res_90337 = add64(defunc_0_f_res_90341, + scanacc_119618); + + ((__global int64_t *) mem_120130)[phys_tid_90321 + i_119620 * + num_threads_125867] = + defunc_1_op_res_90337; + + int64_t scanacc_tmp_126365 = defunc_1_op_res_90337; + + scanacc_119618 = scanacc_tmp_126365; + } + discard_119622 = scanacc_119618; + + int64_t last_res_90342 = ((__global + int64_t *) mem_120130)[phys_tid_90321 + + m_73103 * + num_threads_125867]; + + for (int64_t i_126367 = 0; i_126367 < n_73011; i_126367++) { + ((__global double *) mem_120144)[phys_tid_90321 + i_126367 * + num_threads_125867] = NAN; + } + for (int64_t i_126368 = 0; i_126368 < n_73011; i_126368++) { + ((__global int64_t *) mem_120146)[phys_tid_90321 + i_126368 * + num_threads_125867] = + (int64_t) 0; + } + for (int64_t write_iter_119623 = 0; write_iter_119623 < n_73011; + write_iter_119623++) { + int64_t binop_y_119983 = (int64_t) -1 * write_iter_119623; + int64_t slice_119984 = m_73103 + binop_y_119983; + double write_iv_119626 = ((__global + double *) mem_120127)[slice_119984 * + m_73008 + + gtid_90320]; + bool defunc_0_f_res_90350; + + defunc_0_f_res_90350 = futrts_isnan64(write_iv_119626); + + bool defunc_0_g_res_90351 = !defunc_0_f_res_90350; + int64_t defunc_1_f_res_90352; + + if (defunc_0_g_res_90351) { + int64_t write_iv_119627 = ((__global + int64_t *) mem_120130)[phys_tid_90321 + + write_iter_119623 * + num_threads_125867]; + int64_t defunc_1_f_res_t_res_90353 = sub64(write_iv_119627, + (int64_t) 1); + + defunc_1_f_res_90352 = defunc_1_f_res_t_res_90353; + } else { + defunc_1_f_res_90352 = (int64_t) -1; + } + + bool less_than_zzero_119629 = slt64(defunc_1_f_res_90352, + (int64_t) 0); + bool greater_than_sizze_119630 = sle64(n_73011, + defunc_1_f_res_90352); + bool outside_bounds_dim_119631 = less_than_zzero_119629 || + greater_than_sizze_119630; + + if (!outside_bounds_dim_119631) { + ((__global int64_t *) mem_120146)[phys_tid_90321 + + defunc_1_f_res_90352 * + num_threads_125867] = + write_iter_119623; + } + if (!outside_bounds_dim_119631) { + for (int64_t i_126371 = 0; i_126371 < (int64_t) 1; + i_126371++) { + ((__global double *) mem_120144)[phys_tid_90321 + + (defunc_1_f_res_90352 + + i_126371) * + num_threads_125867] = + ((__global double *) mem_120127)[m_73008 * + slice_119984 + + gtid_90320 + + i_126371 * + ((int64_t) -1 * + m_73008)]; + } + } + } + // save map-out results + { + ((__global int64_t *) mem_120174)[dummy_90319 * m_73008 + + gtid_90320] = last_res_90342; + for (int64_t i_126372 = 0; i_126372 < n_73011; i_126372++) { + ((__global double *) mem_120177)[i_126372 * m_73008 + + dummy_90319 * m_73008 + + gtid_90320] = ((__global + double *) mem_120144)[phys_tid_90321 + + i_126372 * + num_threads_125867]; + } + for (int64_t i_126373 = 0; i_126373 < n_73011; i_126373++) { + ((__global int64_t *) mem_120180)[i_126373 * m_73008 + + dummy_90319 * m_73008 + + gtid_90320] = ((__global + int64_t *) mem_120146)[phys_tid_90321 + + i_126373 * + num_threads_125867]; + } + } + // load accumulator + { + x_90330 = x_acc_126359; + } + // load new values + { + x_90331 = last_res_90342; + } + // apply reduction operator + { + int64_t defunc_1_op_res_90332 = smax64(x_90330, x_90331); + + // store in accumulator + { + x_acc_126359 = defunc_1_op_res_90332; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_90330 = x_acc_126359; + ((__local + int64_t *) red_arr_mem_126357)[sext_i32_i64(local_tid_126351)] = + x_90330; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_126374; + int32_t skip_waves_126375; + + skip_waves_126375 = 1; + + int64_t x_126361; + int64_t x_126362; + + offset_126374 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_126351, sext_i64_i32(segred_group_sizze_90323))) { + x_126361 = ((__local + int64_t *) red_arr_mem_126357)[sext_i32_i64(local_tid_126351 + + offset_126374)]; + } + } + offset_126374 = 1; + while (slt32(offset_126374, wave_sizze_126353)) { + if (slt32(local_tid_126351 + offset_126374, + sext_i64_i32(segred_group_sizze_90323)) && + ((local_tid_126351 - squot32(local_tid_126351, wave_sizze_126353) * + wave_sizze_126353) & (2 * offset_126374 - 1)) == 0) { + // read array element + { + x_126362 = ((volatile __local + int64_t *) red_arr_mem_126357)[sext_i32_i64(local_tid_126351 + + offset_126374)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_126363 = smax64(x_126361, x_126362); + + x_126361 = defunc_1_op_res_126363; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_126357)[sext_i32_i64(local_tid_126351)] = + x_126361; + } + } + offset_126374 *= 2; + } + while (slt32(skip_waves_126375, + squot32(sext_i64_i32(segred_group_sizze_90323) + + wave_sizze_126353 - 1, wave_sizze_126353))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_126374 = skip_waves_126375 * wave_sizze_126353; + if (slt32(local_tid_126351 + offset_126374, + sext_i64_i32(segred_group_sizze_90323)) && + ((local_tid_126351 - squot32(local_tid_126351, wave_sizze_126353) * + wave_sizze_126353) == 0 && (squot32(local_tid_126351, + wave_sizze_126353) & (2 * + skip_waves_126375 - + 1)) == + 0)) { + // read array element + { + x_126362 = ((__local + int64_t *) red_arr_mem_126357)[sext_i32_i64(local_tid_126351 + + offset_126374)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_126363 = smax64(x_126361, x_126362); + + x_126361 = defunc_1_op_res_126363; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_126357)[sext_i32_i64(local_tid_126351)] = + x_126361; + } + } + skip_waves_126375 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_126351) == (int64_t) 0) { + x_acc_126359 = x_126361; + } + } + + int32_t old_counter_126376; + + // first thread in group saves group result to global memory + { + if (local_tid_126351 == 0) { + ((__global + int64_t *) group_res_arr_mem_126347)[sext_i32_i64(group_tid_126352) * + segred_group_sizze_90323] = + x_acc_126359; + mem_fence_global(); + old_counter_126376 = atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_126345)[(int64_t) 0], + (int) 1); + ((__local bool *) sync_arr_mem_126355)[(int64_t) 0] = + old_counter_126376 == num_groups_90324 - (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_126377; + + is_last_group_126377 = ((__local bool *) sync_arr_mem_126355)[(int64_t) 0]; + if (is_last_group_126377) { + if (local_tid_126351 == 0) { + old_counter_126376 = atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_126345)[(int64_t) 0], + (int) ((int64_t) 0 - + num_groups_90324)); + } + // read in the per-group-results + { + int64_t read_per_thread_126378 = sdiv_up64(num_groups_90324, + segred_group_sizze_90323); + + x_90330 = (int64_t) -9223372036854775808; + for (int64_t i_126379 = 0; i_126379 < read_per_thread_126378; + i_126379++) { + int64_t group_res_id_126380 = sext_i32_i64(local_tid_126351) * + read_per_thread_126378 + i_126379; + int64_t index_of_group_res_126381 = group_res_id_126380; + + if (slt64(group_res_id_126380, num_groups_90324)) { + x_90331 = ((__global + int64_t *) group_res_arr_mem_126347)[index_of_group_res_126381 * + segred_group_sizze_90323]; + + int64_t defunc_1_op_res_90332; + + defunc_1_op_res_90332 = smax64(x_90330, x_90331); + x_90330 = defunc_1_op_res_90332; + } + } + } + ((__local + int64_t *) red_arr_mem_126357)[sext_i32_i64(local_tid_126351)] = + x_90330; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_126382; + int32_t skip_waves_126383; + + skip_waves_126383 = 1; + + int64_t x_126361; + int64_t x_126362; + + offset_126382 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_126351, + sext_i64_i32(segred_group_sizze_90323))) { + x_126361 = ((__local + int64_t *) red_arr_mem_126357)[sext_i32_i64(local_tid_126351 + + offset_126382)]; + } + } + offset_126382 = 1; + while (slt32(offset_126382, wave_sizze_126353)) { + if (slt32(local_tid_126351 + offset_126382, + sext_i64_i32(segred_group_sizze_90323)) && + ((local_tid_126351 - squot32(local_tid_126351, + wave_sizze_126353) * + wave_sizze_126353) & (2 * offset_126382 - 1)) == 0) { + // read array element + { + x_126362 = ((volatile __local + int64_t *) red_arr_mem_126357)[sext_i32_i64(local_tid_126351 + + offset_126382)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_126363 = smax64(x_126361, + x_126362); + + x_126361 = defunc_1_op_res_126363; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_126357)[sext_i32_i64(local_tid_126351)] = + x_126361; + } + } + offset_126382 *= 2; + } + while (slt32(skip_waves_126383, + squot32(sext_i64_i32(segred_group_sizze_90323) + + wave_sizze_126353 - 1, wave_sizze_126353))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_126382 = skip_waves_126383 * wave_sizze_126353; + if (slt32(local_tid_126351 + offset_126382, + sext_i64_i32(segred_group_sizze_90323)) && + ((local_tid_126351 - squot32(local_tid_126351, + wave_sizze_126353) * + wave_sizze_126353) == 0 && (squot32(local_tid_126351, + wave_sizze_126353) & + (2 * skip_waves_126383 - + 1)) == 0)) { + // read array element + { + x_126362 = ((__local + int64_t *) red_arr_mem_126357)[sext_i32_i64(local_tid_126351 + + offset_126382)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_126363 = smax64(x_126361, + x_126362); + + x_126361 = defunc_1_op_res_126363; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_126357)[sext_i32_i64(local_tid_126351)] = + x_126361; + } + } + skip_waves_126383 *= 2; + } + // and back to memory with the final result + { + if (local_tid_126351 == 0) { + ((__global int64_t *) mem_120172)[(int64_t) 0] = x_126361; + } + } + } + } + + error_1: + return; + #undef segred_group_sizze_90323 +} +__kernel void mainMagnitudezisegred_nonseg_90558(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_126508_backing_aligned_0, + __local volatile + int64_t *sync_arr_mem_126506_backing_aligned_1, + int64_t m_73008, + int64_t num_groups_90655, + int64_t num_threads_126500, + __global + unsigned char *defunc_2_reduce_res_map_acc_mem_120211, + __global + unsigned char *mem_120218, + __global + unsigned char *mainMagnitudezicounter_mem_126496, + __global + unsigned char *group_res_arr_mem_126498) +{ + #define segred_group_sizze_90654 (mainMagnitudezisegred_group_sizze_90550) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_126508_backing_1 = + (__local volatile + char *) red_arr_mem_126508_backing_aligned_0; + __local volatile char *restrict sync_arr_mem_126506_backing_0 = + (__local volatile + char *) sync_arr_mem_126506_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126501; + int32_t local_tid_126502; + int64_t group_sizze_126505; + int32_t wave_sizze_126504; + int32_t group_tid_126503; + + global_tid_126501 = get_global_id(0); + local_tid_126502 = get_local_id(0); + group_sizze_126505 = get_local_size(0); + wave_sizze_126504 = LOCKSTEP_WIDTH; + group_tid_126503 = get_group_id(0); + + int32_t phys_tid_90558; + + phys_tid_90558 = global_tid_126501; + + __local char *sync_arr_mem_126506; + + sync_arr_mem_126506 = (__local char *) sync_arr_mem_126506_backing_0; + + __local char *red_arr_mem_126508; + + red_arr_mem_126508 = (__local char *) red_arr_mem_126508_backing_1; + + int64_t dummy_90556; + + dummy_90556 = (int64_t) 0; + + int64_t gtid_90557; + + gtid_90557 = (int64_t) 0; + + int64_t x_acc_126510; + int64_t chunk_sizze_126511; + + chunk_sizze_126511 = smin64(sdiv_up64(m_73008, + sext_i32_i64(sext_i64_i32(segred_group_sizze_90654 * + num_groups_90655))), + sdiv_up64(m_73008 - phys_tid_90558, + num_threads_126500)); + + int64_t x_90658; + int64_t x_90659; + + // neutral-initialise the accumulators + { + x_acc_126510 = (int64_t) -9223372036854775808; + } + for (int64_t i_126515 = 0; i_126515 < chunk_sizze_126511; i_126515++) { + gtid_90557 = phys_tid_90558 + num_threads_126500 * i_126515; + // apply map function + { + int64_t x_90661 = ((__global + int64_t *) defunc_2_reduce_res_map_acc_mem_120211)[gtid_90557]; + + // save map-out results + { } + // load accumulator + { + x_90658 = x_acc_126510; + } + // load new values + { + x_90659 = x_90661; + } + // apply reduction operator + { + int64_t defunc_1_op_res_90660 = smax64(x_90658, x_90659); + + // store in accumulator + { + x_acc_126510 = defunc_1_op_res_90660; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_90658 = x_acc_126510; + ((__local + int64_t *) red_arr_mem_126508)[sext_i32_i64(local_tid_126502)] = + x_90658; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_126516; + int32_t skip_waves_126517; + + skip_waves_126517 = 1; + + int64_t x_126512; + int64_t x_126513; + + offset_126516 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_126502, sext_i64_i32(segred_group_sizze_90654))) { + x_126512 = ((__local + int64_t *) red_arr_mem_126508)[sext_i32_i64(local_tid_126502 + + offset_126516)]; + } + } + offset_126516 = 1; + while (slt32(offset_126516, wave_sizze_126504)) { + if (slt32(local_tid_126502 + offset_126516, + sext_i64_i32(segred_group_sizze_90654)) && + ((local_tid_126502 - squot32(local_tid_126502, wave_sizze_126504) * + wave_sizze_126504) & (2 * offset_126516 - 1)) == 0) { + // read array element + { + x_126513 = ((volatile __local + int64_t *) red_arr_mem_126508)[sext_i32_i64(local_tid_126502 + + offset_126516)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_126514 = smax64(x_126512, x_126513); + + x_126512 = defunc_1_op_res_126514; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_126508)[sext_i32_i64(local_tid_126502)] = + x_126512; + } + } + offset_126516 *= 2; + } + while (slt32(skip_waves_126517, + squot32(sext_i64_i32(segred_group_sizze_90654) + + wave_sizze_126504 - 1, wave_sizze_126504))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_126516 = skip_waves_126517 * wave_sizze_126504; + if (slt32(local_tid_126502 + offset_126516, + sext_i64_i32(segred_group_sizze_90654)) && + ((local_tid_126502 - squot32(local_tid_126502, wave_sizze_126504) * + wave_sizze_126504) == 0 && (squot32(local_tid_126502, + wave_sizze_126504) & (2 * + skip_waves_126517 - + 1)) == + 0)) { + // read array element + { + x_126513 = ((__local + int64_t *) red_arr_mem_126508)[sext_i32_i64(local_tid_126502 + + offset_126516)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_126514 = smax64(x_126512, x_126513); + + x_126512 = defunc_1_op_res_126514; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_126508)[sext_i32_i64(local_tid_126502)] = + x_126512; + } + } + skip_waves_126517 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_126502) == (int64_t) 0) { + x_acc_126510 = x_126512; + } + } + + int32_t old_counter_126518; + + // first thread in group saves group result to global memory + { + if (local_tid_126502 == 0) { + ((__global + int64_t *) group_res_arr_mem_126498)[sext_i32_i64(group_tid_126503) * + segred_group_sizze_90654] = + x_acc_126510; + mem_fence_global(); + old_counter_126518 = atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_126496)[(int64_t) 0], + (int) 1); + ((__local bool *) sync_arr_mem_126506)[(int64_t) 0] = + old_counter_126518 == num_groups_90655 - (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_126519; + + is_last_group_126519 = ((__local bool *) sync_arr_mem_126506)[(int64_t) 0]; + if (is_last_group_126519) { + if (local_tid_126502 == 0) { + old_counter_126518 = atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_126496)[(int64_t) 0], + (int) ((int64_t) 0 - + num_groups_90655)); + } + // read in the per-group-results + { + int64_t read_per_thread_126520 = sdiv_up64(num_groups_90655, + segred_group_sizze_90654); + + x_90658 = (int64_t) -9223372036854775808; + for (int64_t i_126521 = 0; i_126521 < read_per_thread_126520; + i_126521++) { + int64_t group_res_id_126522 = sext_i32_i64(local_tid_126502) * + read_per_thread_126520 + i_126521; + int64_t index_of_group_res_126523 = group_res_id_126522; + + if (slt64(group_res_id_126522, num_groups_90655)) { + x_90659 = ((__global + int64_t *) group_res_arr_mem_126498)[index_of_group_res_126523 * + segred_group_sizze_90654]; + + int64_t defunc_1_op_res_90660; + + defunc_1_op_res_90660 = smax64(x_90658, x_90659); + x_90658 = defunc_1_op_res_90660; + } + } + } + ((__local + int64_t *) red_arr_mem_126508)[sext_i32_i64(local_tid_126502)] = + x_90658; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_126524; + int32_t skip_waves_126525; + + skip_waves_126525 = 1; + + int64_t x_126512; + int64_t x_126513; + + offset_126524 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_126502, + sext_i64_i32(segred_group_sizze_90654))) { + x_126512 = ((__local + int64_t *) red_arr_mem_126508)[sext_i32_i64(local_tid_126502 + + offset_126524)]; + } + } + offset_126524 = 1; + while (slt32(offset_126524, wave_sizze_126504)) { + if (slt32(local_tid_126502 + offset_126524, + sext_i64_i32(segred_group_sizze_90654)) && + ((local_tid_126502 - squot32(local_tid_126502, + wave_sizze_126504) * + wave_sizze_126504) & (2 * offset_126524 - 1)) == 0) { + // read array element + { + x_126513 = ((volatile __local + int64_t *) red_arr_mem_126508)[sext_i32_i64(local_tid_126502 + + offset_126524)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_126514 = smax64(x_126512, + x_126513); + + x_126512 = defunc_1_op_res_126514; + } + // write result of operation + { + ((volatile __local + int64_t *) red_arr_mem_126508)[sext_i32_i64(local_tid_126502)] = + x_126512; + } + } + offset_126524 *= 2; + } + while (slt32(skip_waves_126525, + squot32(sext_i64_i32(segred_group_sizze_90654) + + wave_sizze_126504 - 1, wave_sizze_126504))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_126524 = skip_waves_126525 * wave_sizze_126504; + if (slt32(local_tid_126502 + offset_126524, + sext_i64_i32(segred_group_sizze_90654)) && + ((local_tid_126502 - squot32(local_tid_126502, + wave_sizze_126504) * + wave_sizze_126504) == 0 && (squot32(local_tid_126502, + wave_sizze_126504) & + (2 * skip_waves_126525 - + 1)) == 0)) { + // read array element + { + x_126513 = ((__local + int64_t *) red_arr_mem_126508)[sext_i32_i64(local_tid_126502 + + offset_126524)]; + } + // apply reduction operation + { + int64_t defunc_1_op_res_126514 = smax64(x_126512, + x_126513); + + x_126512 = defunc_1_op_res_126514; + } + // write result of operation + { + ((__local + int64_t *) red_arr_mem_126508)[sext_i32_i64(local_tid_126502)] = + x_126512; + } + } + skip_waves_126525 *= 2; + } + // and back to memory with the final result + { + if (local_tid_126502 == 0) { + ((__global int64_t *) mem_120218)[(int64_t) 0] = x_126512; + } + } + } + } + + error_1: + return; + #undef segred_group_sizze_90654 +} +__kernel void mainMagnitudezisegred_nonseg_98266(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_127987_backing_aligned_0, + __local volatile + int64_t *sync_arr_mem_127985_backing_aligned_1, + int64_t m_73008, + int64_t num_groups_98261, + int64_t num_threads_127979, + __global + unsigned char *defunc_7_map_res_mem_123721, + __global + unsigned char *mem_123728, + __global + unsigned char *mainMagnitudezicounter_mem_127975, + __global + unsigned char *group_res_arr_mem_127977) +{ + #define segred_group_sizze_98259 (mainMagnitudezisegred_group_sizze_98258) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_127987_backing_1 = + (__local volatile + char *) red_arr_mem_127987_backing_aligned_0; + __local volatile char *restrict sync_arr_mem_127985_backing_0 = + (__local volatile + char *) sync_arr_mem_127985_backing_aligned_1; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127980; + int32_t local_tid_127981; + int64_t group_sizze_127984; + int32_t wave_sizze_127983; + int32_t group_tid_127982; + + global_tid_127980 = get_global_id(0); + local_tid_127981 = get_local_id(0); + group_sizze_127984 = get_local_size(0); + wave_sizze_127983 = LOCKSTEP_WIDTH; + group_tid_127982 = get_group_id(0); + + int32_t phys_tid_98266; + + phys_tid_98266 = global_tid_127980; + + __local char *sync_arr_mem_127985; + + sync_arr_mem_127985 = (__local char *) sync_arr_mem_127985_backing_0; + + __local char *red_arr_mem_127987; + + red_arr_mem_127987 = (__local char *) red_arr_mem_127987_backing_1; + + int64_t dummy_98264; + + dummy_98264 = (int64_t) 0; + + int64_t gtid_98265; + + gtid_98265 = (int64_t) 0; + + bool x_acc_127989; + int64_t chunk_sizze_127990; + + chunk_sizze_127990 = smin64(sdiv_up64(m_73008, + sext_i32_i64(sext_i64_i32(segred_group_sizze_98259 * + num_groups_98261))), + sdiv_up64(m_73008 - phys_tid_98266, + num_threads_127979)); + + bool x_74292; + bool x_74293; + + // neutral-initialise the accumulators + { + x_acc_127989 = 0; + } + for (int64_t i_127994 = 0; i_127994 < chunk_sizze_127990; i_127994++) { + gtid_98265 = phys_tid_98266 + num_threads_127979 * i_127994; + // apply map function + { + bool x_74295 = ((__global + bool *) defunc_7_map_res_mem_123721)[gtid_98265]; + + // save map-out results + { } + // load accumulator + { + x_74292 = x_acc_127989; + } + // load new values + { + x_74293 = x_74295; + } + // apply reduction operator + { + bool defunc_1_op_res_74294 = x_74292 || x_74293; + + // store in accumulator + { + x_acc_127989 = defunc_1_op_res_74294; + } + } + } + } + // to reduce current chunk, first store our result in memory + { + x_74292 = x_acc_127989; + ((__local bool *) red_arr_mem_127987)[sext_i32_i64(local_tid_127981)] = + x_74292; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t offset_127995; + int32_t skip_waves_127996; + + skip_waves_127996 = 1; + + bool x_127991; + bool x_127992; + + offset_127995 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127981, sext_i64_i32(segred_group_sizze_98259))) { + x_127991 = ((__local + bool *) red_arr_mem_127987)[sext_i32_i64(local_tid_127981 + + offset_127995)]; + } + } + offset_127995 = 1; + while (slt32(offset_127995, wave_sizze_127983)) { + if (slt32(local_tid_127981 + offset_127995, + sext_i64_i32(segred_group_sizze_98259)) && + ((local_tid_127981 - squot32(local_tid_127981, wave_sizze_127983) * + wave_sizze_127983) & (2 * offset_127995 - 1)) == 0) { + // read array element + { + x_127992 = ((volatile __local + bool *) red_arr_mem_127987)[sext_i32_i64(local_tid_127981 + + offset_127995)]; + } + // apply reduction operation + { + bool defunc_1_op_res_127993 = x_127991 || x_127992; + + x_127991 = defunc_1_op_res_127993; + } + // write result of operation + { + ((volatile __local + bool *) red_arr_mem_127987)[sext_i32_i64(local_tid_127981)] = + x_127991; + } + } + offset_127995 *= 2; + } + while (slt32(skip_waves_127996, + squot32(sext_i64_i32(segred_group_sizze_98259) + + wave_sizze_127983 - 1, wave_sizze_127983))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_127995 = skip_waves_127996 * wave_sizze_127983; + if (slt32(local_tid_127981 + offset_127995, + sext_i64_i32(segred_group_sizze_98259)) && + ((local_tid_127981 - squot32(local_tid_127981, wave_sizze_127983) * + wave_sizze_127983) == 0 && (squot32(local_tid_127981, + wave_sizze_127983) & (2 * + skip_waves_127996 - + 1)) == + 0)) { + // read array element + { + x_127992 = ((__local + bool *) red_arr_mem_127987)[sext_i32_i64(local_tid_127981 + + offset_127995)]; + } + // apply reduction operation + { + bool defunc_1_op_res_127993 = x_127991 || x_127992; + + x_127991 = defunc_1_op_res_127993; + } + // write result of operation + { + ((__local + bool *) red_arr_mem_127987)[sext_i32_i64(local_tid_127981)] = + x_127991; + } + } + skip_waves_127996 *= 2; + } + barrier(CLK_LOCAL_MEM_FENCE); + // first thread saves the result in accumulator + { + if (sext_i32_i64(local_tid_127981) == (int64_t) 0) { + x_acc_127989 = x_127991; + } + } + + int32_t old_counter_127997; + + // first thread in group saves group result to global memory + { + if (local_tid_127981 == 0) { + ((__global + bool *) group_res_arr_mem_127977)[sext_i32_i64(group_tid_127982) * + segred_group_sizze_98259] = + x_acc_127989; + mem_fence_global(); + old_counter_127997 = atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_127975)[(int64_t) 0], + (int) 1); + ((__local bool *) sync_arr_mem_127985)[(int64_t) 0] = + old_counter_127997 == num_groups_98261 - (int64_t) 1; + } + } + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + + bool is_last_group_127998; + + is_last_group_127998 = ((__local bool *) sync_arr_mem_127985)[(int64_t) 0]; + if (is_last_group_127998) { + if (local_tid_127981 == 0) { + old_counter_127997 = atomic_add_i32_global(&((volatile __global + int *) mainMagnitudezicounter_mem_127975)[(int64_t) 0], + (int) ((int64_t) 0 - + num_groups_98261)); + } + // read in the per-group-results + { + int64_t read_per_thread_127999 = sdiv_up64(num_groups_98261, + segred_group_sizze_98259); + + x_74292 = 0; + for (int64_t i_128000 = 0; i_128000 < read_per_thread_127999; + i_128000++) { + int64_t group_res_id_128001 = sext_i32_i64(local_tid_127981) * + read_per_thread_127999 + i_128000; + int64_t index_of_group_res_128002 = group_res_id_128001; + + if (slt64(group_res_id_128001, num_groups_98261)) { + x_74293 = ((__global + bool *) group_res_arr_mem_127977)[index_of_group_res_128002 * + segred_group_sizze_98259]; + + bool defunc_1_op_res_74294; + + defunc_1_op_res_74294 = x_74292 || x_74293; + x_74292 = defunc_1_op_res_74294; + } + } + } + ((__local bool *) red_arr_mem_127987)[sext_i32_i64(local_tid_127981)] = + x_74292; + barrier(CLK_LOCAL_MEM_FENCE); + // reduce the per-group results + { + int32_t offset_128003; + int32_t skip_waves_128004; + + skip_waves_128004 = 1; + + bool x_127991; + bool x_127992; + + offset_128003 = 0; + // participating threads read initial accumulator + { + if (slt32(local_tid_127981, + sext_i64_i32(segred_group_sizze_98259))) { + x_127991 = ((__local + bool *) red_arr_mem_127987)[sext_i32_i64(local_tid_127981 + + offset_128003)]; + } + } + offset_128003 = 1; + while (slt32(offset_128003, wave_sizze_127983)) { + if (slt32(local_tid_127981 + offset_128003, + sext_i64_i32(segred_group_sizze_98259)) && + ((local_tid_127981 - squot32(local_tid_127981, + wave_sizze_127983) * + wave_sizze_127983) & (2 * offset_128003 - 1)) == 0) { + // read array element + { + x_127992 = ((volatile __local + bool *) red_arr_mem_127987)[sext_i32_i64(local_tid_127981 + + offset_128003)]; + } + // apply reduction operation + { + bool defunc_1_op_res_127993 = x_127991 || x_127992; + + x_127991 = defunc_1_op_res_127993; + } + // write result of operation + { + ((volatile __local + bool *) red_arr_mem_127987)[sext_i32_i64(local_tid_127981)] = + x_127991; + } + } + offset_128003 *= 2; + } + while (slt32(skip_waves_128004, + squot32(sext_i64_i32(segred_group_sizze_98259) + + wave_sizze_127983 - 1, wave_sizze_127983))) { + barrier(CLK_LOCAL_MEM_FENCE); + offset_128003 = skip_waves_128004 * wave_sizze_127983; + if (slt32(local_tid_127981 + offset_128003, + sext_i64_i32(segred_group_sizze_98259)) && + ((local_tid_127981 - squot32(local_tid_127981, + wave_sizze_127983) * + wave_sizze_127983) == 0 && (squot32(local_tid_127981, + wave_sizze_127983) & + (2 * skip_waves_128004 - + 1)) == 0)) { + // read array element + { + x_127992 = ((__local + bool *) red_arr_mem_127987)[sext_i32_i64(local_tid_127981 + + offset_128003)]; + } + // apply reduction operation + { + bool defunc_1_op_res_127993 = x_127991 || x_127992; + + x_127991 = defunc_1_op_res_127993; + } + // write result of operation + { + ((__local + bool *) red_arr_mem_127987)[sext_i32_i64(local_tid_127981)] = + x_127991; + } + } + skip_waves_128004 *= 2; + } + // and back to memory with the final result + { + if (local_tid_127981 == 0) { + ((__global bool *) mem_123728)[(int64_t) 0] = x_127991; + } + } + } + } + + error_1: + return; + #undef segred_group_sizze_98259 +} +__kernel void mainMagnitudezisegred_small_100140(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_128774_backing_aligned_0, + int64_t N_73007, + int64_t m_73008, + int64_t n_73011, + int64_t k2p2zq_73023, + int64_t num_groups_100279, + int64_t segment_sizze_nonzzero_128767, + __global + unsigned char *binop_p_mem_120117, + __global + unsigned char *mem_124142, + __global + unsigned char *mem_124276, + __global + unsigned char *mem_124281) +{ + #define segred_group_sizze_100278 (mainMagnitudezisegred_group_sizze_100134) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_128774_backing_0 = + (__local volatile + char *) red_arr_mem_128774_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128769; + int32_t local_tid_128770; + int64_t group_sizze_128773; + int32_t wave_sizze_128772; + int32_t group_tid_128771; + + global_tid_128769 = get_global_id(0); + local_tid_128770 = get_local_id(0); + group_sizze_128773 = get_local_size(0); + wave_sizze_128772 = LOCKSTEP_WIDTH; + group_tid_128771 = get_group_id(0); + + int32_t phys_tid_100140; + + phys_tid_100140 = global_tid_128769; + + __local char *red_arr_mem_128774; + + red_arr_mem_128774 = (__local char *) red_arr_mem_128774_backing_0; + + int32_t phys_group_id_128776; + + phys_group_id_128776 = get_group_id(0); + for (int32_t i_128777 = 0; i_128777 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008 * k2p2zq_73023 * k2p2zq_73023, + squot64(segred_group_sizze_100278, + segment_sizze_nonzzero_128767))) - + phys_group_id_128776, sext_i64_i32(num_groups_100279)); + i_128777++) { + int32_t virt_group_id_128778 = phys_group_id_128776 + i_128777 * + sext_i64_i32(num_groups_100279); + int64_t gtid_100127 = squot64(squot64(sext_i32_i64(local_tid_128770), + segment_sizze_nonzzero_128767) + + sext_i32_i64(virt_group_id_128778) * + squot64(segred_group_sizze_100278, + segment_sizze_nonzzero_128767), + k2p2zq_73023 * k2p2zq_73023); + int64_t gtid_100128 = squot64(squot64(sext_i32_i64(local_tid_128770), + segment_sizze_nonzzero_128767) + + sext_i32_i64(virt_group_id_128778) * + squot64(segred_group_sizze_100278, + segment_sizze_nonzzero_128767) - + squot64(squot64(sext_i32_i64(local_tid_128770), + segment_sizze_nonzzero_128767) + + sext_i32_i64(virt_group_id_128778) * + squot64(segred_group_sizze_100278, + segment_sizze_nonzzero_128767), + k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023); + int64_t gtid_100129 = squot64(sext_i32_i64(local_tid_128770), + segment_sizze_nonzzero_128767) + + sext_i32_i64(virt_group_id_128778) * + squot64(segred_group_sizze_100278, + segment_sizze_nonzzero_128767) - + squot64(squot64(sext_i32_i64(local_tid_128770), + segment_sizze_nonzzero_128767) + + sext_i32_i64(virt_group_id_128778) * + squot64(segred_group_sizze_100278, + segment_sizze_nonzzero_128767), k2p2zq_73023 * + k2p2zq_73023) * (k2p2zq_73023 * k2p2zq_73023) - + squot64(squot64(sext_i32_i64(local_tid_128770), + segment_sizze_nonzzero_128767) + + sext_i32_i64(virt_group_id_128778) * + squot64(segred_group_sizze_100278, + segment_sizze_nonzzero_128767) - + squot64(squot64(sext_i32_i64(local_tid_128770), + segment_sizze_nonzzero_128767) + + sext_i32_i64(virt_group_id_128778) * + squot64(segred_group_sizze_100278, + segment_sizze_nonzzero_128767), + k2p2zq_73023 * k2p2zq_73023) * (k2p2zq_73023 * + k2p2zq_73023), + k2p2zq_73023) * k2p2zq_73023; + int64_t gtid_100139 = srem64(sext_i32_i64(local_tid_128770), n_73011); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, n_73011) && (((slt64(gtid_100127, m_73008) && + slt64(gtid_100128, + k2p2zq_73023)) && + slt64(gtid_100129, + k2p2zq_73023)) && + slt64(sext_i32_i64(local_tid_128770), + n_73011 * + squot64(segred_group_sizze_100278, + segment_sizze_nonzzero_128767)))) { + double x_100288 = ((__global double *) mem_124142)[gtid_100127 * + N_73007 + + gtid_100139]; + double x_100289 = ((__global + double *) binop_p_mem_120117)[gtid_100128 * + N_73007 + + gtid_100139]; + double x_100290 = ((__global double *) mem_124276)[gtid_100129 * + N_73007 + + gtid_100139]; + double x_100291 = x_100289 * x_100290; + bool isnan_res_100292; + + isnan_res_100292 = futrts_isnan64(x_100288); + + double y_100293; + + if (isnan_res_100292) { + y_100293 = 0.0; + } else { + y_100293 = 1.0; + } + + double defunc_2_f_res_100294 = x_100291 * y_100293; + + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_128774)[sext_i32_i64(local_tid_128770)] = + defunc_2_f_res_100294; + } + } else { + ((__local + double *) red_arr_mem_128774)[sext_i32_i64(local_tid_128770)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, n_73011)) { + // perform segmented scan to imitate reduction + { + double x_100282; + double x_100283; + double x_128779; + double x_128780; + bool ltid_in_bounds_128782; + + ltid_in_bounds_128782 = slt64(sext_i32_i64(local_tid_128770), + n_73011 * + squot64(segred_group_sizze_100278, + segment_sizze_nonzzero_128767)); + + int32_t skip_threads_128783; + + // read input for in-block scan + { + if (ltid_in_bounds_128782) { + x_100283 = ((volatile __local + double *) red_arr_mem_128774)[sext_i32_i64(local_tid_128770)]; + if ((local_tid_128770 - squot32(local_tid_128770, 32) * + 32) == 0) { + x_100282 = x_100283; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128783 = 1; + while (slt32(skip_threads_128783, 32)) { + if (sle32(skip_threads_128783, local_tid_128770 - + squot32(local_tid_128770, 32) * 32) && + ltid_in_bounds_128782) { + // read operands + { + x_100282 = ((volatile __local + double *) red_arr_mem_128774)[sext_i32_i64(local_tid_128770) - + sext_i32_i64(skip_threads_128783)]; + } + // perform operation + { + bool inactive_128784 = + slt64(srem64(sext_i32_i64(local_tid_128770), + n_73011), + sext_i32_i64(local_tid_128770) - + sext_i32_i64(local_tid_128770 - + skip_threads_128783)); + + if (inactive_128784) { + x_100282 = x_100283; + } + if (!inactive_128784) { + double defunc_1_op_res_100284 = x_100282 + + x_100283; + + x_100282 = defunc_1_op_res_100284; + } + } + } + if (sle32(wave_sizze_128772, skip_threads_128783)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128783, local_tid_128770 - + squot32(local_tid_128770, 32) * 32) && + ltid_in_bounds_128782) { + // write result + { + ((volatile __local + double *) red_arr_mem_128774)[sext_i32_i64(local_tid_128770)] = + x_100282; + x_100283 = x_100282; + } + } + if (sle32(wave_sizze_128772, skip_threads_128783)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128783 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128770 - squot32(local_tid_128770, 32) * + 32) == 31 && ltid_in_bounds_128782) { + ((volatile __local + double *) red_arr_mem_128774)[sext_i32_i64(squot32(local_tid_128770, + 32))] = + x_100282; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128785; + + // read input for in-block scan + { + if (squot32(local_tid_128770, 32) == 0 && + ltid_in_bounds_128782) { + x_128780 = ((volatile __local + double *) red_arr_mem_128774)[sext_i32_i64(local_tid_128770)]; + if ((local_tid_128770 - squot32(local_tid_128770, + 32) * 32) == 0) { + x_128779 = x_128780; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128785 = 1; + while (slt32(skip_threads_128785, 32)) { + if (sle32(skip_threads_128785, local_tid_128770 - + squot32(local_tid_128770, 32) * 32) && + (squot32(local_tid_128770, 32) == 0 && + ltid_in_bounds_128782)) { + // read operands + { + x_128779 = ((volatile __local + double *) red_arr_mem_128774)[sext_i32_i64(local_tid_128770) - + sext_i32_i64(skip_threads_128785)]; + } + // perform operation + { + bool inactive_128786 = + slt64(srem64(sext_i32_i64(local_tid_128770 * + 32 + 32 - 1), n_73011), + sext_i32_i64(local_tid_128770 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_128770 - + skip_threads_128785) * + 32 + 32 - 1)); + + if (inactive_128786) { + x_128779 = x_128780; + } + if (!inactive_128786) { + double defunc_1_op_res_128781 = + x_128779 + x_128780; + + x_128779 = defunc_1_op_res_128781; + } + } + } + if (sle32(wave_sizze_128772, skip_threads_128785)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128785, local_tid_128770 - + squot32(local_tid_128770, 32) * 32) && + (squot32(local_tid_128770, 32) == 0 && + ltid_in_bounds_128782)) { + // write result + { + ((volatile __local + double *) red_arr_mem_128774)[sext_i32_i64(local_tid_128770)] = + x_128779; + x_128780 = x_128779; + } + } + if (sle32(wave_sizze_128772, skip_threads_128785)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128785 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128770, 32) == 0 || + !ltid_in_bounds_128782)) { + // read operands + { + x_100283 = x_100282; + x_100282 = ((__local + double *) red_arr_mem_128774)[sext_i32_i64(squot32(local_tid_128770, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128787 = + slt64(srem64(sext_i32_i64(local_tid_128770), + n_73011), + sext_i32_i64(local_tid_128770) - + sext_i32_i64(squot32(local_tid_128770, + 32) * 32 - 1)); + + if (inactive_128787) { + x_100282 = x_100283; + } + if (!inactive_128787) { + double defunc_1_op_res_100284 = x_100282 + + x_100283; + + x_100282 = defunc_1_op_res_100284; + } + } + // write final result + { + ((__local + double *) red_arr_mem_128774)[sext_i32_i64(local_tid_128770)] = + x_100282; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128770, 32) == 0) { + ((__local + double *) red_arr_mem_128774)[sext_i32_i64(local_tid_128770)] = + x_100283; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_128778) * + squot64(segred_group_sizze_100278, + segment_sizze_nonzzero_128767) + + sext_i32_i64(local_tid_128770), m_73008 * k2p2zq_73023 * + k2p2zq_73023) && slt64(sext_i32_i64(local_tid_128770), + squot64(segred_group_sizze_100278, + segment_sizze_nonzzero_128767))) { + ((__global + double *) mem_124281)[squot64(sext_i32_i64(virt_group_id_128778) * + squot64(segred_group_sizze_100278, + segment_sizze_nonzzero_128767) + + sext_i32_i64(local_tid_128770), + k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023) + + squot64(sext_i32_i64(virt_group_id_128778) * + squot64(segred_group_sizze_100278, + segment_sizze_nonzzero_128767) + + sext_i32_i64(local_tid_128770) - + squot64(sext_i32_i64(virt_group_id_128778) * + squot64(segred_group_sizze_100278, + segment_sizze_nonzzero_128767) + + sext_i32_i64(local_tid_128770), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023) * k2p2zq_73023 + + (sext_i32_i64(virt_group_id_128778) * + squot64(segred_group_sizze_100278, + segment_sizze_nonzzero_128767) + + sext_i32_i64(local_tid_128770) - + squot64(sext_i32_i64(virt_group_id_128778) * + squot64(segred_group_sizze_100278, + segment_sizze_nonzzero_128767) + + sext_i32_i64(local_tid_128770), + k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023) - + squot64(sext_i32_i64(virt_group_id_128778) * + squot64(segred_group_sizze_100278, + segment_sizze_nonzzero_128767) + + sext_i32_i64(local_tid_128770) - + squot64(sext_i32_i64(virt_group_id_128778) * + squot64(segred_group_sizze_100278, + segment_sizze_nonzzero_128767) + + sext_i32_i64(local_tid_128770), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023) * + k2p2zq_73023)] = ((__local + double *) red_arr_mem_128774)[(sext_i32_i64(local_tid_128770) + + (int64_t) 1) * + segment_sizze_nonzzero_128767 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_100278 +} +__kernel void mainMagnitudezisegred_small_101024(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_128964_backing_aligned_0, + int64_t N_73007, + int64_t m_73008, + int64_t n_73011, + int64_t k2p2zq_73023, + int64_t num_groups_101077, + int64_t segment_sizze_nonzzero_128957, + __global + unsigned char *binop_p_mem_120117, + __global + unsigned char *mem_124142, + __global + unsigned char *mem_124587) +{ + #define segred_group_sizze_101076 (mainMagnitudezisegred_group_sizze_101018) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_128964_backing_0 = + (__local volatile + char *) red_arr_mem_128964_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128959; + int32_t local_tid_128960; + int64_t group_sizze_128963; + int32_t wave_sizze_128962; + int32_t group_tid_128961; + + global_tid_128959 = get_global_id(0); + local_tid_128960 = get_local_id(0); + group_sizze_128963 = get_local_size(0); + wave_sizze_128962 = LOCKSTEP_WIDTH; + group_tid_128961 = get_group_id(0); + + int32_t phys_tid_101024; + + phys_tid_101024 = global_tid_128959; + + __local char *red_arr_mem_128964; + + red_arr_mem_128964 = (__local char *) red_arr_mem_128964_backing_0; + + int32_t phys_group_id_128966; + + phys_group_id_128966 = get_group_id(0); + for (int32_t i_128967 = 0; i_128967 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008 * k2p2zq_73023, + squot64(segred_group_sizze_101076, + segment_sizze_nonzzero_128957))) - + phys_group_id_128966, sext_i64_i32(num_groups_101077)); + i_128967++) { + int32_t virt_group_id_128968 = phys_group_id_128966 + i_128967 * + sext_i64_i32(num_groups_101077); + int64_t gtid_101013 = squot64(squot64(sext_i32_i64(local_tid_128960), + segment_sizze_nonzzero_128957) + + sext_i32_i64(virt_group_id_128968) * + squot64(segred_group_sizze_101076, + segment_sizze_nonzzero_128957), + k2p2zq_73023); + int64_t gtid_101014 = squot64(sext_i32_i64(local_tid_128960), + segment_sizze_nonzzero_128957) + + sext_i32_i64(virt_group_id_128968) * + squot64(segred_group_sizze_101076, + segment_sizze_nonzzero_128957) - + squot64(squot64(sext_i32_i64(local_tid_128960), + segment_sizze_nonzzero_128957) + + sext_i32_i64(virt_group_id_128968) * + squot64(segred_group_sizze_101076, + segment_sizze_nonzzero_128957), k2p2zq_73023) * + k2p2zq_73023; + int64_t gtid_101023 = srem64(sext_i32_i64(local_tid_128960), n_73011); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, n_73011) && ((slt64(gtid_101013, m_73008) && + slt64(gtid_101014, + k2p2zq_73023)) && + slt64(sext_i32_i64(local_tid_128960), + n_73011 * + squot64(segred_group_sizze_101076, + segment_sizze_nonzzero_128957)))) { + double x_101086 = ((__global double *) mem_124142)[gtid_101013 * + N_73007 + + gtid_101023]; + bool isnan_res_101087; + + isnan_res_101087 = futrts_isnan64(x_101086); + + double defunc_1_f_res_101088; + + if (isnan_res_101087) { + defunc_1_f_res_101088 = 0.0; + } else { + double x_101085 = ((__global + double *) binop_p_mem_120117)[gtid_101014 * + N_73007 + + gtid_101023]; + double defunc_1_f_res_f_res_101089 = x_101085 * x_101086; + + defunc_1_f_res_101088 = defunc_1_f_res_f_res_101089; + } + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_128964)[sext_i32_i64(local_tid_128960)] = + defunc_1_f_res_101088; + } + } else { + ((__local + double *) red_arr_mem_128964)[sext_i32_i64(local_tid_128960)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, n_73011)) { + // perform segmented scan to imitate reduction + { + double x_101080; + double x_101081; + double x_128969; + double x_128970; + bool ltid_in_bounds_128972; + + ltid_in_bounds_128972 = slt64(sext_i32_i64(local_tid_128960), + n_73011 * + squot64(segred_group_sizze_101076, + segment_sizze_nonzzero_128957)); + + int32_t skip_threads_128973; + + // read input for in-block scan + { + if (ltid_in_bounds_128972) { + x_101081 = ((volatile __local + double *) red_arr_mem_128964)[sext_i32_i64(local_tid_128960)]; + if ((local_tid_128960 - squot32(local_tid_128960, 32) * + 32) == 0) { + x_101080 = x_101081; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128973 = 1; + while (slt32(skip_threads_128973, 32)) { + if (sle32(skip_threads_128973, local_tid_128960 - + squot32(local_tid_128960, 32) * 32) && + ltid_in_bounds_128972) { + // read operands + { + x_101080 = ((volatile __local + double *) red_arr_mem_128964)[sext_i32_i64(local_tid_128960) - + sext_i32_i64(skip_threads_128973)]; + } + // perform operation + { + bool inactive_128974 = + slt64(srem64(sext_i32_i64(local_tid_128960), + n_73011), + sext_i32_i64(local_tid_128960) - + sext_i32_i64(local_tid_128960 - + skip_threads_128973)); + + if (inactive_128974) { + x_101080 = x_101081; + } + if (!inactive_128974) { + double defunc_1_op_res_101082 = x_101080 + + x_101081; + + x_101080 = defunc_1_op_res_101082; + } + } + } + if (sle32(wave_sizze_128962, skip_threads_128973)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128973, local_tid_128960 - + squot32(local_tid_128960, 32) * 32) && + ltid_in_bounds_128972) { + // write result + { + ((volatile __local + double *) red_arr_mem_128964)[sext_i32_i64(local_tid_128960)] = + x_101080; + x_101081 = x_101080; + } + } + if (sle32(wave_sizze_128962, skip_threads_128973)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128973 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128960 - squot32(local_tid_128960, 32) * + 32) == 31 && ltid_in_bounds_128972) { + ((volatile __local + double *) red_arr_mem_128964)[sext_i32_i64(squot32(local_tid_128960, + 32))] = + x_101080; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128975; + + // read input for in-block scan + { + if (squot32(local_tid_128960, 32) == 0 && + ltid_in_bounds_128972) { + x_128970 = ((volatile __local + double *) red_arr_mem_128964)[sext_i32_i64(local_tid_128960)]; + if ((local_tid_128960 - squot32(local_tid_128960, + 32) * 32) == 0) { + x_128969 = x_128970; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128975 = 1; + while (slt32(skip_threads_128975, 32)) { + if (sle32(skip_threads_128975, local_tid_128960 - + squot32(local_tid_128960, 32) * 32) && + (squot32(local_tid_128960, 32) == 0 && + ltid_in_bounds_128972)) { + // read operands + { + x_128969 = ((volatile __local + double *) red_arr_mem_128964)[sext_i32_i64(local_tid_128960) - + sext_i32_i64(skip_threads_128975)]; + } + // perform operation + { + bool inactive_128976 = + slt64(srem64(sext_i32_i64(local_tid_128960 * + 32 + 32 - 1), n_73011), + sext_i32_i64(local_tid_128960 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_128960 - + skip_threads_128975) * + 32 + 32 - 1)); + + if (inactive_128976) { + x_128969 = x_128970; + } + if (!inactive_128976) { + double defunc_1_op_res_128971 = + x_128969 + x_128970; + + x_128969 = defunc_1_op_res_128971; + } + } + } + if (sle32(wave_sizze_128962, skip_threads_128975)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128975, local_tid_128960 - + squot32(local_tid_128960, 32) * 32) && + (squot32(local_tid_128960, 32) == 0 && + ltid_in_bounds_128972)) { + // write result + { + ((volatile __local + double *) red_arr_mem_128964)[sext_i32_i64(local_tid_128960)] = + x_128969; + x_128970 = x_128969; + } + } + if (sle32(wave_sizze_128962, skip_threads_128975)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128975 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128960, 32) == 0 || + !ltid_in_bounds_128972)) { + // read operands + { + x_101081 = x_101080; + x_101080 = ((__local + double *) red_arr_mem_128964)[sext_i32_i64(squot32(local_tid_128960, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128977 = + slt64(srem64(sext_i32_i64(local_tid_128960), + n_73011), + sext_i32_i64(local_tid_128960) - + sext_i32_i64(squot32(local_tid_128960, + 32) * 32 - 1)); + + if (inactive_128977) { + x_101080 = x_101081; + } + if (!inactive_128977) { + double defunc_1_op_res_101082 = x_101080 + + x_101081; + + x_101080 = defunc_1_op_res_101082; + } + } + // write final result + { + ((__local + double *) red_arr_mem_128964)[sext_i32_i64(local_tid_128960)] = + x_101080; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128960, 32) == 0) { + ((__local + double *) red_arr_mem_128964)[sext_i32_i64(local_tid_128960)] = + x_101081; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_128968) * + squot64(segred_group_sizze_101076, + segment_sizze_nonzzero_128957) + + sext_i32_i64(local_tid_128960), m_73008 * k2p2zq_73023) && + slt64(sext_i32_i64(local_tid_128960), + squot64(segred_group_sizze_101076, + segment_sizze_nonzzero_128957))) { + ((__global + double *) mem_124587)[squot64(sext_i32_i64(virt_group_id_128968) * + squot64(segred_group_sizze_101076, + segment_sizze_nonzzero_128957) + + sext_i32_i64(local_tid_128960), + k2p2zq_73023) * k2p2zq_73023 + + (sext_i32_i64(virt_group_id_128968) * + squot64(segred_group_sizze_101076, + segment_sizze_nonzzero_128957) + + sext_i32_i64(local_tid_128960) - + squot64(sext_i32_i64(virt_group_id_128968) * + squot64(segred_group_sizze_101076, + segment_sizze_nonzzero_128957) + + sext_i32_i64(local_tid_128960), + k2p2zq_73023) * + k2p2zq_73023)] = ((__local + double *) red_arr_mem_128964)[(sext_i32_i64(local_tid_128960) + + (int64_t) 1) * + segment_sizze_nonzzero_128957 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_101076 +} +__kernel void mainMagnitudezisegred_small_101161(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_129052_backing_aligned_0, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t num_groups_101210, + int64_t segment_sizze_nonzzero_129045, + __global + unsigned char *defunc_3_map_res_mem_124372, + __global + unsigned char *defunc_3_map_res_mem_124593, + __global + unsigned char *mem_124653) +{ + #define segred_group_sizze_101209 (mainMagnitudezisegred_group_sizze_101155) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_129052_backing_0 = + (__local volatile + char *) red_arr_mem_129052_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129047; + int32_t local_tid_129048; + int64_t group_sizze_129051; + int32_t wave_sizze_129050; + int32_t group_tid_129049; + + global_tid_129047 = get_global_id(0); + local_tid_129048 = get_local_id(0); + group_sizze_129051 = get_local_size(0); + wave_sizze_129050 = LOCKSTEP_WIDTH; + group_tid_129049 = get_group_id(0); + + int32_t phys_tid_101161; + + phys_tid_101161 = global_tid_129047; + + __local char *red_arr_mem_129052; + + red_arr_mem_129052 = (__local char *) red_arr_mem_129052_backing_0; + + int32_t phys_group_id_129054; + + phys_group_id_129054 = get_group_id(0); + for (int32_t i_129055 = 0; i_129055 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008 * k2p2zq_73023, + squot64(segred_group_sizze_101209, + segment_sizze_nonzzero_129045))) - + phys_group_id_129054, sext_i64_i32(num_groups_101210)); + i_129055++) { + int32_t virt_group_id_129056 = phys_group_id_129054 + i_129055 * + sext_i64_i32(num_groups_101210); + int64_t gtid_101150 = squot64(squot64(sext_i32_i64(local_tid_129048), + segment_sizze_nonzzero_129045) + + sext_i32_i64(virt_group_id_129056) * + squot64(segred_group_sizze_101209, + segment_sizze_nonzzero_129045), + k2p2zq_73023); + int64_t gtid_101151 = squot64(sext_i32_i64(local_tid_129048), + segment_sizze_nonzzero_129045) + + sext_i32_i64(virt_group_id_129056) * + squot64(segred_group_sizze_101209, + segment_sizze_nonzzero_129045) - + squot64(squot64(sext_i32_i64(local_tid_129048), + segment_sizze_nonzzero_129045) + + sext_i32_i64(virt_group_id_129056) * + squot64(segred_group_sizze_101209, + segment_sizze_nonzzero_129045), k2p2zq_73023) * + k2p2zq_73023; + int64_t gtid_101160 = srem64(sext_i32_i64(local_tid_129048), + k2p2zq_73023); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, k2p2zq_73023) && ((slt64(gtid_101150, + m_73008) && + slt64(gtid_101151, + k2p2zq_73023)) && + slt64(sext_i32_i64(local_tid_129048), + k2p2zq_73023 * + squot64(segred_group_sizze_101209, + segment_sizze_nonzzero_129045)))) { + double x_101219 = ((__global + double *) defunc_3_map_res_mem_124593)[gtid_101150 * + k2p2zq_73023 + + gtid_101160]; + double x_101220 = ((__global + double *) defunc_3_map_res_mem_124372)[gtid_101150 * + (k2p2zq_73023 * + k2p2zq_73023) + + gtid_101151 * + k2p2zq_73023 + + gtid_101160]; + double defunc_1_f_res_101221 = x_101219 * x_101220; + + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_129052)[sext_i32_i64(local_tid_129048)] = + defunc_1_f_res_101221; + } + } else { + ((__local + double *) red_arr_mem_129052)[sext_i32_i64(local_tid_129048)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, k2p2zq_73023)) { + // perform segmented scan to imitate reduction + { + double x_101213; + double x_101214; + double x_129057; + double x_129058; + bool ltid_in_bounds_129060; + + ltid_in_bounds_129060 = slt64(sext_i32_i64(local_tid_129048), + k2p2zq_73023 * + squot64(segred_group_sizze_101209, + segment_sizze_nonzzero_129045)); + + int32_t skip_threads_129061; + + // read input for in-block scan + { + if (ltid_in_bounds_129060) { + x_101214 = ((volatile __local + double *) red_arr_mem_129052)[sext_i32_i64(local_tid_129048)]; + if ((local_tid_129048 - squot32(local_tid_129048, 32) * + 32) == 0) { + x_101213 = x_101214; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129061 = 1; + while (slt32(skip_threads_129061, 32)) { + if (sle32(skip_threads_129061, local_tid_129048 - + squot32(local_tid_129048, 32) * 32) && + ltid_in_bounds_129060) { + // read operands + { + x_101213 = ((volatile __local + double *) red_arr_mem_129052)[sext_i32_i64(local_tid_129048) - + sext_i32_i64(skip_threads_129061)]; + } + // perform operation + { + bool inactive_129062 = + slt64(srem64(sext_i32_i64(local_tid_129048), + k2p2zq_73023), + sext_i32_i64(local_tid_129048) - + sext_i32_i64(local_tid_129048 - + skip_threads_129061)); + + if (inactive_129062) { + x_101213 = x_101214; + } + if (!inactive_129062) { + double defunc_1_op_res_101215 = x_101213 + + x_101214; + + x_101213 = defunc_1_op_res_101215; + } + } + } + if (sle32(wave_sizze_129050, skip_threads_129061)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129061, local_tid_129048 - + squot32(local_tid_129048, 32) * 32) && + ltid_in_bounds_129060) { + // write result + { + ((volatile __local + double *) red_arr_mem_129052)[sext_i32_i64(local_tid_129048)] = + x_101213; + x_101214 = x_101213; + } + } + if (sle32(wave_sizze_129050, skip_threads_129061)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129061 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129048 - squot32(local_tid_129048, 32) * + 32) == 31 && ltid_in_bounds_129060) { + ((volatile __local + double *) red_arr_mem_129052)[sext_i32_i64(squot32(local_tid_129048, + 32))] = + x_101213; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129063; + + // read input for in-block scan + { + if (squot32(local_tid_129048, 32) == 0 && + ltid_in_bounds_129060) { + x_129058 = ((volatile __local + double *) red_arr_mem_129052)[sext_i32_i64(local_tid_129048)]; + if ((local_tid_129048 - squot32(local_tid_129048, + 32) * 32) == 0) { + x_129057 = x_129058; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129063 = 1; + while (slt32(skip_threads_129063, 32)) { + if (sle32(skip_threads_129063, local_tid_129048 - + squot32(local_tid_129048, 32) * 32) && + (squot32(local_tid_129048, 32) == 0 && + ltid_in_bounds_129060)) { + // read operands + { + x_129057 = ((volatile __local + double *) red_arr_mem_129052)[sext_i32_i64(local_tid_129048) - + sext_i32_i64(skip_threads_129063)]; + } + // perform operation + { + bool inactive_129064 = + slt64(srem64(sext_i32_i64(local_tid_129048 * + 32 + 32 - 1), + k2p2zq_73023), + sext_i32_i64(local_tid_129048 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_129048 - + skip_threads_129063) * + 32 + 32 - 1)); + + if (inactive_129064) { + x_129057 = x_129058; + } + if (!inactive_129064) { + double defunc_1_op_res_129059 = + x_129057 + x_129058; + + x_129057 = defunc_1_op_res_129059; + } + } + } + if (sle32(wave_sizze_129050, skip_threads_129063)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129063, local_tid_129048 - + squot32(local_tid_129048, 32) * 32) && + (squot32(local_tid_129048, 32) == 0 && + ltid_in_bounds_129060)) { + // write result + { + ((volatile __local + double *) red_arr_mem_129052)[sext_i32_i64(local_tid_129048)] = + x_129057; + x_129058 = x_129057; + } + } + if (sle32(wave_sizze_129050, skip_threads_129063)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129063 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129048, 32) == 0 || + !ltid_in_bounds_129060)) { + // read operands + { + x_101214 = x_101213; + x_101213 = ((__local + double *) red_arr_mem_129052)[sext_i32_i64(squot32(local_tid_129048, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129065 = + slt64(srem64(sext_i32_i64(local_tid_129048), + k2p2zq_73023), + sext_i32_i64(local_tid_129048) - + sext_i32_i64(squot32(local_tid_129048, + 32) * 32 - 1)); + + if (inactive_129065) { + x_101213 = x_101214; + } + if (!inactive_129065) { + double defunc_1_op_res_101215 = x_101213 + + x_101214; + + x_101213 = defunc_1_op_res_101215; + } + } + // write final result + { + ((__local + double *) red_arr_mem_129052)[sext_i32_i64(local_tid_129048)] = + x_101213; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129048, 32) == 0) { + ((__local + double *) red_arr_mem_129052)[sext_i32_i64(local_tid_129048)] = + x_101214; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_129056) * + squot64(segred_group_sizze_101209, + segment_sizze_nonzzero_129045) + + sext_i32_i64(local_tid_129048), m_73008 * k2p2zq_73023) && + slt64(sext_i32_i64(local_tid_129048), + squot64(segred_group_sizze_101209, + segment_sizze_nonzzero_129045))) { + ((__global + double *) mem_124653)[squot64(sext_i32_i64(virt_group_id_129056) * + squot64(segred_group_sizze_101209, + segment_sizze_nonzzero_129045) + + sext_i32_i64(local_tid_129048), + k2p2zq_73023) * k2p2zq_73023 + + (sext_i32_i64(virt_group_id_129056) * + squot64(segred_group_sizze_101209, + segment_sizze_nonzzero_129045) + + sext_i32_i64(local_tid_129048) - + squot64(sext_i32_i64(virt_group_id_129056) * + squot64(segred_group_sizze_101209, + segment_sizze_nonzzero_129045) + + sext_i32_i64(local_tid_129048), + k2p2zq_73023) * + k2p2zq_73023)] = ((__local + double *) red_arr_mem_129052)[(sext_i32_i64(local_tid_129048) + + (int64_t) 1) * + segment_sizze_nonzzero_129045 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_101209 +} +__kernel void mainMagnitudezisegred_small_101291(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_129184_backing_aligned_0, + int64_t N_73007, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t num_groups_101338, + int64_t segment_sizze_nonzzero_129177, + __global + unsigned char *mem_120124, + __global + unsigned char *defunc_4_map_res_mem_124659, + __global + unsigned char *mem_124877) +{ + #define segred_group_sizze_101337 (mainMagnitudezisegred_group_sizze_101285) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_129184_backing_0 = + (__local volatile + char *) red_arr_mem_129184_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129179; + int32_t local_tid_129180; + int64_t group_sizze_129183; + int32_t wave_sizze_129182; + int32_t group_tid_129181; + + global_tid_129179 = get_global_id(0); + local_tid_129180 = get_local_id(0); + group_sizze_129183 = get_local_size(0); + wave_sizze_129182 = LOCKSTEP_WIDTH; + group_tid_129181 = get_group_id(0); + + int32_t phys_tid_101291; + + phys_tid_101291 = global_tid_129179; + + __local char *red_arr_mem_129184; + + red_arr_mem_129184 = (__local char *) red_arr_mem_129184_backing_0; + + int32_t phys_group_id_129186; + + phys_group_id_129186 = get_group_id(0); + for (int32_t i_129187 = 0; i_129187 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008 * N_73007, + squot64(segred_group_sizze_101337, + segment_sizze_nonzzero_129177))) - + phys_group_id_129186, sext_i64_i32(num_groups_101338)); + i_129187++) { + int32_t virt_group_id_129188 = phys_group_id_129186 + i_129187 * + sext_i64_i32(num_groups_101338); + int64_t gtid_101280 = squot64(squot64(sext_i32_i64(local_tid_129180), + segment_sizze_nonzzero_129177) + + sext_i32_i64(virt_group_id_129188) * + squot64(segred_group_sizze_101337, + segment_sizze_nonzzero_129177), + N_73007); + int64_t gtid_101281 = squot64(sext_i32_i64(local_tid_129180), + segment_sizze_nonzzero_129177) + + sext_i32_i64(virt_group_id_129188) * + squot64(segred_group_sizze_101337, + segment_sizze_nonzzero_129177) - + squot64(squot64(sext_i32_i64(local_tid_129180), + segment_sizze_nonzzero_129177) + + sext_i32_i64(virt_group_id_129188) * + squot64(segred_group_sizze_101337, + segment_sizze_nonzzero_129177), N_73007) * + N_73007; + int64_t gtid_101290 = srem64(sext_i32_i64(local_tid_129180), + k2p2zq_73023); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, k2p2zq_73023) && ((slt64(gtid_101280, + m_73008) && + slt64(gtid_101281, + N_73007)) && + slt64(sext_i32_i64(local_tid_129180), + k2p2zq_73023 * + squot64(segred_group_sizze_101337, + segment_sizze_nonzzero_129177)))) { + double x_101346 = ((__global + double *) defunc_4_map_res_mem_124659)[gtid_101280 * + k2p2zq_73023 + + gtid_101290]; + double x_101347 = ((__global double *) mem_120124)[gtid_101281 * + k2p2zq_73023 + + gtid_101290]; + double defunc_1_f_res_101348 = x_101346 * x_101347; + + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_129184)[sext_i32_i64(local_tid_129180)] = + defunc_1_f_res_101348; + } + } else { + ((__local + double *) red_arr_mem_129184)[sext_i32_i64(local_tid_129180)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, k2p2zq_73023)) { + // perform segmented scan to imitate reduction + { + double x_101341; + double x_101342; + double x_129189; + double x_129190; + bool ltid_in_bounds_129192; + + ltid_in_bounds_129192 = slt64(sext_i32_i64(local_tid_129180), + k2p2zq_73023 * + squot64(segred_group_sizze_101337, + segment_sizze_nonzzero_129177)); + + int32_t skip_threads_129193; + + // read input for in-block scan + { + if (ltid_in_bounds_129192) { + x_101342 = ((volatile __local + double *) red_arr_mem_129184)[sext_i32_i64(local_tid_129180)]; + if ((local_tid_129180 - squot32(local_tid_129180, 32) * + 32) == 0) { + x_101341 = x_101342; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129193 = 1; + while (slt32(skip_threads_129193, 32)) { + if (sle32(skip_threads_129193, local_tid_129180 - + squot32(local_tid_129180, 32) * 32) && + ltid_in_bounds_129192) { + // read operands + { + x_101341 = ((volatile __local + double *) red_arr_mem_129184)[sext_i32_i64(local_tid_129180) - + sext_i32_i64(skip_threads_129193)]; + } + // perform operation + { + bool inactive_129194 = + slt64(srem64(sext_i32_i64(local_tid_129180), + k2p2zq_73023), + sext_i32_i64(local_tid_129180) - + sext_i32_i64(local_tid_129180 - + skip_threads_129193)); + + if (inactive_129194) { + x_101341 = x_101342; + } + if (!inactive_129194) { + double defunc_1_op_res_101343 = x_101341 + + x_101342; + + x_101341 = defunc_1_op_res_101343; + } + } + } + if (sle32(wave_sizze_129182, skip_threads_129193)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129193, local_tid_129180 - + squot32(local_tid_129180, 32) * 32) && + ltid_in_bounds_129192) { + // write result + { + ((volatile __local + double *) red_arr_mem_129184)[sext_i32_i64(local_tid_129180)] = + x_101341; + x_101342 = x_101341; + } + } + if (sle32(wave_sizze_129182, skip_threads_129193)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129193 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129180 - squot32(local_tid_129180, 32) * + 32) == 31 && ltid_in_bounds_129192) { + ((volatile __local + double *) red_arr_mem_129184)[sext_i32_i64(squot32(local_tid_129180, + 32))] = + x_101341; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129195; + + // read input for in-block scan + { + if (squot32(local_tid_129180, 32) == 0 && + ltid_in_bounds_129192) { + x_129190 = ((volatile __local + double *) red_arr_mem_129184)[sext_i32_i64(local_tid_129180)]; + if ((local_tid_129180 - squot32(local_tid_129180, + 32) * 32) == 0) { + x_129189 = x_129190; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129195 = 1; + while (slt32(skip_threads_129195, 32)) { + if (sle32(skip_threads_129195, local_tid_129180 - + squot32(local_tid_129180, 32) * 32) && + (squot32(local_tid_129180, 32) == 0 && + ltid_in_bounds_129192)) { + // read operands + { + x_129189 = ((volatile __local + double *) red_arr_mem_129184)[sext_i32_i64(local_tid_129180) - + sext_i32_i64(skip_threads_129195)]; + } + // perform operation + { + bool inactive_129196 = + slt64(srem64(sext_i32_i64(local_tid_129180 * + 32 + 32 - 1), + k2p2zq_73023), + sext_i32_i64(local_tid_129180 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_129180 - + skip_threads_129195) * + 32 + 32 - 1)); + + if (inactive_129196) { + x_129189 = x_129190; + } + if (!inactive_129196) { + double defunc_1_op_res_129191 = + x_129189 + x_129190; + + x_129189 = defunc_1_op_res_129191; + } + } + } + if (sle32(wave_sizze_129182, skip_threads_129195)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129195, local_tid_129180 - + squot32(local_tid_129180, 32) * 32) && + (squot32(local_tid_129180, 32) == 0 && + ltid_in_bounds_129192)) { + // write result + { + ((volatile __local + double *) red_arr_mem_129184)[sext_i32_i64(local_tid_129180)] = + x_129189; + x_129190 = x_129189; + } + } + if (sle32(wave_sizze_129182, skip_threads_129195)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129195 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129180, 32) == 0 || + !ltid_in_bounds_129192)) { + // read operands + { + x_101342 = x_101341; + x_101341 = ((__local + double *) red_arr_mem_129184)[sext_i32_i64(squot32(local_tid_129180, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129197 = + slt64(srem64(sext_i32_i64(local_tid_129180), + k2p2zq_73023), + sext_i32_i64(local_tid_129180) - + sext_i32_i64(squot32(local_tid_129180, + 32) * 32 - 1)); + + if (inactive_129197) { + x_101341 = x_101342; + } + if (!inactive_129197) { + double defunc_1_op_res_101343 = x_101341 + + x_101342; + + x_101341 = defunc_1_op_res_101343; + } + } + // write final result + { + ((__local + double *) red_arr_mem_129184)[sext_i32_i64(local_tid_129180)] = + x_101341; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129180, 32) == 0) { + ((__local + double *) red_arr_mem_129184)[sext_i32_i64(local_tid_129180)] = + x_101342; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_129188) * + squot64(segred_group_sizze_101337, + segment_sizze_nonzzero_129177) + + sext_i32_i64(local_tid_129180), m_73008 * N_73007) && + slt64(sext_i32_i64(local_tid_129180), + squot64(segred_group_sizze_101337, + segment_sizze_nonzzero_129177))) { + ((__global + double *) mem_124877)[squot64(sext_i32_i64(virt_group_id_129188) * + squot64(segred_group_sizze_101337, + segment_sizze_nonzzero_129177) + + sext_i32_i64(local_tid_129180), + N_73007) * N_73007 + + (sext_i32_i64(virt_group_id_129188) * + squot64(segred_group_sizze_101337, + segment_sizze_nonzzero_129177) + + sext_i32_i64(local_tid_129180) - + squot64(sext_i32_i64(virt_group_id_129188) * + squot64(segred_group_sizze_101337, + segment_sizze_nonzzero_129177) + + sext_i32_i64(local_tid_129180), + N_73007) * N_73007)] = + ((__local + double *) red_arr_mem_129184)[(sext_i32_i64(local_tid_129180) + + (int64_t) 1) * + segment_sizze_nonzzero_129177 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_101337 +} +__kernel void mainMagnitudezisegred_small_101712(__global int *global_failure, + int failure_is_an_option, + __global + int64_t *global_failure_args, + __local volatile + int64_t *red_arr_mem_129414_backing_aligned_0, + int64_t N_73007, + int64_t m_73008, + int64_t n_73011, + int64_t num_groups_101762, + int64_t segment_sizze_nonzzero_129407, + __global + unsigned char *defunc_4_map_res_mem_124920, + __global + unsigned char *mem_124946, + __global + unsigned char *mem_124949) +{ + #define segred_group_sizze_101761 (mainMagnitudezisegred_group_sizze_101706) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_129414_backing_0 = + (__local volatile + char *) red_arr_mem_129414_backing_aligned_0; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_129409; + int32_t local_tid_129410; + int64_t group_sizze_129413; + int32_t wave_sizze_129412; + int32_t group_tid_129411; + + global_tid_129409 = get_global_id(0); + local_tid_129410 = get_local_id(0); + group_sizze_129413 = get_local_size(0); + wave_sizze_129412 = LOCKSTEP_WIDTH; + group_tid_129411 = get_group_id(0); + + int32_t phys_tid_101712; + + phys_tid_101712 = global_tid_129409; + + __local char *red_arr_mem_129414; + + red_arr_mem_129414 = (__local char *) red_arr_mem_129414_backing_0; + + int32_t phys_group_id_129416; + + phys_group_id_129416 = get_group_id(0); + for (int32_t i_129417 = 0; i_129417 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, + squot64(segred_group_sizze_101761, + segment_sizze_nonzzero_129407))) - + phys_group_id_129416, sext_i64_i32(num_groups_101762)); + i_129417++) { + int32_t virt_group_id_129418 = phys_group_id_129416 + i_129417 * + sext_i64_i32(num_groups_101762); + int64_t gtid_101703 = squot64(sext_i32_i64(local_tid_129410), + segment_sizze_nonzzero_129407) + + sext_i32_i64(virt_group_id_129418) * + squot64(segred_group_sizze_101761, + segment_sizze_nonzzero_129407); + int64_t gtid_101711 = srem64(sext_i32_i64(local_tid_129410), n_73011); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, n_73011) && (slt64(gtid_101703, m_73008) && + slt64(sext_i32_i64(local_tid_129410), + n_73011 * + squot64(segred_group_sizze_101761, + segment_sizze_nonzzero_129407)))) { + int64_t defunc_0_f_res_101769 = ((__global + int64_t *) mem_124946)[gtid_101703]; + bool cond_101771 = slt64(gtid_101711, defunc_0_f_res_101769); + double defunc_0_f_res_101772; + + if (cond_101771) { + bool y_101774 = slt64(gtid_101711, N_73007); + bool index_certs_101776; + + if (!y_101774) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 632) == -1) { + global_failure_args[0] = gtid_101711; + global_failure_args[1] = N_73007; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_0_f_res_t_res_101777 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_101703 * + N_73007 + + gtid_101711]; + + defunc_0_f_res_101772 = defunc_0_f_res_t_res_101777; + } else { + defunc_0_f_res_101772 = 0.0; + } + + double defunc_0_f_res_101778 = defunc_0_f_res_101772 * + defunc_0_f_res_101772; + + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_129414)[sext_i32_i64(local_tid_129410)] = + defunc_0_f_res_101778; + } + } else { + ((__local + double *) red_arr_mem_129414)[sext_i32_i64(local_tid_129410)] = + 0.0; + } + } + + error_0: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, n_73011)) { + // perform segmented scan to imitate reduction + { + double x_101765; + double x_101766; + double x_129419; + double x_129420; + bool ltid_in_bounds_129422; + + ltid_in_bounds_129422 = slt64(sext_i32_i64(local_tid_129410), + n_73011 * + squot64(segred_group_sizze_101761, + segment_sizze_nonzzero_129407)); + + int32_t skip_threads_129423; + + // read input for in-block scan + { + if (ltid_in_bounds_129422) { + x_101766 = ((volatile __local + double *) red_arr_mem_129414)[sext_i32_i64(local_tid_129410)]; + if ((local_tid_129410 - squot32(local_tid_129410, 32) * + 32) == 0) { + x_101765 = x_101766; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129423 = 1; + while (slt32(skip_threads_129423, 32)) { + if (sle32(skip_threads_129423, local_tid_129410 - + squot32(local_tid_129410, 32) * 32) && + ltid_in_bounds_129422) { + // read operands + { + x_101765 = ((volatile __local + double *) red_arr_mem_129414)[sext_i32_i64(local_tid_129410) - + sext_i32_i64(skip_threads_129423)]; + } + // perform operation + { + bool inactive_129424 = + slt64(srem64(sext_i32_i64(local_tid_129410), + n_73011), + sext_i32_i64(local_tid_129410) - + sext_i32_i64(local_tid_129410 - + skip_threads_129423)); + + if (inactive_129424) { + x_101765 = x_101766; + } + if (!inactive_129424) { + double defunc_1_op_res_101767 = x_101765 + + x_101766; + + x_101765 = defunc_1_op_res_101767; + } + } + } + if (sle32(wave_sizze_129412, skip_threads_129423)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129423, local_tid_129410 - + squot32(local_tid_129410, 32) * 32) && + ltid_in_bounds_129422) { + // write result + { + ((volatile __local + double *) red_arr_mem_129414)[sext_i32_i64(local_tid_129410)] = + x_101765; + x_101766 = x_101765; + } + } + if (sle32(wave_sizze_129412, skip_threads_129423)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129423 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129410 - squot32(local_tid_129410, 32) * + 32) == 31 && ltid_in_bounds_129422) { + ((volatile __local + double *) red_arr_mem_129414)[sext_i32_i64(squot32(local_tid_129410, + 32))] = + x_101765; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129425; + + // read input for in-block scan + { + if (squot32(local_tid_129410, 32) == 0 && + ltid_in_bounds_129422) { + x_129420 = ((volatile __local + double *) red_arr_mem_129414)[sext_i32_i64(local_tid_129410)]; + if ((local_tid_129410 - squot32(local_tid_129410, + 32) * 32) == 0) { + x_129419 = x_129420; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129425 = 1; + while (slt32(skip_threads_129425, 32)) { + if (sle32(skip_threads_129425, local_tid_129410 - + squot32(local_tid_129410, 32) * 32) && + (squot32(local_tid_129410, 32) == 0 && + ltid_in_bounds_129422)) { + // read operands + { + x_129419 = ((volatile __local + double *) red_arr_mem_129414)[sext_i32_i64(local_tid_129410) - + sext_i32_i64(skip_threads_129425)]; + } + // perform operation + { + bool inactive_129426 = + slt64(srem64(sext_i32_i64(local_tid_129410 * + 32 + 32 - 1), n_73011), + sext_i32_i64(local_tid_129410 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_129410 - + skip_threads_129425) * + 32 + 32 - 1)); + + if (inactive_129426) { + x_129419 = x_129420; + } + if (!inactive_129426) { + double defunc_1_op_res_129421 = + x_129419 + x_129420; + + x_129419 = defunc_1_op_res_129421; + } + } + } + if (sle32(wave_sizze_129412, skip_threads_129425)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129425, local_tid_129410 - + squot32(local_tid_129410, 32) * 32) && + (squot32(local_tid_129410, 32) == 0 && + ltid_in_bounds_129422)) { + // write result + { + ((volatile __local + double *) red_arr_mem_129414)[sext_i32_i64(local_tid_129410)] = + x_129419; + x_129420 = x_129419; + } + } + if (sle32(wave_sizze_129412, skip_threads_129425)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129425 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129410, 32) == 0 || + !ltid_in_bounds_129422)) { + // read operands + { + x_101766 = x_101765; + x_101765 = ((__local + double *) red_arr_mem_129414)[sext_i32_i64(squot32(local_tid_129410, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129427 = + slt64(srem64(sext_i32_i64(local_tid_129410), + n_73011), + sext_i32_i64(local_tid_129410) - + sext_i32_i64(squot32(local_tid_129410, + 32) * 32 - 1)); + + if (inactive_129427) { + x_101765 = x_101766; + } + if (!inactive_129427) { + double defunc_1_op_res_101767 = x_101765 + + x_101766; + + x_101765 = defunc_1_op_res_101767; + } + } + // write final result + { + ((__local + double *) red_arr_mem_129414)[sext_i32_i64(local_tid_129410)] = + x_101765; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129410, 32) == 0) { + ((__local + double *) red_arr_mem_129414)[sext_i32_i64(local_tid_129410)] = + x_101766; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_129418) * + squot64(segred_group_sizze_101761, + segment_sizze_nonzzero_129407) + + sext_i32_i64(local_tid_129410), m_73008) && + slt64(sext_i32_i64(local_tid_129410), + squot64(segred_group_sizze_101761, + segment_sizze_nonzzero_129407))) { + ((__global + double *) mem_124949)[sext_i32_i64(virt_group_id_129418) * + squot64(segred_group_sizze_101761, + segment_sizze_nonzzero_129407) + + sext_i32_i64(local_tid_129410)] = + ((__local + double *) red_arr_mem_129414)[(sext_i32_i64(local_tid_129410) + + (int64_t) 1) * + segment_sizze_nonzzero_129407 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_101761 +} +__kernel void mainMagnitudezisegred_small_101736(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_129354_backing_aligned_0, + int64_t N_73007, + int64_t m_73008, + int64_t n_73011, + int64_t num_groups_101748, + int64_t segment_sizze_nonzzero_129347, + __global + unsigned char *mem_124142, + __global + unsigned char *mem_124946) +{ + #define segred_group_sizze_101747 (mainMagnitudezisegred_group_sizze_101730) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_129354_backing_0 = + (__local volatile + char *) red_arr_mem_129354_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129349; + int32_t local_tid_129350; + int64_t group_sizze_129353; + int32_t wave_sizze_129352; + int32_t group_tid_129351; + + global_tid_129349 = get_global_id(0); + local_tid_129350 = get_local_id(0); + group_sizze_129353 = get_local_size(0); + wave_sizze_129352 = LOCKSTEP_WIDTH; + group_tid_129351 = get_group_id(0); + + int32_t phys_tid_101736; + + phys_tid_101736 = global_tid_129349; + + __local char *red_arr_mem_129354; + + red_arr_mem_129354 = (__local char *) red_arr_mem_129354_backing_0; + + int32_t phys_group_id_129356; + + phys_group_id_129356 = get_group_id(0); + for (int32_t i_129357 = 0; i_129357 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, + squot64(segred_group_sizze_101747, + segment_sizze_nonzzero_129347))) - + phys_group_id_129356, sext_i64_i32(num_groups_101748)); + i_129357++) { + int32_t virt_group_id_129358 = phys_group_id_129356 + i_129357 * + sext_i64_i32(num_groups_101748); + int64_t gtid_101727 = squot64(sext_i32_i64(local_tid_129350), + segment_sizze_nonzzero_129347) + + sext_i32_i64(virt_group_id_129358) * + squot64(segred_group_sizze_101747, + segment_sizze_nonzzero_129347); + int64_t gtid_101735 = srem64(sext_i32_i64(local_tid_129350), n_73011); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, n_73011) && (slt64(gtid_101727, m_73008) && + slt64(sext_i32_i64(local_tid_129350), + n_73011 * + squot64(segred_group_sizze_101747, + segment_sizze_nonzzero_129347)))) { + double x_101755 = ((__global double *) mem_124142)[gtid_101727 * + N_73007 + + gtid_101735]; + bool isnan_res_101756; + + isnan_res_101756 = futrts_isnan64(x_101755); + + bool cond_101757 = !isnan_res_101756; + int64_t defunc_0_f_res_101758 = btoi_bool_i64(cond_101757); + + // save map-out results + { } + // save results to be reduced + { + ((__local + int64_t *) red_arr_mem_129354)[sext_i32_i64(local_tid_129350)] = + defunc_0_f_res_101758; + } + } else { + ((__local + int64_t *) red_arr_mem_129354)[sext_i32_i64(local_tid_129350)] = + (int64_t) 0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, n_73011)) { + // perform segmented scan to imitate reduction + { + int64_t x_101751; + int64_t x_101752; + int64_t x_129359; + int64_t x_129360; + bool ltid_in_bounds_129362; + + ltid_in_bounds_129362 = slt64(sext_i32_i64(local_tid_129350), + n_73011 * + squot64(segred_group_sizze_101747, + segment_sizze_nonzzero_129347)); + + int32_t skip_threads_129363; + + // read input for in-block scan + { + if (ltid_in_bounds_129362) { + x_101752 = ((volatile __local + int64_t *) red_arr_mem_129354)[sext_i32_i64(local_tid_129350)]; + if ((local_tid_129350 - squot32(local_tid_129350, 32) * + 32) == 0) { + x_101751 = x_101752; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129363 = 1; + while (slt32(skip_threads_129363, 32)) { + if (sle32(skip_threads_129363, local_tid_129350 - + squot32(local_tid_129350, 32) * 32) && + ltid_in_bounds_129362) { + // read operands + { + x_101751 = ((volatile __local + int64_t *) red_arr_mem_129354)[sext_i32_i64(local_tid_129350) - + sext_i32_i64(skip_threads_129363)]; + } + // perform operation + { + bool inactive_129364 = + slt64(srem64(sext_i32_i64(local_tid_129350), + n_73011), + sext_i32_i64(local_tid_129350) - + sext_i32_i64(local_tid_129350 - + skip_threads_129363)); + + if (inactive_129364) { + x_101751 = x_101752; + } + if (!inactive_129364) { + int64_t defunc_1_op_res_101753 = + add64(x_101751, x_101752); + + x_101751 = defunc_1_op_res_101753; + } + } + } + if (sle32(wave_sizze_129352, skip_threads_129363)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129363, local_tid_129350 - + squot32(local_tid_129350, 32) * 32) && + ltid_in_bounds_129362) { + // write result + { + ((volatile __local + int64_t *) red_arr_mem_129354)[sext_i32_i64(local_tid_129350)] = + x_101751; + x_101752 = x_101751; + } + } + if (sle32(wave_sizze_129352, skip_threads_129363)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129363 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129350 - squot32(local_tid_129350, 32) * + 32) == 31 && ltid_in_bounds_129362) { + ((volatile __local + int64_t *) red_arr_mem_129354)[sext_i32_i64(squot32(local_tid_129350, + 32))] = + x_101751; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129365; + + // read input for in-block scan + { + if (squot32(local_tid_129350, 32) == 0 && + ltid_in_bounds_129362) { + x_129360 = ((volatile __local + int64_t *) red_arr_mem_129354)[sext_i32_i64(local_tid_129350)]; + if ((local_tid_129350 - squot32(local_tid_129350, + 32) * 32) == 0) { + x_129359 = x_129360; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129365 = 1; + while (slt32(skip_threads_129365, 32)) { + if (sle32(skip_threads_129365, local_tid_129350 - + squot32(local_tid_129350, 32) * 32) && + (squot32(local_tid_129350, 32) == 0 && + ltid_in_bounds_129362)) { + // read operands + { + x_129359 = ((volatile __local + int64_t *) red_arr_mem_129354)[sext_i32_i64(local_tid_129350) - + sext_i32_i64(skip_threads_129365)]; + } + // perform operation + { + bool inactive_129366 = + slt64(srem64(sext_i32_i64(local_tid_129350 * + 32 + 32 - 1), n_73011), + sext_i32_i64(local_tid_129350 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_129350 - + skip_threads_129365) * + 32 + 32 - 1)); + + if (inactive_129366) { + x_129359 = x_129360; + } + if (!inactive_129366) { + int64_t defunc_1_op_res_129361 = + add64(x_129359, x_129360); + + x_129359 = defunc_1_op_res_129361; + } + } + } + if (sle32(wave_sizze_129352, skip_threads_129365)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129365, local_tid_129350 - + squot32(local_tid_129350, 32) * 32) && + (squot32(local_tid_129350, 32) == 0 && + ltid_in_bounds_129362)) { + // write result + { + ((volatile __local + int64_t *) red_arr_mem_129354)[sext_i32_i64(local_tid_129350)] = + x_129359; + x_129360 = x_129359; + } + } + if (sle32(wave_sizze_129352, skip_threads_129365)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129365 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129350, 32) == 0 || + !ltid_in_bounds_129362)) { + // read operands + { + x_101752 = x_101751; + x_101751 = ((__local + int64_t *) red_arr_mem_129354)[sext_i32_i64(squot32(local_tid_129350, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129367 = + slt64(srem64(sext_i32_i64(local_tid_129350), + n_73011), + sext_i32_i64(local_tid_129350) - + sext_i32_i64(squot32(local_tid_129350, + 32) * 32 - 1)); + + if (inactive_129367) { + x_101751 = x_101752; + } + if (!inactive_129367) { + int64_t defunc_1_op_res_101753 = add64(x_101751, + x_101752); + + x_101751 = defunc_1_op_res_101753; + } + } + // write final result + { + ((__local + int64_t *) red_arr_mem_129354)[sext_i32_i64(local_tid_129350)] = + x_101751; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129350, 32) == 0) { + ((__local + int64_t *) red_arr_mem_129354)[sext_i32_i64(local_tid_129350)] = + x_101752; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_129358) * + squot64(segred_group_sizze_101747, + segment_sizze_nonzzero_129347) + + sext_i32_i64(local_tid_129350), m_73008) && + slt64(sext_i32_i64(local_tid_129350), + squot64(segred_group_sizze_101747, + segment_sizze_nonzzero_129347))) { + ((__global + int64_t *) mem_124946)[sext_i32_i64(virt_group_id_129358) * + squot64(segred_group_sizze_101747, + segment_sizze_nonzzero_129347) + + sext_i32_i64(local_tid_129350)] = + ((__local + int64_t *) red_arr_mem_129354)[(sext_i32_i64(local_tid_129350) + + (int64_t) 1) * + segment_sizze_nonzzero_129347 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_101747 +} +__kernel void mainMagnitudezisegred_small_101866(__global int *global_failure, + int failure_is_an_option, + __global + int64_t *global_failure_args, + __local volatile + int64_t *red_arr_mem_129519_backing_aligned_0, + int64_t N_73007, + int64_t m_73008, + int64_t defunc_2_reduce_comm_res_74867, + int64_t num_groups_101887, + int64_t segment_sizze_nonzzero_129512, + __global + unsigned char *defunc_4_map_res_mem_124920, + __global + unsigned char *defunc_3_map_res_mem_124958, + __global + unsigned char *defunc_3_map_res_mem_124959, + __global + unsigned char *mem_124969) +{ + #define segred_group_sizze_101886 (mainMagnitudezisegred_group_sizze_101860) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_129519_backing_0 = + (__local volatile + char *) red_arr_mem_129519_backing_aligned_0; + volatile __local bool local_failure; + + if (failure_is_an_option) { + int failed = *global_failure >= 0; + + if (failed) + return; + } + local_failure = false; + barrier(CLK_LOCAL_MEM_FENCE); + + int32_t global_tid_129514; + int32_t local_tid_129515; + int64_t group_sizze_129518; + int32_t wave_sizze_129517; + int32_t group_tid_129516; + + global_tid_129514 = get_global_id(0); + local_tid_129515 = get_local_id(0); + group_sizze_129518 = get_local_size(0); + wave_sizze_129517 = LOCKSTEP_WIDTH; + group_tid_129516 = get_group_id(0); + + int32_t phys_tid_101866; + + phys_tid_101866 = global_tid_129514; + + __local char *red_arr_mem_129519; + + red_arr_mem_129519 = (__local char *) red_arr_mem_129519_backing_0; + + int32_t phys_group_id_129521; + + phys_group_id_129521 = get_group_id(0); + for (int32_t i_129522 = 0; i_129522 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, + squot64(segred_group_sizze_101886, + segment_sizze_nonzzero_129512))) - + phys_group_id_129521, sext_i64_i32(num_groups_101887)); + i_129522++) { + int32_t virt_group_id_129523 = phys_group_id_129521 + i_129522 * + sext_i64_i32(num_groups_101887); + int64_t gtid_101857 = squot64(sext_i32_i64(local_tid_129515), + segment_sizze_nonzzero_129512) + + sext_i32_i64(virt_group_id_129523) * + squot64(segred_group_sizze_101886, + segment_sizze_nonzzero_129512); + int64_t gtid_101865 = srem64(sext_i32_i64(local_tid_129515), + defunc_2_reduce_comm_res_74867); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, defunc_2_reduce_comm_res_74867) && + (slt64(gtid_101857, m_73008) && + slt64(sext_i32_i64(local_tid_129515), + defunc_2_reduce_comm_res_74867 * + squot64(segred_group_sizze_101886, + segment_sizze_nonzzero_129512)))) { + int64_t x_101895 = ((__global + int64_t *) defunc_3_map_res_mem_124958)[gtid_101857]; + bool cond_101897 = slt64(gtid_101865, x_101895); + double defunc_0_f_res_101898; + + if (cond_101897) { + int64_t x_101894 = ((__global + int64_t *) defunc_3_map_res_mem_124959)[gtid_101857]; + int64_t x_101899 = add64(gtid_101865, x_101894); + int64_t x_101900 = sub64(x_101899, x_101895); + int64_t i_101901 = add64((int64_t) 1, x_101900); + bool x_101902 = sle64((int64_t) 0, i_101901); + bool y_101903 = slt64(i_101901, N_73007); + bool bounds_check_101904 = x_101902 && y_101903; + bool index_certs_101905; + + if (!bounds_check_101904) { + { + if (atomic_cmpxchg_i32_global(global_failure, -1, + 635) == -1) { + global_failure_args[0] = i_101901; + global_failure_args[1] = N_73007; + ; + } + local_failure = true; + goto error_0; + } + } + + double defunc_0_f_res_t_res_101906 = ((__global + double *) defunc_4_map_res_mem_124920)[gtid_101857 * + N_73007 + + i_101901]; + + defunc_0_f_res_101898 = defunc_0_f_res_t_res_101906; + } else { + defunc_0_f_res_101898 = 0.0; + } + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_129519)[sext_i32_i64(local_tid_129515)] = + defunc_0_f_res_101898; + } + } else { + ((__local + double *) red_arr_mem_129519)[sext_i32_i64(local_tid_129515)] = + 0.0; + } + } + + error_0: + barrier(CLK_LOCAL_MEM_FENCE); + if (local_failure) + return; + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, defunc_2_reduce_comm_res_74867)) { + // perform segmented scan to imitate reduction + { + double x_101890; + double x_101891; + double x_129524; + double x_129525; + bool ltid_in_bounds_129527; + + ltid_in_bounds_129527 = slt64(sext_i32_i64(local_tid_129515), + defunc_2_reduce_comm_res_74867 * + squot64(segred_group_sizze_101886, + segment_sizze_nonzzero_129512)); + + int32_t skip_threads_129528; + + // read input for in-block scan + { + if (ltid_in_bounds_129527) { + x_101891 = ((volatile __local + double *) red_arr_mem_129519)[sext_i32_i64(local_tid_129515)]; + if ((local_tid_129515 - squot32(local_tid_129515, 32) * + 32) == 0) { + x_101890 = x_101891; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129528 = 1; + while (slt32(skip_threads_129528, 32)) { + if (sle32(skip_threads_129528, local_tid_129515 - + squot32(local_tid_129515, 32) * 32) && + ltid_in_bounds_129527) { + // read operands + { + x_101890 = ((volatile __local + double *) red_arr_mem_129519)[sext_i32_i64(local_tid_129515) - + sext_i32_i64(skip_threads_129528)]; + } + // perform operation + { + bool inactive_129529 = + slt64(srem64(sext_i32_i64(local_tid_129515), + defunc_2_reduce_comm_res_74867), + sext_i32_i64(local_tid_129515) - + sext_i32_i64(local_tid_129515 - + skip_threads_129528)); + + if (inactive_129529) { + x_101890 = x_101891; + } + if (!inactive_129529) { + double defunc_1_op_res_101892 = x_101890 + + x_101891; + + x_101890 = defunc_1_op_res_101892; + } + } + } + if (sle32(wave_sizze_129517, skip_threads_129528)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129528, local_tid_129515 - + squot32(local_tid_129515, 32) * 32) && + ltid_in_bounds_129527) { + // write result + { + ((volatile __local + double *) red_arr_mem_129519)[sext_i32_i64(local_tid_129515)] = + x_101890; + x_101891 = x_101890; + } + } + if (sle32(wave_sizze_129517, skip_threads_129528)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129528 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129515 - squot32(local_tid_129515, 32) * + 32) == 31 && ltid_in_bounds_129527) { + ((volatile __local + double *) red_arr_mem_129519)[sext_i32_i64(squot32(local_tid_129515, + 32))] = + x_101890; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129530; + + // read input for in-block scan + { + if (squot32(local_tid_129515, 32) == 0 && + ltid_in_bounds_129527) { + x_129525 = ((volatile __local + double *) red_arr_mem_129519)[sext_i32_i64(local_tid_129515)]; + if ((local_tid_129515 - squot32(local_tid_129515, + 32) * 32) == 0) { + x_129524 = x_129525; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129530 = 1; + while (slt32(skip_threads_129530, 32)) { + if (sle32(skip_threads_129530, local_tid_129515 - + squot32(local_tid_129515, 32) * 32) && + (squot32(local_tid_129515, 32) == 0 && + ltid_in_bounds_129527)) { + // read operands + { + x_129524 = ((volatile __local + double *) red_arr_mem_129519)[sext_i32_i64(local_tid_129515) - + sext_i32_i64(skip_threads_129530)]; + } + // perform operation + { + bool inactive_129531 = + slt64(srem64(sext_i32_i64(local_tid_129515 * + 32 + 32 - 1), + defunc_2_reduce_comm_res_74867), + sext_i32_i64(local_tid_129515 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_129515 - + skip_threads_129530) * + 32 + 32 - 1)); + + if (inactive_129531) { + x_129524 = x_129525; + } + if (!inactive_129531) { + double defunc_1_op_res_129526 = + x_129524 + x_129525; + + x_129524 = defunc_1_op_res_129526; + } + } + } + if (sle32(wave_sizze_129517, skip_threads_129530)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129530, local_tid_129515 - + squot32(local_tid_129515, 32) * 32) && + (squot32(local_tid_129515, 32) == 0 && + ltid_in_bounds_129527)) { + // write result + { + ((volatile __local + double *) red_arr_mem_129519)[sext_i32_i64(local_tid_129515)] = + x_129524; + x_129525 = x_129524; + } + } + if (sle32(wave_sizze_129517, skip_threads_129530)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129530 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129515, 32) == 0 || + !ltid_in_bounds_129527)) { + // read operands + { + x_101891 = x_101890; + x_101890 = ((__local + double *) red_arr_mem_129519)[sext_i32_i64(squot32(local_tid_129515, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129532 = + slt64(srem64(sext_i32_i64(local_tid_129515), + defunc_2_reduce_comm_res_74867), + sext_i32_i64(local_tid_129515) - + sext_i32_i64(squot32(local_tid_129515, + 32) * 32 - 1)); + + if (inactive_129532) { + x_101890 = x_101891; + } + if (!inactive_129532) { + double defunc_1_op_res_101892 = x_101890 + + x_101891; + + x_101890 = defunc_1_op_res_101892; + } + } + // write final result + { + ((__local + double *) red_arr_mem_129519)[sext_i32_i64(local_tid_129515)] = + x_101890; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129515, 32) == 0) { + ((__local + double *) red_arr_mem_129519)[sext_i32_i64(local_tid_129515)] = + x_101891; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_129523) * + squot64(segred_group_sizze_101886, + segment_sizze_nonzzero_129512) + + sext_i32_i64(local_tid_129515), m_73008) && + slt64(sext_i32_i64(local_tid_129515), + squot64(segred_group_sizze_101886, + segment_sizze_nonzzero_129512))) { + ((__global + double *) mem_124969)[sext_i32_i64(virt_group_id_129523) * + squot64(segred_group_sizze_101886, + segment_sizze_nonzzero_129512) + + sext_i32_i64(local_tid_129515)] = + ((__local + double *) red_arr_mem_129519)[(sext_i32_i64(local_tid_129515) + + (int64_t) 1) * + segment_sizze_nonzzero_129512 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_101886 +} +__kernel void mainMagnitudezisegred_small_102410(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_129701_backing_aligned_0, + __local volatile + int64_t *red_arr_mem_129699_backing_aligned_1, + __local volatile + int64_t *red_arr_mem_129697_backing_aligned_2, + int64_t m_73008, + int64_t iota_arg_74896, + int64_t num_groups_102593, + int64_t segment_sizze_nonzzero_129690, + __global + unsigned char *mem_124973, + __global + unsigned char *mem_125026, + __global + unsigned char *mem_125028, + __global + unsigned char *mem_125032, + __global + unsigned char *mem_125035, + __global + unsigned char *mem_125037, + __global + unsigned char *mem_125039) +{ + #define segred_group_sizze_102592 (mainMagnitudezisegred_group_sizze_102404) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_129701_backing_2 = + (__local volatile + char *) red_arr_mem_129701_backing_aligned_0; + __local volatile char *restrict red_arr_mem_129699_backing_1 = + (__local volatile + char *) red_arr_mem_129699_backing_aligned_1; + __local volatile char *restrict red_arr_mem_129697_backing_0 = + (__local volatile + char *) red_arr_mem_129697_backing_aligned_2; + + if (*global_failure >= 0) + return; + + int32_t global_tid_129692; + int32_t local_tid_129693; + int64_t group_sizze_129696; + int32_t wave_sizze_129695; + int32_t group_tid_129694; + + global_tid_129692 = get_global_id(0); + local_tid_129693 = get_local_id(0); + group_sizze_129696 = get_local_size(0); + wave_sizze_129695 = LOCKSTEP_WIDTH; + group_tid_129694 = get_group_id(0); + + int32_t phys_tid_102410; + + phys_tid_102410 = global_tid_129692; + + __local char *red_arr_mem_129697; + + red_arr_mem_129697 = (__local char *) red_arr_mem_129697_backing_0; + + __local char *red_arr_mem_129699; + + red_arr_mem_129699 = (__local char *) red_arr_mem_129699_backing_1; + + __local char *red_arr_mem_129701; + + red_arr_mem_129701 = (__local char *) red_arr_mem_129701_backing_2; + + int32_t phys_group_id_129703; + + phys_group_id_129703 = get_group_id(0); + for (int32_t i_129704 = 0; i_129704 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, + squot64(segred_group_sizze_102592, + segment_sizze_nonzzero_129690))) - + phys_group_id_129703, sext_i64_i32(num_groups_102593)); + i_129704++) { + int32_t virt_group_id_129705 = phys_group_id_129703 + i_129704 * + sext_i64_i32(num_groups_102593); + int64_t gtid_102401 = squot64(sext_i32_i64(local_tid_129693), + segment_sizze_nonzzero_129690) + + sext_i32_i64(virt_group_id_129705) * + squot64(segred_group_sizze_102592, + segment_sizze_nonzzero_129690); + int64_t gtid_102409 = srem64(sext_i32_i64(local_tid_129693), + iota_arg_74896); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, iota_arg_74896) && (slt64(gtid_102401, + m_73008) && + slt64(sext_i32_i64(local_tid_129693), + iota_arg_74896 * + squot64(segred_group_sizze_102592, + segment_sizze_nonzzero_129690)))) { + int64_t y_102612 = ((__global + int64_t *) mem_125028)[gtid_102401]; + double y_102613 = ((__global double *) mem_125026)[gtid_102401]; + double x_102617 = ((__global double *) mem_125032)[gtid_102401 * + iota_arg_74896 + + gtid_102409]; + double x_102618 = ((__global double *) mem_124973)[gtid_102409]; + double defunc_0_f_res_102621 = x_102617 / y_102613; + bool cond_102622 = slt64(gtid_102409, y_102612); + bool isnan_res_102623; + + isnan_res_102623 = futrts_isnan64(defunc_0_f_res_102621); + + bool cond_t_res_102624 = !isnan_res_102623; + bool x_102625 = cond_102622 && cond_t_res_102624; + double abs_res_102626 = fabs(defunc_0_f_res_102621); + bool defunc_2_f_res_t_res_102627 = x_102618 < abs_res_102626; + bool x_102628 = x_102625 && defunc_2_f_res_t_res_102627; + double defunc_1_f_res_102629; + + if (cond_102622) { + defunc_1_f_res_102629 = defunc_0_f_res_102621; + } else { + defunc_1_f_res_102629 = 0.0; + } + // save map-out results + { } + // save results to be reduced + { + ((__local + bool *) red_arr_mem_129697)[sext_i32_i64(local_tid_129693)] = + x_102628; + ((__local + int64_t *) red_arr_mem_129699)[sext_i32_i64(local_tid_129693)] = + gtid_102409; + ((__local + double *) red_arr_mem_129701)[sext_i32_i64(local_tid_129693)] = + defunc_1_f_res_102629; + } + } else { + ((__local + bool *) red_arr_mem_129697)[sext_i32_i64(local_tid_129693)] = + 0; + ((__local + int64_t *) red_arr_mem_129699)[sext_i32_i64(local_tid_129693)] = + (int64_t) -1; + ((__local + double *) red_arr_mem_129701)[sext_i32_i64(local_tid_129693)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, iota_arg_74896)) { + // perform segmented scan to imitate reduction + { + bool x_102598; + int64_t x_102599; + double x_102600; + bool x_102601; + int64_t x_102602; + double x_102603; + bool x_129706; + int64_t x_129707; + double x_129708; + bool x_129709; + int64_t x_129710; + double x_129711; + bool ltid_in_bounds_129720; + + ltid_in_bounds_129720 = slt64(sext_i32_i64(local_tid_129693), + iota_arg_74896 * + squot64(segred_group_sizze_102592, + segment_sizze_nonzzero_129690)); + + int32_t skip_threads_129721; + + // read input for in-block scan + { + if (ltid_in_bounds_129720) { + x_102601 = ((volatile __local + bool *) red_arr_mem_129697)[sext_i32_i64(local_tid_129693)]; + x_102602 = ((volatile __local + int64_t *) red_arr_mem_129699)[sext_i32_i64(local_tid_129693)]; + x_102603 = ((volatile __local + double *) red_arr_mem_129701)[sext_i32_i64(local_tid_129693)]; + if ((local_tid_129693 - squot32(local_tid_129693, 32) * + 32) == 0) { + x_102598 = x_102601; + x_102599 = x_102602; + x_102600 = x_102603; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129721 = 1; + while (slt32(skip_threads_129721, 32)) { + if (sle32(skip_threads_129721, local_tid_129693 - + squot32(local_tid_129693, 32) * 32) && + ltid_in_bounds_129720) { + // read operands + { + x_102598 = ((volatile __local + bool *) red_arr_mem_129697)[sext_i32_i64(local_tid_129693) - + sext_i32_i64(skip_threads_129721)]; + x_102599 = ((volatile __local + int64_t *) red_arr_mem_129699)[sext_i32_i64(local_tid_129693) - + sext_i32_i64(skip_threads_129721)]; + x_102600 = ((volatile __local + double *) red_arr_mem_129701)[sext_i32_i64(local_tid_129693) - + sext_i32_i64(skip_threads_129721)]; + } + // perform operation + { + bool inactive_129722 = + slt64(srem64(sext_i32_i64(local_tid_129693), + iota_arg_74896), + sext_i32_i64(local_tid_129693) - + sext_i32_i64(local_tid_129693 - + skip_threads_129721)); + + if (inactive_129722) { + x_102598 = x_102601; + x_102599 = x_102602; + x_102600 = x_102603; + } + if (!inactive_129722) { + bool defunc_1_op_res_102604; + int64_t defunc_1_op_res_102605; + + if (x_102598) { + defunc_1_op_res_102604 = x_102598; + defunc_1_op_res_102605 = x_102599; + } else { + bool x_102606 = x_102601 && x_102601; + bool x_102607 = !x_102601; + bool y_102608 = x_102598 && x_102607; + bool defunc_1_op_res_f_res_102609 = + x_102606 || y_102608; + int64_t defunc_1_op_res_f_res_102610; + + if (x_102601) { + defunc_1_op_res_f_res_102610 = + x_102602; + } else { + defunc_1_op_res_f_res_102610 = + x_102599; + } + defunc_1_op_res_102604 = + defunc_1_op_res_f_res_102609; + defunc_1_op_res_102605 = + defunc_1_op_res_f_res_102610; + } + + double defunc_1_op_res_102611 = x_102600 + + x_102603; + + x_102598 = defunc_1_op_res_102604; + x_102599 = defunc_1_op_res_102605; + x_102600 = defunc_1_op_res_102611; + } + } + } + if (sle32(wave_sizze_129695, skip_threads_129721)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129721, local_tid_129693 - + squot32(local_tid_129693, 32) * 32) && + ltid_in_bounds_129720) { + // write result + { + ((volatile __local + bool *) red_arr_mem_129697)[sext_i32_i64(local_tid_129693)] = + x_102598; + x_102601 = x_102598; + ((volatile __local + int64_t *) red_arr_mem_129699)[sext_i32_i64(local_tid_129693)] = + x_102599; + x_102602 = x_102599; + ((volatile __local + double *) red_arr_mem_129701)[sext_i32_i64(local_tid_129693)] = + x_102600; + x_102603 = x_102600; + } + } + if (sle32(wave_sizze_129695, skip_threads_129721)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129721 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_129693 - squot32(local_tid_129693, 32) * + 32) == 31 && ltid_in_bounds_129720) { + ((volatile __local + bool *) red_arr_mem_129697)[sext_i32_i64(squot32(local_tid_129693, + 32))] = + x_102598; + ((volatile __local + int64_t *) red_arr_mem_129699)[sext_i32_i64(squot32(local_tid_129693, + 32))] = + x_102599; + ((volatile __local + double *) red_arr_mem_129701)[sext_i32_i64(squot32(local_tid_129693, + 32))] = + x_102600; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_129723; + + // read input for in-block scan + { + if (squot32(local_tid_129693, 32) == 0 && + ltid_in_bounds_129720) { + x_129709 = ((volatile __local + bool *) red_arr_mem_129697)[sext_i32_i64(local_tid_129693)]; + x_129710 = ((volatile __local + int64_t *) red_arr_mem_129699)[sext_i32_i64(local_tid_129693)]; + x_129711 = ((volatile __local + double *) red_arr_mem_129701)[sext_i32_i64(local_tid_129693)]; + if ((local_tid_129693 - squot32(local_tid_129693, + 32) * 32) == 0) { + x_129706 = x_129709; + x_129707 = x_129710; + x_129708 = x_129711; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_129723 = 1; + while (slt32(skip_threads_129723, 32)) { + if (sle32(skip_threads_129723, local_tid_129693 - + squot32(local_tid_129693, 32) * 32) && + (squot32(local_tid_129693, 32) == 0 && + ltid_in_bounds_129720)) { + // read operands + { + x_129706 = ((volatile __local + bool *) red_arr_mem_129697)[sext_i32_i64(local_tid_129693) - + sext_i32_i64(skip_threads_129723)]; + x_129707 = ((volatile __local + int64_t *) red_arr_mem_129699)[sext_i32_i64(local_tid_129693) - + sext_i32_i64(skip_threads_129723)]; + x_129708 = ((volatile __local + double *) red_arr_mem_129701)[sext_i32_i64(local_tid_129693) - + sext_i32_i64(skip_threads_129723)]; + } + // perform operation + { + bool inactive_129724 = + slt64(srem64(sext_i32_i64(local_tid_129693 * + 32 + 32 - 1), + iota_arg_74896), + sext_i32_i64(local_tid_129693 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_129693 - + skip_threads_129723) * + 32 + 32 - 1)); + + if (inactive_129724) { + x_129706 = x_129709; + x_129707 = x_129710; + x_129708 = x_129711; + } + if (!inactive_129724) { + bool defunc_1_op_res_129712; + int64_t defunc_1_op_res_129713; + + if (x_129706) { + defunc_1_op_res_129712 = x_129706; + defunc_1_op_res_129713 = x_129707; + } else { + bool x_129714 = x_129709 && + x_129709; + bool x_129715 = !x_129709; + bool y_129716 = x_129706 && + x_129715; + bool defunc_1_op_res_f_res_129717 = + x_129714 || y_129716; + int64_t + defunc_1_op_res_f_res_129718; + + if (x_129709) { + defunc_1_op_res_f_res_129718 = + x_129710; + } else { + defunc_1_op_res_f_res_129718 = + x_129707; + } + defunc_1_op_res_129712 = + defunc_1_op_res_f_res_129717; + defunc_1_op_res_129713 = + defunc_1_op_res_f_res_129718; + } + + double defunc_1_op_res_129719 = + x_129708 + x_129711; + + x_129706 = defunc_1_op_res_129712; + x_129707 = defunc_1_op_res_129713; + x_129708 = defunc_1_op_res_129719; + } + } + } + if (sle32(wave_sizze_129695, skip_threads_129723)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_129723, local_tid_129693 - + squot32(local_tid_129693, 32) * 32) && + (squot32(local_tid_129693, 32) == 0 && + ltid_in_bounds_129720)) { + // write result + { + ((volatile __local + bool *) red_arr_mem_129697)[sext_i32_i64(local_tid_129693)] = + x_129706; + x_129709 = x_129706; + ((volatile __local + int64_t *) red_arr_mem_129699)[sext_i32_i64(local_tid_129693)] = + x_129707; + x_129710 = x_129707; + ((volatile __local + double *) red_arr_mem_129701)[sext_i32_i64(local_tid_129693)] = + x_129708; + x_129711 = x_129708; + } + } + if (sle32(wave_sizze_129695, skip_threads_129723)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_129723 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_129693, 32) == 0 || + !ltid_in_bounds_129720)) { + // read operands + { + x_102601 = x_102598; + x_102602 = x_102599; + x_102603 = x_102600; + x_102598 = ((__local + bool *) red_arr_mem_129697)[sext_i32_i64(squot32(local_tid_129693, + 32)) - + (int64_t) 1]; + x_102599 = ((__local + int64_t *) red_arr_mem_129699)[sext_i32_i64(squot32(local_tid_129693, + 32)) - + (int64_t) 1]; + x_102600 = ((__local + double *) red_arr_mem_129701)[sext_i32_i64(squot32(local_tid_129693, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_129725 = + slt64(srem64(sext_i32_i64(local_tid_129693), + iota_arg_74896), + sext_i32_i64(local_tid_129693) - + sext_i32_i64(squot32(local_tid_129693, + 32) * 32 - 1)); + + if (inactive_129725) { + x_102598 = x_102601; + x_102599 = x_102602; + x_102600 = x_102603; + } + if (!inactive_129725) { + bool defunc_1_op_res_102604; + int64_t defunc_1_op_res_102605; + + if (x_102598) { + defunc_1_op_res_102604 = x_102598; + defunc_1_op_res_102605 = x_102599; + } else { + bool x_102606 = x_102601 && x_102601; + bool x_102607 = !x_102601; + bool y_102608 = x_102598 && x_102607; + bool defunc_1_op_res_f_res_102609 = + x_102606 || y_102608; + int64_t defunc_1_op_res_f_res_102610; + + if (x_102601) { + defunc_1_op_res_f_res_102610 = x_102602; + } else { + defunc_1_op_res_f_res_102610 = x_102599; + } + defunc_1_op_res_102604 = + defunc_1_op_res_f_res_102609; + defunc_1_op_res_102605 = + defunc_1_op_res_f_res_102610; + } + + double defunc_1_op_res_102611 = x_102600 + + x_102603; + + x_102598 = defunc_1_op_res_102604; + x_102599 = defunc_1_op_res_102605; + x_102600 = defunc_1_op_res_102611; + } + } + // write final result + { + ((__local + bool *) red_arr_mem_129697)[sext_i32_i64(local_tid_129693)] = + x_102598; + ((__local + int64_t *) red_arr_mem_129699)[sext_i32_i64(local_tid_129693)] = + x_102599; + ((__local + double *) red_arr_mem_129701)[sext_i32_i64(local_tid_129693)] = + x_102600; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_129693, 32) == 0) { + ((__local + bool *) red_arr_mem_129697)[sext_i32_i64(local_tid_129693)] = + x_102601; + ((__local + int64_t *) red_arr_mem_129699)[sext_i32_i64(local_tid_129693)] = + x_102602; + ((__local + double *) red_arr_mem_129701)[sext_i32_i64(local_tid_129693)] = + x_102603; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_129705) * + squot64(segred_group_sizze_102592, + segment_sizze_nonzzero_129690) + + sext_i32_i64(local_tid_129693), m_73008) && + slt64(sext_i32_i64(local_tid_129693), + squot64(segred_group_sizze_102592, + segment_sizze_nonzzero_129690))) { + ((__global + bool *) mem_125035)[sext_i32_i64(virt_group_id_129705) * + squot64(segred_group_sizze_102592, + segment_sizze_nonzzero_129690) + + sext_i32_i64(local_tid_129693)] = + ((__local + bool *) red_arr_mem_129697)[(sext_i32_i64(local_tid_129693) + + (int64_t) 1) * + segment_sizze_nonzzero_129690 - + (int64_t) 1]; + ((__global + int64_t *) mem_125037)[sext_i32_i64(virt_group_id_129705) * + squot64(segred_group_sizze_102592, + segment_sizze_nonzzero_129690) + + sext_i32_i64(local_tid_129693)] = + ((__local + int64_t *) red_arr_mem_129699)[(sext_i32_i64(local_tid_129693) + + (int64_t) 1) * + segment_sizze_nonzzero_129690 - + (int64_t) 1]; + ((__global + double *) mem_125039)[sext_i32_i64(virt_group_id_129705) * + squot64(segred_group_sizze_102592, + segment_sizze_nonzzero_129690) + + sext_i32_i64(local_tid_129693)] = + ((__local + double *) red_arr_mem_129701)[(sext_i32_i64(local_tid_129693) + + (int64_t) 1) * + segment_sizze_nonzzero_129690 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_102592 +} +__kernel void mainMagnitudezisegred_small_92263(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_127080_backing_aligned_0, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t num_groups_94094, + int64_t segment_sizze_nonzzero_127073, + __global + unsigned char *mem_121831, + __global + unsigned char *mem_121835, + __global + unsigned char *mem_121840) +{ + #define segred_group_sizze_94093 (mainMagnitudezisegred_group_sizze_92257) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_127080_backing_0 = + (__local volatile + char *) red_arr_mem_127080_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127075; + int32_t local_tid_127076; + int64_t group_sizze_127079; + int32_t wave_sizze_127078; + int32_t group_tid_127077; + + global_tid_127075 = get_global_id(0); + local_tid_127076 = get_local_id(0); + group_sizze_127079 = get_local_size(0); + wave_sizze_127078 = LOCKSTEP_WIDTH; + group_tid_127077 = get_group_id(0); + + int32_t phys_tid_92263; + + phys_tid_92263 = global_tid_127075; + + __local char *red_arr_mem_127080; + + red_arr_mem_127080 = (__local char *) red_arr_mem_127080_backing_0; + + int32_t phys_group_id_127082; + + phys_group_id_127082 = get_group_id(0); + for (int32_t i_127083 = 0; i_127083 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008 * k2p2zq_73023 * k2p2zq_73023, + squot64(segred_group_sizze_94093, + segment_sizze_nonzzero_127073))) - + phys_group_id_127082, sext_i64_i32(num_groups_94094)); + i_127083++) { + int32_t virt_group_id_127084 = phys_group_id_127082 + i_127083 * + sext_i64_i32(num_groups_94094); + int64_t gtid_92250 = squot64(squot64(sext_i32_i64(local_tid_127076), + segment_sizze_nonzzero_127073) + + sext_i32_i64(virt_group_id_127084) * + squot64(segred_group_sizze_94093, + segment_sizze_nonzzero_127073), + k2p2zq_73023 * k2p2zq_73023); + int64_t gtid_92251 = squot64(squot64(sext_i32_i64(local_tid_127076), + segment_sizze_nonzzero_127073) + + sext_i32_i64(virt_group_id_127084) * + squot64(segred_group_sizze_94093, + segment_sizze_nonzzero_127073) - + squot64(squot64(sext_i32_i64(local_tid_127076), + segment_sizze_nonzzero_127073) + + sext_i32_i64(virt_group_id_127084) * + squot64(segred_group_sizze_94093, + segment_sizze_nonzzero_127073), + k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023); + int64_t gtid_92252 = squot64(sext_i32_i64(local_tid_127076), + segment_sizze_nonzzero_127073) + + sext_i32_i64(virt_group_id_127084) * + squot64(segred_group_sizze_94093, + segment_sizze_nonzzero_127073) - + squot64(squot64(sext_i32_i64(local_tid_127076), + segment_sizze_nonzzero_127073) + + sext_i32_i64(virt_group_id_127084) * + squot64(segred_group_sizze_94093, + segment_sizze_nonzzero_127073), k2p2zq_73023 * + k2p2zq_73023) * (k2p2zq_73023 * k2p2zq_73023) - + squot64(squot64(sext_i32_i64(local_tid_127076), + segment_sizze_nonzzero_127073) + + sext_i32_i64(virt_group_id_127084) * + squot64(segred_group_sizze_94093, + segment_sizze_nonzzero_127073) - + squot64(squot64(sext_i32_i64(local_tid_127076), + segment_sizze_nonzzero_127073) + + sext_i32_i64(virt_group_id_127084) * + squot64(segred_group_sizze_94093, + segment_sizze_nonzzero_127073), + k2p2zq_73023 * k2p2zq_73023) * (k2p2zq_73023 * + k2p2zq_73023), + k2p2zq_73023) * k2p2zq_73023; + int64_t gtid_92262 = srem64(sext_i32_i64(local_tid_127076), + k2p2zq_73023); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, k2p2zq_73023) && (((slt64(gtid_92250, + m_73008) && + slt64(gtid_92251, + k2p2zq_73023)) && + slt64(gtid_92252, + k2p2zq_73023)) && + slt64(sext_i32_i64(local_tid_127076), + k2p2zq_73023 * + squot64(segred_group_sizze_94093, + segment_sizze_nonzzero_127073)))) { + double x_94103 = ((__global double *) mem_121831)[gtid_92251 * + (k2p2zq_73023 * + m_73008) + + gtid_92250 * + k2p2zq_73023 + + gtid_92262]; + double x_94104 = ((__global double *) mem_121835)[gtid_92250 * + (k2p2zq_73023 * + k2p2zq_73023) + + gtid_92252 * + k2p2zq_73023 + + gtid_92262]; + double defunc_1_f_res_94105 = x_94103 * x_94104; + + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_127080)[sext_i32_i64(local_tid_127076)] = + defunc_1_f_res_94105; + } + } else { + ((__local + double *) red_arr_mem_127080)[sext_i32_i64(local_tid_127076)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, k2p2zq_73023)) { + // perform segmented scan to imitate reduction + { + double x_94097; + double x_94098; + double x_127085; + double x_127086; + bool ltid_in_bounds_127088; + + ltid_in_bounds_127088 = slt64(sext_i32_i64(local_tid_127076), + k2p2zq_73023 * + squot64(segred_group_sizze_94093, + segment_sizze_nonzzero_127073)); + + int32_t skip_threads_127089; + + // read input for in-block scan + { + if (ltid_in_bounds_127088) { + x_94098 = ((volatile __local + double *) red_arr_mem_127080)[sext_i32_i64(local_tid_127076)]; + if ((local_tid_127076 - squot32(local_tid_127076, 32) * + 32) == 0) { + x_94097 = x_94098; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127089 = 1; + while (slt32(skip_threads_127089, 32)) { + if (sle32(skip_threads_127089, local_tid_127076 - + squot32(local_tid_127076, 32) * 32) && + ltid_in_bounds_127088) { + // read operands + { + x_94097 = ((volatile __local + double *) red_arr_mem_127080)[sext_i32_i64(local_tid_127076) - + sext_i32_i64(skip_threads_127089)]; + } + // perform operation + { + bool inactive_127090 = + slt64(srem64(sext_i32_i64(local_tid_127076), + k2p2zq_73023), + sext_i32_i64(local_tid_127076) - + sext_i32_i64(local_tid_127076 - + skip_threads_127089)); + + if (inactive_127090) { + x_94097 = x_94098; + } + if (!inactive_127090) { + double defunc_1_op_res_94099 = x_94097 + + x_94098; + + x_94097 = defunc_1_op_res_94099; + } + } + } + if (sle32(wave_sizze_127078, skip_threads_127089)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127089, local_tid_127076 - + squot32(local_tid_127076, 32) * 32) && + ltid_in_bounds_127088) { + // write result + { + ((volatile __local + double *) red_arr_mem_127080)[sext_i32_i64(local_tid_127076)] = + x_94097; + x_94098 = x_94097; + } + } + if (sle32(wave_sizze_127078, skip_threads_127089)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127089 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_127076 - squot32(local_tid_127076, 32) * + 32) == 31 && ltid_in_bounds_127088) { + ((volatile __local + double *) red_arr_mem_127080)[sext_i32_i64(squot32(local_tid_127076, + 32))] = + x_94097; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_127091; + + // read input for in-block scan + { + if (squot32(local_tid_127076, 32) == 0 && + ltid_in_bounds_127088) { + x_127086 = ((volatile __local + double *) red_arr_mem_127080)[sext_i32_i64(local_tid_127076)]; + if ((local_tid_127076 - squot32(local_tid_127076, + 32) * 32) == 0) { + x_127085 = x_127086; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127091 = 1; + while (slt32(skip_threads_127091, 32)) { + if (sle32(skip_threads_127091, local_tid_127076 - + squot32(local_tid_127076, 32) * 32) && + (squot32(local_tid_127076, 32) == 0 && + ltid_in_bounds_127088)) { + // read operands + { + x_127085 = ((volatile __local + double *) red_arr_mem_127080)[sext_i32_i64(local_tid_127076) - + sext_i32_i64(skip_threads_127091)]; + } + // perform operation + { + bool inactive_127092 = + slt64(srem64(sext_i32_i64(local_tid_127076 * + 32 + 32 - 1), + k2p2zq_73023), + sext_i32_i64(local_tid_127076 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_127076 - + skip_threads_127091) * + 32 + 32 - 1)); + + if (inactive_127092) { + x_127085 = x_127086; + } + if (!inactive_127092) { + double defunc_1_op_res_127087 = + x_127085 + x_127086; + + x_127085 = defunc_1_op_res_127087; + } + } + } + if (sle32(wave_sizze_127078, skip_threads_127091)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127091, local_tid_127076 - + squot32(local_tid_127076, 32) * 32) && + (squot32(local_tid_127076, 32) == 0 && + ltid_in_bounds_127088)) { + // write result + { + ((volatile __local + double *) red_arr_mem_127080)[sext_i32_i64(local_tid_127076)] = + x_127085; + x_127086 = x_127085; + } + } + if (sle32(wave_sizze_127078, skip_threads_127091)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127091 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_127076, 32) == 0 || + !ltid_in_bounds_127088)) { + // read operands + { + x_94098 = x_94097; + x_94097 = ((__local + double *) red_arr_mem_127080)[sext_i32_i64(squot32(local_tid_127076, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_127093 = + slt64(srem64(sext_i32_i64(local_tid_127076), + k2p2zq_73023), + sext_i32_i64(local_tid_127076) - + sext_i32_i64(squot32(local_tid_127076, + 32) * 32 - 1)); + + if (inactive_127093) { + x_94097 = x_94098; + } + if (!inactive_127093) { + double defunc_1_op_res_94099 = x_94097 + + x_94098; + + x_94097 = defunc_1_op_res_94099; + } + } + // write final result + { + ((__local + double *) red_arr_mem_127080)[sext_i32_i64(local_tid_127076)] = + x_94097; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_127076, 32) == 0) { + ((__local + double *) red_arr_mem_127080)[sext_i32_i64(local_tid_127076)] = + x_94098; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_127084) * + squot64(segred_group_sizze_94093, + segment_sizze_nonzzero_127073) + + sext_i32_i64(local_tid_127076), m_73008 * k2p2zq_73023 * + k2p2zq_73023) && slt64(sext_i32_i64(local_tid_127076), + squot64(segred_group_sizze_94093, + segment_sizze_nonzzero_127073))) { + ((__global + double *) mem_121840)[squot64(sext_i32_i64(virt_group_id_127084) * + squot64(segred_group_sizze_94093, + segment_sizze_nonzzero_127073) + + sext_i32_i64(local_tid_127076), + k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023) + + squot64(sext_i32_i64(virt_group_id_127084) * + squot64(segred_group_sizze_94093, + segment_sizze_nonzzero_127073) + + sext_i32_i64(local_tid_127076) - + squot64(sext_i32_i64(virt_group_id_127084) * + squot64(segred_group_sizze_94093, + segment_sizze_nonzzero_127073) + + sext_i32_i64(local_tid_127076), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023) * k2p2zq_73023 + + (sext_i32_i64(virt_group_id_127084) * + squot64(segred_group_sizze_94093, + segment_sizze_nonzzero_127073) + + sext_i32_i64(local_tid_127076) - + squot64(sext_i32_i64(virt_group_id_127084) * + squot64(segred_group_sizze_94093, + segment_sizze_nonzzero_127073) + + sext_i32_i64(local_tid_127076), + k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023) - + squot64(sext_i32_i64(virt_group_id_127084) * + squot64(segred_group_sizze_94093, + segment_sizze_nonzzero_127073) + + sext_i32_i64(local_tid_127076) - + squot64(sext_i32_i64(virt_group_id_127084) * + squot64(segred_group_sizze_94093, + segment_sizze_nonzzero_127073) + + sext_i32_i64(local_tid_127076), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023) * + k2p2zq_73023)] = ((__local + double *) red_arr_mem_127080)[(sext_i32_i64(local_tid_127076) + + (int64_t) 1) * + segment_sizze_nonzzero_127073 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_94093 +} +__kernel void mainMagnitudezisegred_small_92541(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_126936_backing_aligned_0, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t x_93925, + int64_t i_93926, + int64_t j_m_i_93930, + int64_t num_groups_94012, + int64_t binop_x_120251, + int64_t segment_sizze_nonzzero_126929, + __global + unsigned char *mem_121351, + __global + unsigned char *mem_param_121469, + __global + unsigned char *mem_121555) +{ + #define segred_group_sizze_94011 (mainMagnitudezisegred_group_sizze_92535) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_126936_backing_0 = + (__local volatile + char *) red_arr_mem_126936_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126931; + int32_t local_tid_126932; + int64_t group_sizze_126935; + int32_t wave_sizze_126934; + int32_t group_tid_126933; + + global_tid_126931 = get_global_id(0); + local_tid_126932 = get_local_id(0); + group_sizze_126935 = get_local_size(0); + wave_sizze_126934 = LOCKSTEP_WIDTH; + group_tid_126933 = get_group_id(0); + + int32_t phys_tid_92541; + + phys_tid_92541 = global_tid_126931; + + __local char *red_arr_mem_126936; + + red_arr_mem_126936 = (__local char *) red_arr_mem_126936_backing_0; + + int32_t phys_group_id_126938; + + phys_group_id_126938 = get_group_id(0); + for (int32_t i_126939 = 0; i_126939 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008 * k2p2zq_73023, + squot64(segred_group_sizze_94011, + segment_sizze_nonzzero_126929))) - + phys_group_id_126938, sext_i64_i32(num_groups_94012)); + i_126939++) { + int32_t virt_group_id_126940 = phys_group_id_126938 + i_126939 * + sext_i64_i32(num_groups_94012); + int64_t gtid_92530 = squot64(squot64(sext_i32_i64(local_tid_126932), + segment_sizze_nonzzero_126929) + + sext_i32_i64(virt_group_id_126940) * + squot64(segred_group_sizze_94011, + segment_sizze_nonzzero_126929), + k2p2zq_73023); + int64_t gtid_92531 = squot64(sext_i32_i64(local_tid_126932), + segment_sizze_nonzzero_126929) + + sext_i32_i64(virt_group_id_126940) * + squot64(segred_group_sizze_94011, + segment_sizze_nonzzero_126929) - + squot64(squot64(sext_i32_i64(local_tid_126932), + segment_sizze_nonzzero_126929) + + sext_i32_i64(virt_group_id_126940) * + squot64(segred_group_sizze_94011, + segment_sizze_nonzzero_126929), k2p2zq_73023) * + k2p2zq_73023; + int64_t gtid_92540 = srem64(sext_i32_i64(local_tid_126932), + j_m_i_93930); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, j_m_i_93930) && ((slt64(gtid_92530, + m_73008) && + slt64(gtid_92531, + k2p2zq_73023)) && + slt64(sext_i32_i64(local_tid_126932), + j_m_i_93930 * + squot64(segred_group_sizze_94011, + segment_sizze_nonzzero_126929)))) { + int64_t slice_115048 = gtid_92540 + x_93925; + double x_94022 = ((__global double *) mem_121351)[gtid_92530 * + (k2p2zq_73023 * + k2p2zq_73023) + + slice_115048 * + k2p2zq_73023 + + i_93926]; + bool isnan_res_94023; + + isnan_res_94023 = futrts_isnan64(x_94022); + + double defunc_1_f_res_94024; + + if (isnan_res_94023) { + defunc_1_f_res_94024 = 0.0; + } else { + double x_94021 = ((__global + double *) mem_param_121469)[gtid_92530 * + binop_x_120251 + + gtid_92531 * + k2p2zq_73023 + + slice_115048]; + double defunc_1_f_res_f_res_94025 = x_94021 * x_94022; + + defunc_1_f_res_94024 = defunc_1_f_res_f_res_94025; + } + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_126936)[sext_i32_i64(local_tid_126932)] = + defunc_1_f_res_94024; + } + } else { + ((__local + double *) red_arr_mem_126936)[sext_i32_i64(local_tid_126932)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, j_m_i_93930)) { + // perform segmented scan to imitate reduction + { + double x_94015; + double x_94016; + double x_126941; + double x_126942; + bool ltid_in_bounds_126944; + + ltid_in_bounds_126944 = slt64(sext_i32_i64(local_tid_126932), + j_m_i_93930 * + squot64(segred_group_sizze_94011, + segment_sizze_nonzzero_126929)); + + int32_t skip_threads_126945; + + // read input for in-block scan + { + if (ltid_in_bounds_126944) { + x_94016 = ((volatile __local + double *) red_arr_mem_126936)[sext_i32_i64(local_tid_126932)]; + if ((local_tid_126932 - squot32(local_tid_126932, 32) * + 32) == 0) { + x_94015 = x_94016; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_126945 = 1; + while (slt32(skip_threads_126945, 32)) { + if (sle32(skip_threads_126945, local_tid_126932 - + squot32(local_tid_126932, 32) * 32) && + ltid_in_bounds_126944) { + // read operands + { + x_94015 = ((volatile __local + double *) red_arr_mem_126936)[sext_i32_i64(local_tid_126932) - + sext_i32_i64(skip_threads_126945)]; + } + // perform operation + { + bool inactive_126946 = + slt64(srem64(sext_i32_i64(local_tid_126932), + j_m_i_93930), + sext_i32_i64(local_tid_126932) - + sext_i32_i64(local_tid_126932 - + skip_threads_126945)); + + if (inactive_126946) { + x_94015 = x_94016; + } + if (!inactive_126946) { + double defunc_1_op_res_94017 = x_94015 + + x_94016; + + x_94015 = defunc_1_op_res_94017; + } + } + } + if (sle32(wave_sizze_126934, skip_threads_126945)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_126945, local_tid_126932 - + squot32(local_tid_126932, 32) * 32) && + ltid_in_bounds_126944) { + // write result + { + ((volatile __local + double *) red_arr_mem_126936)[sext_i32_i64(local_tid_126932)] = + x_94015; + x_94016 = x_94015; + } + } + if (sle32(wave_sizze_126934, skip_threads_126945)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_126945 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_126932 - squot32(local_tid_126932, 32) * + 32) == 31 && ltid_in_bounds_126944) { + ((volatile __local + double *) red_arr_mem_126936)[sext_i32_i64(squot32(local_tid_126932, + 32))] = + x_94015; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_126947; + + // read input for in-block scan + { + if (squot32(local_tid_126932, 32) == 0 && + ltid_in_bounds_126944) { + x_126942 = ((volatile __local + double *) red_arr_mem_126936)[sext_i32_i64(local_tid_126932)]; + if ((local_tid_126932 - squot32(local_tid_126932, + 32) * 32) == 0) { + x_126941 = x_126942; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_126947 = 1; + while (slt32(skip_threads_126947, 32)) { + if (sle32(skip_threads_126947, local_tid_126932 - + squot32(local_tid_126932, 32) * 32) && + (squot32(local_tid_126932, 32) == 0 && + ltid_in_bounds_126944)) { + // read operands + { + x_126941 = ((volatile __local + double *) red_arr_mem_126936)[sext_i32_i64(local_tid_126932) - + sext_i32_i64(skip_threads_126947)]; + } + // perform operation + { + bool inactive_126948 = + slt64(srem64(sext_i32_i64(local_tid_126932 * + 32 + 32 - 1), + j_m_i_93930), + sext_i32_i64(local_tid_126932 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_126932 - + skip_threads_126947) * + 32 + 32 - 1)); + + if (inactive_126948) { + x_126941 = x_126942; + } + if (!inactive_126948) { + double defunc_1_op_res_126943 = + x_126941 + x_126942; + + x_126941 = defunc_1_op_res_126943; + } + } + } + if (sle32(wave_sizze_126934, skip_threads_126947)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_126947, local_tid_126932 - + squot32(local_tid_126932, 32) * 32) && + (squot32(local_tid_126932, 32) == 0 && + ltid_in_bounds_126944)) { + // write result + { + ((volatile __local + double *) red_arr_mem_126936)[sext_i32_i64(local_tid_126932)] = + x_126941; + x_126942 = x_126941; + } + } + if (sle32(wave_sizze_126934, skip_threads_126947)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_126947 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_126932, 32) == 0 || + !ltid_in_bounds_126944)) { + // read operands + { + x_94016 = x_94015; + x_94015 = ((__local + double *) red_arr_mem_126936)[sext_i32_i64(squot32(local_tid_126932, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_126949 = + slt64(srem64(sext_i32_i64(local_tid_126932), + j_m_i_93930), + sext_i32_i64(local_tid_126932) - + sext_i32_i64(squot32(local_tid_126932, + 32) * 32 - 1)); + + if (inactive_126949) { + x_94015 = x_94016; + } + if (!inactive_126949) { + double defunc_1_op_res_94017 = x_94015 + + x_94016; + + x_94015 = defunc_1_op_res_94017; + } + } + // write final result + { + ((__local + double *) red_arr_mem_126936)[sext_i32_i64(local_tid_126932)] = + x_94015; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_126932, 32) == 0) { + ((__local + double *) red_arr_mem_126936)[sext_i32_i64(local_tid_126932)] = + x_94016; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_126940) * + squot64(segred_group_sizze_94011, + segment_sizze_nonzzero_126929) + + sext_i32_i64(local_tid_126932), m_73008 * k2p2zq_73023) && + slt64(sext_i32_i64(local_tid_126932), + squot64(segred_group_sizze_94011, + segment_sizze_nonzzero_126929))) { + ((__global + double *) mem_121555)[squot64(sext_i32_i64(virt_group_id_126940) * + squot64(segred_group_sizze_94011, + segment_sizze_nonzzero_126929) + + sext_i32_i64(local_tid_126932), + k2p2zq_73023) * k2p2zq_73023 + + (sext_i32_i64(virt_group_id_126940) * + squot64(segred_group_sizze_94011, + segment_sizze_nonzzero_126929) + + sext_i32_i64(local_tid_126932) - + squot64(sext_i32_i64(virt_group_id_126940) * + squot64(segred_group_sizze_94011, + segment_sizze_nonzzero_126929) + + sext_i32_i64(local_tid_126932), + k2p2zq_73023) * + k2p2zq_73023)] = ((__local + double *) red_arr_mem_126936)[(sext_i32_i64(local_tid_126932) + + (int64_t) 1) * + segment_sizze_nonzzero_126929 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_94011 +} +__kernel void mainMagnitudezisegred_small_93298(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_126711_backing_aligned_0, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t defunc_2_reduce_res_73132, + int64_t j_93466, + int64_t num_groups_93499, + int64_t segment_sizze_nonzzero_126704, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_120938) +{ + #define segred_group_sizze_93498 (mainMagnitudezisegred_group_sizze_93292) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_126711_backing_0 = + (__local volatile + char *) red_arr_mem_126711_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_126706; + int32_t local_tid_126707; + int64_t group_sizze_126710; + int32_t wave_sizze_126709; + int32_t group_tid_126708; + + global_tid_126706 = get_global_id(0); + local_tid_126707 = get_local_id(0); + group_sizze_126710 = get_local_size(0); + wave_sizze_126709 = LOCKSTEP_WIDTH; + group_tid_126708 = get_group_id(0); + + int32_t phys_tid_93298; + + phys_tid_93298 = global_tid_126706; + + __local char *red_arr_mem_126711; + + red_arr_mem_126711 = (__local char *) red_arr_mem_126711_backing_0; + + int32_t phys_group_id_126713; + + phys_group_id_126713 = get_group_id(0); + for (int32_t i_126714 = 0; i_126714 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, + squot64(segred_group_sizze_93498, + segment_sizze_nonzzero_126704))) - + phys_group_id_126713, sext_i64_i32(num_groups_93499)); + i_126714++) { + int32_t virt_group_id_126715 = phys_group_id_126713 + i_126714 * + sext_i64_i32(num_groups_93499); + int64_t gtid_93289 = squot64(sext_i32_i64(local_tid_126707), + segment_sizze_nonzzero_126704) + + sext_i32_i64(virt_group_id_126715) * + squot64(segred_group_sizze_93498, + segment_sizze_nonzzero_126704); + int64_t gtid_93297 = srem64(sext_i32_i64(local_tid_126707), + k2p2zq_73023); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, k2p2zq_73023) && (slt64(gtid_93289, + m_73008) && + slt64(sext_i32_i64(local_tid_126707), + k2p2zq_73023 * + squot64(segred_group_sizze_93498, + segment_sizze_nonzzero_126704)))) { + double x_93506 = ((__global double *) mem_120246)[j_93466 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_93289 * + defunc_2_reduce_res_73132 + + gtid_93297]; + double defunc_1_f_res_93507 = x_93506 * x_93506; + + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_126711)[sext_i32_i64(local_tid_126707)] = + defunc_1_f_res_93507; + } + } else { + ((__local + double *) red_arr_mem_126711)[sext_i32_i64(local_tid_126707)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, k2p2zq_73023)) { + // perform segmented scan to imitate reduction + { + double x_93502; + double x_93503; + double x_126716; + double x_126717; + bool ltid_in_bounds_126719; + + ltid_in_bounds_126719 = slt64(sext_i32_i64(local_tid_126707), + k2p2zq_73023 * + squot64(segred_group_sizze_93498, + segment_sizze_nonzzero_126704)); + + int32_t skip_threads_126720; + + // read input for in-block scan + { + if (ltid_in_bounds_126719) { + x_93503 = ((volatile __local + double *) red_arr_mem_126711)[sext_i32_i64(local_tid_126707)]; + if ((local_tid_126707 - squot32(local_tid_126707, 32) * + 32) == 0) { + x_93502 = x_93503; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_126720 = 1; + while (slt32(skip_threads_126720, 32)) { + if (sle32(skip_threads_126720, local_tid_126707 - + squot32(local_tid_126707, 32) * 32) && + ltid_in_bounds_126719) { + // read operands + { + x_93502 = ((volatile __local + double *) red_arr_mem_126711)[sext_i32_i64(local_tid_126707) - + sext_i32_i64(skip_threads_126720)]; + } + // perform operation + { + bool inactive_126721 = + slt64(srem64(sext_i32_i64(local_tid_126707), + k2p2zq_73023), + sext_i32_i64(local_tid_126707) - + sext_i32_i64(local_tid_126707 - + skip_threads_126720)); + + if (inactive_126721) { + x_93502 = x_93503; + } + if (!inactive_126721) { + double defunc_1_op_res_93504 = x_93502 + + x_93503; + + x_93502 = defunc_1_op_res_93504; + } + } + } + if (sle32(wave_sizze_126709, skip_threads_126720)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_126720, local_tid_126707 - + squot32(local_tid_126707, 32) * 32) && + ltid_in_bounds_126719) { + // write result + { + ((volatile __local + double *) red_arr_mem_126711)[sext_i32_i64(local_tid_126707)] = + x_93502; + x_93503 = x_93502; + } + } + if (sle32(wave_sizze_126709, skip_threads_126720)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_126720 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_126707 - squot32(local_tid_126707, 32) * + 32) == 31 && ltid_in_bounds_126719) { + ((volatile __local + double *) red_arr_mem_126711)[sext_i32_i64(squot32(local_tid_126707, + 32))] = + x_93502; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_126722; + + // read input for in-block scan + { + if (squot32(local_tid_126707, 32) == 0 && + ltid_in_bounds_126719) { + x_126717 = ((volatile __local + double *) red_arr_mem_126711)[sext_i32_i64(local_tid_126707)]; + if ((local_tid_126707 - squot32(local_tid_126707, + 32) * 32) == 0) { + x_126716 = x_126717; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_126722 = 1; + while (slt32(skip_threads_126722, 32)) { + if (sle32(skip_threads_126722, local_tid_126707 - + squot32(local_tid_126707, 32) * 32) && + (squot32(local_tid_126707, 32) == 0 && + ltid_in_bounds_126719)) { + // read operands + { + x_126716 = ((volatile __local + double *) red_arr_mem_126711)[sext_i32_i64(local_tid_126707) - + sext_i32_i64(skip_threads_126722)]; + } + // perform operation + { + bool inactive_126723 = + slt64(srem64(sext_i32_i64(local_tid_126707 * + 32 + 32 - 1), + k2p2zq_73023), + sext_i32_i64(local_tid_126707 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_126707 - + skip_threads_126722) * + 32 + 32 - 1)); + + if (inactive_126723) { + x_126716 = x_126717; + } + if (!inactive_126723) { + double defunc_1_op_res_126718 = + x_126716 + x_126717; + + x_126716 = defunc_1_op_res_126718; + } + } + } + if (sle32(wave_sizze_126709, skip_threads_126722)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_126722, local_tid_126707 - + squot32(local_tid_126707, 32) * 32) && + (squot32(local_tid_126707, 32) == 0 && + ltid_in_bounds_126719)) { + // write result + { + ((volatile __local + double *) red_arr_mem_126711)[sext_i32_i64(local_tid_126707)] = + x_126716; + x_126717 = x_126716; + } + } + if (sle32(wave_sizze_126709, skip_threads_126722)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_126722 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_126707, 32) == 0 || + !ltid_in_bounds_126719)) { + // read operands + { + x_93503 = x_93502; + x_93502 = ((__local + double *) red_arr_mem_126711)[sext_i32_i64(squot32(local_tid_126707, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_126724 = + slt64(srem64(sext_i32_i64(local_tid_126707), + k2p2zq_73023), + sext_i32_i64(local_tid_126707) - + sext_i32_i64(squot32(local_tid_126707, + 32) * 32 - 1)); + + if (inactive_126724) { + x_93502 = x_93503; + } + if (!inactive_126724) { + double defunc_1_op_res_93504 = x_93502 + + x_93503; + + x_93502 = defunc_1_op_res_93504; + } + } + // write final result + { + ((__local + double *) red_arr_mem_126711)[sext_i32_i64(local_tid_126707)] = + x_93502; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_126707, 32) == 0) { + ((__local + double *) red_arr_mem_126711)[sext_i32_i64(local_tid_126707)] = + x_93503; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_126715) * + squot64(segred_group_sizze_93498, + segment_sizze_nonzzero_126704) + + sext_i32_i64(local_tid_126707), m_73008) && + slt64(sext_i32_i64(local_tid_126707), + squot64(segred_group_sizze_93498, + segment_sizze_nonzzero_126704))) { + ((__global + double *) mem_120938)[sext_i32_i64(virt_group_id_126715) * + squot64(segred_group_sizze_93498, + segment_sizze_nonzzero_126704) + + sext_i32_i64(local_tid_126707)] = + ((__local + double *) red_arr_mem_126711)[(sext_i32_i64(local_tid_126707) + + (int64_t) 1) * + segment_sizze_nonzzero_126704 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_93498 +} +__kernel void mainMagnitudezisegred_small_96013(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_127864_backing_aligned_0, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t num_groups_97994, + int64_t segment_sizze_nonzzero_127857, + __global + unsigned char *mem_123614, + __global + unsigned char *mem_123618, + __global + unsigned char *mem_123623) +{ + #define segred_group_sizze_97993 (mainMagnitudezisegred_group_sizze_96007) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_127864_backing_0 = + (__local volatile + char *) red_arr_mem_127864_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127859; + int32_t local_tid_127860; + int64_t group_sizze_127863; + int32_t wave_sizze_127862; + int32_t group_tid_127861; + + global_tid_127859 = get_global_id(0); + local_tid_127860 = get_local_id(0); + group_sizze_127863 = get_local_size(0); + wave_sizze_127862 = LOCKSTEP_WIDTH; + group_tid_127861 = get_group_id(0); + + int32_t phys_tid_96013; + + phys_tid_96013 = global_tid_127859; + + __local char *red_arr_mem_127864; + + red_arr_mem_127864 = (__local char *) red_arr_mem_127864_backing_0; + + int32_t phys_group_id_127866; + + phys_group_id_127866 = get_group_id(0); + for (int32_t i_127867 = 0; i_127867 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008 * k2p2zq_73023 * k2p2zq_73023, + squot64(segred_group_sizze_97993, + segment_sizze_nonzzero_127857))) - + phys_group_id_127866, sext_i64_i32(num_groups_97994)); + i_127867++) { + int32_t virt_group_id_127868 = phys_group_id_127866 + i_127867 * + sext_i64_i32(num_groups_97994); + int64_t gtid_96000 = squot64(squot64(sext_i32_i64(local_tid_127860), + segment_sizze_nonzzero_127857) + + sext_i32_i64(virt_group_id_127868) * + squot64(segred_group_sizze_97993, + segment_sizze_nonzzero_127857), + k2p2zq_73023 * k2p2zq_73023); + int64_t gtid_96001 = squot64(squot64(sext_i32_i64(local_tid_127860), + segment_sizze_nonzzero_127857) + + sext_i32_i64(virt_group_id_127868) * + squot64(segred_group_sizze_97993, + segment_sizze_nonzzero_127857) - + squot64(squot64(sext_i32_i64(local_tid_127860), + segment_sizze_nonzzero_127857) + + sext_i32_i64(virt_group_id_127868) * + squot64(segred_group_sizze_97993, + segment_sizze_nonzzero_127857), + k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023); + int64_t gtid_96002 = squot64(sext_i32_i64(local_tid_127860), + segment_sizze_nonzzero_127857) + + sext_i32_i64(virt_group_id_127868) * + squot64(segred_group_sizze_97993, + segment_sizze_nonzzero_127857) - + squot64(squot64(sext_i32_i64(local_tid_127860), + segment_sizze_nonzzero_127857) + + sext_i32_i64(virt_group_id_127868) * + squot64(segred_group_sizze_97993, + segment_sizze_nonzzero_127857), k2p2zq_73023 * + k2p2zq_73023) * (k2p2zq_73023 * k2p2zq_73023) - + squot64(squot64(sext_i32_i64(local_tid_127860), + segment_sizze_nonzzero_127857) + + sext_i32_i64(virt_group_id_127868) * + squot64(segred_group_sizze_97993, + segment_sizze_nonzzero_127857) - + squot64(squot64(sext_i32_i64(local_tid_127860), + segment_sizze_nonzzero_127857) + + sext_i32_i64(virt_group_id_127868) * + squot64(segred_group_sizze_97993, + segment_sizze_nonzzero_127857), + k2p2zq_73023 * k2p2zq_73023) * (k2p2zq_73023 * + k2p2zq_73023), + k2p2zq_73023) * k2p2zq_73023; + int64_t gtid_96012 = srem64(sext_i32_i64(local_tid_127860), + k2p2zq_73023); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, k2p2zq_73023) && (((slt64(gtid_96000, + m_73008) && + slt64(gtid_96001, + k2p2zq_73023)) && + slt64(gtid_96002, + k2p2zq_73023)) && + slt64(sext_i32_i64(local_tid_127860), + k2p2zq_73023 * + squot64(segred_group_sizze_97993, + segment_sizze_nonzzero_127857)))) { + double x_98003 = ((__global double *) mem_123614)[gtid_96001 * + (k2p2zq_73023 * + m_73008) + + gtid_96000 * + k2p2zq_73023 + + gtid_96012]; + double x_98004 = ((__global double *) mem_123618)[gtid_96000 * + (k2p2zq_73023 * + k2p2zq_73023) + + gtid_96002 * + k2p2zq_73023 + + gtid_96012]; + double defunc_1_f_res_98005 = x_98003 * x_98004; + + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_127864)[sext_i32_i64(local_tid_127860)] = + defunc_1_f_res_98005; + } + } else { + ((__local + double *) red_arr_mem_127864)[sext_i32_i64(local_tid_127860)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, k2p2zq_73023)) { + // perform segmented scan to imitate reduction + { + double x_97997; + double x_97998; + double x_127869; + double x_127870; + bool ltid_in_bounds_127872; + + ltid_in_bounds_127872 = slt64(sext_i32_i64(local_tid_127860), + k2p2zq_73023 * + squot64(segred_group_sizze_97993, + segment_sizze_nonzzero_127857)); + + int32_t skip_threads_127873; + + // read input for in-block scan + { + if (ltid_in_bounds_127872) { + x_97998 = ((volatile __local + double *) red_arr_mem_127864)[sext_i32_i64(local_tid_127860)]; + if ((local_tid_127860 - squot32(local_tid_127860, 32) * + 32) == 0) { + x_97997 = x_97998; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127873 = 1; + while (slt32(skip_threads_127873, 32)) { + if (sle32(skip_threads_127873, local_tid_127860 - + squot32(local_tid_127860, 32) * 32) && + ltid_in_bounds_127872) { + // read operands + { + x_97997 = ((volatile __local + double *) red_arr_mem_127864)[sext_i32_i64(local_tid_127860) - + sext_i32_i64(skip_threads_127873)]; + } + // perform operation + { + bool inactive_127874 = + slt64(srem64(sext_i32_i64(local_tid_127860), + k2p2zq_73023), + sext_i32_i64(local_tid_127860) - + sext_i32_i64(local_tid_127860 - + skip_threads_127873)); + + if (inactive_127874) { + x_97997 = x_97998; + } + if (!inactive_127874) { + double defunc_1_op_res_97999 = x_97997 + + x_97998; + + x_97997 = defunc_1_op_res_97999; + } + } + } + if (sle32(wave_sizze_127862, skip_threads_127873)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127873, local_tid_127860 - + squot32(local_tid_127860, 32) * 32) && + ltid_in_bounds_127872) { + // write result + { + ((volatile __local + double *) red_arr_mem_127864)[sext_i32_i64(local_tid_127860)] = + x_97997; + x_97998 = x_97997; + } + } + if (sle32(wave_sizze_127862, skip_threads_127873)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127873 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_127860 - squot32(local_tid_127860, 32) * + 32) == 31 && ltid_in_bounds_127872) { + ((volatile __local + double *) red_arr_mem_127864)[sext_i32_i64(squot32(local_tid_127860, + 32))] = + x_97997; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_127875; + + // read input for in-block scan + { + if (squot32(local_tid_127860, 32) == 0 && + ltid_in_bounds_127872) { + x_127870 = ((volatile __local + double *) red_arr_mem_127864)[sext_i32_i64(local_tid_127860)]; + if ((local_tid_127860 - squot32(local_tid_127860, + 32) * 32) == 0) { + x_127869 = x_127870; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127875 = 1; + while (slt32(skip_threads_127875, 32)) { + if (sle32(skip_threads_127875, local_tid_127860 - + squot32(local_tid_127860, 32) * 32) && + (squot32(local_tid_127860, 32) == 0 && + ltid_in_bounds_127872)) { + // read operands + { + x_127869 = ((volatile __local + double *) red_arr_mem_127864)[sext_i32_i64(local_tid_127860) - + sext_i32_i64(skip_threads_127875)]; + } + // perform operation + { + bool inactive_127876 = + slt64(srem64(sext_i32_i64(local_tid_127860 * + 32 + 32 - 1), + k2p2zq_73023), + sext_i32_i64(local_tid_127860 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_127860 - + skip_threads_127875) * + 32 + 32 - 1)); + + if (inactive_127876) { + x_127869 = x_127870; + } + if (!inactive_127876) { + double defunc_1_op_res_127871 = + x_127869 + x_127870; + + x_127869 = defunc_1_op_res_127871; + } + } + } + if (sle32(wave_sizze_127862, skip_threads_127875)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127875, local_tid_127860 - + squot32(local_tid_127860, 32) * 32) && + (squot32(local_tid_127860, 32) == 0 && + ltid_in_bounds_127872)) { + // write result + { + ((volatile __local + double *) red_arr_mem_127864)[sext_i32_i64(local_tid_127860)] = + x_127869; + x_127870 = x_127869; + } + } + if (sle32(wave_sizze_127862, skip_threads_127875)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127875 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_127860, 32) == 0 || + !ltid_in_bounds_127872)) { + // read operands + { + x_97998 = x_97997; + x_97997 = ((__local + double *) red_arr_mem_127864)[sext_i32_i64(squot32(local_tid_127860, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_127877 = + slt64(srem64(sext_i32_i64(local_tid_127860), + k2p2zq_73023), + sext_i32_i64(local_tid_127860) - + sext_i32_i64(squot32(local_tid_127860, + 32) * 32 - 1)); + + if (inactive_127877) { + x_97997 = x_97998; + } + if (!inactive_127877) { + double defunc_1_op_res_97999 = x_97997 + + x_97998; + + x_97997 = defunc_1_op_res_97999; + } + } + // write final result + { + ((__local + double *) red_arr_mem_127864)[sext_i32_i64(local_tid_127860)] = + x_97997; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_127860, 32) == 0) { + ((__local + double *) red_arr_mem_127864)[sext_i32_i64(local_tid_127860)] = + x_97998; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_127868) * + squot64(segred_group_sizze_97993, + segment_sizze_nonzzero_127857) + + sext_i32_i64(local_tid_127860), m_73008 * k2p2zq_73023 * + k2p2zq_73023) && slt64(sext_i32_i64(local_tid_127860), + squot64(segred_group_sizze_97993, + segment_sizze_nonzzero_127857))) { + ((__global + double *) mem_123623)[squot64(sext_i32_i64(virt_group_id_127868) * + squot64(segred_group_sizze_97993, + segment_sizze_nonzzero_127857) + + sext_i32_i64(local_tid_127860), + k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023) + + squot64(sext_i32_i64(virt_group_id_127868) * + squot64(segred_group_sizze_97993, + segment_sizze_nonzzero_127857) + + sext_i32_i64(local_tid_127860) - + squot64(sext_i32_i64(virt_group_id_127868) * + squot64(segred_group_sizze_97993, + segment_sizze_nonzzero_127857) + + sext_i32_i64(local_tid_127860), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023) * k2p2zq_73023 + + (sext_i32_i64(virt_group_id_127868) * + squot64(segred_group_sizze_97993, + segment_sizze_nonzzero_127857) + + sext_i32_i64(local_tid_127860) - + squot64(sext_i32_i64(virt_group_id_127868) * + squot64(segred_group_sizze_97993, + segment_sizze_nonzzero_127857) + + sext_i32_i64(local_tid_127860), + k2p2zq_73023 * k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023) - + squot64(sext_i32_i64(virt_group_id_127868) * + squot64(segred_group_sizze_97993, + segment_sizze_nonzzero_127857) + + sext_i32_i64(local_tid_127860) - + squot64(sext_i32_i64(virt_group_id_127868) * + squot64(segred_group_sizze_97993, + segment_sizze_nonzzero_127857) + + sext_i32_i64(local_tid_127860), + k2p2zq_73023 * + k2p2zq_73023) * + (k2p2zq_73023 * k2p2zq_73023), + k2p2zq_73023) * + k2p2zq_73023)] = ((__local + double *) red_arr_mem_127864)[(sext_i32_i64(local_tid_127860) + + (int64_t) 1) * + segment_sizze_nonzzero_127857 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_97993 +} +__kernel void mainMagnitudezisegred_small_96291(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_127720_backing_aligned_0, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t x_97825, + int64_t i_97826, + int64_t j_m_i_97830, + int64_t num_groups_97912, + int64_t binop_x_120251, + int64_t segment_sizze_nonzzero_127713, + __global + unsigned char *mem_123143, + __global + unsigned char *mem_param_123252, + __global + unsigned char *mem_123338) +{ + #define segred_group_sizze_97911 (mainMagnitudezisegred_group_sizze_96285) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_127720_backing_0 = + (__local volatile + char *) red_arr_mem_127720_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127715; + int32_t local_tid_127716; + int64_t group_sizze_127719; + int32_t wave_sizze_127718; + int32_t group_tid_127717; + + global_tid_127715 = get_global_id(0); + local_tid_127716 = get_local_id(0); + group_sizze_127719 = get_local_size(0); + wave_sizze_127718 = LOCKSTEP_WIDTH; + group_tid_127717 = get_group_id(0); + + int32_t phys_tid_96291; + + phys_tid_96291 = global_tid_127715; + + __local char *red_arr_mem_127720; + + red_arr_mem_127720 = (__local char *) red_arr_mem_127720_backing_0; + + int32_t phys_group_id_127722; + + phys_group_id_127722 = get_group_id(0); + for (int32_t i_127723 = 0; i_127723 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008 * k2p2zq_73023, + squot64(segred_group_sizze_97911, + segment_sizze_nonzzero_127713))) - + phys_group_id_127722, sext_i64_i32(num_groups_97912)); + i_127723++) { + int32_t virt_group_id_127724 = phys_group_id_127722 + i_127723 * + sext_i64_i32(num_groups_97912); + int64_t gtid_96280 = squot64(squot64(sext_i32_i64(local_tid_127716), + segment_sizze_nonzzero_127713) + + sext_i32_i64(virt_group_id_127724) * + squot64(segred_group_sizze_97911, + segment_sizze_nonzzero_127713), + k2p2zq_73023); + int64_t gtid_96281 = squot64(sext_i32_i64(local_tid_127716), + segment_sizze_nonzzero_127713) + + sext_i32_i64(virt_group_id_127724) * + squot64(segred_group_sizze_97911, + segment_sizze_nonzzero_127713) - + squot64(squot64(sext_i32_i64(local_tid_127716), + segment_sizze_nonzzero_127713) + + sext_i32_i64(virt_group_id_127724) * + squot64(segred_group_sizze_97911, + segment_sizze_nonzzero_127713), k2p2zq_73023) * + k2p2zq_73023; + int64_t gtid_96290 = srem64(sext_i32_i64(local_tid_127716), + j_m_i_97830); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, j_m_i_97830) && ((slt64(gtid_96280, + m_73008) && + slt64(gtid_96281, + k2p2zq_73023)) && + slt64(sext_i32_i64(local_tid_127716), + j_m_i_97830 * + squot64(segred_group_sizze_97911, + segment_sizze_nonzzero_127713)))) { + int64_t slice_115165 = gtid_96290 + x_97825; + double x_97922 = ((__global double *) mem_123143)[gtid_96280 * + (k2p2zq_73023 * + k2p2zq_73023) + + slice_115165 * + k2p2zq_73023 + + i_97826]; + bool isnan_res_97923; + + isnan_res_97923 = futrts_isnan64(x_97922); + + double defunc_1_f_res_97924; + + if (isnan_res_97923) { + defunc_1_f_res_97924 = 0.0; + } else { + double x_97921 = ((__global + double *) mem_param_123252)[gtid_96280 * + binop_x_120251 + + gtid_96281 * + k2p2zq_73023 + + slice_115165]; + double defunc_1_f_res_f_res_97925 = x_97921 * x_97922; + + defunc_1_f_res_97924 = defunc_1_f_res_f_res_97925; + } + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_127720)[sext_i32_i64(local_tid_127716)] = + defunc_1_f_res_97924; + } + } else { + ((__local + double *) red_arr_mem_127720)[sext_i32_i64(local_tid_127716)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, j_m_i_97830)) { + // perform segmented scan to imitate reduction + { + double x_97915; + double x_97916; + double x_127725; + double x_127726; + bool ltid_in_bounds_127728; + + ltid_in_bounds_127728 = slt64(sext_i32_i64(local_tid_127716), + j_m_i_97830 * + squot64(segred_group_sizze_97911, + segment_sizze_nonzzero_127713)); + + int32_t skip_threads_127729; + + // read input for in-block scan + { + if (ltid_in_bounds_127728) { + x_97916 = ((volatile __local + double *) red_arr_mem_127720)[sext_i32_i64(local_tid_127716)]; + if ((local_tid_127716 - squot32(local_tid_127716, 32) * + 32) == 0) { + x_97915 = x_97916; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127729 = 1; + while (slt32(skip_threads_127729, 32)) { + if (sle32(skip_threads_127729, local_tid_127716 - + squot32(local_tid_127716, 32) * 32) && + ltid_in_bounds_127728) { + // read operands + { + x_97915 = ((volatile __local + double *) red_arr_mem_127720)[sext_i32_i64(local_tid_127716) - + sext_i32_i64(skip_threads_127729)]; + } + // perform operation + { + bool inactive_127730 = + slt64(srem64(sext_i32_i64(local_tid_127716), + j_m_i_97830), + sext_i32_i64(local_tid_127716) - + sext_i32_i64(local_tid_127716 - + skip_threads_127729)); + + if (inactive_127730) { + x_97915 = x_97916; + } + if (!inactive_127730) { + double defunc_1_op_res_97917 = x_97915 + + x_97916; + + x_97915 = defunc_1_op_res_97917; + } + } + } + if (sle32(wave_sizze_127718, skip_threads_127729)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127729, local_tid_127716 - + squot32(local_tid_127716, 32) * 32) && + ltid_in_bounds_127728) { + // write result + { + ((volatile __local + double *) red_arr_mem_127720)[sext_i32_i64(local_tid_127716)] = + x_97915; + x_97916 = x_97915; + } + } + if (sle32(wave_sizze_127718, skip_threads_127729)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127729 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_127716 - squot32(local_tid_127716, 32) * + 32) == 31 && ltid_in_bounds_127728) { + ((volatile __local + double *) red_arr_mem_127720)[sext_i32_i64(squot32(local_tid_127716, + 32))] = + x_97915; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_127731; + + // read input for in-block scan + { + if (squot32(local_tid_127716, 32) == 0 && + ltid_in_bounds_127728) { + x_127726 = ((volatile __local + double *) red_arr_mem_127720)[sext_i32_i64(local_tid_127716)]; + if ((local_tid_127716 - squot32(local_tid_127716, + 32) * 32) == 0) { + x_127725 = x_127726; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127731 = 1; + while (slt32(skip_threads_127731, 32)) { + if (sle32(skip_threads_127731, local_tid_127716 - + squot32(local_tid_127716, 32) * 32) && + (squot32(local_tid_127716, 32) == 0 && + ltid_in_bounds_127728)) { + // read operands + { + x_127725 = ((volatile __local + double *) red_arr_mem_127720)[sext_i32_i64(local_tid_127716) - + sext_i32_i64(skip_threads_127731)]; + } + // perform operation + { + bool inactive_127732 = + slt64(srem64(sext_i32_i64(local_tid_127716 * + 32 + 32 - 1), + j_m_i_97830), + sext_i32_i64(local_tid_127716 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_127716 - + skip_threads_127731) * + 32 + 32 - 1)); + + if (inactive_127732) { + x_127725 = x_127726; + } + if (!inactive_127732) { + double defunc_1_op_res_127727 = + x_127725 + x_127726; + + x_127725 = defunc_1_op_res_127727; + } + } + } + if (sle32(wave_sizze_127718, skip_threads_127731)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127731, local_tid_127716 - + squot32(local_tid_127716, 32) * 32) && + (squot32(local_tid_127716, 32) == 0 && + ltid_in_bounds_127728)) { + // write result + { + ((volatile __local + double *) red_arr_mem_127720)[sext_i32_i64(local_tid_127716)] = + x_127725; + x_127726 = x_127725; + } + } + if (sle32(wave_sizze_127718, skip_threads_127731)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127731 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_127716, 32) == 0 || + !ltid_in_bounds_127728)) { + // read operands + { + x_97916 = x_97915; + x_97915 = ((__local + double *) red_arr_mem_127720)[sext_i32_i64(squot32(local_tid_127716, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_127733 = + slt64(srem64(sext_i32_i64(local_tid_127716), + j_m_i_97830), + sext_i32_i64(local_tid_127716) - + sext_i32_i64(squot32(local_tid_127716, + 32) * 32 - 1)); + + if (inactive_127733) { + x_97915 = x_97916; + } + if (!inactive_127733) { + double defunc_1_op_res_97917 = x_97915 + + x_97916; + + x_97915 = defunc_1_op_res_97917; + } + } + // write final result + { + ((__local + double *) red_arr_mem_127720)[sext_i32_i64(local_tid_127716)] = + x_97915; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_127716, 32) == 0) { + ((__local + double *) red_arr_mem_127720)[sext_i32_i64(local_tid_127716)] = + x_97916; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_127724) * + squot64(segred_group_sizze_97911, + segment_sizze_nonzzero_127713) + + sext_i32_i64(local_tid_127716), m_73008 * k2p2zq_73023) && + slt64(sext_i32_i64(local_tid_127716), + squot64(segred_group_sizze_97911, + segment_sizze_nonzzero_127713))) { + ((__global + double *) mem_123338)[squot64(sext_i32_i64(virt_group_id_127724) * + squot64(segred_group_sizze_97911, + segment_sizze_nonzzero_127713) + + sext_i32_i64(local_tid_127716), + k2p2zq_73023) * k2p2zq_73023 + + (sext_i32_i64(virt_group_id_127724) * + squot64(segred_group_sizze_97911, + segment_sizze_nonzzero_127713) + + sext_i32_i64(local_tid_127716) - + squot64(sext_i32_i64(virt_group_id_127724) * + squot64(segred_group_sizze_97911, + segment_sizze_nonzzero_127713) + + sext_i32_i64(local_tid_127716), + k2p2zq_73023) * + k2p2zq_73023)] = ((__local + double *) red_arr_mem_127720)[(sext_i32_i64(local_tid_127716) + + (int64_t) 1) * + segment_sizze_nonzzero_127713 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_97911 +} +__kernel void mainMagnitudezisegred_small_97064(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_127495_backing_aligned_0, + int64_t m_73008, + int64_t defunc_2_reduce_res_73132, + int64_t rp1_73709, + int64_t j_97356, + int64_t num_groups_97389, + int64_t segment_sizze_nonzzero_127488, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_122730) +{ + #define segred_group_sizze_97388 (mainMagnitudezisegred_group_sizze_97058) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_127495_backing_0 = + (__local volatile + char *) red_arr_mem_127495_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127490; + int32_t local_tid_127491; + int64_t group_sizze_127494; + int32_t wave_sizze_127493; + int32_t group_tid_127492; + + global_tid_127490 = get_global_id(0); + local_tid_127491 = get_local_id(0); + group_sizze_127494 = get_local_size(0); + wave_sizze_127493 = LOCKSTEP_WIDTH; + group_tid_127492 = get_group_id(0); + + int32_t phys_tid_97064; + + phys_tid_97064 = global_tid_127490; + + __local char *red_arr_mem_127495; + + red_arr_mem_127495 = (__local char *) red_arr_mem_127495_backing_0; + + int32_t phys_group_id_127497; + + phys_group_id_127497 = get_group_id(0); + for (int32_t i_127498 = 0; i_127498 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, + squot64(segred_group_sizze_97388, + segment_sizze_nonzzero_127488))) - + phys_group_id_127497, sext_i64_i32(num_groups_97389)); + i_127498++) { + int32_t virt_group_id_127499 = phys_group_id_127497 + i_127498 * + sext_i64_i32(num_groups_97389); + int64_t gtid_97055 = squot64(sext_i32_i64(local_tid_127491), + segment_sizze_nonzzero_127488) + + sext_i32_i64(virt_group_id_127499) * + squot64(segred_group_sizze_97388, + segment_sizze_nonzzero_127488); + int64_t gtid_97063 = srem64(sext_i32_i64(local_tid_127491), rp1_73709); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, rp1_73709) && (slt64(gtid_97055, m_73008) && + slt64(sext_i32_i64(local_tid_127491), + rp1_73709 * + squot64(segred_group_sizze_97388, + segment_sizze_nonzzero_127488)))) { + double x_97396 = ((__global double *) mem_120246)[j_97356 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_97055 * + defunc_2_reduce_res_73132 + + gtid_97063]; + double defunc_1_f_res_97397 = x_97396 * x_97396; + + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_127495)[sext_i32_i64(local_tid_127491)] = + defunc_1_f_res_97397; + } + } else { + ((__local + double *) red_arr_mem_127495)[sext_i32_i64(local_tid_127491)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, rp1_73709)) { + // perform segmented scan to imitate reduction + { + double x_97392; + double x_97393; + double x_127500; + double x_127501; + bool ltid_in_bounds_127503; + + ltid_in_bounds_127503 = slt64(sext_i32_i64(local_tid_127491), + rp1_73709 * + squot64(segred_group_sizze_97388, + segment_sizze_nonzzero_127488)); + + int32_t skip_threads_127504; + + // read input for in-block scan + { + if (ltid_in_bounds_127503) { + x_97393 = ((volatile __local + double *) red_arr_mem_127495)[sext_i32_i64(local_tid_127491)]; + if ((local_tid_127491 - squot32(local_tid_127491, 32) * + 32) == 0) { + x_97392 = x_97393; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127504 = 1; + while (slt32(skip_threads_127504, 32)) { + if (sle32(skip_threads_127504, local_tid_127491 - + squot32(local_tid_127491, 32) * 32) && + ltid_in_bounds_127503) { + // read operands + { + x_97392 = ((volatile __local + double *) red_arr_mem_127495)[sext_i32_i64(local_tid_127491) - + sext_i32_i64(skip_threads_127504)]; + } + // perform operation + { + bool inactive_127505 = + slt64(srem64(sext_i32_i64(local_tid_127491), + rp1_73709), + sext_i32_i64(local_tid_127491) - + sext_i32_i64(local_tid_127491 - + skip_threads_127504)); + + if (inactive_127505) { + x_97392 = x_97393; + } + if (!inactive_127505) { + double defunc_1_op_res_97394 = x_97392 + + x_97393; + + x_97392 = defunc_1_op_res_97394; + } + } + } + if (sle32(wave_sizze_127493, skip_threads_127504)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127504, local_tid_127491 - + squot32(local_tid_127491, 32) * 32) && + ltid_in_bounds_127503) { + // write result + { + ((volatile __local + double *) red_arr_mem_127495)[sext_i32_i64(local_tid_127491)] = + x_97392; + x_97393 = x_97392; + } + } + if (sle32(wave_sizze_127493, skip_threads_127504)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127504 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_127491 - squot32(local_tid_127491, 32) * + 32) == 31 && ltid_in_bounds_127503) { + ((volatile __local + double *) red_arr_mem_127495)[sext_i32_i64(squot32(local_tid_127491, + 32))] = + x_97392; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_127506; + + // read input for in-block scan + { + if (squot32(local_tid_127491, 32) == 0 && + ltid_in_bounds_127503) { + x_127501 = ((volatile __local + double *) red_arr_mem_127495)[sext_i32_i64(local_tid_127491)]; + if ((local_tid_127491 - squot32(local_tid_127491, + 32) * 32) == 0) { + x_127500 = x_127501; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127506 = 1; + while (slt32(skip_threads_127506, 32)) { + if (sle32(skip_threads_127506, local_tid_127491 - + squot32(local_tid_127491, 32) * 32) && + (squot32(local_tid_127491, 32) == 0 && + ltid_in_bounds_127503)) { + // read operands + { + x_127500 = ((volatile __local + double *) red_arr_mem_127495)[sext_i32_i64(local_tid_127491) - + sext_i32_i64(skip_threads_127506)]; + } + // perform operation + { + bool inactive_127507 = + slt64(srem64(sext_i32_i64(local_tid_127491 * + 32 + 32 - 1), rp1_73709), + sext_i32_i64(local_tid_127491 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_127491 - + skip_threads_127506) * + 32 + 32 - 1)); + + if (inactive_127507) { + x_127500 = x_127501; + } + if (!inactive_127507) { + double defunc_1_op_res_127502 = + x_127500 + x_127501; + + x_127500 = defunc_1_op_res_127502; + } + } + } + if (sle32(wave_sizze_127493, skip_threads_127506)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127506, local_tid_127491 - + squot32(local_tid_127491, 32) * 32) && + (squot32(local_tid_127491, 32) == 0 && + ltid_in_bounds_127503)) { + // write result + { + ((volatile __local + double *) red_arr_mem_127495)[sext_i32_i64(local_tid_127491)] = + x_127500; + x_127501 = x_127500; + } + } + if (sle32(wave_sizze_127493, skip_threads_127506)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127506 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_127491, 32) == 0 || + !ltid_in_bounds_127503)) { + // read operands + { + x_97393 = x_97392; + x_97392 = ((__local + double *) red_arr_mem_127495)[sext_i32_i64(squot32(local_tid_127491, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_127508 = + slt64(srem64(sext_i32_i64(local_tid_127491), + rp1_73709), + sext_i32_i64(local_tid_127491) - + sext_i32_i64(squot32(local_tid_127491, + 32) * 32 - 1)); + + if (inactive_127508) { + x_97392 = x_97393; + } + if (!inactive_127508) { + double defunc_1_op_res_97394 = x_97392 + + x_97393; + + x_97392 = defunc_1_op_res_97394; + } + } + // write final result + { + ((__local + double *) red_arr_mem_127495)[sext_i32_i64(local_tid_127491)] = + x_97392; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_127491, 32) == 0) { + ((__local + double *) red_arr_mem_127495)[sext_i32_i64(local_tid_127491)] = + x_97393; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_127499) * + squot64(segred_group_sizze_97388, + segment_sizze_nonzzero_127488) + + sext_i32_i64(local_tid_127491), m_73008) && + slt64(sext_i32_i64(local_tid_127491), + squot64(segred_group_sizze_97388, + segment_sizze_nonzzero_127488))) { + ((__global + double *) mem_122730)[sext_i32_i64(virt_group_id_127499) * + squot64(segred_group_sizze_97388, + segment_sizze_nonzzero_127488) + + sext_i32_i64(local_tid_127491)] = + ((__local + double *) red_arr_mem_127495)[(sext_i32_i64(local_tid_127491) + + (int64_t) 1) * + segment_sizze_nonzzero_127488 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_97388 +} +__kernel void mainMagnitudezisegred_small_97217(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_127409_backing_aligned_0, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t defunc_2_reduce_res_73132, + int64_t r_73698, + int64_t num_groups_97296, + int64_t segment_sizze_nonzzero_127402, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_param_121967, + __global + unsigned char *mem_122677) +{ + #define segred_group_sizze_97295 (mainMagnitudezisegred_group_sizze_97211) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_127409_backing_0 = + (__local volatile + char *) red_arr_mem_127409_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127404; + int32_t local_tid_127405; + int64_t group_sizze_127408; + int32_t wave_sizze_127407; + int32_t group_tid_127406; + + global_tid_127404 = get_global_id(0); + local_tid_127405 = get_local_id(0); + group_sizze_127408 = get_local_size(0); + wave_sizze_127407 = LOCKSTEP_WIDTH; + group_tid_127406 = get_group_id(0); + + int32_t phys_tid_97217; + + phys_tid_97217 = global_tid_127404; + + __local char *red_arr_mem_127409; + + red_arr_mem_127409 = (__local char *) red_arr_mem_127409_backing_0; + + int32_t phys_group_id_127411; + + phys_group_id_127411 = get_group_id(0); + for (int32_t i_127412 = 0; i_127412 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, + squot64(segred_group_sizze_97295, + segment_sizze_nonzzero_127402))) - + phys_group_id_127411, sext_i64_i32(num_groups_97296)); + i_127412++) { + int32_t virt_group_id_127413 = phys_group_id_127411 + i_127412 * + sext_i64_i32(num_groups_97296); + int64_t gtid_97208 = squot64(sext_i32_i64(local_tid_127405), + segment_sizze_nonzzero_127402) + + sext_i32_i64(virt_group_id_127413) * + squot64(segred_group_sizze_97295, + segment_sizze_nonzzero_127402); + int64_t gtid_97216 = srem64(sext_i32_i64(local_tid_127405), + k2p2zq_73023); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, k2p2zq_73023) && (slt64(gtid_97208, + m_73008) && + slt64(sext_i32_i64(local_tid_127405), + k2p2zq_73023 * + squot64(segred_group_sizze_97295, + segment_sizze_nonzzero_127402)))) { + double x_97304 = ((__global double *) mem_120246)[gtid_97216 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_97208 * + defunc_2_reduce_res_73132 + + r_73698]; + double x_97305 = ((__global + double *) mem_param_121967)[gtid_97208 * + k2p2zq_73023 + + gtid_97216]; + double defunc_1_f_res_97306 = x_97304 * x_97305; + + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_127409)[sext_i32_i64(local_tid_127405)] = + defunc_1_f_res_97306; + } + } else { + ((__local + double *) red_arr_mem_127409)[sext_i32_i64(local_tid_127405)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, k2p2zq_73023)) { + // perform segmented scan to imitate reduction + { + double x_97299; + double x_97300; + double x_127414; + double x_127415; + bool ltid_in_bounds_127417; + + ltid_in_bounds_127417 = slt64(sext_i32_i64(local_tid_127405), + k2p2zq_73023 * + squot64(segred_group_sizze_97295, + segment_sizze_nonzzero_127402)); + + int32_t skip_threads_127418; + + // read input for in-block scan + { + if (ltid_in_bounds_127417) { + x_97300 = ((volatile __local + double *) red_arr_mem_127409)[sext_i32_i64(local_tid_127405)]; + if ((local_tid_127405 - squot32(local_tid_127405, 32) * + 32) == 0) { + x_97299 = x_97300; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127418 = 1; + while (slt32(skip_threads_127418, 32)) { + if (sle32(skip_threads_127418, local_tid_127405 - + squot32(local_tid_127405, 32) * 32) && + ltid_in_bounds_127417) { + // read operands + { + x_97299 = ((volatile __local + double *) red_arr_mem_127409)[sext_i32_i64(local_tid_127405) - + sext_i32_i64(skip_threads_127418)]; + } + // perform operation + { + bool inactive_127419 = + slt64(srem64(sext_i32_i64(local_tid_127405), + k2p2zq_73023), + sext_i32_i64(local_tid_127405) - + sext_i32_i64(local_tid_127405 - + skip_threads_127418)); + + if (inactive_127419) { + x_97299 = x_97300; + } + if (!inactive_127419) { + double defunc_1_op_res_97301 = x_97299 + + x_97300; + + x_97299 = defunc_1_op_res_97301; + } + } + } + if (sle32(wave_sizze_127407, skip_threads_127418)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127418, local_tid_127405 - + squot32(local_tid_127405, 32) * 32) && + ltid_in_bounds_127417) { + // write result + { + ((volatile __local + double *) red_arr_mem_127409)[sext_i32_i64(local_tid_127405)] = + x_97299; + x_97300 = x_97299; + } + } + if (sle32(wave_sizze_127407, skip_threads_127418)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127418 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_127405 - squot32(local_tid_127405, 32) * + 32) == 31 && ltid_in_bounds_127417) { + ((volatile __local + double *) red_arr_mem_127409)[sext_i32_i64(squot32(local_tid_127405, + 32))] = + x_97299; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_127420; + + // read input for in-block scan + { + if (squot32(local_tid_127405, 32) == 0 && + ltid_in_bounds_127417) { + x_127415 = ((volatile __local + double *) red_arr_mem_127409)[sext_i32_i64(local_tid_127405)]; + if ((local_tid_127405 - squot32(local_tid_127405, + 32) * 32) == 0) { + x_127414 = x_127415; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127420 = 1; + while (slt32(skip_threads_127420, 32)) { + if (sle32(skip_threads_127420, local_tid_127405 - + squot32(local_tid_127405, 32) * 32) && + (squot32(local_tid_127405, 32) == 0 && + ltid_in_bounds_127417)) { + // read operands + { + x_127414 = ((volatile __local + double *) red_arr_mem_127409)[sext_i32_i64(local_tid_127405) - + sext_i32_i64(skip_threads_127420)]; + } + // perform operation + { + bool inactive_127421 = + slt64(srem64(sext_i32_i64(local_tid_127405 * + 32 + 32 - 1), + k2p2zq_73023), + sext_i32_i64(local_tid_127405 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_127405 - + skip_threads_127420) * + 32 + 32 - 1)); + + if (inactive_127421) { + x_127414 = x_127415; + } + if (!inactive_127421) { + double defunc_1_op_res_127416 = + x_127414 + x_127415; + + x_127414 = defunc_1_op_res_127416; + } + } + } + if (sle32(wave_sizze_127407, skip_threads_127420)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127420, local_tid_127405 - + squot32(local_tid_127405, 32) * 32) && + (squot32(local_tid_127405, 32) == 0 && + ltid_in_bounds_127417)) { + // write result + { + ((volatile __local + double *) red_arr_mem_127409)[sext_i32_i64(local_tid_127405)] = + x_127414; + x_127415 = x_127414; + } + } + if (sle32(wave_sizze_127407, skip_threads_127420)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127420 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_127405, 32) == 0 || + !ltid_in_bounds_127417)) { + // read operands + { + x_97300 = x_97299; + x_97299 = ((__local + double *) red_arr_mem_127409)[sext_i32_i64(squot32(local_tid_127405, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_127422 = + slt64(srem64(sext_i32_i64(local_tid_127405), + k2p2zq_73023), + sext_i32_i64(local_tid_127405) - + sext_i32_i64(squot32(local_tid_127405, + 32) * 32 - 1)); + + if (inactive_127422) { + x_97299 = x_97300; + } + if (!inactive_127422) { + double defunc_1_op_res_97301 = x_97299 + + x_97300; + + x_97299 = defunc_1_op_res_97301; + } + } + // write final result + { + ((__local + double *) red_arr_mem_127409)[sext_i32_i64(local_tid_127405)] = + x_97299; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_127405, 32) == 0) { + ((__local + double *) red_arr_mem_127409)[sext_i32_i64(local_tid_127405)] = + x_97300; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_127413) * + squot64(segred_group_sizze_97295, + segment_sizze_nonzzero_127402) + + sext_i32_i64(local_tid_127405), m_73008) && + slt64(sext_i32_i64(local_tid_127405), + squot64(segred_group_sizze_97295, + segment_sizze_nonzzero_127402))) { + ((__global + double *) mem_122677)[sext_i32_i64(virt_group_id_127413) * + squot64(segred_group_sizze_97295, + segment_sizze_nonzzero_127402) + + sext_i32_i64(local_tid_127405)] = + ((__local + double *) red_arr_mem_127409)[(sext_i32_i64(local_tid_127405) + + (int64_t) 1) * + segment_sizze_nonzzero_127402 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_97295 +} +__kernel void mainMagnitudezisegred_small_97246(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_127342_backing_aligned_0, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t defunc_2_reduce_res_73132, + int64_t r_73698, + int64_t num_groups_97265, + int64_t segment_sizze_nonzzero_127335, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_122665, + __global + unsigned char *mem_122668, + __global + unsigned char *mem_122671) +{ + #define segred_group_sizze_97264 (mainMagnitudezisegred_group_sizze_97240) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_127342_backing_0 = + (__local volatile + char *) red_arr_mem_127342_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_127337; + int32_t local_tid_127338; + int64_t group_sizze_127341; + int32_t wave_sizze_127340; + int32_t group_tid_127339; + + global_tid_127337 = get_global_id(0); + local_tid_127338 = get_local_id(0); + group_sizze_127341 = get_local_size(0); + wave_sizze_127340 = LOCKSTEP_WIDTH; + group_tid_127339 = get_group_id(0); + + int32_t phys_tid_97246; + + phys_tid_97246 = global_tid_127337; + + __local char *red_arr_mem_127342; + + red_arr_mem_127342 = (__local char *) red_arr_mem_127342_backing_0; + + int32_t phys_group_id_127344; + + phys_group_id_127344 = get_group_id(0); + for (int32_t i_127345 = 0; i_127345 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, + squot64(segred_group_sizze_97264, + segment_sizze_nonzzero_127335))) - + phys_group_id_127344, sext_i64_i32(num_groups_97265)); + i_127345++) { + int32_t virt_group_id_127346 = phys_group_id_127344 + i_127345 * + sext_i64_i32(num_groups_97265); + int64_t gtid_97237 = squot64(sext_i32_i64(local_tid_127338), + segment_sizze_nonzzero_127335) + + sext_i32_i64(virt_group_id_127346) * + squot64(segred_group_sizze_97264, + segment_sizze_nonzzero_127335); + int64_t gtid_97245 = srem64(sext_i32_i64(local_tid_127338), + k2p2zq_73023); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, k2p2zq_73023) && (slt64(gtid_97237, + m_73008) && + slt64(sext_i32_i64(local_tid_127338), + k2p2zq_73023 * + squot64(segred_group_sizze_97264, + segment_sizze_nonzzero_127335)))) { + double x_97275 = ((__global double *) mem_120246)[gtid_97245 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_97237 * + defunc_2_reduce_res_73132 + + r_73698]; + double defunc_0_f_res_97276; + double redout_119829 = 0.0; + + for (int64_t i_119830 = 0; i_119830 < k2p2zq_73023; + i_119830++) { + double x_97280 = ((__global double *) mem_120246)[i_119830 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_97237 * + defunc_2_reduce_res_73132 + + r_73698]; + double x_97281 = ((__global double *) mem_122665)[i_119830 * + (k2p2zq_73023 * + m_73008) + + gtid_97237 * + k2p2zq_73023 + + gtid_97245]; + double defunc_1_f_res_97282 = x_97280 * x_97281; + double defunc_1_op_res_97279 = defunc_1_f_res_97282 + + redout_119829; + double redout_tmp_127347 = defunc_1_op_res_97279; + + redout_119829 = redout_tmp_127347; + } + defunc_0_f_res_97276 = redout_119829; + + double defunc_1_f_res_97283 = x_97275 * defunc_0_f_res_97276; + + // save map-out results + { + ((__global double *) mem_122671)[gtid_97237 * k2p2zq_73023 + + gtid_97245] = + defunc_0_f_res_97276; + } + // save results to be reduced + { + ((__local + double *) red_arr_mem_127342)[sext_i32_i64(local_tid_127338)] = + defunc_1_f_res_97283; + } + } else { + ((__local + double *) red_arr_mem_127342)[sext_i32_i64(local_tid_127338)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, k2p2zq_73023)) { + // perform segmented scan to imitate reduction + { + double x_97269; + double x_97270; + double x_127348; + double x_127349; + bool ltid_in_bounds_127351; + + ltid_in_bounds_127351 = slt64(sext_i32_i64(local_tid_127338), + k2p2zq_73023 * + squot64(segred_group_sizze_97264, + segment_sizze_nonzzero_127335)); + + int32_t skip_threads_127352; + + // read input for in-block scan + { + if (ltid_in_bounds_127351) { + x_97270 = ((volatile __local + double *) red_arr_mem_127342)[sext_i32_i64(local_tid_127338)]; + if ((local_tid_127338 - squot32(local_tid_127338, 32) * + 32) == 0) { + x_97269 = x_97270; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127352 = 1; + while (slt32(skip_threads_127352, 32)) { + if (sle32(skip_threads_127352, local_tid_127338 - + squot32(local_tid_127338, 32) * 32) && + ltid_in_bounds_127351) { + // read operands + { + x_97269 = ((volatile __local + double *) red_arr_mem_127342)[sext_i32_i64(local_tid_127338) - + sext_i32_i64(skip_threads_127352)]; + } + // perform operation + { + bool inactive_127353 = + slt64(srem64(sext_i32_i64(local_tid_127338), + k2p2zq_73023), + sext_i32_i64(local_tid_127338) - + sext_i32_i64(local_tid_127338 - + skip_threads_127352)); + + if (inactive_127353) { + x_97269 = x_97270; + } + if (!inactive_127353) { + double defunc_1_op_res_97271 = x_97269 + + x_97270; + + x_97269 = defunc_1_op_res_97271; + } + } + } + if (sle32(wave_sizze_127340, skip_threads_127352)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127352, local_tid_127338 - + squot32(local_tid_127338, 32) * 32) && + ltid_in_bounds_127351) { + // write result + { + ((volatile __local + double *) red_arr_mem_127342)[sext_i32_i64(local_tid_127338)] = + x_97269; + x_97270 = x_97269; + } + } + if (sle32(wave_sizze_127340, skip_threads_127352)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127352 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_127338 - squot32(local_tid_127338, 32) * + 32) == 31 && ltid_in_bounds_127351) { + ((volatile __local + double *) red_arr_mem_127342)[sext_i32_i64(squot32(local_tid_127338, + 32))] = + x_97269; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_127354; + + // read input for in-block scan + { + if (squot32(local_tid_127338, 32) == 0 && + ltid_in_bounds_127351) { + x_127349 = ((volatile __local + double *) red_arr_mem_127342)[sext_i32_i64(local_tid_127338)]; + if ((local_tid_127338 - squot32(local_tid_127338, + 32) * 32) == 0) { + x_127348 = x_127349; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_127354 = 1; + while (slt32(skip_threads_127354, 32)) { + if (sle32(skip_threads_127354, local_tid_127338 - + squot32(local_tid_127338, 32) * 32) && + (squot32(local_tid_127338, 32) == 0 && + ltid_in_bounds_127351)) { + // read operands + { + x_127348 = ((volatile __local + double *) red_arr_mem_127342)[sext_i32_i64(local_tid_127338) - + sext_i32_i64(skip_threads_127354)]; + } + // perform operation + { + bool inactive_127355 = + slt64(srem64(sext_i32_i64(local_tid_127338 * + 32 + 32 - 1), + k2p2zq_73023), + sext_i32_i64(local_tid_127338 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_127338 - + skip_threads_127354) * + 32 + 32 - 1)); + + if (inactive_127355) { + x_127348 = x_127349; + } + if (!inactive_127355) { + double defunc_1_op_res_127350 = + x_127348 + x_127349; + + x_127348 = defunc_1_op_res_127350; + } + } + } + if (sle32(wave_sizze_127340, skip_threads_127354)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_127354, local_tid_127338 - + squot32(local_tid_127338, 32) * 32) && + (squot32(local_tid_127338, 32) == 0 && + ltid_in_bounds_127351)) { + // write result + { + ((volatile __local + double *) red_arr_mem_127342)[sext_i32_i64(local_tid_127338)] = + x_127348; + x_127349 = x_127348; + } + } + if (sle32(wave_sizze_127340, skip_threads_127354)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_127354 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_127338, 32) == 0 || + !ltid_in_bounds_127351)) { + // read operands + { + x_97270 = x_97269; + x_97269 = ((__local + double *) red_arr_mem_127342)[sext_i32_i64(squot32(local_tid_127338, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_127356 = + slt64(srem64(sext_i32_i64(local_tid_127338), + k2p2zq_73023), + sext_i32_i64(local_tid_127338) - + sext_i32_i64(squot32(local_tid_127338, + 32) * 32 - 1)); + + if (inactive_127356) { + x_97269 = x_97270; + } + if (!inactive_127356) { + double defunc_1_op_res_97271 = x_97269 + + x_97270; + + x_97269 = defunc_1_op_res_97271; + } + } + // write final result + { + ((__local + double *) red_arr_mem_127342)[sext_i32_i64(local_tid_127338)] = + x_97269; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_127338, 32) == 0) { + ((__local + double *) red_arr_mem_127342)[sext_i32_i64(local_tid_127338)] = + x_97270; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_127346) * + squot64(segred_group_sizze_97264, + segment_sizze_nonzzero_127335) + + sext_i32_i64(local_tid_127338), m_73008) && + slt64(sext_i32_i64(local_tid_127338), + squot64(segred_group_sizze_97264, + segment_sizze_nonzzero_127335))) { + ((__global + double *) mem_122668)[sext_i32_i64(virt_group_id_127346) * + squot64(segred_group_sizze_97264, + segment_sizze_nonzzero_127335) + + sext_i32_i64(local_tid_127338)] = + ((__local + double *) red_arr_mem_127342)[(sext_i32_i64(local_tid_127338) + + (int64_t) 1) * + segment_sizze_nonzzero_127335 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_97264 +} +__kernel void mainMagnitudezisegred_small_98543(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_128218_backing_aligned_0, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t defunc_2_reduce_res_73132, + int64_t index_primexp_74309, + int64_t num_groups_98751, + int64_t binop_x_120251, + int64_t segment_sizze_nonzzero_128211, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_param_123778, + __global + unsigned char *mem_123907, + __global + unsigned char *mem_123910, + __global + unsigned char *mem_123944, + __global + unsigned char *mem_123948) +{ + #define segred_group_sizze_98750 (mainMagnitudezisegred_group_sizze_98537) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_128218_backing_0 = + (__local volatile + char *) red_arr_mem_128218_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128213; + int32_t local_tid_128214; + int64_t group_sizze_128217; + int32_t wave_sizze_128216; + int32_t group_tid_128215; + + global_tid_128213 = get_global_id(0); + local_tid_128214 = get_local_id(0); + group_sizze_128217 = get_local_size(0); + wave_sizze_128216 = LOCKSTEP_WIDTH; + group_tid_128215 = get_group_id(0); + + int32_t phys_tid_98543; + + phys_tid_98543 = global_tid_128213; + + __local char *red_arr_mem_128218; + + red_arr_mem_128218 = (__local char *) red_arr_mem_128218_backing_0; + + int32_t phys_group_id_128220; + + phys_group_id_128220 = get_group_id(0); + for (int32_t i_128221 = 0; i_128221 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008 * k2p2zq_73023, + squot64(segred_group_sizze_98750, + segment_sizze_nonzzero_128211))) - + phys_group_id_128220, sext_i64_i32(num_groups_98751)); + i_128221++) { + int32_t virt_group_id_128222 = phys_group_id_128220 + i_128221 * + sext_i64_i32(num_groups_98751); + int64_t gtid_98532 = squot64(squot64(sext_i32_i64(local_tid_128214), + segment_sizze_nonzzero_128211) + + sext_i32_i64(virt_group_id_128222) * + squot64(segred_group_sizze_98750, + segment_sizze_nonzzero_128211), + k2p2zq_73023); + int64_t gtid_98533 = squot64(sext_i32_i64(local_tid_128214), + segment_sizze_nonzzero_128211) + + sext_i32_i64(virt_group_id_128222) * + squot64(segred_group_sizze_98750, + segment_sizze_nonzzero_128211) - + squot64(squot64(sext_i32_i64(local_tid_128214), + segment_sizze_nonzzero_128211) + + sext_i32_i64(virt_group_id_128222) * + squot64(segred_group_sizze_98750, + segment_sizze_nonzzero_128211), k2p2zq_73023) * + k2p2zq_73023; + int64_t gtid_98542 = srem64(sext_i32_i64(local_tid_128214), + k2p2zq_73023); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, k2p2zq_73023) && ((slt64(gtid_98532, + m_73008) && + slt64(gtid_98533, + k2p2zq_73023)) && + slt64(sext_i32_i64(local_tid_128214), + k2p2zq_73023 * + squot64(segred_group_sizze_98750, + segment_sizze_nonzzero_128211)))) { + double fr_98761 = ((__global double *) mem_123910)[gtid_98532]; + double x_98762 = ((__global double *) mem_123907)[gtid_98532 * + k2p2zq_73023 + + gtid_98533]; + double x_98764 = ((__global double *) mem_123907)[gtid_98532 * + k2p2zq_73023 + + gtid_98542]; + double x_98765 = ((__global + double *) mem_param_123778)[gtid_98532 * + binop_x_120251 + + gtid_98533 * + k2p2zq_73023 + + gtid_98542]; + double x_98766 = ((__global double *) mem_120246)[gtid_98542 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_98532 * + defunc_2_reduce_res_73132 + + index_primexp_74309]; + double x_98767 = x_98762 * x_98764; + double y_98768 = x_98767 / fr_98761; + double defunc_1_f_res_98769 = x_98765 - y_98768; + double defunc_1_f_res_98770 = x_98766 * defunc_1_f_res_98769; + + // save map-out results + { + ((__global double *) mem_123948)[gtid_98532 * + (k2p2zq_73023 * + k2p2zq_73023) + + gtid_98533 * k2p2zq_73023 + + gtid_98542] = + defunc_1_f_res_98769; + } + // save results to be reduced + { + ((__local + double *) red_arr_mem_128218)[sext_i32_i64(local_tid_128214)] = + defunc_1_f_res_98770; + } + } else { + ((__local + double *) red_arr_mem_128218)[sext_i32_i64(local_tid_128214)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, k2p2zq_73023)) { + // perform segmented scan to imitate reduction + { + double x_98755; + double x_98756; + double x_128223; + double x_128224; + bool ltid_in_bounds_128226; + + ltid_in_bounds_128226 = slt64(sext_i32_i64(local_tid_128214), + k2p2zq_73023 * + squot64(segred_group_sizze_98750, + segment_sizze_nonzzero_128211)); + + int32_t skip_threads_128227; + + // read input for in-block scan + { + if (ltid_in_bounds_128226) { + x_98756 = ((volatile __local + double *) red_arr_mem_128218)[sext_i32_i64(local_tid_128214)]; + if ((local_tid_128214 - squot32(local_tid_128214, 32) * + 32) == 0) { + x_98755 = x_98756; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128227 = 1; + while (slt32(skip_threads_128227, 32)) { + if (sle32(skip_threads_128227, local_tid_128214 - + squot32(local_tid_128214, 32) * 32) && + ltid_in_bounds_128226) { + // read operands + { + x_98755 = ((volatile __local + double *) red_arr_mem_128218)[sext_i32_i64(local_tid_128214) - + sext_i32_i64(skip_threads_128227)]; + } + // perform operation + { + bool inactive_128228 = + slt64(srem64(sext_i32_i64(local_tid_128214), + k2p2zq_73023), + sext_i32_i64(local_tid_128214) - + sext_i32_i64(local_tid_128214 - + skip_threads_128227)); + + if (inactive_128228) { + x_98755 = x_98756; + } + if (!inactive_128228) { + double defunc_1_op_res_98757 = x_98755 + + x_98756; + + x_98755 = defunc_1_op_res_98757; + } + } + } + if (sle32(wave_sizze_128216, skip_threads_128227)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128227, local_tid_128214 - + squot32(local_tid_128214, 32) * 32) && + ltid_in_bounds_128226) { + // write result + { + ((volatile __local + double *) red_arr_mem_128218)[sext_i32_i64(local_tid_128214)] = + x_98755; + x_98756 = x_98755; + } + } + if (sle32(wave_sizze_128216, skip_threads_128227)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128227 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128214 - squot32(local_tid_128214, 32) * + 32) == 31 && ltid_in_bounds_128226) { + ((volatile __local + double *) red_arr_mem_128218)[sext_i32_i64(squot32(local_tid_128214, + 32))] = + x_98755; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128229; + + // read input for in-block scan + { + if (squot32(local_tid_128214, 32) == 0 && + ltid_in_bounds_128226) { + x_128224 = ((volatile __local + double *) red_arr_mem_128218)[sext_i32_i64(local_tid_128214)]; + if ((local_tid_128214 - squot32(local_tid_128214, + 32) * 32) == 0) { + x_128223 = x_128224; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128229 = 1; + while (slt32(skip_threads_128229, 32)) { + if (sle32(skip_threads_128229, local_tid_128214 - + squot32(local_tid_128214, 32) * 32) && + (squot32(local_tid_128214, 32) == 0 && + ltid_in_bounds_128226)) { + // read operands + { + x_128223 = ((volatile __local + double *) red_arr_mem_128218)[sext_i32_i64(local_tid_128214) - + sext_i32_i64(skip_threads_128229)]; + } + // perform operation + { + bool inactive_128230 = + slt64(srem64(sext_i32_i64(local_tid_128214 * + 32 + 32 - 1), + k2p2zq_73023), + sext_i32_i64(local_tid_128214 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_128214 - + skip_threads_128229) * + 32 + 32 - 1)); + + if (inactive_128230) { + x_128223 = x_128224; + } + if (!inactive_128230) { + double defunc_1_op_res_128225 = + x_128223 + x_128224; + + x_128223 = defunc_1_op_res_128225; + } + } + } + if (sle32(wave_sizze_128216, skip_threads_128229)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128229, local_tid_128214 - + squot32(local_tid_128214, 32) * 32) && + (squot32(local_tid_128214, 32) == 0 && + ltid_in_bounds_128226)) { + // write result + { + ((volatile __local + double *) red_arr_mem_128218)[sext_i32_i64(local_tid_128214)] = + x_128223; + x_128224 = x_128223; + } + } + if (sle32(wave_sizze_128216, skip_threads_128229)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128229 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128214, 32) == 0 || + !ltid_in_bounds_128226)) { + // read operands + { + x_98756 = x_98755; + x_98755 = ((__local + double *) red_arr_mem_128218)[sext_i32_i64(squot32(local_tid_128214, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128231 = + slt64(srem64(sext_i32_i64(local_tid_128214), + k2p2zq_73023), + sext_i32_i64(local_tid_128214) - + sext_i32_i64(squot32(local_tid_128214, + 32) * 32 - 1)); + + if (inactive_128231) { + x_98755 = x_98756; + } + if (!inactive_128231) { + double defunc_1_op_res_98757 = x_98755 + + x_98756; + + x_98755 = defunc_1_op_res_98757; + } + } + // write final result + { + ((__local + double *) red_arr_mem_128218)[sext_i32_i64(local_tid_128214)] = + x_98755; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128214, 32) == 0) { + ((__local + double *) red_arr_mem_128218)[sext_i32_i64(local_tid_128214)] = + x_98756; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_128222) * + squot64(segred_group_sizze_98750, + segment_sizze_nonzzero_128211) + + sext_i32_i64(local_tid_128214), m_73008 * k2p2zq_73023) && + slt64(sext_i32_i64(local_tid_128214), + squot64(segred_group_sizze_98750, + segment_sizze_nonzzero_128211))) { + ((__global + double *) mem_123944)[squot64(sext_i32_i64(virt_group_id_128222) * + squot64(segred_group_sizze_98750, + segment_sizze_nonzzero_128211) + + sext_i32_i64(local_tid_128214), + k2p2zq_73023) * k2p2zq_73023 + + (sext_i32_i64(virt_group_id_128222) * + squot64(segred_group_sizze_98750, + segment_sizze_nonzzero_128211) + + sext_i32_i64(local_tid_128214) - + squot64(sext_i32_i64(virt_group_id_128222) * + squot64(segred_group_sizze_98750, + segment_sizze_nonzzero_128211) + + sext_i32_i64(local_tid_128214), + k2p2zq_73023) * + k2p2zq_73023)] = ((__local + double *) red_arr_mem_128218)[(sext_i32_i64(local_tid_128214) + + (int64_t) 1) * + segment_sizze_nonzzero_128211 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_98750 +} +__kernel void mainMagnitudezisegred_small_98618(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_128142_backing_aligned_0, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t defunc_2_reduce_res_73132, + int64_t index_primexp_74309, + int64_t num_groups_98692, + int64_t segment_sizze_nonzzero_128135, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_param_123786, + __global + unsigned char *mem_123913) +{ + #define segred_group_sizze_98691 (mainMagnitudezisegred_group_sizze_98612) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_128142_backing_0 = + (__local volatile + char *) red_arr_mem_128142_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128137; + int32_t local_tid_128138; + int64_t group_sizze_128141; + int32_t wave_sizze_128140; + int32_t group_tid_128139; + + global_tid_128137 = get_global_id(0); + local_tid_128138 = get_local_id(0); + group_sizze_128141 = get_local_size(0); + wave_sizze_128140 = LOCKSTEP_WIDTH; + group_tid_128139 = get_group_id(0); + + int32_t phys_tid_98618; + + phys_tid_98618 = global_tid_128137; + + __local char *red_arr_mem_128142; + + red_arr_mem_128142 = (__local char *) red_arr_mem_128142_backing_0; + + int32_t phys_group_id_128144; + + phys_group_id_128144 = get_group_id(0); + for (int32_t i_128145 = 0; i_128145 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, + squot64(segred_group_sizze_98691, + segment_sizze_nonzzero_128135))) - + phys_group_id_128144, sext_i64_i32(num_groups_98692)); + i_128145++) { + int32_t virt_group_id_128146 = phys_group_id_128144 + i_128145 * + sext_i64_i32(num_groups_98692); + int64_t gtid_98609 = squot64(sext_i32_i64(local_tid_128138), + segment_sizze_nonzzero_128135) + + sext_i32_i64(virt_group_id_128146) * + squot64(segred_group_sizze_98691, + segment_sizze_nonzzero_128135); + int64_t gtid_98617 = srem64(sext_i32_i64(local_tid_128138), + k2p2zq_73023); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, k2p2zq_73023) && (slt64(gtid_98609, + m_73008) && + slt64(sext_i32_i64(local_tid_128138), + k2p2zq_73023 * + squot64(segred_group_sizze_98691, + segment_sizze_nonzzero_128135)))) { + double x_98700 = ((__global double *) mem_120246)[gtid_98617 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_98609 * + defunc_2_reduce_res_73132 + + index_primexp_74309]; + double x_98701 = ((__global + double *) mem_param_123786)[gtid_98609 * + k2p2zq_73023 + + gtid_98617]; + double defunc_1_f_res_98702 = x_98700 * x_98701; + + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_128142)[sext_i32_i64(local_tid_128138)] = + defunc_1_f_res_98702; + } + } else { + ((__local + double *) red_arr_mem_128142)[sext_i32_i64(local_tid_128138)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, k2p2zq_73023)) { + // perform segmented scan to imitate reduction + { + double x_98695; + double x_98696; + double x_128147; + double x_128148; + bool ltid_in_bounds_128150; + + ltid_in_bounds_128150 = slt64(sext_i32_i64(local_tid_128138), + k2p2zq_73023 * + squot64(segred_group_sizze_98691, + segment_sizze_nonzzero_128135)); + + int32_t skip_threads_128151; + + // read input for in-block scan + { + if (ltid_in_bounds_128150) { + x_98696 = ((volatile __local + double *) red_arr_mem_128142)[sext_i32_i64(local_tid_128138)]; + if ((local_tid_128138 - squot32(local_tid_128138, 32) * + 32) == 0) { + x_98695 = x_98696; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128151 = 1; + while (slt32(skip_threads_128151, 32)) { + if (sle32(skip_threads_128151, local_tid_128138 - + squot32(local_tid_128138, 32) * 32) && + ltid_in_bounds_128150) { + // read operands + { + x_98695 = ((volatile __local + double *) red_arr_mem_128142)[sext_i32_i64(local_tid_128138) - + sext_i32_i64(skip_threads_128151)]; + } + // perform operation + { + bool inactive_128152 = + slt64(srem64(sext_i32_i64(local_tid_128138), + k2p2zq_73023), + sext_i32_i64(local_tid_128138) - + sext_i32_i64(local_tid_128138 - + skip_threads_128151)); + + if (inactive_128152) { + x_98695 = x_98696; + } + if (!inactive_128152) { + double defunc_1_op_res_98697 = x_98695 + + x_98696; + + x_98695 = defunc_1_op_res_98697; + } + } + } + if (sle32(wave_sizze_128140, skip_threads_128151)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128151, local_tid_128138 - + squot32(local_tid_128138, 32) * 32) && + ltid_in_bounds_128150) { + // write result + { + ((volatile __local + double *) red_arr_mem_128142)[sext_i32_i64(local_tid_128138)] = + x_98695; + x_98696 = x_98695; + } + } + if (sle32(wave_sizze_128140, skip_threads_128151)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128151 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128138 - squot32(local_tid_128138, 32) * + 32) == 31 && ltid_in_bounds_128150) { + ((volatile __local + double *) red_arr_mem_128142)[sext_i32_i64(squot32(local_tid_128138, + 32))] = + x_98695; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128153; + + // read input for in-block scan + { + if (squot32(local_tid_128138, 32) == 0 && + ltid_in_bounds_128150) { + x_128148 = ((volatile __local + double *) red_arr_mem_128142)[sext_i32_i64(local_tid_128138)]; + if ((local_tid_128138 - squot32(local_tid_128138, + 32) * 32) == 0) { + x_128147 = x_128148; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128153 = 1; + while (slt32(skip_threads_128153, 32)) { + if (sle32(skip_threads_128153, local_tid_128138 - + squot32(local_tid_128138, 32) * 32) && + (squot32(local_tid_128138, 32) == 0 && + ltid_in_bounds_128150)) { + // read operands + { + x_128147 = ((volatile __local + double *) red_arr_mem_128142)[sext_i32_i64(local_tid_128138) - + sext_i32_i64(skip_threads_128153)]; + } + // perform operation + { + bool inactive_128154 = + slt64(srem64(sext_i32_i64(local_tid_128138 * + 32 + 32 - 1), + k2p2zq_73023), + sext_i32_i64(local_tid_128138 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_128138 - + skip_threads_128153) * + 32 + 32 - 1)); + + if (inactive_128154) { + x_128147 = x_128148; + } + if (!inactive_128154) { + double defunc_1_op_res_128149 = + x_128147 + x_128148; + + x_128147 = defunc_1_op_res_128149; + } + } + } + if (sle32(wave_sizze_128140, skip_threads_128153)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128153, local_tid_128138 - + squot32(local_tid_128138, 32) * 32) && + (squot32(local_tid_128138, 32) == 0 && + ltid_in_bounds_128150)) { + // write result + { + ((volatile __local + double *) red_arr_mem_128142)[sext_i32_i64(local_tid_128138)] = + x_128147; + x_128148 = x_128147; + } + } + if (sle32(wave_sizze_128140, skip_threads_128153)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128153 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128138, 32) == 0 || + !ltid_in_bounds_128150)) { + // read operands + { + x_98696 = x_98695; + x_98695 = ((__local + double *) red_arr_mem_128142)[sext_i32_i64(squot32(local_tid_128138, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128155 = + slt64(srem64(sext_i32_i64(local_tid_128138), + k2p2zq_73023), + sext_i32_i64(local_tid_128138) - + sext_i32_i64(squot32(local_tid_128138, + 32) * 32 - 1)); + + if (inactive_128155) { + x_98695 = x_98696; + } + if (!inactive_128155) { + double defunc_1_op_res_98697 = x_98695 + + x_98696; + + x_98695 = defunc_1_op_res_98697; + } + } + // write final result + { + ((__local + double *) red_arr_mem_128142)[sext_i32_i64(local_tid_128138)] = + x_98695; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128138, 32) == 0) { + ((__local + double *) red_arr_mem_128142)[sext_i32_i64(local_tid_128138)] = + x_98696; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_128146) * + squot64(segred_group_sizze_98691, + segment_sizze_nonzzero_128135) + + sext_i32_i64(local_tid_128138), m_73008) && + slt64(sext_i32_i64(local_tid_128138), + squot64(segred_group_sizze_98691, + segment_sizze_nonzzero_128135))) { + ((__global + double *) mem_123913)[sext_i32_i64(virt_group_id_128146) * + squot64(segred_group_sizze_98691, + segment_sizze_nonzzero_128135) + + sext_i32_i64(local_tid_128138)] = + ((__local + double *) red_arr_mem_128142)[(sext_i32_i64(local_tid_128138) + + (int64_t) 1) * + segment_sizze_nonzzero_128135 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_98691 +} +__kernel void mainMagnitudezisegred_small_98645(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_128075_backing_aligned_0, + int64_t m_73008, + int64_t k2p2zq_73023, + int64_t defunc_2_reduce_res_73132, + int64_t index_primexp_74309, + int64_t num_groups_98664, + int64_t segment_sizze_nonzzero_128068, + __global + unsigned char *mem_120246, + __global + unsigned char *mem_123901, + __global + unsigned char *mem_123904, + __global + unsigned char *mem_123907) +{ + #define segred_group_sizze_98663 (mainMagnitudezisegred_group_sizze_98639) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_128075_backing_0 = + (__local volatile + char *) red_arr_mem_128075_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128070; + int32_t local_tid_128071; + int64_t group_sizze_128074; + int32_t wave_sizze_128073; + int32_t group_tid_128072; + + global_tid_128070 = get_global_id(0); + local_tid_128071 = get_local_id(0); + group_sizze_128074 = get_local_size(0); + wave_sizze_128073 = LOCKSTEP_WIDTH; + group_tid_128072 = get_group_id(0); + + int32_t phys_tid_98645; + + phys_tid_98645 = global_tid_128070; + + __local char *red_arr_mem_128075; + + red_arr_mem_128075 = (__local char *) red_arr_mem_128075_backing_0; + + int32_t phys_group_id_128077; + + phys_group_id_128077 = get_group_id(0); + for (int32_t i_128078 = 0; i_128078 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, + squot64(segred_group_sizze_98663, + segment_sizze_nonzzero_128068))) - + phys_group_id_128077, sext_i64_i32(num_groups_98664)); + i_128078++) { + int32_t virt_group_id_128079 = phys_group_id_128077 + i_128078 * + sext_i64_i32(num_groups_98664); + int64_t gtid_98636 = squot64(sext_i32_i64(local_tid_128071), + segment_sizze_nonzzero_128068) + + sext_i32_i64(virt_group_id_128079) * + squot64(segred_group_sizze_98663, + segment_sizze_nonzzero_128068); + int64_t gtid_98644 = srem64(sext_i32_i64(local_tid_128071), + k2p2zq_73023); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, k2p2zq_73023) && (slt64(gtid_98636, + m_73008) && + slt64(sext_i32_i64(local_tid_128071), + k2p2zq_73023 * + squot64(segred_group_sizze_98663, + segment_sizze_nonzzero_128068)))) { + double x_98674 = ((__global double *) mem_120246)[gtid_98644 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_98636 * + defunc_2_reduce_res_73132 + + index_primexp_74309]; + double defunc_0_f_res_98675; + double redout_119889 = 0.0; + + for (int64_t i_119890 = 0; i_119890 < k2p2zq_73023; + i_119890++) { + double x_98679 = ((__global double *) mem_120246)[i_119890 * + (defunc_2_reduce_res_73132 * + m_73008) + + gtid_98636 * + defunc_2_reduce_res_73132 + + index_primexp_74309]; + double x_98680 = ((__global double *) mem_123901)[i_119890 * + (k2p2zq_73023 * + m_73008) + + gtid_98636 * + k2p2zq_73023 + + gtid_98644]; + double defunc_1_f_res_98681 = x_98679 * x_98680; + double defunc_1_op_res_98678 = defunc_1_f_res_98681 + + redout_119889; + double redout_tmp_128080 = defunc_1_op_res_98678; + + redout_119889 = redout_tmp_128080; + } + defunc_0_f_res_98675 = redout_119889; + + double defunc_1_f_res_98682 = x_98674 * defunc_0_f_res_98675; + + // save map-out results + { + ((__global double *) mem_123907)[gtid_98636 * k2p2zq_73023 + + gtid_98644] = + defunc_0_f_res_98675; + } + // save results to be reduced + { + ((__local + double *) red_arr_mem_128075)[sext_i32_i64(local_tid_128071)] = + defunc_1_f_res_98682; + } + } else { + ((__local + double *) red_arr_mem_128075)[sext_i32_i64(local_tid_128071)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, k2p2zq_73023)) { + // perform segmented scan to imitate reduction + { + double x_98668; + double x_98669; + double x_128081; + double x_128082; + bool ltid_in_bounds_128084; + + ltid_in_bounds_128084 = slt64(sext_i32_i64(local_tid_128071), + k2p2zq_73023 * + squot64(segred_group_sizze_98663, + segment_sizze_nonzzero_128068)); + + int32_t skip_threads_128085; + + // read input for in-block scan + { + if (ltid_in_bounds_128084) { + x_98669 = ((volatile __local + double *) red_arr_mem_128075)[sext_i32_i64(local_tid_128071)]; + if ((local_tid_128071 - squot32(local_tid_128071, 32) * + 32) == 0) { + x_98668 = x_98669; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128085 = 1; + while (slt32(skip_threads_128085, 32)) { + if (sle32(skip_threads_128085, local_tid_128071 - + squot32(local_tid_128071, 32) * 32) && + ltid_in_bounds_128084) { + // read operands + { + x_98668 = ((volatile __local + double *) red_arr_mem_128075)[sext_i32_i64(local_tid_128071) - + sext_i32_i64(skip_threads_128085)]; + } + // perform operation + { + bool inactive_128086 = + slt64(srem64(sext_i32_i64(local_tid_128071), + k2p2zq_73023), + sext_i32_i64(local_tid_128071) - + sext_i32_i64(local_tid_128071 - + skip_threads_128085)); + + if (inactive_128086) { + x_98668 = x_98669; + } + if (!inactive_128086) { + double defunc_1_op_res_98670 = x_98668 + + x_98669; + + x_98668 = defunc_1_op_res_98670; + } + } + } + if (sle32(wave_sizze_128073, skip_threads_128085)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128085, local_tid_128071 - + squot32(local_tid_128071, 32) * 32) && + ltid_in_bounds_128084) { + // write result + { + ((volatile __local + double *) red_arr_mem_128075)[sext_i32_i64(local_tid_128071)] = + x_98668; + x_98669 = x_98668; + } + } + if (sle32(wave_sizze_128073, skip_threads_128085)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128085 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128071 - squot32(local_tid_128071, 32) * + 32) == 31 && ltid_in_bounds_128084) { + ((volatile __local + double *) red_arr_mem_128075)[sext_i32_i64(squot32(local_tid_128071, + 32))] = + x_98668; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128087; + + // read input for in-block scan + { + if (squot32(local_tid_128071, 32) == 0 && + ltid_in_bounds_128084) { + x_128082 = ((volatile __local + double *) red_arr_mem_128075)[sext_i32_i64(local_tid_128071)]; + if ((local_tid_128071 - squot32(local_tid_128071, + 32) * 32) == 0) { + x_128081 = x_128082; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128087 = 1; + while (slt32(skip_threads_128087, 32)) { + if (sle32(skip_threads_128087, local_tid_128071 - + squot32(local_tid_128071, 32) * 32) && + (squot32(local_tid_128071, 32) == 0 && + ltid_in_bounds_128084)) { + // read operands + { + x_128081 = ((volatile __local + double *) red_arr_mem_128075)[sext_i32_i64(local_tid_128071) - + sext_i32_i64(skip_threads_128087)]; + } + // perform operation + { + bool inactive_128088 = + slt64(srem64(sext_i32_i64(local_tid_128071 * + 32 + 32 - 1), + k2p2zq_73023), + sext_i32_i64(local_tid_128071 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_128071 - + skip_threads_128087) * + 32 + 32 - 1)); + + if (inactive_128088) { + x_128081 = x_128082; + } + if (!inactive_128088) { + double defunc_1_op_res_128083 = + x_128081 + x_128082; + + x_128081 = defunc_1_op_res_128083; + } + } + } + if (sle32(wave_sizze_128073, skip_threads_128087)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128087, local_tid_128071 - + squot32(local_tid_128071, 32) * 32) && + (squot32(local_tid_128071, 32) == 0 && + ltid_in_bounds_128084)) { + // write result + { + ((volatile __local + double *) red_arr_mem_128075)[sext_i32_i64(local_tid_128071)] = + x_128081; + x_128082 = x_128081; + } + } + if (sle32(wave_sizze_128073, skip_threads_128087)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128087 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128071, 32) == 0 || + !ltid_in_bounds_128084)) { + // read operands + { + x_98669 = x_98668; + x_98668 = ((__local + double *) red_arr_mem_128075)[sext_i32_i64(squot32(local_tid_128071, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128089 = + slt64(srem64(sext_i32_i64(local_tid_128071), + k2p2zq_73023), + sext_i32_i64(local_tid_128071) - + sext_i32_i64(squot32(local_tid_128071, + 32) * 32 - 1)); + + if (inactive_128089) { + x_98668 = x_98669; + } + if (!inactive_128089) { + double defunc_1_op_res_98670 = x_98668 + + x_98669; + + x_98668 = defunc_1_op_res_98670; + } + } + // write final result + { + ((__local + double *) red_arr_mem_128075)[sext_i32_i64(local_tid_128071)] = + x_98668; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128071, 32) == 0) { + ((__local + double *) red_arr_mem_128075)[sext_i32_i64(local_tid_128071)] = + x_98669; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_128079) * + squot64(segred_group_sizze_98663, + segment_sizze_nonzzero_128068) + + sext_i32_i64(local_tid_128071), m_73008) && + slt64(sext_i32_i64(local_tid_128071), + squot64(segred_group_sizze_98663, + segment_sizze_nonzzero_128068))) { + ((__global + double *) mem_123904)[sext_i32_i64(virt_group_id_128079) * + squot64(segred_group_sizze_98663, + segment_sizze_nonzzero_128068) + + sext_i32_i64(local_tid_128071)] = + ((__local + double *) red_arr_mem_128075)[(sext_i32_i64(local_tid_128071) + + (int64_t) 1) * + segment_sizze_nonzzero_128068 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_98663 +} +__kernel void mainMagnitudezisegred_small_99002(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_128410_backing_aligned_0, + int64_t m_73008, + int64_t num_recresids_padded_73681, + int64_t num_groups_99084, + int64_t segment_sizze_nonzzero_128403, + __global + unsigned char *mem_124045, + __global + unsigned char *mem_124051, + __global + unsigned char *mem_124054) +{ + #define segred_group_sizze_99083 (mainMagnitudezisegred_group_sizze_98996) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_128410_backing_0 = + (__local volatile + char *) red_arr_mem_128410_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128405; + int32_t local_tid_128406; + int64_t group_sizze_128409; + int32_t wave_sizze_128408; + int32_t group_tid_128407; + + global_tid_128405 = get_global_id(0); + local_tid_128406 = get_local_id(0); + group_sizze_128409 = get_local_size(0); + wave_sizze_128408 = LOCKSTEP_WIDTH; + group_tid_128407 = get_group_id(0); + + int32_t phys_tid_99002; + + phys_tid_99002 = global_tid_128405; + + __local char *red_arr_mem_128410; + + red_arr_mem_128410 = (__local char *) red_arr_mem_128410_backing_0; + + int32_t phys_group_id_128412; + + phys_group_id_128412 = get_group_id(0); + for (int32_t i_128413 = 0; i_128413 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, + squot64(segred_group_sizze_99083, + segment_sizze_nonzzero_128403))) - + phys_group_id_128412, sext_i64_i32(num_groups_99084)); + i_128413++) { + int32_t virt_group_id_128414 = phys_group_id_128412 + i_128413 * + sext_i64_i32(num_groups_99084); + int64_t gtid_98993 = squot64(sext_i32_i64(local_tid_128406), + segment_sizze_nonzzero_128403) + + sext_i32_i64(virt_group_id_128414) * + squot64(segred_group_sizze_99083, + segment_sizze_nonzzero_128403); + int64_t gtid_99001 = srem64(sext_i32_i64(local_tid_128406), + num_recresids_padded_73681); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, num_recresids_padded_73681) && + (slt64(gtid_98993, m_73008) && + slt64(sext_i32_i64(local_tid_128406), + num_recresids_padded_73681 * + squot64(segred_group_sizze_99083, + segment_sizze_nonzzero_128403)))) { + double x_99092 = ((__global double *) mem_124045)[gtid_98993 * + num_recresids_padded_73681 + + gtid_99001]; + bool isnan_res_99093; + + isnan_res_99093 = futrts_isnan64(x_99092); + + double defunc_0_f_res_99094; + + if (isnan_res_99093) { + defunc_0_f_res_99094 = 0.0; + } else { + double x_mean_99091 = ((__global + double *) mem_124051)[gtid_98993]; + double x_99095 = x_99092 - x_mean_99091; + double defunc_0_f_res_f_res_99096 = fpow64(x_99095, 2.0); + + defunc_0_f_res_99094 = defunc_0_f_res_f_res_99096; + } + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_128410)[sext_i32_i64(local_tid_128406)] = + defunc_0_f_res_99094; + } + } else { + ((__local + double *) red_arr_mem_128410)[sext_i32_i64(local_tid_128406)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, num_recresids_padded_73681)) { + // perform segmented scan to imitate reduction + { + double x_99087; + double x_99088; + double x_128415; + double x_128416; + bool ltid_in_bounds_128418; + + ltid_in_bounds_128418 = slt64(sext_i32_i64(local_tid_128406), + num_recresids_padded_73681 * + squot64(segred_group_sizze_99083, + segment_sizze_nonzzero_128403)); + + int32_t skip_threads_128419; + + // read input for in-block scan + { + if (ltid_in_bounds_128418) { + x_99088 = ((volatile __local + double *) red_arr_mem_128410)[sext_i32_i64(local_tid_128406)]; + if ((local_tid_128406 - squot32(local_tid_128406, 32) * + 32) == 0) { + x_99087 = x_99088; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128419 = 1; + while (slt32(skip_threads_128419, 32)) { + if (sle32(skip_threads_128419, local_tid_128406 - + squot32(local_tid_128406, 32) * 32) && + ltid_in_bounds_128418) { + // read operands + { + x_99087 = ((volatile __local + double *) red_arr_mem_128410)[sext_i32_i64(local_tid_128406) - + sext_i32_i64(skip_threads_128419)]; + } + // perform operation + { + bool inactive_128420 = + slt64(srem64(sext_i32_i64(local_tid_128406), + num_recresids_padded_73681), + sext_i32_i64(local_tid_128406) - + sext_i32_i64(local_tid_128406 - + skip_threads_128419)); + + if (inactive_128420) { + x_99087 = x_99088; + } + if (!inactive_128420) { + double defunc_1_op_res_99089 = x_99087 + + x_99088; + + x_99087 = defunc_1_op_res_99089; + } + } + } + if (sle32(wave_sizze_128408, skip_threads_128419)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128419, local_tid_128406 - + squot32(local_tid_128406, 32) * 32) && + ltid_in_bounds_128418) { + // write result + { + ((volatile __local + double *) red_arr_mem_128410)[sext_i32_i64(local_tid_128406)] = + x_99087; + x_99088 = x_99087; + } + } + if (sle32(wave_sizze_128408, skip_threads_128419)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128419 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128406 - squot32(local_tid_128406, 32) * + 32) == 31 && ltid_in_bounds_128418) { + ((volatile __local + double *) red_arr_mem_128410)[sext_i32_i64(squot32(local_tid_128406, + 32))] = + x_99087; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128421; + + // read input for in-block scan + { + if (squot32(local_tid_128406, 32) == 0 && + ltid_in_bounds_128418) { + x_128416 = ((volatile __local + double *) red_arr_mem_128410)[sext_i32_i64(local_tid_128406)]; + if ((local_tid_128406 - squot32(local_tid_128406, + 32) * 32) == 0) { + x_128415 = x_128416; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128421 = 1; + while (slt32(skip_threads_128421, 32)) { + if (sle32(skip_threads_128421, local_tid_128406 - + squot32(local_tid_128406, 32) * 32) && + (squot32(local_tid_128406, 32) == 0 && + ltid_in_bounds_128418)) { + // read operands + { + x_128415 = ((volatile __local + double *) red_arr_mem_128410)[sext_i32_i64(local_tid_128406) - + sext_i32_i64(skip_threads_128421)]; + } + // perform operation + { + bool inactive_128422 = + slt64(srem64(sext_i32_i64(local_tid_128406 * + 32 + 32 - 1), + num_recresids_padded_73681), + sext_i32_i64(local_tid_128406 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_128406 - + skip_threads_128421) * + 32 + 32 - 1)); + + if (inactive_128422) { + x_128415 = x_128416; + } + if (!inactive_128422) { + double defunc_1_op_res_128417 = + x_128415 + x_128416; + + x_128415 = defunc_1_op_res_128417; + } + } + } + if (sle32(wave_sizze_128408, skip_threads_128421)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128421, local_tid_128406 - + squot32(local_tid_128406, 32) * 32) && + (squot32(local_tid_128406, 32) == 0 && + ltid_in_bounds_128418)) { + // write result + { + ((volatile __local + double *) red_arr_mem_128410)[sext_i32_i64(local_tid_128406)] = + x_128415; + x_128416 = x_128415; + } + } + if (sle32(wave_sizze_128408, skip_threads_128421)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128421 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128406, 32) == 0 || + !ltid_in_bounds_128418)) { + // read operands + { + x_99088 = x_99087; + x_99087 = ((__local + double *) red_arr_mem_128410)[sext_i32_i64(squot32(local_tid_128406, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128423 = + slt64(srem64(sext_i32_i64(local_tid_128406), + num_recresids_padded_73681), + sext_i32_i64(local_tid_128406) - + sext_i32_i64(squot32(local_tid_128406, + 32) * 32 - 1)); + + if (inactive_128423) { + x_99087 = x_99088; + } + if (!inactive_128423) { + double defunc_1_op_res_99089 = x_99087 + + x_99088; + + x_99087 = defunc_1_op_res_99089; + } + } + // write final result + { + ((__local + double *) red_arr_mem_128410)[sext_i32_i64(local_tid_128406)] = + x_99087; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128406, 32) == 0) { + ((__local + double *) red_arr_mem_128410)[sext_i32_i64(local_tid_128406)] = + x_99088; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_128414) * + squot64(segred_group_sizze_99083, + segment_sizze_nonzzero_128403) + + sext_i32_i64(local_tid_128406), m_73008) && + slt64(sext_i32_i64(local_tid_128406), + squot64(segred_group_sizze_99083, + segment_sizze_nonzzero_128403))) { + ((__global + double *) mem_124054)[sext_i32_i64(virt_group_id_128414) * + squot64(segred_group_sizze_99083, + segment_sizze_nonzzero_128403) + + sext_i32_i64(local_tid_128406)] = + ((__local + double *) red_arr_mem_128410)[(sext_i32_i64(local_tid_128406) + + (int64_t) 1) * + segment_sizze_nonzzero_128403 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_99083 +} +__kernel void mainMagnitudezisegred_small_99032(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_128335_backing_aligned_0, + int64_t m_73008, + int64_t num_recresids_padded_73681, + int64_t num_groups_99062, + int64_t segment_sizze_nonzzero_128328, + __global + unsigned char *mem_124045, + __global + unsigned char *mem_124048) +{ + #define segred_group_sizze_99061 (mainMagnitudezisegred_group_sizze_99026) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_128335_backing_0 = + (__local volatile + char *) red_arr_mem_128335_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128330; + int32_t local_tid_128331; + int64_t group_sizze_128334; + int32_t wave_sizze_128333; + int32_t group_tid_128332; + + global_tid_128330 = get_global_id(0); + local_tid_128331 = get_local_id(0); + group_sizze_128334 = get_local_size(0); + wave_sizze_128333 = LOCKSTEP_WIDTH; + group_tid_128332 = get_group_id(0); + + int32_t phys_tid_99032; + + phys_tid_99032 = global_tid_128330; + + __local char *red_arr_mem_128335; + + red_arr_mem_128335 = (__local char *) red_arr_mem_128335_backing_0; + + int32_t phys_group_id_128337; + + phys_group_id_128337 = get_group_id(0); + for (int32_t i_128338 = 0; i_128338 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, + squot64(segred_group_sizze_99061, + segment_sizze_nonzzero_128328))) - + phys_group_id_128337, sext_i64_i32(num_groups_99062)); + i_128338++) { + int32_t virt_group_id_128339 = phys_group_id_128337 + i_128338 * + sext_i64_i32(num_groups_99062); + int64_t gtid_99023 = squot64(sext_i32_i64(local_tid_128331), + segment_sizze_nonzzero_128328) + + sext_i32_i64(virt_group_id_128339) * + squot64(segred_group_sizze_99061, + segment_sizze_nonzzero_128328); + int64_t gtid_99031 = srem64(sext_i32_i64(local_tid_128331), + num_recresids_padded_73681); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, num_recresids_padded_73681) && + (slt64(gtid_99023, m_73008) && + slt64(sext_i32_i64(local_tid_128331), + num_recresids_padded_73681 * + squot64(segred_group_sizze_99061, + segment_sizze_nonzzero_128328)))) { + double x_99073 = ((__global double *) mem_124045)[gtid_99023 * + num_recresids_padded_73681 + + gtid_99031]; + + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_128335)[sext_i32_i64(local_tid_128331)] = + x_99073; + } + } else { + ((__local + double *) red_arr_mem_128335)[sext_i32_i64(local_tid_128331)] = + 0.0; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, num_recresids_padded_73681)) { + // perform segmented scan to imitate reduction + { + double x_99065; + double x_99066; + double x_128340; + double x_128341; + bool ltid_in_bounds_128347; + + ltid_in_bounds_128347 = slt64(sext_i32_i64(local_tid_128331), + num_recresids_padded_73681 * + squot64(segred_group_sizze_99061, + segment_sizze_nonzzero_128328)); + + int32_t skip_threads_128348; + + // read input for in-block scan + { + if (ltid_in_bounds_128347) { + x_99066 = ((volatile __local + double *) red_arr_mem_128335)[sext_i32_i64(local_tid_128331)]; + if ((local_tid_128331 - squot32(local_tid_128331, 32) * + 32) == 0) { + x_99065 = x_99066; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128348 = 1; + while (slt32(skip_threads_128348, 32)) { + if (sle32(skip_threads_128348, local_tid_128331 - + squot32(local_tid_128331, 32) * 32) && + ltid_in_bounds_128347) { + // read operands + { + x_99065 = ((volatile __local + double *) red_arr_mem_128335)[sext_i32_i64(local_tid_128331) - + sext_i32_i64(skip_threads_128348)]; + } + // perform operation + { + bool inactive_128349 = + slt64(srem64(sext_i32_i64(local_tid_128331), + num_recresids_padded_73681), + sext_i32_i64(local_tid_128331) - + sext_i32_i64(local_tid_128331 - + skip_threads_128348)); + + if (inactive_128349) { + x_99065 = x_99066; + } + if (!inactive_128349) { + bool isnan_res_99067; + + isnan_res_99067 = futrts_isnan64(x_99065); + + double defunc_1_op_res_99068; + + if (isnan_res_99067) { + defunc_1_op_res_99068 = x_99066; + } else { + bool isnan_res_99069; + + isnan_res_99069 = + futrts_isnan64(x_99066); + + double defunc_1_op_res_f_res_99070; + + if (isnan_res_99069) { + defunc_1_op_res_f_res_99070 = + x_99065; + } else { + double + defunc_1_op_res_f_res_f_res_99071 = + x_99065 + x_99066; + + defunc_1_op_res_f_res_99070 = + defunc_1_op_res_f_res_f_res_99071; + } + defunc_1_op_res_99068 = + defunc_1_op_res_f_res_99070; + } + x_99065 = defunc_1_op_res_99068; + } + } + } + if (sle32(wave_sizze_128333, skip_threads_128348)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128348, local_tid_128331 - + squot32(local_tid_128331, 32) * 32) && + ltid_in_bounds_128347) { + // write result + { + ((volatile __local + double *) red_arr_mem_128335)[sext_i32_i64(local_tid_128331)] = + x_99065; + x_99066 = x_99065; + } + } + if (sle32(wave_sizze_128333, skip_threads_128348)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128348 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128331 - squot32(local_tid_128331, 32) * + 32) == 31 && ltid_in_bounds_128347) { + ((volatile __local + double *) red_arr_mem_128335)[sext_i32_i64(squot32(local_tid_128331, + 32))] = + x_99065; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128350; + + // read input for in-block scan + { + if (squot32(local_tid_128331, 32) == 0 && + ltid_in_bounds_128347) { + x_128341 = ((volatile __local + double *) red_arr_mem_128335)[sext_i32_i64(local_tid_128331)]; + if ((local_tid_128331 - squot32(local_tid_128331, + 32) * 32) == 0) { + x_128340 = x_128341; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128350 = 1; + while (slt32(skip_threads_128350, 32)) { + if (sle32(skip_threads_128350, local_tid_128331 - + squot32(local_tid_128331, 32) * 32) && + (squot32(local_tid_128331, 32) == 0 && + ltid_in_bounds_128347)) { + // read operands + { + x_128340 = ((volatile __local + double *) red_arr_mem_128335)[sext_i32_i64(local_tid_128331) - + sext_i32_i64(skip_threads_128350)]; + } + // perform operation + { + bool inactive_128351 = + slt64(srem64(sext_i32_i64(local_tid_128331 * + 32 + 32 - 1), + num_recresids_padded_73681), + sext_i32_i64(local_tid_128331 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_128331 - + skip_threads_128350) * + 32 + 32 - 1)); + + if (inactive_128351) { + x_128340 = x_128341; + } + if (!inactive_128351) { + bool isnan_res_128342; + + isnan_res_128342 = + futrts_isnan64(x_128340); + + double defunc_1_op_res_128343; + + if (isnan_res_128342) { + defunc_1_op_res_128343 = x_128341; + } else { + bool isnan_res_128344; + + isnan_res_128344 = + futrts_isnan64(x_128341); + + double defunc_1_op_res_f_res_128345; + + if (isnan_res_128344) { + defunc_1_op_res_f_res_128345 = + x_128340; + } else { + double + defunc_1_op_res_f_res_f_res_128346 + = x_128340 + x_128341; + + defunc_1_op_res_f_res_128345 = + defunc_1_op_res_f_res_f_res_128346; + } + defunc_1_op_res_128343 = + defunc_1_op_res_f_res_128345; + } + x_128340 = defunc_1_op_res_128343; + } + } + } + if (sle32(wave_sizze_128333, skip_threads_128350)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128350, local_tid_128331 - + squot32(local_tid_128331, 32) * 32) && + (squot32(local_tid_128331, 32) == 0 && + ltid_in_bounds_128347)) { + // write result + { + ((volatile __local + double *) red_arr_mem_128335)[sext_i32_i64(local_tid_128331)] = + x_128340; + x_128341 = x_128340; + } + } + if (sle32(wave_sizze_128333, skip_threads_128350)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128350 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128331, 32) == 0 || + !ltid_in_bounds_128347)) { + // read operands + { + x_99066 = x_99065; + x_99065 = ((__local + double *) red_arr_mem_128335)[sext_i32_i64(squot32(local_tid_128331, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128352 = + slt64(srem64(sext_i32_i64(local_tid_128331), + num_recresids_padded_73681), + sext_i32_i64(local_tid_128331) - + sext_i32_i64(squot32(local_tid_128331, + 32) * 32 - 1)); + + if (inactive_128352) { + x_99065 = x_99066; + } + if (!inactive_128352) { + bool isnan_res_99067; + + isnan_res_99067 = futrts_isnan64(x_99065); + + double defunc_1_op_res_99068; + + if (isnan_res_99067) { + defunc_1_op_res_99068 = x_99066; + } else { + bool isnan_res_99069; + + isnan_res_99069 = futrts_isnan64(x_99066); + + double defunc_1_op_res_f_res_99070; + + if (isnan_res_99069) { + defunc_1_op_res_f_res_99070 = x_99065; + } else { + double defunc_1_op_res_f_res_f_res_99071 + = x_99065 + x_99066; + + defunc_1_op_res_f_res_99070 = + defunc_1_op_res_f_res_f_res_99071; + } + defunc_1_op_res_99068 = + defunc_1_op_res_f_res_99070; + } + x_99065 = defunc_1_op_res_99068; + } + } + // write final result + { + ((__local + double *) red_arr_mem_128335)[sext_i32_i64(local_tid_128331)] = + x_99065; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128331, 32) == 0) { + ((__local + double *) red_arr_mem_128335)[sext_i32_i64(local_tid_128331)] = + x_99066; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_128339) * + squot64(segred_group_sizze_99061, + segment_sizze_nonzzero_128328) + + sext_i32_i64(local_tid_128331), m_73008) && + slt64(sext_i32_i64(local_tid_128331), + squot64(segred_group_sizze_99061, + segment_sizze_nonzzero_128328))) { + ((__global + double *) mem_124048)[sext_i32_i64(virt_group_id_128339) * + squot64(segred_group_sizze_99061, + segment_sizze_nonzzero_128328) + + sext_i32_i64(local_tid_128331)] = + ((__local + double *) red_arr_mem_128335)[(sext_i32_i64(local_tid_128331) + + (int64_t) 1) * + segment_sizze_nonzzero_128328 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_99061 +} +__kernel void mainMagnitudezisegred_small_99667(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_128640_backing_aligned_0, + int64_t m_73008, + int64_t num_recresids_padded_73681, + int64_t Nmk_74408, + int64_t num_groups_99944, + int64_t segment_sizze_nonzzero_128633, + __global + unsigned char *defunc_3_map_res_mem_124068, + __global + unsigned char *mem_124078, + __global + unsigned char *mem_124130) +{ + #define segred_group_sizze_99943 (mainMagnitudezisegred_group_sizze_99661) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_128640_backing_0 = + (__local volatile + char *) red_arr_mem_128640_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128635; + int32_t local_tid_128636; + int64_t group_sizze_128639; + int32_t wave_sizze_128638; + int32_t group_tid_128637; + + global_tid_128635 = get_global_id(0); + local_tid_128636 = get_local_id(0); + group_sizze_128639 = get_local_size(0); + wave_sizze_128638 = LOCKSTEP_WIDTH; + group_tid_128637 = get_group_id(0); + + int32_t phys_tid_99667; + + phys_tid_99667 = global_tid_128635; + + __local char *red_arr_mem_128640; + + red_arr_mem_128640 = (__local char *) red_arr_mem_128640_backing_0; + + int32_t phys_group_id_128642; + + phys_group_id_128642 = get_group_id(0); + for (int32_t i_128643 = 0; i_128643 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, + squot64(segred_group_sizze_99943, + segment_sizze_nonzzero_128633))) - + phys_group_id_128642, sext_i64_i32(num_groups_99944)); + i_128643++) { + int32_t virt_group_id_128644 = phys_group_id_128642 + i_128643 * + sext_i64_i32(num_groups_99944); + int64_t gtid_99658 = squot64(sext_i32_i64(local_tid_128636), + segment_sizze_nonzzero_128633) + + sext_i32_i64(virt_group_id_128644) * + squot64(segred_group_sizze_99943, + segment_sizze_nonzzero_128633); + int64_t gtid_99666 = srem64(sext_i32_i64(local_tid_128636), + num_recresids_padded_73681); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, num_recresids_padded_73681) && + (slt64(gtid_99658, m_73008) && + slt64(sext_i32_i64(local_tid_128636), + num_recresids_padded_73681 * + squot64(segred_group_sizze_99943, + segment_sizze_nonzzero_128633)))) { + int64_t slice_115288 = (int64_t) 1 + gtid_99666; + double x_99953 = ((__global + double *) defunc_3_map_res_mem_124068)[gtid_99658 * + Nmk_74408 + + slice_115288]; + double x_99954 = ((__global double *) mem_124078)[gtid_99658 * + Nmk_74408 + + slice_115288]; + double abs_res_99955 = fabs(x_99953); + bool cond_99956 = x_99954 < abs_res_99955; + int64_t defunc_2_f_res_99957; + + if (cond_99956) { + defunc_2_f_res_99957 = gtid_99666; + } else { + defunc_2_f_res_99957 = (int64_t) 9223372036854775807; + } + // save map-out results + { } + // save results to be reduced + { + ((__local + int64_t *) red_arr_mem_128640)[sext_i32_i64(local_tid_128636)] = + defunc_2_f_res_99957; + } + } else { + ((__local + int64_t *) red_arr_mem_128640)[sext_i32_i64(local_tid_128636)] = + (int64_t) 9223372036854775807; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, num_recresids_padded_73681)) { + // perform segmented scan to imitate reduction + { + int64_t x_99947; + int64_t x_99948; + int64_t x_128645; + int64_t x_128646; + bool ltid_in_bounds_128648; + + ltid_in_bounds_128648 = slt64(sext_i32_i64(local_tid_128636), + num_recresids_padded_73681 * + squot64(segred_group_sizze_99943, + segment_sizze_nonzzero_128633)); + + int32_t skip_threads_128649; + + // read input for in-block scan + { + if (ltid_in_bounds_128648) { + x_99948 = ((volatile __local + int64_t *) red_arr_mem_128640)[sext_i32_i64(local_tid_128636)]; + if ((local_tid_128636 - squot32(local_tid_128636, 32) * + 32) == 0) { + x_99947 = x_99948; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128649 = 1; + while (slt32(skip_threads_128649, 32)) { + if (sle32(skip_threads_128649, local_tid_128636 - + squot32(local_tid_128636, 32) * 32) && + ltid_in_bounds_128648) { + // read operands + { + x_99947 = ((volatile __local + int64_t *) red_arr_mem_128640)[sext_i32_i64(local_tid_128636) - + sext_i32_i64(skip_threads_128649)]; + } + // perform operation + { + bool inactive_128650 = + slt64(srem64(sext_i32_i64(local_tid_128636), + num_recresids_padded_73681), + sext_i32_i64(local_tid_128636) - + sext_i32_i64(local_tid_128636 - + skip_threads_128649)); + + if (inactive_128650) { + x_99947 = x_99948; + } + if (!inactive_128650) { + int64_t defunc_1_op_res_99949 = + smin64(x_99947, x_99948); + + x_99947 = defunc_1_op_res_99949; + } + } + } + if (sle32(wave_sizze_128638, skip_threads_128649)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128649, local_tid_128636 - + squot32(local_tid_128636, 32) * 32) && + ltid_in_bounds_128648) { + // write result + { + ((volatile __local + int64_t *) red_arr_mem_128640)[sext_i32_i64(local_tid_128636)] = + x_99947; + x_99948 = x_99947; + } + } + if (sle32(wave_sizze_128638, skip_threads_128649)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128649 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128636 - squot32(local_tid_128636, 32) * + 32) == 31 && ltid_in_bounds_128648) { + ((volatile __local + int64_t *) red_arr_mem_128640)[sext_i32_i64(squot32(local_tid_128636, + 32))] = + x_99947; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128651; + + // read input for in-block scan + { + if (squot32(local_tid_128636, 32) == 0 && + ltid_in_bounds_128648) { + x_128646 = ((volatile __local + int64_t *) red_arr_mem_128640)[sext_i32_i64(local_tid_128636)]; + if ((local_tid_128636 - squot32(local_tid_128636, + 32) * 32) == 0) { + x_128645 = x_128646; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128651 = 1; + while (slt32(skip_threads_128651, 32)) { + if (sle32(skip_threads_128651, local_tid_128636 - + squot32(local_tid_128636, 32) * 32) && + (squot32(local_tid_128636, 32) == 0 && + ltid_in_bounds_128648)) { + // read operands + { + x_128645 = ((volatile __local + int64_t *) red_arr_mem_128640)[sext_i32_i64(local_tid_128636) - + sext_i32_i64(skip_threads_128651)]; + } + // perform operation + { + bool inactive_128652 = + slt64(srem64(sext_i32_i64(local_tid_128636 * + 32 + 32 - 1), + num_recresids_padded_73681), + sext_i32_i64(local_tid_128636 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_128636 - + skip_threads_128651) * + 32 + 32 - 1)); + + if (inactive_128652) { + x_128645 = x_128646; + } + if (!inactive_128652) { + int64_t defunc_1_op_res_128647 = + smin64(x_128645, x_128646); + + x_128645 = defunc_1_op_res_128647; + } + } + } + if (sle32(wave_sizze_128638, skip_threads_128651)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128651, local_tid_128636 - + squot32(local_tid_128636, 32) * 32) && + (squot32(local_tid_128636, 32) == 0 && + ltid_in_bounds_128648)) { + // write result + { + ((volatile __local + int64_t *) red_arr_mem_128640)[sext_i32_i64(local_tid_128636)] = + x_128645; + x_128646 = x_128645; + } + } + if (sle32(wave_sizze_128638, skip_threads_128651)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128651 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128636, 32) == 0 || + !ltid_in_bounds_128648)) { + // read operands + { + x_99948 = x_99947; + x_99947 = ((__local + int64_t *) red_arr_mem_128640)[sext_i32_i64(squot32(local_tid_128636, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128653 = + slt64(srem64(sext_i32_i64(local_tid_128636), + num_recresids_padded_73681), + sext_i32_i64(local_tid_128636) - + sext_i32_i64(squot32(local_tid_128636, + 32) * 32 - 1)); + + if (inactive_128653) { + x_99947 = x_99948; + } + if (!inactive_128653) { + int64_t defunc_1_op_res_99949 = smin64(x_99947, + x_99948); + + x_99947 = defunc_1_op_res_99949; + } + } + // write final result + { + ((__local + int64_t *) red_arr_mem_128640)[sext_i32_i64(local_tid_128636)] = + x_99947; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128636, 32) == 0) { + ((__local + int64_t *) red_arr_mem_128640)[sext_i32_i64(local_tid_128636)] = + x_99948; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_128644) * + squot64(segred_group_sizze_99943, + segment_sizze_nonzzero_128633) + + sext_i32_i64(local_tid_128636), m_73008) && + slt64(sext_i32_i64(local_tid_128636), + squot64(segred_group_sizze_99943, + segment_sizze_nonzzero_128633))) { + ((__global + int64_t *) mem_124130)[sext_i32_i64(virt_group_id_128644) * + squot64(segred_group_sizze_99943, + segment_sizze_nonzzero_128633) + + sext_i32_i64(local_tid_128636)] = + ((__local + int64_t *) red_arr_mem_128640)[(sext_i32_i64(local_tid_128636) + + (int64_t) 1) * + segment_sizze_nonzzero_128633 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_99943 +} +__kernel void mainMagnitudezisegred_small_99792(__global int *global_failure, + __local volatile + int64_t *red_arr_mem_128575_backing_aligned_0, + int64_t m_73008, + int64_t num_recresids_padded_73681, + int64_t Nmk_74408, + int64_t num_groups_99824, + int64_t segment_sizze_nonzzero_128568, + __global + unsigned char *defunc_3_map_res_mem_124068, + __global + unsigned char *mem_124121, + __global + unsigned char *mem_124124) +{ + #define segred_group_sizze_99823 (mainMagnitudezisegred_group_sizze_99786) + + const int block_dim0 = 0; + const int block_dim1 = 1; + const int block_dim2 = 2; + __local volatile char *restrict red_arr_mem_128575_backing_0 = + (__local volatile + char *) red_arr_mem_128575_backing_aligned_0; + + if (*global_failure >= 0) + return; + + int32_t global_tid_128570; + int32_t local_tid_128571; + int64_t group_sizze_128574; + int32_t wave_sizze_128573; + int32_t group_tid_128572; + + global_tid_128570 = get_global_id(0); + local_tid_128571 = get_local_id(0); + group_sizze_128574 = get_local_size(0); + wave_sizze_128573 = LOCKSTEP_WIDTH; + group_tid_128572 = get_group_id(0); + + int32_t phys_tid_99792; + + phys_tid_99792 = global_tid_128570; + + __local char *red_arr_mem_128575; + + red_arr_mem_128575 = (__local char *) red_arr_mem_128575_backing_0; + + int32_t phys_group_id_128577; + + phys_group_id_128577 = get_group_id(0); + for (int32_t i_128578 = 0; i_128578 < + sdiv_up32(sext_i64_i32(sdiv_up64(m_73008, + squot64(segred_group_sizze_99823, + segment_sizze_nonzzero_128568))) - + phys_group_id_128577, sext_i64_i32(num_groups_99824)); + i_128578++) { + int32_t virt_group_id_128579 = phys_group_id_128577 + i_128578 * + sext_i64_i32(num_groups_99824); + int64_t gtid_99783 = squot64(sext_i32_i64(local_tid_128571), + segment_sizze_nonzzero_128568) + + sext_i32_i64(virt_group_id_128579) * + squot64(segred_group_sizze_99823, + segment_sizze_nonzzero_128568); + int64_t gtid_99791 = srem64(sext_i32_i64(local_tid_128571), + num_recresids_padded_73681); + + // apply map function if in bounds + { + if (slt64((int64_t) 0, num_recresids_padded_73681) && + (slt64(gtid_99783, m_73008) && + slt64(sext_i32_i64(local_tid_128571), + num_recresids_padded_73681 * + squot64(segred_group_sizze_99823, + segment_sizze_nonzzero_128568)))) { + double i64_res_99831 = ((__global + double *) mem_124121)[gtid_99783]; + int64_t slice_115286 = (int64_t) 1 + gtid_99791; + double x_99832 = ((__global + double *) defunc_3_map_res_mem_124068)[gtid_99783 * + Nmk_74408 + + slice_115286]; + int64_t x_99834 = mul64((int64_t) 2, gtid_99791); + int64_t i64_arg_99835 = add64((int64_t) 2, x_99834); + double i64_res_99836 = sitofp_i64_f64(i64_arg_99835); + double y_99837 = i64_res_99836 / i64_res_99831; + double lifted_div_res_99838 = 1.0 + y_99837; + double abs_arg_99839 = x_99832 / lifted_div_res_99838; + double abs_res_99840 = fabs(abs_arg_99839); + + // save map-out results + { } + // save results to be reduced + { + ((__local + double *) red_arr_mem_128575)[sext_i32_i64(local_tid_128571)] = + abs_res_99840; + } + } else { + ((__local + double *) red_arr_mem_128575)[sext_i32_i64(local_tid_128571)] = + -INFINITY; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + if (slt64((int64_t) 0, num_recresids_padded_73681)) { + // perform segmented scan to imitate reduction + { + double x_99827; + double x_99828; + double x_128580; + double x_128581; + bool ltid_in_bounds_128583; + + ltid_in_bounds_128583 = slt64(sext_i32_i64(local_tid_128571), + num_recresids_padded_73681 * + squot64(segred_group_sizze_99823, + segment_sizze_nonzzero_128568)); + + int32_t skip_threads_128584; + + // read input for in-block scan + { + if (ltid_in_bounds_128583) { + x_99828 = ((volatile __local + double *) red_arr_mem_128575)[sext_i32_i64(local_tid_128571)]; + if ((local_tid_128571 - squot32(local_tid_128571, 32) * + 32) == 0) { + x_99827 = x_99828; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128584 = 1; + while (slt32(skip_threads_128584, 32)) { + if (sle32(skip_threads_128584, local_tid_128571 - + squot32(local_tid_128571, 32) * 32) && + ltid_in_bounds_128583) { + // read operands + { + x_99827 = ((volatile __local + double *) red_arr_mem_128575)[sext_i32_i64(local_tid_128571) - + sext_i32_i64(skip_threads_128584)]; + } + // perform operation + { + bool inactive_128585 = + slt64(srem64(sext_i32_i64(local_tid_128571), + num_recresids_padded_73681), + sext_i32_i64(local_tid_128571) - + sext_i32_i64(local_tid_128571 - + skip_threads_128584)); + + if (inactive_128585) { + x_99827 = x_99828; + } + if (!inactive_128585) { + double defunc_1_op_res_99829 = + fmax64(x_99827, x_99828); + + x_99827 = defunc_1_op_res_99829; + } + } + } + if (sle32(wave_sizze_128573, skip_threads_128584)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128584, local_tid_128571 - + squot32(local_tid_128571, 32) * 32) && + ltid_in_bounds_128583) { + // write result + { + ((volatile __local + double *) red_arr_mem_128575)[sext_i32_i64(local_tid_128571)] = + x_99827; + x_99828 = x_99827; + } + } + if (sle32(wave_sizze_128573, skip_threads_128584)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128584 *= 2; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // last thread of block 'i' writes its result to offset 'i' + { + if ((local_tid_128571 - squot32(local_tid_128571, 32) * + 32) == 31 && ltid_in_bounds_128583) { + ((volatile __local + double *) red_arr_mem_128575)[sext_i32_i64(squot32(local_tid_128571, + 32))] = + x_99827; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // scan the first block, after which offset 'i' contains carry-in for block 'i+1' + { + int32_t skip_threads_128586; + + // read input for in-block scan + { + if (squot32(local_tid_128571, 32) == 0 && + ltid_in_bounds_128583) { + x_128581 = ((volatile __local + double *) red_arr_mem_128575)[sext_i32_i64(local_tid_128571)]; + if ((local_tid_128571 - squot32(local_tid_128571, + 32) * 32) == 0) { + x_128580 = x_128581; + } + } + } + // in-block scan (hopefully no barriers needed) + { + skip_threads_128586 = 1; + while (slt32(skip_threads_128586, 32)) { + if (sle32(skip_threads_128586, local_tid_128571 - + squot32(local_tid_128571, 32) * 32) && + (squot32(local_tid_128571, 32) == 0 && + ltid_in_bounds_128583)) { + // read operands + { + x_128580 = ((volatile __local + double *) red_arr_mem_128575)[sext_i32_i64(local_tid_128571) - + sext_i32_i64(skip_threads_128586)]; + } + // perform operation + { + bool inactive_128587 = + slt64(srem64(sext_i32_i64(local_tid_128571 * + 32 + 32 - 1), + num_recresids_padded_73681), + sext_i32_i64(local_tid_128571 * + 32 + 32 - 1) - + sext_i32_i64((local_tid_128571 - + skip_threads_128586) * + 32 + 32 - 1)); + + if (inactive_128587) { + x_128580 = x_128581; + } + if (!inactive_128587) { + double defunc_1_op_res_128582 = + fmax64(x_128580, x_128581); + + x_128580 = defunc_1_op_res_128582; + } + } + } + if (sle32(wave_sizze_128573, skip_threads_128586)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + if (sle32(skip_threads_128586, local_tid_128571 - + squot32(local_tid_128571, 32) * 32) && + (squot32(local_tid_128571, 32) == 0 && + ltid_in_bounds_128583)) { + // write result + { + ((volatile __local + double *) red_arr_mem_128575)[sext_i32_i64(local_tid_128571)] = + x_128580; + x_128581 = x_128580; + } + } + if (sle32(wave_sizze_128573, skip_threads_128586)) { + barrier(CLK_LOCAL_MEM_FENCE); + } + skip_threads_128586 *= 2; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // carry-in for every block except the first + { + if (!(squot32(local_tid_128571, 32) == 0 || + !ltid_in_bounds_128583)) { + // read operands + { + x_99828 = x_99827; + x_99827 = ((__local + double *) red_arr_mem_128575)[sext_i32_i64(squot32(local_tid_128571, + 32)) - + (int64_t) 1]; + } + // perform operation + { + bool inactive_128588 = + slt64(srem64(sext_i32_i64(local_tid_128571), + num_recresids_padded_73681), + sext_i32_i64(local_tid_128571) - + sext_i32_i64(squot32(local_tid_128571, + 32) * 32 - 1)); + + if (inactive_128588) { + x_99827 = x_99828; + } + if (!inactive_128588) { + double defunc_1_op_res_99829 = fmax64(x_99827, + x_99828); + + x_99827 = defunc_1_op_res_99829; + } + } + // write final result + { + ((__local + double *) red_arr_mem_128575)[sext_i32_i64(local_tid_128571)] = + x_99827; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // restore correct values for first block + { + if (squot32(local_tid_128571, 32) == 0) { + ((__local + double *) red_arr_mem_128575)[sext_i32_i64(local_tid_128571)] = + x_99828; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + // save final values of segments + { + if (slt64(sext_i32_i64(virt_group_id_128579) * + squot64(segred_group_sizze_99823, + segment_sizze_nonzzero_128568) + + sext_i32_i64(local_tid_128571), m_73008) && + slt64(sext_i32_i64(local_tid_128571), + squot64(segred_group_sizze_99823, + segment_sizze_nonzzero_128568))) { + ((__global + double *) mem_124124)[sext_i32_i64(virt_group_id_128579) * + squot64(segred_group_sizze_99823, + segment_sizze_nonzzero_128568) + + sext_i32_i64(local_tid_128571)] = + ((__local + double *) red_arr_mem_128575)[(sext_i32_i64(local_tid_128571) + + (int64_t) 1) * + segment_sizze_nonzzero_128568 - + (int64_t) 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE); + } + + error_1: + return; + #undef segred_group_sizze_99823 +} +""" +# Start of values.py. + +# Hacky parser/reader/writer for values written in Futhark syntax. +# Used for reading stdin when compiling standalone programs with the +# Python code generator. + +import numpy as np +import string +import struct +import sys + +class ReaderInput: + def __init__(self, f): + self.f = f + self.lookahead_buffer = [] + + def get_char(self): if len(self.lookahead_buffer) == 0: return self.f.read(1) else: @@ -50180,7 +153319,8 @@ def mk_bin_scalar_reader(t): def bin_reader(f): fmt = FUTHARK_PRIMTYPES[t]['bin_format'] size = FUTHARK_PRIMTYPES[t]['size'] - return struct.unpack('<' + fmt, f.get_chars(size))[0] + tf = FUTHARK_PRIMTYPES[t]['numpy_type'] + return tf(struct.unpack('<' + fmt, f.get_chars(size))[0]) return bin_reader read_bin_i8 = mk_bin_scalar_reader('i8') @@ -50287,7 +153427,7 @@ def read_is_binary(f): 'bin_reader': read_bin_bool, 'str_reader': read_str_bool, 'bin_format': 'b', - 'numpy_type': np.bool } + 'numpy_type': bool } } def read_bin_read_type(f): @@ -50345,18 +153485,18 @@ def read_array(f, expected_type, rank): shape = [] elem_count = 1 for i in range(rank): - bin_size = read_bin_u64(f) + bin_size = read_bin_i64(f) elem_count *= bin_size shape.append(bin_size) bin_fmt = FUTHARK_PRIMTYPES[bin_type_enum]['bin_format'] # We first read the expected number of types into a bytestring, - # then use np.fromstring. This is because np.fromfile does not + # then use np.frombuffer. This is because np.fromfile does not # work on things that are insufficiently file-like, like a network # stream. bytes = f.get_chars(elem_count * FUTHARK_PRIMTYPES[expected_type]['size']) - arr = np.fromstring(bytes, dtype=FUTHARK_PRIMTYPES[bin_type_enum]['numpy_type']) + arr = np.frombuffer(bytes, dtype=FUTHARK_PRIMTYPES[bin_type_enum]['numpy_type']) arr.shape = shape return arr @@ -50375,12 +153515,11 @@ def read_value(type_desc, reader=input_reader): if m: dims = int(len(m.group(1))/2) basetype = m.group(2) - assert basetype in FUTHARK_PRIMTYPES, "Unknown type: {}".format(type_desc) - if dims > 0: - return read_array(reader, basetype, dims) - else: - return read_scalar(reader, basetype) - return (dims, basetype) + assert m and basetype in FUTHARK_PRIMTYPES, "Unknown type: {}".format(type_desc) + if dims > 0: + return read_array(reader, basetype, dims) + else: + return read_scalar(reader, basetype) def end_of_input(entry, f=input_reader): skip_spaces(f) @@ -50473,9 +153612,9 @@ def construct_binary_value(v): bytes[3:7] = type_strs[t] for i in range(len(shape)): - bytes[7+i*8:7+(i+1)*8] = np.int64(shape[i]).tostring() + bytes[7+i*8:7+(i+1)*8] = np.int64(shape[i]).tobytes() - bytes[7+len(shape)*8:] = np.ascontiguousarray(v).tostring() + bytes[7+len(shape)*8:] = np.ascontiguousarray(v).tobytes() return bytes @@ -50708,7 +153847,7 @@ def sext_T_i64(x): return np.int64(x) def itob_T_bool(x): - return np.bool(x) + return bool(x) def btoi_bool_i8(x): return np.int8(x) @@ -51123,6 +154262,7 @@ def futhark_fma64(a, b, c): import sys import time +import shlex # For string splitting class Server: def __init__(self, ctx): @@ -51137,8460 +154277,25537 @@ def _get_arg(self, args, i): if i < len(args): return args[i] else: - raise self.Failure('Insufficient command args') - - def _get_entry_point(self, entry): - if entry in self._ctx.entry_points: - return self._ctx.entry_points[entry] + raise self.Failure('Insufficient command args') + + def _get_entry_point(self, entry): + if entry in self._ctx.entry_points: + return self._ctx.entry_points[entry] + else: + raise self.Failure('Unknown entry point: %s' % entry) + + def _check_var(self, vname): + if not vname in self._vars: + raise self.Failure('Unknown variable: %s' % vname) + + def _get_var(self, vname): + self._check_var(vname) + return self._vars[vname] + + def _cmd_inputs(self, args): + entry = self._get_arg(args, 0) + for t in self._get_entry_point(entry)[0]: + print(t) + + def _cmd_outputs(self, args): + entry = self._get_arg(args, 0) + for t in self._get_entry_point(entry)[1]: + print(t) + + def _cmd_dummy(self, args): + pass + + def _cmd_free(self, args): + for vname in args: + self._check_var(vname) + del self._vars[vname] + + def _cmd_call(self, args): + entry = self._get_entry_point(self._get_arg(args, 0)) + num_ins = len(entry[0]) + num_outs = len(entry[1]) + exp_len = 1 + num_outs + num_ins + + if len(args) != exp_len: + raise self.Failure('Invalid argument count, expected %d' % exp_len) + + out_vnames = args[1:num_outs+1] + + for out_vname in out_vnames: + if out_vname in self._vars: + raise self.Failure('Variable already exists: %s' % out_vname) + + in_vnames = args[1+num_outs:] + ins = [ self._get_var(in_vname) for in_vname in in_vnames ] + + try: + (runtime, vals) = getattr(self._ctx, args[0])(*ins) + except Exception as e: + raise self.Failure(str(e)) + + print('runtime: %d' % runtime) + + if num_outs == 1: + self._vars[out_vnames[0]] = vals + else: + for (out_vname, val) in zip(out_vnames, vals): + self._vars[out_vname] = val + + def _store_val(self, f, value): + # In case we are using the PyOpenCL backend, we first + # need to convert OpenCL arrays to ordinary NumPy + # arrays. We do this in a nasty way. + if isinstance(value, opaque): + for component in value.data: + self._store_val(f, component) + elif isinstance(value, np.number) or isinstance(value, bool) or isinstance(value, np.bool_) or isinstance(value, np.ndarray): + # Ordinary NumPy value. + f.write(construct_binary_value(value)) + else: + # Assuming PyOpenCL array. + f.write(construct_binary_value(value.get())) + + def _cmd_store(self, args): + fname = self._get_arg(args, 0) + + with open(fname, 'wb') as f: + for i in range(1, len(args)): + self._store_val(f, self._get_var(args[i])) + + def _restore_val(self, reader, typename): + if typename in self._ctx.opaques: + vs = [] + for t in self._ctx.opaques[typename]: + vs += [read_value(t, reader)] + return opaque(typename, *vs) + else: + return read_value(typename, reader) + + def _cmd_restore(self, args): + if len(args) % 2 == 0: + raise self.Failure('Invalid argument count') + + fname = args[0] + args = args[1:] + + with open(fname, 'rb') as f: + reader = ReaderInput(f) + while args != []: + vname = args[0] + typename = args[1] + args = args[2:] + + if vname in self._vars: + raise self.Failure('Variable already exists: %s' % vname) + + try: + self._vars[vname] = self._restore_val(reader, typename) + except ValueError: + raise self.Failure('Failed to restore variable %s.\n' + 'Possibly malformed data in %s.\n' + % (vname, fname)) + + skip_spaces(reader) + if reader.get_char() != b'': + raise self.Failure('Expected EOF after reading values') + + _commands = { 'inputs': _cmd_inputs, + 'outputs': _cmd_outputs, + 'call': _cmd_call, + 'restore': _cmd_restore, + 'store': _cmd_store, + 'free': _cmd_free, + 'clear': _cmd_dummy, + 'pause_profiling': _cmd_dummy, + 'unpause_profiling': _cmd_dummy, + 'report': _cmd_dummy + } + + def _process_line(self, line): + words = shlex.split(line) + if words == []: + raise self.Failure('Empty line') + else: + cmd = words[0] + args = words[1:] + if cmd in self._commands: + self._commands[cmd](self, args) + else: + raise self.Failure('Unknown command: %s' % cmd) + + def run(self): + while True: + print('%%% OK', flush=True) + line = sys.stdin.readline() + if line == '': + return + try: + self._process_line(line) + except self.Failure as e: + print('%%% FAILURE') + print(e.msg) + +# End of server.py +class bfastfinal: + entry_points = {"convertToFloat": (["i16", "[][][]i16"], ["[][][]f64"]), + "main": (["i64", "i64", "i64", "f64", "f64", "f64", "f64", + "i64", "f64", "[]i64", "[][]f64"], ["[]i64", + "[]i64", + "[]f64", + "[]i64"]), + "mainDetailed": (["i64", "i64", "i64", "f64", "f64", "f64", + "f64", "i64", "f64", "[]i64", "[][]f64"], + ["[]f64", "[]i64", "[]i64", "[]f64", + "[][]f64", "[][]f64", "[]f64", "[]i64", + "[]f64", "[]f64", "[][]f64", "[][]f64", + "[]i64"]), "mainMagnitude": (["i64", "i64", + "i64", "f64", + "f64", "f64", + "f64", "i64", + "f64", + "[]i64", + "[][]f64"], + ["[]i64", + "[]i64", + "[]f64", + "[]f64", + "[]i64"]), + "reshapeTransp": (["[][][]f64"], ["[][]f64"])} + opaques = {} + def __init__(self, command_queue=None, interactive=False, + platform_pref=preferred_platform, device_pref=preferred_device, + default_group_size=default_group_size, + default_num_groups=default_num_groups, + default_tile_size=default_tile_size, + default_reg_tile_size=default_reg_tile_size, + default_threshold=default_threshold, sizes=sizes): + size_heuristics=[("NVIDIA CUDA", cl.device_type.GPU, "lockstep_width", + lambda device: np.int32(32)), ("AMD Accelerated Parallel Processing", + cl.device_type.GPU, "lockstep_width", + lambda device: np.int32(32)), ("", + cl.device_type.GPU, + "lockstep_width", + lambda device: np.int32(1)), + ("", cl.device_type.GPU, "num_groups", + lambda device: (np.int32(4) * device.get_info(getattr(cl.device_info, + "MAX_COMPUTE_UNITS")))), + ("", cl.device_type.GPU, "group_size", lambda device: np.int32(256)), ("", + cl.device_type.GPU, + "tile_size", + lambda device: np.int32(32)), + ("", cl.device_type.GPU, "reg_tile_size", lambda device: np.int32(2)), ("", + cl.device_type.GPU, + "threshold", + lambda device: np.int32(32768)), + ("", cl.device_type.CPU, "lockstep_width", lambda device: np.int32(1)), ("", + cl.device_type.CPU, + "num_groups", + lambda device: device.get_info(getattr(cl.device_info, + "MAX_COMPUTE_UNITS"))), + ("", cl.device_type.CPU, "group_size", lambda device: np.int32(32)), ("", + cl.device_type.CPU, + "tile_size", + lambda device: np.int32(4)), + ("", cl.device_type.CPU, "reg_tile_size", lambda device: np.int32(1)), ("", + cl.device_type.CPU, + "threshold", + lambda device: device.get_info(getattr(cl.device_info, + "MAX_COMPUTE_UNITS")))] + self.global_failure_args_max = 5 + self.failure_msgs=["Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 recresid.fut:95:38-44\n #1 recresid.fut:95:12-88\n #2 recresid.fut:94:5-96:18\n #3 mroc.fut:27:25-38\n #4 mroc.fut:77:27-61\n #5 bfastfinal.fut:45:24-53\n #6 bfastfinal.fut:200:5-74\n #7 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:44:25-30\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:46-54\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:35-42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:73:23-29\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:75:25-33\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:76:25-33\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:88:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:90:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:91:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:65:27-32\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:69:38-43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:69:23-70:27\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:71:19-72:23\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:82:35-41\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:82:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:84:37-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:84:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:85:37-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:85:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:46-54\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:35-42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:98:34-40\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:101:42-47\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:107:46-51\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Range {}..<{} is invalid.\n-> #0 lib/github.com/nhey/lm/linpack.fut:112:58-64\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:117:40-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:116:39-44\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:116:48-53\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:121:50-55\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:121:63-68\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:122:37-44\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:124:54-59\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:131:65-73\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:132:44-133:62\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:134:24-136:42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}:{}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:28:39-48\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:200:5-74\n #12 bfastfinal.fut:195:1-201:36\n", + "Index [{}:{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:28:30-37\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:200:5-74\n #12 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:29:38-43\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:200:5-74\n #12 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:29:19-22\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:200:5-74\n #12 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:10:10-17\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:16:23-28\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:16:32-37\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:22:34-39\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:22:47-52\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:46-54\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:35-42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:73:23-29\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:75:25-33\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:76:25-33\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:88:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:90:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:91:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:65:27-32\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:69:38-43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:69:23-70:27\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:71:19-72:23\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:82:35-41\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:82:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:84:37-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:84:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:85:37-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:85:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:46-54\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:35-42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:98:34-40\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:101:42-47\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:107:46-51\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Range {}..<{} is invalid.\n-> #0 lib/github.com/nhey/lm/linpack.fut:112:58-64\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:117:40-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:116:39-44\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:116:48-53\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:121:50-55\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:121:63-68\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:122:37-44\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:124:54-59\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:131:65-73\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:132:44-133:62\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:134:24-136:42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}:{}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:28:39-48\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:200:5-74\n #12 bfastfinal.fut:195:1-201:36\n", + "Index [{}:{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:28:30-37\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:200:5-74\n #12 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:29:38-43\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:200:5-74\n #12 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:29:19-22\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:200:5-74\n #12 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:10:10-17\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:16:23-28\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:16:32-37\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:22:34-39\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:22:47-52\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:44:25-30\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:46-54\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:35-42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:73:23-29\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:75:25-33\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:76:25-33\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:88:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:90:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:91:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:65:27-32\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:69:38-43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:69:23-70:27\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:71:19-72:23\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:82:35-41\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:82:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:84:37-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:84:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:85:37-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:85:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:46-54\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:35-42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:98:34-40\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:101:42-47\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:107:46-51\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Range {}..<{} is invalid.\n-> #0 lib/github.com/nhey/lm/linpack.fut:112:58-64\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:117:40-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:116:39-44\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:116:48-53\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:121:50-55\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:121:63-68\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:122:37-44\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:124:54-59\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:131:65-73\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:132:44-133:62\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:134:24-136:42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}:{}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:28:39-48\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:200:5-74\n #12 bfastfinal.fut:195:1-201:36\n", + "Index [{}:{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:28:30-37\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:200:5-74\n #12 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:29:38-43\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:200:5-74\n #12 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:29:19-22\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:200:5-74\n #12 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:10:10-17\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:12:33-38\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:16:23-28\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:16:32-37\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:22:34-39\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:22:47-52\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:46-54\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:35-42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:73:23-29\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:75:25-33\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:76:25-33\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:88:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:90:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:91:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:65:27-32\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:69:38-43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:69:23-70:27\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:71:19-72:23\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:82:35-41\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:82:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:84:37-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:84:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:85:37-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:85:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:46-54\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:35-42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:98:34-40\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:101:42-47\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:107:46-51\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Range {}..<{} is invalid.\n-> #0 lib/github.com/nhey/lm/linpack.fut:112:58-64\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:117:40-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:116:39-44\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:116:48-53\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:121:50-55\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:121:63-68\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:122:37-44\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:124:54-59\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:131:65-73\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:132:44-133:62\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:134:24-136:42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}:{}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:28:39-48\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:200:5-74\n #12 bfastfinal.fut:195:1-201:36\n", + "Index [{}:{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:28:30-37\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:200:5-74\n #12 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:29:38-43\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:200:5-74\n #12 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:29:19-22\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:200:5-74\n #12 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:10:10-17\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:12:33-38\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:16:23-28\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:16:32-37\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:22:34-39\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:22:47-52\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 mroc.fut:36:58-63\n #1 mroc.fut:36:25-78\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 mroc.fut:32:5-38:15\n #5 mroc.fut:77:27-61\n #6 bfastfinal.fut:45:24-53\n #7 bfastfinal.fut:200:5-74\n #8 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 mroc.fut:36:58-63\n #1 mroc.fut:36:25-78\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 mroc.fut:32:5-38:15\n #5 mroc.fut:77:27-61\n #6 bfastfinal.fut:45:24-53\n #7 bfastfinal.fut:200:5-74\n #8 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 mroc.fut:36:58-63\n #1 mroc.fut:36:25-78\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 mroc.fut:32:5-38:15\n #5 mroc.fut:77:27-61\n #6 bfastfinal.fut:45:24-53\n #7 bfastfinal.fut:200:5-74\n #8 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 helpers.fut:69:45-50\n #1 helpers.fut:68:14-73:28\n #2 bfastfinal.fut:61:35-50\n #3 bfastfinal.fut:200:5-74\n #4 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:53:16-19\n #1 helpers.fut:74:16-34\n #2 bfastfinal.fut:61:35-50\n #3 bfastfinal.fut:200:5-74\n #4 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:55:50-63\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:200:5-74\n #5 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:56:37-40\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:200:5-74\n #5 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:58:38-57\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:200:5-74\n #5 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:58:61-80\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:200:5-74\n #5 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 helpers.fut:69:45-50\n #1 helpers.fut:68:14-73:28\n #2 bfastfinal.fut:61:35-50\n #3 bfastfinal.fut:200:5-74\n #4 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:55:50-63\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:200:5-74\n #5 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:56:37-40\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:200:5-74\n #5 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:58:38-57\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:200:5-74\n #5 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:58:61-80\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:200:5-74\n #5 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:55:50-63\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:200:5-74\n #5 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:56:37-40\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:200:5-74\n #5 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:58:38-57\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:200:5-74\n #5 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:58:61-80\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:200:5-74\n #5 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:97:51-60\n #1 bfastfinal.fut:97:25-79\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 bfastfinal.fut:94:5-102:22\n #5 bfastfinal.fut:200:5-74\n #6 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:97:51-60\n #1 bfastfinal.fut:97:25-79\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 bfastfinal.fut:94:5-102:22\n #5 bfastfinal.fut:200:5-74\n #6 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:97:51-60\n #1 bfastfinal.fut:97:25-79\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 bfastfinal.fut:94:5-102:22\n #5 bfastfinal.fut:200:5-74\n #6 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:97:51-60\n #1 bfastfinal.fut:97:25-79\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 bfastfinal.fut:94:5-102:22\n #5 bfastfinal.fut:200:5-74\n #6 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:111:43-65\n #1 bfastfinal.fut:110:27-113:43\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:108:17-115:24\n #4 bfastfinal.fut:200:5-74\n #5 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:111:43-65\n #1 bfastfinal.fut:110:27-113:43\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:108:17-115:24\n #4 bfastfinal.fut:200:5-74\n #5 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:111:43-65\n #1 bfastfinal.fut:110:27-113:43\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:108:17-115:24\n #4 bfastfinal.fut:200:5-74\n #5 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:117:37-57\n #1 bfastfinal.fut:117:15-120:32\n #2 bfastfinal.fut:200:5-74\n #3 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:152:38-52\n #1 bfastfinal.fut:150:22-153:41\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:147:38-175:9\n #4 bfastfinal.fut:200:5-74\n #5 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:152:56-74\n #1 bfastfinal.fut:150:22-153:41\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:147:38-175:9\n #4 bfastfinal.fut:200:5-74\n #5 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:6:27-44\n #1 bfastfinal.fut:169:35-74\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:147:38-175:9\n #4 bfastfinal.fut:200:5-74\n #5 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:152:38-52\n #1 bfastfinal.fut:150:22-153:41\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:147:38-175:9\n #4 bfastfinal.fut:200:5-74\n #5 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:152:56-74\n #1 bfastfinal.fut:150:22-153:41\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:147:38-175:9\n #4 bfastfinal.fut:200:5-74\n #5 bfastfinal.fut:195:1-201:36\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:6:27-44\n #1 bfastfinal.fut:169:35-74\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:147:38-175:9\n #4 bfastfinal.fut:200:5-74\n #5 bfastfinal.fut:195:1-201:36\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 recresid.fut:95:38-44\n #1 recresid.fut:95:12-88\n #2 recresid.fut:94:5-96:18\n #3 mroc.fut:27:25-38\n #4 mroc.fut:77:27-61\n #5 bfastfinal.fut:45:24-53\n #6 bfastfinal.fut:185:3-72\n #7 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:44:25-30\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:46-54\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:35-42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:73:23-29\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:75:25-33\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:76:25-33\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:88:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:90:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:91:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:65:27-32\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:69:38-43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:69:23-70:27\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:71:19-72:23\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:82:35-41\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:82:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:84:37-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:84:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:85:37-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:85:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:46-54\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:35-42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:98:34-40\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:101:42-47\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:107:46-51\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Range {}..<{} is invalid.\n-> #0 lib/github.com/nhey/lm/linpack.fut:112:58-64\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:117:40-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:116:39-44\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:116:48-53\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:121:50-55\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:121:63-68\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:122:37-44\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:124:54-59\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:131:65-73\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:132:44-133:62\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:134:24-136:42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}:{}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:28:39-48\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:185:3-72\n #12 bfastfinal.fut:181:1-185:72\n", + "Index [{}:{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:28:30-37\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:185:3-72\n #12 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:29:38-43\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:185:3-72\n #12 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:29:19-22\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:185:3-72\n #12 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:10:10-17\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:16:23-28\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:16:32-37\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:22:34-39\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:22:47-52\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:46-54\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:35-42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:73:23-29\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:75:25-33\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:76:25-33\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:88:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:90:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:91:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:65:27-32\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:69:38-43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:69:23-70:27\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:71:19-72:23\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:82:35-41\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:82:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:84:37-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:84:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:85:37-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:85:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:46-54\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:35-42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:98:34-40\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:101:42-47\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:107:46-51\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Range {}..<{} is invalid.\n-> #0 lib/github.com/nhey/lm/linpack.fut:112:58-64\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:117:40-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:116:39-44\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:116:48-53\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:121:50-55\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:121:63-68\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:122:37-44\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:124:54-59\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:131:65-73\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:132:44-133:62\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:134:24-136:42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}:{}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:28:39-48\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:185:3-72\n #12 bfastfinal.fut:181:1-185:72\n", + "Index [{}:{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:28:30-37\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:185:3-72\n #12 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:29:38-43\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:185:3-72\n #12 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:29:19-22\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:185:3-72\n #12 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:10:10-17\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:16:23-28\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:16:32-37\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:22:34-39\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:22:47-52\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:44:25-30\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:46-54\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:35-42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:73:23-29\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:75:25-33\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:76:25-33\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:88:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:90:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:91:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:65:27-32\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:69:38-43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:69:23-70:27\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:71:19-72:23\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:82:35-41\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:82:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:84:37-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:84:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:85:37-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:85:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:46-54\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:35-42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:98:34-40\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:101:42-47\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:107:46-51\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Range {}..<{} is invalid.\n-> #0 lib/github.com/nhey/lm/linpack.fut:112:58-64\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:117:40-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:116:39-44\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:116:48-53\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:121:50-55\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:121:63-68\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:122:37-44\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:124:54-59\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:131:65-73\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:132:44-133:62\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:134:24-136:42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}:{}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:28:39-48\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:185:3-72\n #12 bfastfinal.fut:181:1-185:72\n", + "Index [{}:{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:28:30-37\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:185:3-72\n #12 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:29:38-43\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:185:3-72\n #12 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:29:19-22\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:185:3-72\n #12 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:10:10-17\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:12:33-38\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:16:23-28\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:16:32-37\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:22:34-39\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:22:47-52\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:46-54\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:35-42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:73:23-29\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:75:25-33\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:76:25-33\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:88:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:90:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:91:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:65:27-32\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:69:38-43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:69:23-70:27\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:71:19-72:23\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:82:35-41\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:82:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:84:37-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:84:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:85:37-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:85:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:46-54\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:35-42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:98:34-40\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:101:42-47\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:107:46-51\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Range {}..<{} is invalid.\n-> #0 lib/github.com/nhey/lm/linpack.fut:112:58-64\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:117:40-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:116:39-44\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:116:48-53\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:121:50-55\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:121:63-68\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:122:37-44\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:124:54-59\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:131:65-73\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:132:44-133:62\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:134:24-136:42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}:{}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:28:39-48\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:185:3-72\n #12 bfastfinal.fut:181:1-185:72\n", + "Index [{}:{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:28:30-37\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:185:3-72\n #12 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:29:38-43\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:185:3-72\n #12 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:29:19-22\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:185:3-72\n #12 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:10:10-17\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:12:33-38\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:16:23-28\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:16:32-37\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:22:34-39\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:22:47-52\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 mroc.fut:36:58-63\n #1 mroc.fut:36:25-78\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 mroc.fut:32:5-38:15\n #5 mroc.fut:77:27-61\n #6 bfastfinal.fut:45:24-53\n #7 bfastfinal.fut:185:3-72\n #8 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 mroc.fut:36:58-63\n #1 mroc.fut:36:25-78\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 mroc.fut:32:5-38:15\n #5 mroc.fut:77:27-61\n #6 bfastfinal.fut:45:24-53\n #7 bfastfinal.fut:185:3-72\n #8 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 mroc.fut:36:58-63\n #1 mroc.fut:36:25-78\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 mroc.fut:32:5-38:15\n #5 mroc.fut:77:27-61\n #6 bfastfinal.fut:45:24-53\n #7 bfastfinal.fut:185:3-72\n #8 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 helpers.fut:69:45-50\n #1 helpers.fut:68:14-73:28\n #2 bfastfinal.fut:61:35-50\n #3 bfastfinal.fut:185:3-72\n #4 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:53:16-19\n #1 helpers.fut:74:16-34\n #2 bfastfinal.fut:61:35-50\n #3 bfastfinal.fut:185:3-72\n #4 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:55:50-63\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:56:37-40\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:58:38-57\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:58:61-80\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 helpers.fut:69:45-50\n #1 helpers.fut:68:14-73:28\n #2 bfastfinal.fut:61:35-50\n #3 bfastfinal.fut:185:3-72\n #4 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:55:50-63\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:56:37-40\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:58:38-57\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:58:61-80\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:55:50-63\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:56:37-40\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:58:38-57\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:58:61-80\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:97:51-60\n #1 bfastfinal.fut:97:25-79\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 bfastfinal.fut:94:5-102:22\n #5 bfastfinal.fut:185:3-72\n #6 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:97:51-60\n #1 bfastfinal.fut:97:25-79\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 bfastfinal.fut:94:5-102:22\n #5 bfastfinal.fut:185:3-72\n #6 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:97:51-60\n #1 bfastfinal.fut:97:25-79\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 bfastfinal.fut:94:5-102:22\n #5 bfastfinal.fut:185:3-72\n #6 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:97:51-60\n #1 bfastfinal.fut:97:25-79\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 bfastfinal.fut:94:5-102:22\n #5 bfastfinal.fut:185:3-72\n #6 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:111:43-65\n #1 bfastfinal.fut:110:27-113:43\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:108:17-115:24\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:111:43-65\n #1 bfastfinal.fut:110:27-113:43\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:108:17-115:24\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:111:43-65\n #1 bfastfinal.fut:110:27-113:43\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:108:17-115:24\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:117:37-57\n #1 bfastfinal.fut:117:15-120:32\n #2 bfastfinal.fut:185:3-72\n #3 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:127:54-68\n #1 bfastfinal.fut:127:13-130:32\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:125:20-142:9\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:128:29-43\n #1 bfastfinal.fut:127:13-130:32\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:125:20-142:9\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/diku-dk/sorts/insertion_sort.fut:19:72-76\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:127:13-132:42\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:125:20-142:9\n #5 bfastfinal.fut:185:3-72\n #6 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/diku-dk/sorts/insertion_sort.fut:19:59-65\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:127:13-132:42\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:125:20-142:9\n #5 bfastfinal.fut:185:3-72\n #6 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/diku-dk/sorts/insertion_sort.fut:8:17-21\n #1 lib/github.com/diku-dk/sorts/insertion_sort.fut:20:28-42\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:127:13-132:42\n #4 /prelude/functional.fut:9:42-44\n #5 bfastfinal.fut:125:20-142:9\n #6 bfastfinal.fut:185:3-72\n #7 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/diku-dk/sorts/insertion_sort.fut:9:20-24\n #1 lib/github.com/diku-dk/sorts/insertion_sort.fut:20:28-42\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:127:13-132:42\n #4 /prelude/functional.fut:9:42-44\n #5 bfastfinal.fut:125:20-142:9\n #6 bfastfinal.fut:185:3-72\n #7 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/diku-dk/sorts/insertion_sort.fut:19:72-76\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:127:13-132:42\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:125:20-142:9\n #5 bfastfinal.fut:185:3-72\n #6 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/diku-dk/sorts/insertion_sort.fut:19:59-65\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:127:13-132:42\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:125:20-142:9\n #5 bfastfinal.fut:185:3-72\n #6 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:140:39-43\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:127:13-141:43\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:125:20-142:9\n #5 bfastfinal.fut:185:3-72\n #6 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:140:47-51\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:127:13-141:43\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:125:20-142:9\n #5 bfastfinal.fut:185:3-72\n #6 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:141:38-42\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:127:13-141:43\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:125:20-142:9\n #5 bfastfinal.fut:185:3-72\n #6 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:152:38-52\n #1 bfastfinal.fut:150:22-153:41\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:147:38-175:9\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:152:56-74\n #1 bfastfinal.fut:150:22-153:41\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:147:38-175:9\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:6:27-44\n #1 bfastfinal.fut:169:35-74\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:147:38-175:9\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:6:27-44\n #1 bfastfinal.fut:172:29-79\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:147:38-175:9\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:152:38-52\n #1 bfastfinal.fut:150:22-153:41\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:147:38-175:9\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:152:56-74\n #1 bfastfinal.fut:150:22-153:41\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:147:38-175:9\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:6:27-44\n #1 bfastfinal.fut:169:35-74\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:147:38-175:9\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:6:27-44\n #1 bfastfinal.fut:172:29-79\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:147:38-175:9\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:152:38-52\n #1 bfastfinal.fut:150:22-153:41\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:147:38-175:9\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:152:56-74\n #1 bfastfinal.fut:150:22-153:41\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:147:38-175:9\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:6:27-44\n #1 bfastfinal.fut:169:35-74\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:147:38-175:9\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:6:27-44\n #1 bfastfinal.fut:172:29-79\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:147:38-175:9\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 recresid.fut:95:38-44\n #1 recresid.fut:95:12-88\n #2 recresid.fut:94:5-96:18\n #3 mroc.fut:27:25-38\n #4 mroc.fut:77:27-61\n #5 bfastfinal.fut:45:24-53\n #6 bfastfinal.fut:192:5-74\n #7 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:44:25-30\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:46-54\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:35-42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:73:23-29\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:75:25-33\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:76:25-33\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:88:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:90:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:91:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:65:27-32\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:69:38-43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:69:23-70:27\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:71:19-72:23\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:82:35-41\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:82:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:84:37-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:84:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:85:37-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:85:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:46-54\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:35-42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:98:34-40\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:101:42-47\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:107:46-51\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Range {}..<{} is invalid.\n-> #0 lib/github.com/nhey/lm/linpack.fut:112:58-64\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:117:40-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:116:39-44\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:116:48-53\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:121:50-55\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:121:63-68\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:122:37-44\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:124:54-59\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:131:65-73\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:132:44-133:62\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:134:24-136:42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}:{}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:28:39-48\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:192:5-74\n #12 bfastfinal.fut:187:1-193:48\n", + "Index [{}:{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:28:30-37\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:192:5-74\n #12 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:29:38-43\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:192:5-74\n #12 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:29:19-22\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:192:5-74\n #12 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:10:10-17\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:16:23-28\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:16:32-37\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:22:34-39\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:22:47-52\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:46-54\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:35-42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:73:23-29\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:75:25-33\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:76:25-33\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:88:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:90:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:91:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:65:27-32\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:69:38-43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:69:23-70:27\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:71:19-72:23\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:82:35-41\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:82:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:84:37-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:84:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:85:37-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:85:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:46-54\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:35-42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:98:34-40\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:101:42-47\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:107:46-51\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Range {}..<{} is invalid.\n-> #0 lib/github.com/nhey/lm/linpack.fut:112:58-64\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:117:40-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:116:39-44\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:116:48-53\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:121:50-55\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:121:63-68\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:122:37-44\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:124:54-59\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:131:65-73\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:132:44-133:62\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:134:24-136:42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}:{}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:28:39-48\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:192:5-74\n #12 bfastfinal.fut:187:1-193:48\n", + "Index [{}:{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:28:30-37\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:192:5-74\n #12 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:29:38-43\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:192:5-74\n #12 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:29:19-22\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:192:5-74\n #12 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:10:10-17\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:16:23-28\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:16:32-37\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:22:34-39\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:22:47-52\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:44:25-30\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:46-54\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:35-42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:73:23-29\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:75:25-33\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:76:25-33\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:88:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:90:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:91:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:65:27-32\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:69:38-43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:69:23-70:27\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:71:19-72:23\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:82:35-41\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:82:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:84:37-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:84:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:85:37-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:85:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:46-54\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:35-42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:98:34-40\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:101:42-47\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:107:46-51\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Range {}..<{} is invalid.\n-> #0 lib/github.com/nhey/lm/linpack.fut:112:58-64\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:117:40-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:116:39-44\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:116:48-53\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:121:50-55\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:121:63-68\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:122:37-44\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:124:54-59\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:131:65-73\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:132:44-133:62\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:134:24-136:42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}:{}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:28:39-48\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:192:5-74\n #12 bfastfinal.fut:187:1-193:48\n", + "Index [{}:{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:28:30-37\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:192:5-74\n #12 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:29:38-43\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:192:5-74\n #12 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:29:19-22\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:192:5-74\n #12 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:10:10-17\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:12:33-38\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:16:23-28\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:16:32-37\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:22:34-39\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:22:47-52\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:46-54\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:35-42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:73:23-29\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:75:25-33\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:76:25-33\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:88:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:90:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:91:15-93:43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:65:27-32\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:69:38-43\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:69:23-70:27\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:71:19-72:23\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:82:35-41\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:82:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:84:37-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:84:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:85:37-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:85:19-86:39\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:46-54\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:61:35-42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:98:34-40\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:101:42-47\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:107:46-51\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Range {}..<{} is invalid.\n-> #0 lib/github.com/nhey/lm/linpack.fut:112:58-64\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:117:40-45\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:116:39-44\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:116:48-53\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:121:50-55\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:121:63-68\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:122:37-44\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:124:54-59\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}:] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:131:65-73\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:132:44-133:62\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:134:24-136:42\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}:{}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:28:39-48\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:192:5-74\n #12 bfastfinal.fut:187:1-193:48\n", + "Index [{}:{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:28:30-37\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:192:5-74\n #12 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:29:38-43\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:192:5-74\n #12 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/lm.fut:29:19-22\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:192:5-74\n #12 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:10:10-17\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:12:33-38\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:16:23-28\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:16:32-37\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:22:34-39\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 lib/github.com/nhey/lm/linpack.fut:22:47-52\n #1 lib/github.com/nhey/lm/lm.fut:91:15-44\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 mroc.fut:36:58-63\n #1 mroc.fut:36:25-78\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 mroc.fut:32:5-38:15\n #5 mroc.fut:77:27-61\n #6 bfastfinal.fut:45:24-53\n #7 bfastfinal.fut:192:5-74\n #8 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 mroc.fut:36:58-63\n #1 mroc.fut:36:25-78\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 mroc.fut:32:5-38:15\n #5 mroc.fut:77:27-61\n #6 bfastfinal.fut:45:24-53\n #7 bfastfinal.fut:192:5-74\n #8 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 mroc.fut:36:58-63\n #1 mroc.fut:36:25-78\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 mroc.fut:32:5-38:15\n #5 mroc.fut:77:27-61\n #6 bfastfinal.fut:45:24-53\n #7 bfastfinal.fut:192:5-74\n #8 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 helpers.fut:69:45-50\n #1 helpers.fut:68:14-73:28\n #2 bfastfinal.fut:61:35-50\n #3 bfastfinal.fut:192:5-74\n #4 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:53:16-19\n #1 helpers.fut:74:16-34\n #2 bfastfinal.fut:61:35-50\n #3 bfastfinal.fut:192:5-74\n #4 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:55:50-63\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:192:5-74\n #5 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:56:37-40\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:192:5-74\n #5 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:58:38-57\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:192:5-74\n #5 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:58:61-80\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:192:5-74\n #5 bfastfinal.fut:187:1-193:48\n", + "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 helpers.fut:69:45-50\n #1 helpers.fut:68:14-73:28\n #2 bfastfinal.fut:61:35-50\n #3 bfastfinal.fut:192:5-74\n #4 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:55:50-63\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:192:5-74\n #5 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:56:37-40\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:192:5-74\n #5 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:58:38-57\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:192:5-74\n #5 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:58:61-80\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:192:5-74\n #5 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:55:50-63\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:192:5-74\n #5 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:56:37-40\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:192:5-74\n #5 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:58:38-57\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:192:5-74\n #5 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:58:61-80\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:192:5-74\n #5 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:97:51-60\n #1 bfastfinal.fut:97:25-79\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 bfastfinal.fut:94:5-102:22\n #5 bfastfinal.fut:192:5-74\n #6 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:97:51-60\n #1 bfastfinal.fut:97:25-79\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 bfastfinal.fut:94:5-102:22\n #5 bfastfinal.fut:192:5-74\n #6 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:97:51-60\n #1 bfastfinal.fut:97:25-79\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 bfastfinal.fut:94:5-102:22\n #5 bfastfinal.fut:192:5-74\n #6 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:97:51-60\n #1 bfastfinal.fut:97:25-79\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 bfastfinal.fut:94:5-102:22\n #5 bfastfinal.fut:192:5-74\n #6 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:111:43-65\n #1 bfastfinal.fut:110:27-113:43\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:108:17-115:24\n #4 bfastfinal.fut:192:5-74\n #5 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:111:43-65\n #1 bfastfinal.fut:110:27-113:43\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:108:17-115:24\n #4 bfastfinal.fut:192:5-74\n #5 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:111:43-65\n #1 bfastfinal.fut:110:27-113:43\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:108:17-115:24\n #4 bfastfinal.fut:192:5-74\n #5 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:117:37-57\n #1 bfastfinal.fut:117:15-120:32\n #2 bfastfinal.fut:192:5-74\n #3 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:127:54-68\n #1 bfastfinal.fut:127:13-130:32\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:125:20-142:9\n #4 bfastfinal.fut:192:5-74\n #5 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:128:29-43\n #1 bfastfinal.fut:127:13-130:32\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:125:20-142:9\n #4 bfastfinal.fut:192:5-74\n #5 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/diku-dk/sorts/insertion_sort.fut:19:72-76\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:127:13-132:42\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:125:20-142:9\n #5 bfastfinal.fut:192:5-74\n #6 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/diku-dk/sorts/insertion_sort.fut:19:59-65\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:127:13-132:42\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:125:20-142:9\n #5 bfastfinal.fut:192:5-74\n #6 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/diku-dk/sorts/insertion_sort.fut:8:17-21\n #1 lib/github.com/diku-dk/sorts/insertion_sort.fut:20:28-42\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:127:13-132:42\n #4 /prelude/functional.fut:9:42-44\n #5 bfastfinal.fut:125:20-142:9\n #6 bfastfinal.fut:192:5-74\n #7 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/diku-dk/sorts/insertion_sort.fut:9:20-24\n #1 lib/github.com/diku-dk/sorts/insertion_sort.fut:20:28-42\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:127:13-132:42\n #4 /prelude/functional.fut:9:42-44\n #5 bfastfinal.fut:125:20-142:9\n #6 bfastfinal.fut:192:5-74\n #7 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/diku-dk/sorts/insertion_sort.fut:19:72-76\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:127:13-132:42\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:125:20-142:9\n #5 bfastfinal.fut:192:5-74\n #6 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/diku-dk/sorts/insertion_sort.fut:19:59-65\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:127:13-132:42\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:125:20-142:9\n #5 bfastfinal.fut:192:5-74\n #6 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:140:39-43\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:127:13-141:43\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:125:20-142:9\n #5 bfastfinal.fut:192:5-74\n #6 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:140:47-51\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:127:13-141:43\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:125:20-142:9\n #5 bfastfinal.fut:192:5-74\n #6 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:141:38-42\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:127:13-141:43\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:125:20-142:9\n #5 bfastfinal.fut:192:5-74\n #6 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:152:38-52\n #1 bfastfinal.fut:150:22-153:41\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:147:38-175:9\n #4 bfastfinal.fut:192:5-74\n #5 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:152:56-74\n #1 bfastfinal.fut:150:22-153:41\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:147:38-175:9\n #4 bfastfinal.fut:192:5-74\n #5 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:6:27-44\n #1 bfastfinal.fut:169:35-74\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:147:38-175:9\n #4 bfastfinal.fut:192:5-74\n #5 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:152:38-52\n #1 bfastfinal.fut:150:22-153:41\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:147:38-175:9\n #4 bfastfinal.fut:192:5-74\n #5 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:152:56-74\n #1 bfastfinal.fut:150:22-153:41\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:147:38-175:9\n #4 bfastfinal.fut:192:5-74\n #5 bfastfinal.fut:187:1-193:48\n", + "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:6:27-44\n #1 bfastfinal.fut:169:35-74\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:147:38-175:9\n #4 bfastfinal.fut:192:5-74\n #5 bfastfinal.fut:187:1-193:48\n"] + program = initialise_opencl_object(self, + program_src=fut_opencl_src, + command_queue=command_queue, + interactive=interactive, + platform_pref=platform_pref, + device_pref=device_pref, + default_group_size=default_group_size, + default_num_groups=default_num_groups, + default_tile_size=default_tile_size, + default_reg_tile_size=default_reg_tile_size, + default_threshold=default_threshold, + size_heuristics=size_heuristics, + required_types=["i16", "i32", "i64", "f64", "bool", "unit"], + user_sizes=sizes, + all_sizes={"builtin#iota_i64.group_size_126547": {"class": "group_size", "value": None}, + "builtin#replicate_f64.group_size_126478": {"class": "group_size", + "value": None}, + "builtin#replicate_i64.group_size_126487": {"class": "group_size", + "value": None}, + "convertToFloat.segmap_group_size_77189": {"class": "group_size", + "value": None}, + "main.Rx_116320": {"class": "reg_tile_size", "value": None}, + "main.Rx_117443": {"class": "reg_tile_size", "value": None}, + "main.Rx_118370": {"class": "reg_tile_size", "value": None}, + "main.Rx_119111": {"class": "reg_tile_size", "value": None}, + "main.Ry_116321": {"class": "reg_tile_size", "value": None}, + "main.Ry_117444": {"class": "reg_tile_size", "value": None}, + "main.Ry_118371": {"class": "reg_tile_size", "value": None}, + "main.Ry_119112": {"class": "reg_tile_size", "value": None}, + "main.Tk_116317": {"class": "tile_size", "value": None}, + "main.Tk_117440": {"class": "tile_size", "value": None}, + "main.Tk_118367": {"class": "tile_size", "value": None}, + "main.Tk_119108": {"class": "tile_size", "value": None}, + "main.Tx_116318": {"class": "tile_size", "value": None}, + "main.Tx_117441": {"class": "tile_size", "value": None}, + "main.Tx_118222": {"class": "tile_size", "value": None}, + "main.Tx_118368": {"class": "tile_size", "value": None}, + "main.Tx_119109": {"class": "tile_size", "value": None}, + "main.Ty_116319": {"class": "tile_size", "value": None}, + "main.Ty_117442": {"class": "tile_size", "value": None}, + "main.Ty_118223": {"class": "tile_size", "value": None}, + "main.Ty_118369": {"class": "tile_size", "value": None}, + "main.Ty_119110": {"class": "tile_size", "value": None}, + "main.group_size_126386": {"class": "group_size", "value": None}, + "main.group_size_126391": {"class": "group_size", "value": None}, + "main.group_size_126469": {"class": "group_size", "value": None}, + "main.group_size_126565": {"class": "group_size", "value": None}, + "main.group_size_126570": {"class": "group_size", "value": None}, + "main.group_size_126575": {"class": "group_size", "value": None}, + "main.group_size_126685": {"class": "group_size", "value": None}, + "main.group_size_126789": {"class": "group_size", "value": None}, + "main.group_size_126873": {"class": "group_size", "value": None}, + "main.group_size_127137": {"class": "group_size", "value": None}, + "main.group_size_127142": {"class": "group_size", "value": None}, + "main.group_size_127210": {"class": "group_size", "value": None}, + "main.group_size_127215": {"class": "group_size", "value": None}, + "main.group_size_127220": {"class": "group_size", "value": None}, + "main.group_size_127469": {"class": "group_size", "value": None}, + "main.group_size_127573": {"class": "group_size", "value": None}, + "main.group_size_127657": {"class": "group_size", "value": None}, + "main.group_size_127921": {"class": "group_size", "value": None}, + "main.group_size_127926": {"class": "group_size", "value": None}, + "main.group_size_129315": {"class": "group_size", "value": None}, + "main.segmap_group_size_102691": {"class": "group_size", "value": None}, + "main.segmap_group_size_102791": {"class": "group_size", "value": None}, + "main.segmap_group_size_102884": {"class": "group_size", "value": None}, + "main.segmap_group_size_103008": {"class": "group_size", "value": None}, + "main.segmap_group_size_103286": {"class": "group_size", "value": None}, + "main.segmap_group_size_103422": {"class": "group_size", "value": None}, + "main.segmap_group_size_103552": {"class": "group_size", "value": None}, + "main.segmap_group_size_104475": {"class": "group_size", "value": None}, + "main.segmap_group_size_104498": {"class": "group_size", "value": None}, + "main.segmap_group_size_104545": {"class": "group_size", "value": None}, + "main.segmap_group_size_104702": {"class": "group_size", "value": None}, + "main.segmap_group_size_104744": {"class": "group_size", "value": None}, + "main.segmap_group_size_104807": {"class": "group_size", "value": None}, + "main.segmap_group_size_105022": {"class": "group_size", "value": None}, + "main.segmap_group_size_105112": {"class": "group_size", "value": None}, + "main.segmap_group_size_105123": {"class": "group_size", "value": None}, + "main.segmap_group_size_105450": {"class": "group_size", "value": None}, + "main.segmap_group_size_105484": {"class": "group_size", "value": None}, + "main.segmap_group_size_105528": {"class": "group_size", "value": None}, + "main.segmap_group_size_105816": {"class": "group_size", "value": None}, + "main.segmap_group_size_105861": {"class": "group_size", "value": None}, + "main.segmap_group_size_105877": {"class": "group_size", "value": None}, + "main.segmap_group_size_105885": {"class": "group_size", "value": None}, + "main.segmap_group_size_106903": {"class": "group_size", "value": None}, + "main.segmap_group_size_107041": {"class": "group_size", "value": None}, + "main.segmap_group_size_108138": {"class": "group_size", "value": None}, + "main.segmap_group_size_108211": {"class": "group_size", "value": None}, + "main.segmap_group_size_108234": {"class": "group_size", "value": None}, + "main.segmap_group_size_108281": {"class": "group_size", "value": None}, + "main.segmap_group_size_108438": {"class": "group_size", "value": None}, + "main.segmap_group_size_108492": {"class": "group_size", "value": None}, + "main.segmap_group_size_108557": {"class": "group_size", "value": None}, + "main.segmap_group_size_108772": {"class": "group_size", "value": None}, + "main.segmap_group_size_108862": {"class": "group_size", "value": None}, + "main.segmap_group_size_108873": {"class": "group_size", "value": None}, + "main.segmap_group_size_109201": {"class": "group_size", "value": None}, + "main.segmap_group_size_109235": {"class": "group_size", "value": None}, + "main.segmap_group_size_109292": {"class": "group_size", "value": None}, + "main.segmap_group_size_109582": {"class": "group_size", "value": None}, + "main.segmap_group_size_109627": {"class": "group_size", "value": None}, + "main.segmap_group_size_109643": {"class": "group_size", "value": None}, + "main.segmap_group_size_109651": {"class": "group_size", "value": None}, + "main.segmap_group_size_109800": {"class": "group_size", "value": None}, + "main.segmap_group_size_109831": {"class": "group_size", "value": None}, + "main.segmap_group_size_110909": {"class": "group_size", "value": None}, + "main.segmap_group_size_111087": {"class": "group_size", "value": None}, + "main.segmap_group_size_111125": {"class": "group_size", "value": None}, + "main.segmap_group_size_111201": {"class": "group_size", "value": None}, + "main.segmap_group_size_111232": {"class": "group_size", "value": None}, + "main.segmap_group_size_111407": {"class": "group_size", "value": None}, + "main.segmap_group_size_111583": {"class": "group_size", "value": None}, + "main.segmap_group_size_111618": {"class": "group_size", "value": None}, + "main.segmap_group_size_111646": {"class": "group_size", "value": None}, + "main.segmap_group_size_111749": {"class": "group_size", "value": None}, + "main.segmap_group_size_111780": {"class": "group_size", "value": None}, + "main.segmap_group_size_111831": {"class": "group_size", "value": None}, + "main.segmap_group_size_112244": {"class": "group_size", "value": None}, + "main.segmap_group_size_112284": {"class": "group_size", "value": None}, + "main.segmap_group_size_112411": {"class": "group_size", "value": None}, + "main.segmap_group_size_112590": {"class": "group_size", "value": None}, + "main.segmap_group_size_112628": {"class": "group_size", "value": None}, + "main.segmap_group_size_112675": {"class": "group_size", "value": None}, + "main.segmap_group_size_113046": {"class": "group_size", "value": None}, + "main.segmap_group_size_113156": {"class": "group_size", "value": None}, + "main.segmap_group_size_113176": {"class": "group_size", "value": None}, + "main.segmap_group_size_113265": {"class": "group_size", "value": None}, + "main.segmap_group_size_113349": {"class": "group_size", "value": None}, + "main.segmap_group_size_113557": {"class": "group_size", "value": None}, + "main.segmap_group_size_113698": {"class": "group_size", "value": None}, + "main.segmap_group_size_113830": {"class": "group_size", "value": None}, + "main.segmap_group_size_114009": {"class": "group_size", "value": None}, + "main.segmap_group_size_114179": {"class": "group_size", "value": None}, + "main.segmap_group_size_114292": {"class": "group_size", "value": None}, + "main.segmap_group_size_114415": {"class": "group_size", "value": None}, + "main.segmap_group_size_114512": {"class": "group_size", "value": None}, + "main.segmap_group_size_114683": {"class": "group_size", "value": None}, + "main.segmap_group_size_114834": {"class": "group_size", "value": None}, + "main.segmap_num_groups_103288": {"class": "num_groups", "value": None}, + "main.segmap_num_groups_103554": {"class": "num_groups", "value": None}, + "main.segmap_num_groups_104746": {"class": "num_groups", "value": None}, + "main.segmap_num_groups_104809": {"class": "num_groups", "value": None}, + "main.segmap_num_groups_105024": {"class": "num_groups", "value": None}, + "main.segmap_num_groups_105530": {"class": "num_groups", "value": None}, + "main.segmap_num_groups_105818": {"class": "num_groups", "value": None}, + "main.segmap_num_groups_105863": {"class": "num_groups", "value": None}, + "main.segmap_num_groups_107043": {"class": "num_groups", "value": None}, + "main.segmap_num_groups_108494": {"class": "num_groups", "value": None}, + "main.segmap_num_groups_108559": {"class": "num_groups", "value": None}, + "main.segmap_num_groups_108774": {"class": "num_groups", "value": None}, + "main.segmap_num_groups_109294": {"class": "num_groups", "value": None}, + "main.segmap_num_groups_109584": {"class": "num_groups", "value": None}, + "main.segmap_num_groups_109629": {"class": "num_groups", "value": None}, + "main.segmap_num_groups_110911": {"class": "num_groups", "value": None}, + "main.segmap_num_groups_111089": {"class": "num_groups", "value": None}, + "main.segmap_num_groups_111409": {"class": "num_groups", "value": None}, + "main.segmap_num_groups_112630": {"class": "num_groups", "value": None}, + "main.segmap_num_groups_112677": {"class": "num_groups", "value": None}, + "main.segmap_num_groups_113559": {"class": "num_groups", "value": None}, + "main.segmap_num_groups_113700": {"class": "num_groups", "value": None}, + "main.segmap_num_groups_113832": {"class": "num_groups", "value": None}, + "main.segmap_num_groups_114836": {"class": "num_groups", "value": None}, + "main.segred_group_size_102911": {"class": "group_size", "value": None}, + "main.segred_group_size_103151": {"class": "group_size", "value": None}, + "main.segred_group_size_104858": {"class": "group_size", "value": None}, + "main.segred_group_size_105136": {"class": "group_size", "value": None}, + "main.segred_group_size_105893": {"class": "group_size", "value": None}, + "main.segred_group_size_108608": {"class": "group_size", "value": None}, + "main.segred_group_size_108886": {"class": "group_size", "value": None}, + "main.segred_group_size_109659": {"class": "group_size", "value": None}, + "main.segred_group_size_109812": {"class": "group_size", "value": None}, + "main.segred_group_size_109841": {"class": "group_size", "value": None}, + "main.segred_group_size_110859": {"class": "group_size", "value": None}, + "main.segred_group_size_111138": {"class": "group_size", "value": None}, + "main.segred_group_size_111213": {"class": "group_size", "value": None}, + "main.segred_group_size_111240": {"class": "group_size", "value": None}, + "main.segred_group_size_111597": {"class": "group_size", "value": None}, + "main.segred_group_size_111627": {"class": "group_size", "value": None}, + "main.segred_group_size_112262": {"class": "group_size", "value": None}, + "main.segred_group_size_112387": {"class": "group_size", "value": None}, + "main.segred_group_size_112735": {"class": "group_size", "value": None}, + "main.segred_group_size_113619": {"class": "group_size", "value": None}, + "main.segred_group_size_113756": {"class": "group_size", "value": None}, + "main.segred_group_size_113886": {"class": "group_size", "value": None}, + "main.segred_group_size_114307": {"class": "group_size", "value": None}, + "main.segred_group_size_114331": {"class": "group_size", "value": None}, + "main.segred_group_size_114401": {"class": "group_size", "value": None}, + "main.segred_group_size_114461": {"class": "group_size", "value": None}, + "main.segred_group_size_114733": {"class": "group_size", "value": None}, + "main.segred_num_groups_102913": {"class": "num_groups", "value": None}, + "main.segred_num_groups_103153": {"class": "num_groups", "value": None}, + "main.segred_num_groups_104860": {"class": "num_groups", "value": None}, + "main.segred_num_groups_105138": {"class": "num_groups", "value": None}, + "main.segred_num_groups_105895": {"class": "num_groups", "value": None}, + "main.segred_num_groups_108610": {"class": "num_groups", "value": None}, + "main.segred_num_groups_108888": {"class": "num_groups", "value": None}, + "main.segred_num_groups_109661": {"class": "num_groups", "value": None}, + "main.segred_num_groups_109814": {"class": "num_groups", "value": None}, + "main.segred_num_groups_109843": {"class": "num_groups", "value": None}, + "main.segred_num_groups_110861": {"class": "num_groups", "value": None}, + "main.segred_num_groups_111140": {"class": "num_groups", "value": None}, + "main.segred_num_groups_111215": {"class": "num_groups", "value": None}, + "main.segred_num_groups_111242": {"class": "num_groups", "value": None}, + "main.segred_num_groups_111599": {"class": "num_groups", "value": None}, + "main.segred_num_groups_111629": {"class": "num_groups", "value": None}, + "main.segred_num_groups_112264": {"class": "num_groups", "value": None}, + "main.segred_num_groups_112389": {"class": "num_groups", "value": None}, + "main.segred_num_groups_112737": {"class": "num_groups", "value": None}, + "main.segred_num_groups_113621": {"class": "num_groups", "value": None}, + "main.segred_num_groups_113758": {"class": "num_groups", "value": None}, + "main.segred_num_groups_113888": {"class": "num_groups", "value": None}, + "main.segred_num_groups_114309": {"class": "num_groups", "value": None}, + "main.segred_num_groups_114333": {"class": "num_groups", "value": None}, + "main.segred_num_groups_114403": {"class": "num_groups", "value": None}, + "main.segred_num_groups_114463": {"class": "num_groups", "value": None}, + "main.segred_num_groups_114735": {"class": "num_groups", "value": None}, + "main.segscan_group_size_103077": {"class": "group_size", "value": None}, + "main.segscan_group_size_111556": {"class": "group_size", "value": None}, + "main.segscan_group_size_114078": {"class": "group_size", "value": None}, + "main.segscan_group_size_114787": {"class": "group_size", "value": None}, + "main.segscan_num_groups_103079": {"class": "num_groups", "value": None}, + "main.segscan_num_groups_111558": {"class": "num_groups", "value": None}, + "main.segscan_num_groups_114080": {"class": "num_groups", "value": None}, + "main.segscan_num_groups_114789": {"class": "num_groups", "value": None}, + "main.suff_intra_par_1": {"class": "threshold(32, !main.suff_outer_redomap_0)", + "value": 32}, + "main.suff_intra_par_17": {"class": "threshold(32, !main.suff_outer_par_16)", + "value": 32}, + "main.suff_intra_par_20": {"class": "threshold(32, !main.suff_outer_par_19)", + "value": 32}, + "main.suff_intra_par_22": {"class": "threshold(32, !main.suff_outer_par_21)", + "value": 32}, + "main.suff_intra_par_26": {"class": "threshold(32,)", "value": 32}, + "main.suff_intra_par_27": {"class": "threshold(32, !main.suff_intra_par_26)", + "value": 32}, + "main.suff_intra_par_34": {"class": "threshold(32,)", "value": 32}, + "main.suff_intra_par_36": {"class": "threshold(32, !main.suff_outer_par_35)", + "value": 32}, + "main.suff_intra_par_38": {"class": "threshold(32,)", "value": 32}, + "main.suff_outer_par_10": {"class": "threshold(def, !main.suff_outer_par_9)", + "value": None}, + "main.suff_outer_par_11": {"class": "threshold(def, !main.suff_outer_par_10 !main.suff_outer_par_9)", + "value": None}, + "main.suff_outer_par_12": {"class": "threshold(def, !main.suff_outer_par_9)", + "value": None}, + "main.suff_outer_par_13": {"class": "threshold(def, !main.suff_outer_par_12 !main.suff_outer_par_9)", + "value": None}, + "main.suff_outer_par_14": {"class": "threshold(def, !main.suff_outer_par_13 !main.suff_outer_par_12 !main.suff_outer_par_9)", + "value": None}, + "main.suff_outer_par_15": {"class": "threshold(def, !main.suff_outer_par_9)", + "value": None}, + "main.suff_outer_par_16": {"class": "threshold(def,)", "value": None}, + "main.suff_outer_par_18": {"class": "threshold(def, !main.suff_outer_par_16 !main.suff_intra_par_17)", + "value": None}, + "main.suff_outer_par_19": {"class": "threshold(def,)", "value": None}, + "main.suff_outer_par_2": {"class": "threshold(def,)", "value": None}, + "main.suff_outer_par_21": {"class": "threshold(def,)", "value": None}, + "main.suff_outer_par_23": {"class": "threshold(def,)", "value": None}, + "main.suff_outer_par_24": {"class": "threshold(def, !main.suff_outer_par_23)", + "value": None}, + "main.suff_outer_par_25": {"class": "threshold(def, !main.suff_outer_par_24 !main.suff_outer_par_23)", + "value": None}, + "main.suff_outer_par_28": {"class": "threshold(def,)", "value": None}, + "main.suff_outer_par_29": {"class": "threshold(def, !main.suff_outer_par_28)", + "value": None}, + "main.suff_outer_par_3": {"class": "threshold(def, !main.suff_outer_par_2)", + "value": None}, + "main.suff_outer_par_30": {"class": "threshold(def,)", "value": None}, + "main.suff_outer_par_31": {"class": "threshold(def, !main.suff_outer_par_30)", + "value": None}, + "main.suff_outer_par_32": {"class": "threshold(def,)", "value": None}, + "main.suff_outer_par_33": {"class": "threshold(def, !main.suff_outer_par_32)", + "value": None}, + "main.suff_outer_par_35": {"class": "threshold(def,)", "value": None}, + "main.suff_outer_par_37": {"class": "threshold(def,)", "value": None}, + "main.suff_outer_par_4": {"class": "threshold(def, !main.suff_outer_par_3 !main.suff_outer_par_2)", + "value": None}, + "main.suff_outer_par_5": {"class": "threshold(def, !main.suff_outer_par_2)", + "value": None}, + "main.suff_outer_par_6": {"class": "threshold(def, !main.suff_outer_par_5 !main.suff_outer_par_2)", + "value": None}, + "main.suff_outer_par_7": {"class": "threshold(def, !main.suff_outer_par_6 !main.suff_outer_par_5 !main.suff_outer_par_2)", + "value": None}, + "main.suff_outer_par_8": {"class": "threshold(def, !main.suff_outer_par_2)", + "value": None}, + "main.suff_outer_par_9": {"class": "threshold(def,)", "value": None}, + "main.suff_outer_redomap_0": {"class": "threshold(def,)", "value": None}, + "main.tile_size_115655": {"class": "tile_size", "value": None}, + "main.tile_size_116017": {"class": "tile_size", "value": None}, + "main.tile_size_116778": {"class": "tile_size", "value": None}, + "main.tile_size_117140": {"class": "tile_size", "value": None}, + "main.tile_size_118834": {"class": "tile_size", "value": None}, + "mainDetailed.Rx_116320": {"class": "reg_tile_size", "value": None}, + "mainDetailed.Rx_117443": {"class": "reg_tile_size", "value": None}, + "mainDetailed.Rx_118370": {"class": "reg_tile_size", "value": None}, + "mainDetailed.Rx_119111": {"class": "reg_tile_size", "value": None}, + "mainDetailed.Ry_116321": {"class": "reg_tile_size", "value": None}, + "mainDetailed.Ry_117444": {"class": "reg_tile_size", "value": None}, + "mainDetailed.Ry_118371": {"class": "reg_tile_size", "value": None}, + "mainDetailed.Ry_119112": {"class": "reg_tile_size", "value": None}, + "mainDetailed.Tk_116317": {"class": "tile_size", "value": None}, + "mainDetailed.Tk_117440": {"class": "tile_size", "value": None}, + "mainDetailed.Tk_118367": {"class": "tile_size", "value": None}, + "mainDetailed.Tk_119108": {"class": "tile_size", "value": None}, + "mainDetailed.Tx_116318": {"class": "tile_size", "value": None}, + "mainDetailed.Tx_117441": {"class": "tile_size", "value": None}, + "mainDetailed.Tx_118222": {"class": "tile_size", "value": None}, + "mainDetailed.Tx_118368": {"class": "tile_size", "value": None}, + "mainDetailed.Tx_119109": {"class": "tile_size", "value": None}, + "mainDetailed.Ty_116319": {"class": "tile_size", "value": None}, + "mainDetailed.Ty_117442": {"class": "tile_size", "value": None}, + "mainDetailed.Ty_118223": {"class": "tile_size", "value": None}, + "mainDetailed.Ty_118369": {"class": "tile_size", "value": None}, + "mainDetailed.Ty_119110": {"class": "tile_size", "value": None}, + "mainDetailed.group_size_126398": {"class": "group_size", "value": None}, + "mainDetailed.group_size_126403": {"class": "group_size", "value": None}, + "mainDetailed.group_size_126481": {"class": "group_size", "value": None}, + "mainDetailed.group_size_126577": {"class": "group_size", "value": None}, + "mainDetailed.group_size_126582": {"class": "group_size", "value": None}, + "mainDetailed.group_size_126587": {"class": "group_size", "value": None}, + "mainDetailed.group_size_126697": {"class": "group_size", "value": None}, + "mainDetailed.group_size_126801": {"class": "group_size", "value": None}, + "mainDetailed.group_size_126885": {"class": "group_size", "value": None}, + "mainDetailed.group_size_127149": {"class": "group_size", "value": None}, + "mainDetailed.group_size_127154": {"class": "group_size", "value": None}, + "mainDetailed.group_size_127222": {"class": "group_size", "value": None}, + "mainDetailed.group_size_127227": {"class": "group_size", "value": None}, + "mainDetailed.group_size_127232": {"class": "group_size", "value": None}, + "mainDetailed.group_size_127481": {"class": "group_size", "value": None}, + "mainDetailed.group_size_127585": {"class": "group_size", "value": None}, + "mainDetailed.group_size_127669": {"class": "group_size", "value": None}, + "mainDetailed.group_size_127933": {"class": "group_size", "value": None}, + "mainDetailed.group_size_127938": {"class": "group_size", "value": None}, + "mainDetailed.group_size_129327": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_77258": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_77358": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_77451": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_77575": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_77853": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_77989": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_78119": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_79042": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_79065": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_79112": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_79269": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_79311": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_79374": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_79589": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_79679": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_79690": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_80017": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_80051": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_80095": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_80383": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_80428": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_80444": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_80452": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_81470": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_81608": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_82705": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_82778": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_82801": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_82848": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_83005": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_83059": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_83124": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_83339": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_83429": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_83440": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_83768": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_83802": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_83859": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_84149": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_84194": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_84210": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_84218": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_84367": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_84398": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_85476": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_85654": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_85692": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_85768": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_85799": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_85974": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_86150": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_86185": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_86213": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_86316": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_86347": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_86398": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_86811": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_86851": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_86978": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_87157": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_87195": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_87242": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_87613": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_87723": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_87743": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_87832": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_87916": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_88124": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_88265": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_88397": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_88576": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_88746": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_88859": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_88982": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_89079": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_89119": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_89210": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_89263": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_89404": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_89740": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_89787": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_89862": {"class": "group_size", "value": None}, + "mainDetailed.segmap_group_size_89910": {"class": "group_size", "value": None}, + "mainDetailed.segmap_num_groups_77855": {"class": "num_groups", "value": None}, + "mainDetailed.segmap_num_groups_78121": {"class": "num_groups", "value": None}, + "mainDetailed.segmap_num_groups_79313": {"class": "num_groups", "value": None}, + "mainDetailed.segmap_num_groups_79376": {"class": "num_groups", "value": None}, + "mainDetailed.segmap_num_groups_79591": {"class": "num_groups", "value": None}, + "mainDetailed.segmap_num_groups_80097": {"class": "num_groups", "value": None}, + "mainDetailed.segmap_num_groups_80385": {"class": "num_groups", "value": None}, + "mainDetailed.segmap_num_groups_80430": {"class": "num_groups", "value": None}, + "mainDetailed.segmap_num_groups_81610": {"class": "num_groups", "value": None}, + "mainDetailed.segmap_num_groups_83061": {"class": "num_groups", "value": None}, + "mainDetailed.segmap_num_groups_83126": {"class": "num_groups", "value": None}, + "mainDetailed.segmap_num_groups_83341": {"class": "num_groups", "value": None}, + "mainDetailed.segmap_num_groups_83861": {"class": "num_groups", "value": None}, + "mainDetailed.segmap_num_groups_84151": {"class": "num_groups", "value": None}, + "mainDetailed.segmap_num_groups_84196": {"class": "num_groups", "value": None}, + "mainDetailed.segmap_num_groups_85478": {"class": "num_groups", "value": None}, + "mainDetailed.segmap_num_groups_85656": {"class": "num_groups", "value": None}, + "mainDetailed.segmap_num_groups_85976": {"class": "num_groups", "value": None}, + "mainDetailed.segmap_num_groups_87197": {"class": "num_groups", "value": None}, + "mainDetailed.segmap_num_groups_87244": {"class": "num_groups", "value": None}, + "mainDetailed.segmap_num_groups_88126": {"class": "num_groups", "value": None}, + "mainDetailed.segmap_num_groups_88267": {"class": "num_groups", "value": None}, + "mainDetailed.segmap_num_groups_88399": {"class": "num_groups", "value": None}, + "mainDetailed.segmap_num_groups_89406": {"class": "num_groups", "value": None}, + "mainDetailed.segred_group_size_77478": {"class": "group_size", "value": None}, + "mainDetailed.segred_group_size_77718": {"class": "group_size", "value": None}, + "mainDetailed.segred_group_size_79425": {"class": "group_size", "value": None}, + "mainDetailed.segred_group_size_79703": {"class": "group_size", "value": None}, + "mainDetailed.segred_group_size_80460": {"class": "group_size", "value": None}, + "mainDetailed.segred_group_size_83175": {"class": "group_size", "value": None}, + "mainDetailed.segred_group_size_83453": {"class": "group_size", "value": None}, + "mainDetailed.segred_group_size_84226": {"class": "group_size", "value": None}, + "mainDetailed.segred_group_size_84379": {"class": "group_size", "value": None}, + "mainDetailed.segred_group_size_84408": {"class": "group_size", "value": None}, + "mainDetailed.segred_group_size_85426": {"class": "group_size", "value": None}, + "mainDetailed.segred_group_size_85705": {"class": "group_size", "value": None}, + "mainDetailed.segred_group_size_85780": {"class": "group_size", "value": None}, + "mainDetailed.segred_group_size_85807": {"class": "group_size", "value": None}, + "mainDetailed.segred_group_size_86164": {"class": "group_size", "value": None}, + "mainDetailed.segred_group_size_86194": {"class": "group_size", "value": None}, + "mainDetailed.segred_group_size_86829": {"class": "group_size", "value": None}, + "mainDetailed.segred_group_size_86954": {"class": "group_size", "value": None}, + "mainDetailed.segred_group_size_87302": {"class": "group_size", "value": None}, + "mainDetailed.segred_group_size_88186": {"class": "group_size", "value": None}, + "mainDetailed.segred_group_size_88323": {"class": "group_size", "value": None}, + "mainDetailed.segred_group_size_88453": {"class": "group_size", "value": None}, + "mainDetailed.segred_group_size_88874": {"class": "group_size", "value": None}, + "mainDetailed.segred_group_size_88898": {"class": "group_size", "value": None}, + "mainDetailed.segred_group_size_88968": {"class": "group_size", "value": None}, + "mainDetailed.segred_group_size_89028": {"class": "group_size", "value": None}, + "mainDetailed.segred_group_size_89822": {"class": "group_size", "value": None}, + "mainDetailed.segred_num_groups_77480": {"class": "num_groups", "value": None}, + "mainDetailed.segred_num_groups_77720": {"class": "num_groups", "value": None}, + "mainDetailed.segred_num_groups_79427": {"class": "num_groups", "value": None}, + "mainDetailed.segred_num_groups_79705": {"class": "num_groups", "value": None}, + "mainDetailed.segred_num_groups_80462": {"class": "num_groups", "value": None}, + "mainDetailed.segred_num_groups_83177": {"class": "num_groups", "value": None}, + "mainDetailed.segred_num_groups_83455": {"class": "num_groups", "value": None}, + "mainDetailed.segred_num_groups_84228": {"class": "num_groups", "value": None}, + "mainDetailed.segred_num_groups_84381": {"class": "num_groups", "value": None}, + "mainDetailed.segred_num_groups_84410": {"class": "num_groups", "value": None}, + "mainDetailed.segred_num_groups_85428": {"class": "num_groups", "value": None}, + "mainDetailed.segred_num_groups_85707": {"class": "num_groups", "value": None}, + "mainDetailed.segred_num_groups_85782": {"class": "num_groups", "value": None}, + "mainDetailed.segred_num_groups_85809": {"class": "num_groups", "value": None}, + "mainDetailed.segred_num_groups_86166": {"class": "num_groups", "value": None}, + "mainDetailed.segred_num_groups_86196": {"class": "num_groups", "value": None}, + "mainDetailed.segred_num_groups_86831": {"class": "num_groups", "value": None}, + "mainDetailed.segred_num_groups_86956": {"class": "num_groups", "value": None}, + "mainDetailed.segred_num_groups_87304": {"class": "num_groups", "value": None}, + "mainDetailed.segred_num_groups_88188": {"class": "num_groups", "value": None}, + "mainDetailed.segred_num_groups_88325": {"class": "num_groups", "value": None}, + "mainDetailed.segred_num_groups_88455": {"class": "num_groups", "value": None}, + "mainDetailed.segred_num_groups_88876": {"class": "num_groups", "value": None}, + "mainDetailed.segred_num_groups_88900": {"class": "num_groups", "value": None}, + "mainDetailed.segred_num_groups_88970": {"class": "num_groups", "value": None}, + "mainDetailed.segred_num_groups_89030": {"class": "num_groups", "value": None}, + "mainDetailed.segred_num_groups_89824": {"class": "num_groups", "value": None}, + "mainDetailed.segscan_group_size_77644": {"class": "group_size", + "value": None}, + "mainDetailed.segscan_group_size_86123": {"class": "group_size", + "value": None}, + "mainDetailed.segscan_group_size_88645": {"class": "group_size", + "value": None}, + "mainDetailed.segscan_group_size_89873": {"class": "group_size", + "value": None}, + "mainDetailed.segscan_num_groups_77646": {"class": "num_groups", + "value": None}, + "mainDetailed.segscan_num_groups_86125": {"class": "num_groups", + "value": None}, + "mainDetailed.segscan_num_groups_88647": {"class": "num_groups", + "value": None}, + "mainDetailed.segscan_num_groups_89875": {"class": "num_groups", + "value": None}, + "mainDetailed.suff_intra_par_1": {"class": "threshold(32, !mainDetailed.suff_outer_redomap_0)", + "value": 32}, + "mainDetailed.suff_intra_par_17": {"class": "threshold(32, !mainDetailed.suff_outer_par_16)", + "value": 32}, + "mainDetailed.suff_intra_par_20": {"class": "threshold(32, !mainDetailed.suff_outer_par_19)", + "value": 32}, + "mainDetailed.suff_intra_par_22": {"class": "threshold(32, !mainDetailed.suff_outer_par_21)", + "value": 32}, + "mainDetailed.suff_intra_par_26": {"class": "threshold(32,)", "value": 32}, + "mainDetailed.suff_intra_par_27": {"class": "threshold(32, !mainDetailed.suff_intra_par_26)", + "value": 32}, + "mainDetailed.suff_intra_par_34": {"class": "threshold(32,)", "value": 32}, + "mainDetailed.suff_intra_par_36": {"class": "threshold(32, !mainDetailed.suff_outer_par_35)", + "value": 32}, + "mainDetailed.suff_intra_par_39": {"class": "threshold(32, !mainDetailed.suff_outer_par_38)", + "value": 32}, + "mainDetailed.suff_outer_par_10": {"class": "threshold(def, !mainDetailed.suff_outer_par_9)", + "value": None}, + "mainDetailed.suff_outer_par_11": {"class": "threshold(def, !mainDetailed.suff_outer_par_10 !mainDetailed.suff_outer_par_9)", + "value": None}, + "mainDetailed.suff_outer_par_12": {"class": "threshold(def, !mainDetailed.suff_outer_par_9)", + "value": None}, + "mainDetailed.suff_outer_par_13": {"class": "threshold(def, !mainDetailed.suff_outer_par_12 !mainDetailed.suff_outer_par_9)", + "value": None}, + "mainDetailed.suff_outer_par_14": {"class": "threshold(def, !mainDetailed.suff_outer_par_13 !mainDetailed.suff_outer_par_12 !mainDetailed.suff_outer_par_9)", + "value": None}, + "mainDetailed.suff_outer_par_15": {"class": "threshold(def, !mainDetailed.suff_outer_par_9)", + "value": None}, + "mainDetailed.suff_outer_par_16": {"class": "threshold(def,)", "value": None}, + "mainDetailed.suff_outer_par_18": {"class": "threshold(def, !mainDetailed.suff_outer_par_16 !mainDetailed.suff_intra_par_17)", + "value": None}, + "mainDetailed.suff_outer_par_19": {"class": "threshold(def,)", "value": None}, + "mainDetailed.suff_outer_par_2": {"class": "threshold(def,)", "value": None}, + "mainDetailed.suff_outer_par_21": {"class": "threshold(def,)", "value": None}, + "mainDetailed.suff_outer_par_23": {"class": "threshold(def,)", "value": None}, + "mainDetailed.suff_outer_par_24": {"class": "threshold(def, !mainDetailed.suff_outer_par_23)", + "value": None}, + "mainDetailed.suff_outer_par_25": {"class": "threshold(def, !mainDetailed.suff_outer_par_24 !mainDetailed.suff_outer_par_23)", + "value": None}, + "mainDetailed.suff_outer_par_28": {"class": "threshold(def,)", "value": None}, + "mainDetailed.suff_outer_par_29": {"class": "threshold(def, !mainDetailed.suff_outer_par_28)", + "value": None}, + "mainDetailed.suff_outer_par_3": {"class": "threshold(def, !mainDetailed.suff_outer_par_2)", + "value": None}, + "mainDetailed.suff_outer_par_30": {"class": "threshold(def,)", "value": None}, + "mainDetailed.suff_outer_par_31": {"class": "threshold(def, !mainDetailed.suff_outer_par_30)", + "value": None}, + "mainDetailed.suff_outer_par_32": {"class": "threshold(def,)", "value": None}, + "mainDetailed.suff_outer_par_33": {"class": "threshold(def, !mainDetailed.suff_outer_par_32)", + "value": None}, + "mainDetailed.suff_outer_par_35": {"class": "threshold(def,)", "value": None}, + "mainDetailed.suff_outer_par_37": {"class": "threshold(def,)", "value": None}, + "mainDetailed.suff_outer_par_38": {"class": "threshold(def,)", "value": None}, + "mainDetailed.suff_outer_par_4": {"class": "threshold(def, !mainDetailed.suff_outer_par_3 !mainDetailed.suff_outer_par_2)", + "value": None}, + "mainDetailed.suff_outer_par_5": {"class": "threshold(def, !mainDetailed.suff_outer_par_2)", + "value": None}, + "mainDetailed.suff_outer_par_6": {"class": "threshold(def, !mainDetailed.suff_outer_par_5 !mainDetailed.suff_outer_par_2)", + "value": None}, + "mainDetailed.suff_outer_par_7": {"class": "threshold(def, !mainDetailed.suff_outer_par_6 !mainDetailed.suff_outer_par_5 !mainDetailed.suff_outer_par_2)", + "value": None}, + "mainDetailed.suff_outer_par_8": {"class": "threshold(def, !mainDetailed.suff_outer_par_2)", + "value": None}, + "mainDetailed.suff_outer_par_9": {"class": "threshold(def,)", "value": None}, + "mainDetailed.suff_outer_redomap_0": {"class": "threshold(def,)", + "value": None}, + "mainDetailed.tile_size_115655": {"class": "tile_size", "value": None}, + "mainDetailed.tile_size_116017": {"class": "tile_size", "value": None}, + "mainDetailed.tile_size_116778": {"class": "tile_size", "value": None}, + "mainDetailed.tile_size_117140": {"class": "tile_size", "value": None}, + "mainDetailed.tile_size_118834": {"class": "tile_size", "value": None}, + "mainMagnitude.Rx_116320": {"class": "reg_tile_size", "value": None}, + "mainMagnitude.Rx_117443": {"class": "reg_tile_size", "value": None}, + "mainMagnitude.Rx_118370": {"class": "reg_tile_size", "value": None}, + "mainMagnitude.Rx_119111": {"class": "reg_tile_size", "value": None}, + "mainMagnitude.Ry_116321": {"class": "reg_tile_size", "value": None}, + "mainMagnitude.Ry_117444": {"class": "reg_tile_size", "value": None}, + "mainMagnitude.Ry_118371": {"class": "reg_tile_size", "value": None}, + "mainMagnitude.Ry_119112": {"class": "reg_tile_size", "value": None}, + "mainMagnitude.Tk_116317": {"class": "tile_size", "value": None}, + "mainMagnitude.Tk_117440": {"class": "tile_size", "value": None}, + "mainMagnitude.Tk_118367": {"class": "tile_size", "value": None}, + "mainMagnitude.Tk_119108": {"class": "tile_size", "value": None}, + "mainMagnitude.Tx_116318": {"class": "tile_size", "value": None}, + "mainMagnitude.Tx_117441": {"class": "tile_size", "value": None}, + "mainMagnitude.Tx_118222": {"class": "tile_size", "value": None}, + "mainMagnitude.Tx_118368": {"class": "tile_size", "value": None}, + "mainMagnitude.Tx_119109": {"class": "tile_size", "value": None}, + "mainMagnitude.Ty_116319": {"class": "tile_size", "value": None}, + "mainMagnitude.Ty_117442": {"class": "tile_size", "value": None}, + "mainMagnitude.Ty_118223": {"class": "tile_size", "value": None}, + "mainMagnitude.Ty_118369": {"class": "tile_size", "value": None}, + "mainMagnitude.Ty_119110": {"class": "tile_size", "value": None}, + "mainMagnitude.group_size_126387": {"class": "group_size", "value": None}, + "mainMagnitude.group_size_126392": {"class": "group_size", "value": None}, + "mainMagnitude.group_size_126470": {"class": "group_size", "value": None}, + "mainMagnitude.group_size_126566": {"class": "group_size", "value": None}, + "mainMagnitude.group_size_126571": {"class": "group_size", "value": None}, + "mainMagnitude.group_size_126576": {"class": "group_size", "value": None}, + "mainMagnitude.group_size_126686": {"class": "group_size", "value": None}, + "mainMagnitude.group_size_126790": {"class": "group_size", "value": None}, + "mainMagnitude.group_size_126874": {"class": "group_size", "value": None}, + "mainMagnitude.group_size_127138": {"class": "group_size", "value": None}, + "mainMagnitude.group_size_127143": {"class": "group_size", "value": None}, + "mainMagnitude.group_size_127211": {"class": "group_size", "value": None}, + "mainMagnitude.group_size_127216": {"class": "group_size", "value": None}, + "mainMagnitude.group_size_127221": {"class": "group_size", "value": None}, + "mainMagnitude.group_size_127470": {"class": "group_size", "value": None}, + "mainMagnitude.group_size_127574": {"class": "group_size", "value": None}, + "mainMagnitude.group_size_127658": {"class": "group_size", "value": None}, + "mainMagnitude.group_size_127922": {"class": "group_size", "value": None}, + "mainMagnitude.group_size_127927": {"class": "group_size", "value": None}, + "mainMagnitude.group_size_129316": {"class": "group_size", "value": None}, + "mainMagnitude.segmap_group_size_100027": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_100074": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_100445": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_100555": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_100575": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_100664": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_100748": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_100956": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_101097": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_101229": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_101408": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_101578": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_101691": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_101814": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_101911": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_101951": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_102042": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_102095": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_102354": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_102505": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_90090": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_90190": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_90283": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_90407": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_90685": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_90821": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_90951": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_91874": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_91897": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_91944": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_92101": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_92143": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_92206": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_92421": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_92511": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_92522": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_92849": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_92883": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_92927": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_93215": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_93260": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_93276": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_93284": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_94302": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_94440": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_95537": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_95610": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_95633": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_95680": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_95837": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_95891": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_95956": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_96171": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_96261": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_96272": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_96600": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_96634": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_96691": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_96981": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_97026": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_97042": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_97050": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_97199": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_97230": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_98308": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_98486": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_98524": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_98600": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_98631": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_98806": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_98982": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_99017": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_99045": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_99148": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_99179": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_99230": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_99643": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_99683": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_99810": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_group_size_99989": {"class": "group_size", + "value": None}, + "mainMagnitude.segmap_num_groups_100029": {"class": "num_groups", + "value": None}, + "mainMagnitude.segmap_num_groups_100076": {"class": "num_groups", + "value": None}, + "mainMagnitude.segmap_num_groups_100958": {"class": "num_groups", + "value": None}, + "mainMagnitude.segmap_num_groups_101099": {"class": "num_groups", + "value": None}, + "mainMagnitude.segmap_num_groups_101231": {"class": "num_groups", + "value": None}, + "mainMagnitude.segmap_num_groups_102507": {"class": "num_groups", + "value": None}, + "mainMagnitude.segmap_num_groups_90687": {"class": "num_groups", + "value": None}, + "mainMagnitude.segmap_num_groups_90953": {"class": "num_groups", + "value": None}, + "mainMagnitude.segmap_num_groups_92145": {"class": "num_groups", + "value": None}, + "mainMagnitude.segmap_num_groups_92208": {"class": "num_groups", + "value": None}, + "mainMagnitude.segmap_num_groups_92423": {"class": "num_groups", + "value": None}, + "mainMagnitude.segmap_num_groups_92929": {"class": "num_groups", + "value": None}, + "mainMagnitude.segmap_num_groups_93217": {"class": "num_groups", + "value": None}, + "mainMagnitude.segmap_num_groups_93262": {"class": "num_groups", + "value": None}, + "mainMagnitude.segmap_num_groups_94442": {"class": "num_groups", + "value": None}, + "mainMagnitude.segmap_num_groups_95893": {"class": "num_groups", + "value": None}, + "mainMagnitude.segmap_num_groups_95958": {"class": "num_groups", + "value": None}, + "mainMagnitude.segmap_num_groups_96173": {"class": "num_groups", + "value": None}, + "mainMagnitude.segmap_num_groups_96693": {"class": "num_groups", + "value": None}, + "mainMagnitude.segmap_num_groups_96983": {"class": "num_groups", + "value": None}, + "mainMagnitude.segmap_num_groups_97028": {"class": "num_groups", + "value": None}, + "mainMagnitude.segmap_num_groups_98310": {"class": "num_groups", + "value": None}, + "mainMagnitude.segmap_num_groups_98488": {"class": "num_groups", + "value": None}, + "mainMagnitude.segmap_num_groups_98808": {"class": "num_groups", + "value": None}, + "mainMagnitude.segred_group_size_100134": {"class": "group_size", + "value": None}, + "mainMagnitude.segred_group_size_101018": {"class": "group_size", + "value": None}, + "mainMagnitude.segred_group_size_101155": {"class": "group_size", + "value": None}, + "mainMagnitude.segred_group_size_101285": {"class": "group_size", + "value": None}, + "mainMagnitude.segred_group_size_101706": {"class": "group_size", + "value": None}, + "mainMagnitude.segred_group_size_101730": {"class": "group_size", + "value": None}, + "mainMagnitude.segred_group_size_101800": {"class": "group_size", + "value": None}, + "mainMagnitude.segred_group_size_101860": {"class": "group_size", + "value": None}, + "mainMagnitude.segred_group_size_102404": {"class": "group_size", + "value": None}, + "mainMagnitude.segred_group_size_90310": {"class": "group_size", + "value": None}, + "mainMagnitude.segred_group_size_90550": {"class": "group_size", + "value": None}, + "mainMagnitude.segred_group_size_92257": {"class": "group_size", + "value": None}, + "mainMagnitude.segred_group_size_92535": {"class": "group_size", + "value": None}, + "mainMagnitude.segred_group_size_93292": {"class": "group_size", + "value": None}, + "mainMagnitude.segred_group_size_96007": {"class": "group_size", + "value": None}, + "mainMagnitude.segred_group_size_96285": {"class": "group_size", + "value": None}, + "mainMagnitude.segred_group_size_97058": {"class": "group_size", + "value": None}, + "mainMagnitude.segred_group_size_97211": {"class": "group_size", + "value": None}, + "mainMagnitude.segred_group_size_97240": {"class": "group_size", + "value": None}, + "mainMagnitude.segred_group_size_98258": {"class": "group_size", + "value": None}, + "mainMagnitude.segred_group_size_98537": {"class": "group_size", + "value": None}, + "mainMagnitude.segred_group_size_98612": {"class": "group_size", + "value": None}, + "mainMagnitude.segred_group_size_98639": {"class": "group_size", + "value": None}, + "mainMagnitude.segred_group_size_98996": {"class": "group_size", + "value": None}, + "mainMagnitude.segred_group_size_99026": {"class": "group_size", + "value": None}, + "mainMagnitude.segred_group_size_99661": {"class": "group_size", + "value": None}, + "mainMagnitude.segred_group_size_99786": {"class": "group_size", + "value": None}, + "mainMagnitude.segred_num_groups_100136": {"class": "num_groups", + "value": None}, + "mainMagnitude.segred_num_groups_101020": {"class": "num_groups", + "value": None}, + "mainMagnitude.segred_num_groups_101157": {"class": "num_groups", + "value": None}, + "mainMagnitude.segred_num_groups_101287": {"class": "num_groups", + "value": None}, + "mainMagnitude.segred_num_groups_101708": {"class": "num_groups", + "value": None}, + "mainMagnitude.segred_num_groups_101732": {"class": "num_groups", + "value": None}, + "mainMagnitude.segred_num_groups_101802": {"class": "num_groups", + "value": None}, + "mainMagnitude.segred_num_groups_101862": {"class": "num_groups", + "value": None}, + "mainMagnitude.segred_num_groups_102406": {"class": "num_groups", + "value": None}, + "mainMagnitude.segred_num_groups_90312": {"class": "num_groups", + "value": None}, + "mainMagnitude.segred_num_groups_90552": {"class": "num_groups", + "value": None}, + "mainMagnitude.segred_num_groups_92259": {"class": "num_groups", + "value": None}, + "mainMagnitude.segred_num_groups_92537": {"class": "num_groups", + "value": None}, + "mainMagnitude.segred_num_groups_93294": {"class": "num_groups", + "value": None}, + "mainMagnitude.segred_num_groups_96009": {"class": "num_groups", + "value": None}, + "mainMagnitude.segred_num_groups_96287": {"class": "num_groups", + "value": None}, + "mainMagnitude.segred_num_groups_97060": {"class": "num_groups", + "value": None}, + "mainMagnitude.segred_num_groups_97213": {"class": "num_groups", + "value": None}, + "mainMagnitude.segred_num_groups_97242": {"class": "num_groups", + "value": None}, + "mainMagnitude.segred_num_groups_98260": {"class": "num_groups", + "value": None}, + "mainMagnitude.segred_num_groups_98539": {"class": "num_groups", + "value": None}, + "mainMagnitude.segred_num_groups_98614": {"class": "num_groups", + "value": None}, + "mainMagnitude.segred_num_groups_98641": {"class": "num_groups", + "value": None}, + "mainMagnitude.segred_num_groups_98998": {"class": "num_groups", + "value": None}, + "mainMagnitude.segred_num_groups_99028": {"class": "num_groups", + "value": None}, + "mainMagnitude.segred_num_groups_99663": {"class": "num_groups", + "value": None}, + "mainMagnitude.segred_num_groups_99788": {"class": "num_groups", + "value": None}, + "mainMagnitude.segscan_group_size_101477": {"class": "group_size", + "value": None}, + "mainMagnitude.segscan_group_size_102458": {"class": "group_size", + "value": None}, + "mainMagnitude.segscan_group_size_90476": {"class": "group_size", + "value": None}, + "mainMagnitude.segscan_group_size_98955": {"class": "group_size", + "value": None}, + "mainMagnitude.segscan_num_groups_101479": {"class": "num_groups", + "value": None}, + "mainMagnitude.segscan_num_groups_102460": {"class": "num_groups", + "value": None}, + "mainMagnitude.segscan_num_groups_90478": {"class": "num_groups", + "value": None}, + "mainMagnitude.segscan_num_groups_98957": {"class": "num_groups", + "value": None}, + "mainMagnitude.suff_intra_par_1": {"class": "threshold(32, !mainMagnitude.suff_outer_redomap_0)", + "value": 32}, + "mainMagnitude.suff_intra_par_17": {"class": "threshold(32, !mainMagnitude.suff_outer_par_16)", + "value": 32}, + "mainMagnitude.suff_intra_par_20": {"class": "threshold(32, !mainMagnitude.suff_outer_par_19)", + "value": 32}, + "mainMagnitude.suff_intra_par_22": {"class": "threshold(32, !mainMagnitude.suff_outer_par_21)", + "value": 32}, + "mainMagnitude.suff_intra_par_26": {"class": "threshold(32,)", "value": 32}, + "mainMagnitude.suff_intra_par_27": {"class": "threshold(32, !mainMagnitude.suff_intra_par_26)", + "value": 32}, + "mainMagnitude.suff_intra_par_34": {"class": "threshold(32,)", "value": 32}, + "mainMagnitude.suff_intra_par_36": {"class": "threshold(32, !mainMagnitude.suff_outer_par_35)", + "value": 32}, + "mainMagnitude.suff_intra_par_38": {"class": "threshold(32,)", "value": 32}, + "mainMagnitude.suff_outer_par_10": {"class": "threshold(def, !mainMagnitude.suff_outer_par_9)", + "value": None}, + "mainMagnitude.suff_outer_par_11": {"class": "threshold(def, !mainMagnitude.suff_outer_par_10 !mainMagnitude.suff_outer_par_9)", + "value": None}, + "mainMagnitude.suff_outer_par_12": {"class": "threshold(def, !mainMagnitude.suff_outer_par_9)", + "value": None}, + "mainMagnitude.suff_outer_par_13": {"class": "threshold(def, !mainMagnitude.suff_outer_par_12 !mainMagnitude.suff_outer_par_9)", + "value": None}, + "mainMagnitude.suff_outer_par_14": {"class": "threshold(def, !mainMagnitude.suff_outer_par_13 !mainMagnitude.suff_outer_par_12 !mainMagnitude.suff_outer_par_9)", + "value": None}, + "mainMagnitude.suff_outer_par_15": {"class": "threshold(def, !mainMagnitude.suff_outer_par_9)", + "value": None}, + "mainMagnitude.suff_outer_par_16": {"class": "threshold(def,)", "value": None}, + "mainMagnitude.suff_outer_par_18": {"class": "threshold(def, !mainMagnitude.suff_outer_par_16 !mainMagnitude.suff_intra_par_17)", + "value": None}, + "mainMagnitude.suff_outer_par_19": {"class": "threshold(def,)", "value": None}, + "mainMagnitude.suff_outer_par_2": {"class": "threshold(def,)", "value": None}, + "mainMagnitude.suff_outer_par_21": {"class": "threshold(def,)", "value": None}, + "mainMagnitude.suff_outer_par_23": {"class": "threshold(def,)", "value": None}, + "mainMagnitude.suff_outer_par_24": {"class": "threshold(def, !mainMagnitude.suff_outer_par_23)", + "value": None}, + "mainMagnitude.suff_outer_par_25": {"class": "threshold(def, !mainMagnitude.suff_outer_par_24 !mainMagnitude.suff_outer_par_23)", + "value": None}, + "mainMagnitude.suff_outer_par_28": {"class": "threshold(def,)", "value": None}, + "mainMagnitude.suff_outer_par_29": {"class": "threshold(def, !mainMagnitude.suff_outer_par_28)", + "value": None}, + "mainMagnitude.suff_outer_par_3": {"class": "threshold(def, !mainMagnitude.suff_outer_par_2)", + "value": None}, + "mainMagnitude.suff_outer_par_30": {"class": "threshold(def,)", "value": None}, + "mainMagnitude.suff_outer_par_31": {"class": "threshold(def, !mainMagnitude.suff_outer_par_30)", + "value": None}, + "mainMagnitude.suff_outer_par_32": {"class": "threshold(def,)", "value": None}, + "mainMagnitude.suff_outer_par_33": {"class": "threshold(def, !mainMagnitude.suff_outer_par_32)", + "value": None}, + "mainMagnitude.suff_outer_par_35": {"class": "threshold(def,)", "value": None}, + "mainMagnitude.suff_outer_par_37": {"class": "threshold(def,)", "value": None}, + "mainMagnitude.suff_outer_par_4": {"class": "threshold(def, !mainMagnitude.suff_outer_par_3 !mainMagnitude.suff_outer_par_2)", + "value": None}, + "mainMagnitude.suff_outer_par_5": {"class": "threshold(def, !mainMagnitude.suff_outer_par_2)", + "value": None}, + "mainMagnitude.suff_outer_par_6": {"class": "threshold(def, !mainMagnitude.suff_outer_par_5 !mainMagnitude.suff_outer_par_2)", + "value": None}, + "mainMagnitude.suff_outer_par_7": {"class": "threshold(def, !mainMagnitude.suff_outer_par_6 !mainMagnitude.suff_outer_par_5 !mainMagnitude.suff_outer_par_2)", + "value": None}, + "mainMagnitude.suff_outer_par_8": {"class": "threshold(def, !mainMagnitude.suff_outer_par_2)", + "value": None}, + "mainMagnitude.suff_outer_par_9": {"class": "threshold(def,)", "value": None}, + "mainMagnitude.suff_outer_redomap_0": {"class": "threshold(def,)", + "value": None}, + "mainMagnitude.tile_size_115655": {"class": "tile_size", "value": None}, + "mainMagnitude.tile_size_116017": {"class": "tile_size", "value": None}, + "mainMagnitude.tile_size_116778": {"class": "tile_size", "value": None}, + "mainMagnitude.tile_size_117140": {"class": "tile_size", "value": None}, + "mainMagnitude.tile_size_118834": {"class": "tile_size", "value": None}}) + self.builtinzhiota_i64ziiota_i64_126544_var = program.builtinzhiota_i64ziiota_i64_126544 + self.builtinzhreplicate_f64zireplicate_126475_var = program.builtinzhreplicate_f64zireplicate_126475 + self.builtinzhreplicate_i64zireplicate_126484_var = program.builtinzhreplicate_i64zireplicate_126484 + self.convertToFloatzisegmap_77185_var = program.convertToFloatzisegmap_77185 + self.gpu_map_transpose_f64_var = program.gpu_map_transpose_f64 + self.gpu_map_transpose_f64_low_height_var = program.gpu_map_transpose_f64_low_height + self.gpu_map_transpose_f64_low_width_var = program.gpu_map_transpose_f64_low_width + self.gpu_map_transpose_f64_small_var = program.gpu_map_transpose_f64_small + self.gpu_map_transpose_i64_var = program.gpu_map_transpose_i64 + self.gpu_map_transpose_i64_low_height_var = program.gpu_map_transpose_i64_low_height + self.gpu_map_transpose_i64_low_width_var = program.gpu_map_transpose_i64_low_width + self.gpu_map_transpose_i64_small_var = program.gpu_map_transpose_i64_small + self.mainzicopy_126383_var = program.mainzicopy_126383 + self.mainzicopy_126388_var = program.mainzicopy_126388 + self.mainzicopy_126466_var = program.mainzicopy_126466 + self.mainzicopy_126562_var = program.mainzicopy_126562 + self.mainzicopy_126567_var = program.mainzicopy_126567 + self.mainzicopy_126572_var = program.mainzicopy_126572 + self.mainzicopy_126682_var = program.mainzicopy_126682 + self.mainzicopy_126786_var = program.mainzicopy_126786 + self.mainzicopy_126870_var = program.mainzicopy_126870 + self.mainzicopy_127134_var = program.mainzicopy_127134 + self.mainzicopy_127139_var = program.mainzicopy_127139 + self.mainzicopy_127207_var = program.mainzicopy_127207 + self.mainzicopy_127212_var = program.mainzicopy_127212 + self.mainzicopy_127217_var = program.mainzicopy_127217 + self.mainzicopy_127466_var = program.mainzicopy_127466 + self.mainzicopy_127570_var = program.mainzicopy_127570 + self.mainzicopy_127654_var = program.mainzicopy_127654 + self.mainzicopy_127918_var = program.mainzicopy_127918 + self.mainzicopy_127923_var = program.mainzicopy_127923 + self.mainzicopy_129312_var = program.mainzicopy_129312 + self.mainziscan_stage1_103083_var = program.mainziscan_stage1_103083 + self.mainziscan_stage1_111562_var = program.mainziscan_stage1_111562 + self.mainziscan_stage1_114084_var = program.mainziscan_stage1_114084 + self.mainziscan_stage1_114793_var = program.mainziscan_stage1_114793 + self.mainziscan_stage2_103083_var = program.mainziscan_stage2_103083 + self.mainziscan_stage2_111562_var = program.mainziscan_stage2_111562 + self.mainziscan_stage2_114084_var = program.mainziscan_stage2_114084 + self.mainziscan_stage2_114793_var = program.mainziscan_stage2_114793 + self.mainziscan_stage3_103083_var = program.mainziscan_stage3_103083 + self.mainziscan_stage3_111562_var = program.mainziscan_stage3_111562 + self.mainziscan_stage3_114084_var = program.mainziscan_stage3_114084 + self.mainziscan_stage3_114793_var = program.mainziscan_stage3_114793 + self.mainzisegmap_102688_var = program.mainzisegmap_102688 + self.mainzisegmap_102788_var = program.mainzisegmap_102788 + self.mainzisegmap_102881_var = program.mainzisegmap_102881 + self.mainzisegmap_103005_var = program.mainzisegmap_103005 + self.mainzisegmap_103283_var = program.mainzisegmap_103283 + self.mainzisegmap_103419_var = program.mainzisegmap_103419 + self.mainzisegmap_103550_var = program.mainzisegmap_103550 + self.mainzisegmap_104472_var = program.mainzisegmap_104472 + self.mainzisegmap_104494_var = program.mainzisegmap_104494 + self.mainzisegmap_104542_var = program.mainzisegmap_104542 + self.mainzisegmap_104699_var = program.mainzisegmap_104699 + self.mainzisegmap_104742_var = program.mainzisegmap_104742 + self.mainzisegmap_104804_var = program.mainzisegmap_104804 + self.mainzisegmap_105020_var = program.mainzisegmap_105020 + self.mainzisegmap_105108_var = program.mainzisegmap_105108 + self.mainzisegmap_105120_var = program.mainzisegmap_105120 + self.mainzisegmap_105446_var = program.mainzisegmap_105446 + self.mainzisegmap_105481_var = program.mainzisegmap_105481 + self.mainzisegmap_105526_var = program.mainzisegmap_105526 + self.mainzisegmap_105814_var = program.mainzisegmap_105814 + self.mainzisegmap_105859_var = program.mainzisegmap_105859 + self.mainzisegmap_105874_var = program.mainzisegmap_105874 + self.mainzisegmap_105883_var = program.mainzisegmap_105883 + self.mainzisegmap_106900_var = program.mainzisegmap_106900 + self.mainzisegmap_107039_var = program.mainzisegmap_107039 + self.mainzisegmap_108136_var = program.mainzisegmap_108136 + self.mainzisegmap_108208_var = program.mainzisegmap_108208 + self.mainzisegmap_108230_var = program.mainzisegmap_108230 + self.mainzisegmap_108278_var = program.mainzisegmap_108278 + self.mainzisegmap_108435_var = program.mainzisegmap_108435 + self.mainzisegmap_108490_var = program.mainzisegmap_108490 + self.mainzisegmap_108554_var = program.mainzisegmap_108554 + self.mainzisegmap_108770_var = program.mainzisegmap_108770 + self.mainzisegmap_108858_var = program.mainzisegmap_108858 + self.mainzisegmap_108870_var = program.mainzisegmap_108870 + self.mainzisegmap_109197_var = program.mainzisegmap_109197 + self.mainzisegmap_109232_var = program.mainzisegmap_109232 + self.mainzisegmap_109290_var = program.mainzisegmap_109290 + self.mainzisegmap_109580_var = program.mainzisegmap_109580 + self.mainzisegmap_109625_var = program.mainzisegmap_109625 + self.mainzisegmap_109640_var = program.mainzisegmap_109640 + self.mainzisegmap_109649_var = program.mainzisegmap_109649 + self.mainzisegmap_109798_var = program.mainzisegmap_109798 + self.mainzisegmap_109829_var = program.mainzisegmap_109829 + self.mainzisegmap_110907_var = program.mainzisegmap_110907 + self.mainzisegmap_111084_var = program.mainzisegmap_111084 + self.mainzisegmap_111122_var = program.mainzisegmap_111122 + self.mainzisegmap_111199_var = program.mainzisegmap_111199 + self.mainzisegmap_111230_var = program.mainzisegmap_111230 + self.mainzisegmap_111405_var = program.mainzisegmap_111405 + self.mainzisegmap_111581_var = program.mainzisegmap_111581 + self.mainzisegmap_111616_var = program.mainzisegmap_111616 + self.mainzisegmap_111644_var = program.mainzisegmap_111644 + self.mainzisegmap_111746_var = program.mainzisegmap_111746 + self.mainzisegmap_111778_var = program.mainzisegmap_111778 + self.mainzisegmap_112242_var = program.mainzisegmap_112242 + self.mainzisegmap_112282_var = program.mainzisegmap_112282 + self.mainzisegmap_112409_var = program.mainzisegmap_112409 + self.mainzisegmap_112587_var = program.mainzisegmap_112587 + self.mainzisegmap_112626_var = program.mainzisegmap_112626 + self.mainzisegmap_112672_var = program.mainzisegmap_112672 + self.mainzisegmap_113042_var = program.mainzisegmap_113042 + self.mainzisegmap_113153_var = program.mainzisegmap_113153 + self.mainzisegmap_113173_var = program.mainzisegmap_113173 + self.mainzisegmap_113263_var = program.mainzisegmap_113263 + self.mainzisegmap_113346_var = program.mainzisegmap_113346 + self.mainzisegmap_113555_var = program.mainzisegmap_113555 + self.mainzisegmap_113696_var = program.mainzisegmap_113696 + self.mainzisegmap_113828_var = program.mainzisegmap_113828 + self.mainzisegmap_114006_var = program.mainzisegmap_114006 + self.mainzisegmap_114177_var = program.mainzisegmap_114177 + self.mainzisegmap_114290_var = program.mainzisegmap_114290 + self.mainzisegmap_114413_var = program.mainzisegmap_114413 + self.mainzisegmap_114510_var = program.mainzisegmap_114510 + self.mainzisegmap_114681_var = program.mainzisegmap_114681 + self.mainzisegmap_114832_var = program.mainzisegmap_114832 + self.mainzisegmap_intragroup_102969_var = program.mainzisegmap_intragroup_102969 + self.mainzisegmap_intragroup_110903_var = program.mainzisegmap_intragroup_110903 + self.mainzisegmap_intragroup_111401_var = program.mainzisegmap_intragroup_111401 + self.mainzisegmap_intragroup_111825_var = program.mainzisegmap_intragroup_111825 + self.mainzisegmap_intragroup_112961_var = program.mainzisegmap_intragroup_112961 + self.mainzisegmap_intragroup_113099_var = program.mainzisegmap_intragroup_113099 + self.mainzisegmap_intragroup_113961_var = program.mainzisegmap_intragroup_113961 + self.mainzisegmap_intragroup_114173_var = program.mainzisegmap_intragroup_114173 + self.mainzisegmap_intragroup_114556_var = program.mainzisegmap_intragroup_114556 + self.mainzisegmap_intragroup_115661_var = program.mainzisegmap_intragroup_115661 + self.mainzisegmap_intragroup_116023_var = program.mainzisegmap_intragroup_116023 + self.mainzisegmap_intragroup_116342_var = program.mainzisegmap_intragroup_116342 + self.mainzisegmap_intragroup_116784_var = program.mainzisegmap_intragroup_116784 + self.mainzisegmap_intragroup_117146_var = program.mainzisegmap_intragroup_117146 + self.mainzisegmap_intragroup_117465_var = program.mainzisegmap_intragroup_117465 + self.mainzisegmap_intragroup_117900_var = program.mainzisegmap_intragroup_117900 + self.mainzisegmap_intragroup_118238_var = program.mainzisegmap_intragroup_118238 + self.mainzisegmap_intragroup_118391_var = program.mainzisegmap_intragroup_118391 + self.mainzisegmap_intragroup_118840_var = program.mainzisegmap_intragroup_118840 + self.mainzisegmap_intragroup_119132_var = program.mainzisegmap_intragroup_119132 + self.mainzisegred_large_104864_var = program.mainzisegred_large_104864 + self.mainzisegred_large_105142_var = program.mainzisegred_large_105142 + self.mainzisegred_large_105899_var = program.mainzisegred_large_105899 + self.mainzisegred_large_108614_var = program.mainzisegred_large_108614 + self.mainzisegred_large_108892_var = program.mainzisegred_large_108892 + self.mainzisegred_large_109665_var = program.mainzisegred_large_109665 + self.mainzisegred_large_109818_var = program.mainzisegred_large_109818 + self.mainzisegred_large_109847_var = program.mainzisegred_large_109847 + self.mainzisegred_large_111144_var = program.mainzisegred_large_111144 + self.mainzisegred_large_111219_var = program.mainzisegred_large_111219 + self.mainzisegred_large_111246_var = program.mainzisegred_large_111246 + self.mainzisegred_large_111603_var = program.mainzisegred_large_111603 + self.mainzisegred_large_111633_var = program.mainzisegred_large_111633 + self.mainzisegred_large_112268_var = program.mainzisegred_large_112268 + self.mainzisegred_large_112393_var = program.mainzisegred_large_112393 + self.mainzisegred_large_112741_var = program.mainzisegred_large_112741 + self.mainzisegred_large_113625_var = program.mainzisegred_large_113625 + self.mainzisegred_large_113762_var = program.mainzisegred_large_113762 + self.mainzisegred_large_113892_var = program.mainzisegred_large_113892 + self.mainzisegred_large_114313_var = program.mainzisegred_large_114313 + self.mainzisegred_large_114337_var = program.mainzisegred_large_114337 + self.mainzisegred_large_114467_var = program.mainzisegred_large_114467 + self.mainzisegred_large_114739_var = program.mainzisegred_large_114739 + self.mainzisegred_nonseg_102922_var = program.mainzisegred_nonseg_102922 + self.mainzisegred_nonseg_103159_var = program.mainzisegred_nonseg_103159 + self.mainzisegred_nonseg_110867_var = program.mainzisegred_nonseg_110867 + self.mainzisegred_nonseg_114409_var = program.mainzisegred_nonseg_114409 + self.mainzisegred_small_104864_var = program.mainzisegred_small_104864 + self.mainzisegred_small_105142_var = program.mainzisegred_small_105142 + self.mainzisegred_small_105899_var = program.mainzisegred_small_105899 + self.mainzisegred_small_108614_var = program.mainzisegred_small_108614 + self.mainzisegred_small_108892_var = program.mainzisegred_small_108892 + self.mainzisegred_small_109665_var = program.mainzisegred_small_109665 + self.mainzisegred_small_109818_var = program.mainzisegred_small_109818 + self.mainzisegred_small_109847_var = program.mainzisegred_small_109847 + self.mainzisegred_small_111144_var = program.mainzisegred_small_111144 + self.mainzisegred_small_111219_var = program.mainzisegred_small_111219 + self.mainzisegred_small_111246_var = program.mainzisegred_small_111246 + self.mainzisegred_small_111603_var = program.mainzisegred_small_111603 + self.mainzisegred_small_111633_var = program.mainzisegred_small_111633 + self.mainzisegred_small_112268_var = program.mainzisegred_small_112268 + self.mainzisegred_small_112393_var = program.mainzisegred_small_112393 + self.mainzisegred_small_112741_var = program.mainzisegred_small_112741 + self.mainzisegred_small_113625_var = program.mainzisegred_small_113625 + self.mainzisegred_small_113762_var = program.mainzisegred_small_113762 + self.mainzisegred_small_113892_var = program.mainzisegred_small_113892 + self.mainzisegred_small_114313_var = program.mainzisegred_small_114313 + self.mainzisegred_small_114337_var = program.mainzisegred_small_114337 + self.mainzisegred_small_114467_var = program.mainzisegred_small_114467 + self.mainzisegred_small_114739_var = program.mainzisegred_small_114739 + self.mainDetailedzicopy_126395_var = program.mainDetailedzicopy_126395 + self.mainDetailedzicopy_126400_var = program.mainDetailedzicopy_126400 + self.mainDetailedzicopy_126478_var = program.mainDetailedzicopy_126478 + self.mainDetailedzicopy_126574_var = program.mainDetailedzicopy_126574 + self.mainDetailedzicopy_126579_var = program.mainDetailedzicopy_126579 + self.mainDetailedzicopy_126584_var = program.mainDetailedzicopy_126584 + self.mainDetailedzicopy_126694_var = program.mainDetailedzicopy_126694 + self.mainDetailedzicopy_126798_var = program.mainDetailedzicopy_126798 + self.mainDetailedzicopy_126882_var = program.mainDetailedzicopy_126882 + self.mainDetailedzicopy_127146_var = program.mainDetailedzicopy_127146 + self.mainDetailedzicopy_127151_var = program.mainDetailedzicopy_127151 + self.mainDetailedzicopy_127219_var = program.mainDetailedzicopy_127219 + self.mainDetailedzicopy_127224_var = program.mainDetailedzicopy_127224 + self.mainDetailedzicopy_127229_var = program.mainDetailedzicopy_127229 + self.mainDetailedzicopy_127478_var = program.mainDetailedzicopy_127478 + self.mainDetailedzicopy_127582_var = program.mainDetailedzicopy_127582 + self.mainDetailedzicopy_127666_var = program.mainDetailedzicopy_127666 + self.mainDetailedzicopy_127930_var = program.mainDetailedzicopy_127930 + self.mainDetailedzicopy_127935_var = program.mainDetailedzicopy_127935 + self.mainDetailedzicopy_129324_var = program.mainDetailedzicopy_129324 + self.mainDetailedziscan_stage1_77650_var = program.mainDetailedziscan_stage1_77650 + self.mainDetailedziscan_stage1_86129_var = program.mainDetailedziscan_stage1_86129 + self.mainDetailedziscan_stage1_88651_var = program.mainDetailedziscan_stage1_88651 + self.mainDetailedziscan_stage1_89879_var = program.mainDetailedziscan_stage1_89879 + self.mainDetailedziscan_stage2_77650_var = program.mainDetailedziscan_stage2_77650 + self.mainDetailedziscan_stage2_86129_var = program.mainDetailedziscan_stage2_86129 + self.mainDetailedziscan_stage2_88651_var = program.mainDetailedziscan_stage2_88651 + self.mainDetailedziscan_stage2_89879_var = program.mainDetailedziscan_stage2_89879 + self.mainDetailedziscan_stage3_77650_var = program.mainDetailedziscan_stage3_77650 + self.mainDetailedziscan_stage3_86129_var = program.mainDetailedziscan_stage3_86129 + self.mainDetailedziscan_stage3_88651_var = program.mainDetailedziscan_stage3_88651 + self.mainDetailedziscan_stage3_89879_var = program.mainDetailedziscan_stage3_89879 + self.mainDetailedzisegmap_77255_var = program.mainDetailedzisegmap_77255 + self.mainDetailedzisegmap_77355_var = program.mainDetailedzisegmap_77355 + self.mainDetailedzisegmap_77448_var = program.mainDetailedzisegmap_77448 + self.mainDetailedzisegmap_77572_var = program.mainDetailedzisegmap_77572 + self.mainDetailedzisegmap_77850_var = program.mainDetailedzisegmap_77850 + self.mainDetailedzisegmap_77986_var = program.mainDetailedzisegmap_77986 + self.mainDetailedzisegmap_78117_var = program.mainDetailedzisegmap_78117 + self.mainDetailedzisegmap_79039_var = program.mainDetailedzisegmap_79039 + self.mainDetailedzisegmap_79061_var = program.mainDetailedzisegmap_79061 + self.mainDetailedzisegmap_79109_var = program.mainDetailedzisegmap_79109 + self.mainDetailedzisegmap_79266_var = program.mainDetailedzisegmap_79266 + self.mainDetailedzisegmap_79309_var = program.mainDetailedzisegmap_79309 + self.mainDetailedzisegmap_79371_var = program.mainDetailedzisegmap_79371 + self.mainDetailedzisegmap_79587_var = program.mainDetailedzisegmap_79587 + self.mainDetailedzisegmap_79675_var = program.mainDetailedzisegmap_79675 + self.mainDetailedzisegmap_79687_var = program.mainDetailedzisegmap_79687 + self.mainDetailedzisegmap_80013_var = program.mainDetailedzisegmap_80013 + self.mainDetailedzisegmap_80048_var = program.mainDetailedzisegmap_80048 + self.mainDetailedzisegmap_80093_var = program.mainDetailedzisegmap_80093 + self.mainDetailedzisegmap_80381_var = program.mainDetailedzisegmap_80381 + self.mainDetailedzisegmap_80426_var = program.mainDetailedzisegmap_80426 + self.mainDetailedzisegmap_80441_var = program.mainDetailedzisegmap_80441 + self.mainDetailedzisegmap_80450_var = program.mainDetailedzisegmap_80450 + self.mainDetailedzisegmap_81467_var = program.mainDetailedzisegmap_81467 + self.mainDetailedzisegmap_81606_var = program.mainDetailedzisegmap_81606 + self.mainDetailedzisegmap_82703_var = program.mainDetailedzisegmap_82703 + self.mainDetailedzisegmap_82775_var = program.mainDetailedzisegmap_82775 + self.mainDetailedzisegmap_82797_var = program.mainDetailedzisegmap_82797 + self.mainDetailedzisegmap_82845_var = program.mainDetailedzisegmap_82845 + self.mainDetailedzisegmap_83002_var = program.mainDetailedzisegmap_83002 + self.mainDetailedzisegmap_83057_var = program.mainDetailedzisegmap_83057 + self.mainDetailedzisegmap_83121_var = program.mainDetailedzisegmap_83121 + self.mainDetailedzisegmap_83337_var = program.mainDetailedzisegmap_83337 + self.mainDetailedzisegmap_83425_var = program.mainDetailedzisegmap_83425 + self.mainDetailedzisegmap_83437_var = program.mainDetailedzisegmap_83437 + self.mainDetailedzisegmap_83764_var = program.mainDetailedzisegmap_83764 + self.mainDetailedzisegmap_83799_var = program.mainDetailedzisegmap_83799 + self.mainDetailedzisegmap_83857_var = program.mainDetailedzisegmap_83857 + self.mainDetailedzisegmap_84147_var = program.mainDetailedzisegmap_84147 + self.mainDetailedzisegmap_84192_var = program.mainDetailedzisegmap_84192 + self.mainDetailedzisegmap_84207_var = program.mainDetailedzisegmap_84207 + self.mainDetailedzisegmap_84216_var = program.mainDetailedzisegmap_84216 + self.mainDetailedzisegmap_84365_var = program.mainDetailedzisegmap_84365 + self.mainDetailedzisegmap_84396_var = program.mainDetailedzisegmap_84396 + self.mainDetailedzisegmap_85474_var = program.mainDetailedzisegmap_85474 + self.mainDetailedzisegmap_85651_var = program.mainDetailedzisegmap_85651 + self.mainDetailedzisegmap_85689_var = program.mainDetailedzisegmap_85689 + self.mainDetailedzisegmap_85766_var = program.mainDetailedzisegmap_85766 + self.mainDetailedzisegmap_85797_var = program.mainDetailedzisegmap_85797 + self.mainDetailedzisegmap_85972_var = program.mainDetailedzisegmap_85972 + self.mainDetailedzisegmap_86148_var = program.mainDetailedzisegmap_86148 + self.mainDetailedzisegmap_86183_var = program.mainDetailedzisegmap_86183 + self.mainDetailedzisegmap_86211_var = program.mainDetailedzisegmap_86211 + self.mainDetailedzisegmap_86313_var = program.mainDetailedzisegmap_86313 + self.mainDetailedzisegmap_86345_var = program.mainDetailedzisegmap_86345 + self.mainDetailedzisegmap_86809_var = program.mainDetailedzisegmap_86809 + self.mainDetailedzisegmap_86849_var = program.mainDetailedzisegmap_86849 + self.mainDetailedzisegmap_86976_var = program.mainDetailedzisegmap_86976 + self.mainDetailedzisegmap_87154_var = program.mainDetailedzisegmap_87154 + self.mainDetailedzisegmap_87193_var = program.mainDetailedzisegmap_87193 + self.mainDetailedzisegmap_87239_var = program.mainDetailedzisegmap_87239 + self.mainDetailedzisegmap_87609_var = program.mainDetailedzisegmap_87609 + self.mainDetailedzisegmap_87720_var = program.mainDetailedzisegmap_87720 + self.mainDetailedzisegmap_87740_var = program.mainDetailedzisegmap_87740 + self.mainDetailedzisegmap_87830_var = program.mainDetailedzisegmap_87830 + self.mainDetailedzisegmap_87913_var = program.mainDetailedzisegmap_87913 + self.mainDetailedzisegmap_88122_var = program.mainDetailedzisegmap_88122 + self.mainDetailedzisegmap_88263_var = program.mainDetailedzisegmap_88263 + self.mainDetailedzisegmap_88395_var = program.mainDetailedzisegmap_88395 + self.mainDetailedzisegmap_88573_var = program.mainDetailedzisegmap_88573 + self.mainDetailedzisegmap_88744_var = program.mainDetailedzisegmap_88744 + self.mainDetailedzisegmap_88857_var = program.mainDetailedzisegmap_88857 + self.mainDetailedzisegmap_88980_var = program.mainDetailedzisegmap_88980 + self.mainDetailedzisegmap_89077_var = program.mainDetailedzisegmap_89077 + self.mainDetailedzisegmap_89117_var = program.mainDetailedzisegmap_89117 + self.mainDetailedzisegmap_89207_var = program.mainDetailedzisegmap_89207 + self.mainDetailedzisegmap_89261_var = program.mainDetailedzisegmap_89261 + self.mainDetailedzisegmap_89402_var = program.mainDetailedzisegmap_89402 + self.mainDetailedzisegmap_89737_var = program.mainDetailedzisegmap_89737 + self.mainDetailedzisegmap_89785_var = program.mainDetailedzisegmap_89785 + self.mainDetailedzisegmap_89860_var = program.mainDetailedzisegmap_89860 + self.mainDetailedzisegmap_89908_var = program.mainDetailedzisegmap_89908 + self.mainDetailedzisegmap_intragroup_115661_var = program.mainDetailedzisegmap_intragroup_115661 + self.mainDetailedzisegmap_intragroup_116023_var = program.mainDetailedzisegmap_intragroup_116023 + self.mainDetailedzisegmap_intragroup_116342_var = program.mainDetailedzisegmap_intragroup_116342 + self.mainDetailedzisegmap_intragroup_116784_var = program.mainDetailedzisegmap_intragroup_116784 + self.mainDetailedzisegmap_intragroup_117146_var = program.mainDetailedzisegmap_intragroup_117146 + self.mainDetailedzisegmap_intragroup_117465_var = program.mainDetailedzisegmap_intragroup_117465 + self.mainDetailedzisegmap_intragroup_117900_var = program.mainDetailedzisegmap_intragroup_117900 + self.mainDetailedzisegmap_intragroup_118238_var = program.mainDetailedzisegmap_intragroup_118238 + self.mainDetailedzisegmap_intragroup_118391_var = program.mainDetailedzisegmap_intragroup_118391 + self.mainDetailedzisegmap_intragroup_118840_var = program.mainDetailedzisegmap_intragroup_118840 + self.mainDetailedzisegmap_intragroup_119132_var = program.mainDetailedzisegmap_intragroup_119132 + self.mainDetailedzisegmap_intragroup_77536_var = program.mainDetailedzisegmap_intragroup_77536 + self.mainDetailedzisegmap_intragroup_85470_var = program.mainDetailedzisegmap_intragroup_85470 + self.mainDetailedzisegmap_intragroup_85968_var = program.mainDetailedzisegmap_intragroup_85968 + self.mainDetailedzisegmap_intragroup_86392_var = program.mainDetailedzisegmap_intragroup_86392 + self.mainDetailedzisegmap_intragroup_87528_var = program.mainDetailedzisegmap_intragroup_87528 + self.mainDetailedzisegmap_intragroup_87666_var = program.mainDetailedzisegmap_intragroup_87666 + self.mainDetailedzisegmap_intragroup_88528_var = program.mainDetailedzisegmap_intragroup_88528 + self.mainDetailedzisegmap_intragroup_88740_var = program.mainDetailedzisegmap_intragroup_88740 + self.mainDetailedzisegmap_intragroup_89398_var = program.mainDetailedzisegmap_intragroup_89398 + self.mainDetailedzisegred_large_79431_var = program.mainDetailedzisegred_large_79431 + self.mainDetailedzisegred_large_79709_var = program.mainDetailedzisegred_large_79709 + self.mainDetailedzisegred_large_80466_var = program.mainDetailedzisegred_large_80466 + self.mainDetailedzisegred_large_83181_var = program.mainDetailedzisegred_large_83181 + self.mainDetailedzisegred_large_83459_var = program.mainDetailedzisegred_large_83459 + self.mainDetailedzisegred_large_84232_var = program.mainDetailedzisegred_large_84232 + self.mainDetailedzisegred_large_84385_var = program.mainDetailedzisegred_large_84385 + self.mainDetailedzisegred_large_84414_var = program.mainDetailedzisegred_large_84414 + self.mainDetailedzisegred_large_85711_var = program.mainDetailedzisegred_large_85711 + self.mainDetailedzisegred_large_85786_var = program.mainDetailedzisegred_large_85786 + self.mainDetailedzisegred_large_85813_var = program.mainDetailedzisegred_large_85813 + self.mainDetailedzisegred_large_86170_var = program.mainDetailedzisegred_large_86170 + self.mainDetailedzisegred_large_86200_var = program.mainDetailedzisegred_large_86200 + self.mainDetailedzisegred_large_86835_var = program.mainDetailedzisegred_large_86835 + self.mainDetailedzisegred_large_86960_var = program.mainDetailedzisegred_large_86960 + self.mainDetailedzisegred_large_87308_var = program.mainDetailedzisegred_large_87308 + self.mainDetailedzisegred_large_88192_var = program.mainDetailedzisegred_large_88192 + self.mainDetailedzisegred_large_88329_var = program.mainDetailedzisegred_large_88329 + self.mainDetailedzisegred_large_88459_var = program.mainDetailedzisegred_large_88459 + self.mainDetailedzisegred_large_88880_var = program.mainDetailedzisegred_large_88880 + self.mainDetailedzisegred_large_88904_var = program.mainDetailedzisegred_large_88904 + self.mainDetailedzisegred_large_89034_var = program.mainDetailedzisegred_large_89034 + self.mainDetailedzisegred_large_89828_var = program.mainDetailedzisegred_large_89828 + self.mainDetailedzisegred_nonseg_77489_var = program.mainDetailedzisegred_nonseg_77489 + self.mainDetailedzisegred_nonseg_77726_var = program.mainDetailedzisegred_nonseg_77726 + self.mainDetailedzisegred_nonseg_85434_var = program.mainDetailedzisegred_nonseg_85434 + self.mainDetailedzisegred_nonseg_88976_var = program.mainDetailedzisegred_nonseg_88976 + self.mainDetailedzisegred_small_79431_var = program.mainDetailedzisegred_small_79431 + self.mainDetailedzisegred_small_79709_var = program.mainDetailedzisegred_small_79709 + self.mainDetailedzisegred_small_80466_var = program.mainDetailedzisegred_small_80466 + self.mainDetailedzisegred_small_83181_var = program.mainDetailedzisegred_small_83181 + self.mainDetailedzisegred_small_83459_var = program.mainDetailedzisegred_small_83459 + self.mainDetailedzisegred_small_84232_var = program.mainDetailedzisegred_small_84232 + self.mainDetailedzisegred_small_84385_var = program.mainDetailedzisegred_small_84385 + self.mainDetailedzisegred_small_84414_var = program.mainDetailedzisegred_small_84414 + self.mainDetailedzisegred_small_85711_var = program.mainDetailedzisegred_small_85711 + self.mainDetailedzisegred_small_85786_var = program.mainDetailedzisegred_small_85786 + self.mainDetailedzisegred_small_85813_var = program.mainDetailedzisegred_small_85813 + self.mainDetailedzisegred_small_86170_var = program.mainDetailedzisegred_small_86170 + self.mainDetailedzisegred_small_86200_var = program.mainDetailedzisegred_small_86200 + self.mainDetailedzisegred_small_86835_var = program.mainDetailedzisegred_small_86835 + self.mainDetailedzisegred_small_86960_var = program.mainDetailedzisegred_small_86960 + self.mainDetailedzisegred_small_87308_var = program.mainDetailedzisegred_small_87308 + self.mainDetailedzisegred_small_88192_var = program.mainDetailedzisegred_small_88192 + self.mainDetailedzisegred_small_88329_var = program.mainDetailedzisegred_small_88329 + self.mainDetailedzisegred_small_88459_var = program.mainDetailedzisegred_small_88459 + self.mainDetailedzisegred_small_88880_var = program.mainDetailedzisegred_small_88880 + self.mainDetailedzisegred_small_88904_var = program.mainDetailedzisegred_small_88904 + self.mainDetailedzisegred_small_89034_var = program.mainDetailedzisegred_small_89034 + self.mainDetailedzisegred_small_89828_var = program.mainDetailedzisegred_small_89828 + self.mainMagnitudezicopy_126384_var = program.mainMagnitudezicopy_126384 + self.mainMagnitudezicopy_126389_var = program.mainMagnitudezicopy_126389 + self.mainMagnitudezicopy_126467_var = program.mainMagnitudezicopy_126467 + self.mainMagnitudezicopy_126563_var = program.mainMagnitudezicopy_126563 + self.mainMagnitudezicopy_126568_var = program.mainMagnitudezicopy_126568 + self.mainMagnitudezicopy_126573_var = program.mainMagnitudezicopy_126573 + self.mainMagnitudezicopy_126683_var = program.mainMagnitudezicopy_126683 + self.mainMagnitudezicopy_126787_var = program.mainMagnitudezicopy_126787 + self.mainMagnitudezicopy_126871_var = program.mainMagnitudezicopy_126871 + self.mainMagnitudezicopy_127135_var = program.mainMagnitudezicopy_127135 + self.mainMagnitudezicopy_127140_var = program.mainMagnitudezicopy_127140 + self.mainMagnitudezicopy_127208_var = program.mainMagnitudezicopy_127208 + self.mainMagnitudezicopy_127213_var = program.mainMagnitudezicopy_127213 + self.mainMagnitudezicopy_127218_var = program.mainMagnitudezicopy_127218 + self.mainMagnitudezicopy_127467_var = program.mainMagnitudezicopy_127467 + self.mainMagnitudezicopy_127571_var = program.mainMagnitudezicopy_127571 + self.mainMagnitudezicopy_127655_var = program.mainMagnitudezicopy_127655 + self.mainMagnitudezicopy_127919_var = program.mainMagnitudezicopy_127919 + self.mainMagnitudezicopy_127924_var = program.mainMagnitudezicopy_127924 + self.mainMagnitudezicopy_129313_var = program.mainMagnitudezicopy_129313 + self.mainMagnitudeziscan_stage1_101483_var = program.mainMagnitudeziscan_stage1_101483 + self.mainMagnitudeziscan_stage1_102464_var = program.mainMagnitudeziscan_stage1_102464 + self.mainMagnitudeziscan_stage1_90482_var = program.mainMagnitudeziscan_stage1_90482 + self.mainMagnitudeziscan_stage1_98961_var = program.mainMagnitudeziscan_stage1_98961 + self.mainMagnitudeziscan_stage2_101483_var = program.mainMagnitudeziscan_stage2_101483 + self.mainMagnitudeziscan_stage2_102464_var = program.mainMagnitudeziscan_stage2_102464 + self.mainMagnitudeziscan_stage2_90482_var = program.mainMagnitudeziscan_stage2_90482 + self.mainMagnitudeziscan_stage2_98961_var = program.mainMagnitudeziscan_stage2_98961 + self.mainMagnitudeziscan_stage3_101483_var = program.mainMagnitudeziscan_stage3_101483 + self.mainMagnitudeziscan_stage3_102464_var = program.mainMagnitudeziscan_stage3_102464 + self.mainMagnitudeziscan_stage3_90482_var = program.mainMagnitudeziscan_stage3_90482 + self.mainMagnitudeziscan_stage3_98961_var = program.mainMagnitudeziscan_stage3_98961 + self.mainMagnitudezisegmap_100025_var = program.mainMagnitudezisegmap_100025 + self.mainMagnitudezisegmap_100071_var = program.mainMagnitudezisegmap_100071 + self.mainMagnitudezisegmap_100441_var = program.mainMagnitudezisegmap_100441 + self.mainMagnitudezisegmap_100552_var = program.mainMagnitudezisegmap_100552 + self.mainMagnitudezisegmap_100572_var = program.mainMagnitudezisegmap_100572 + self.mainMagnitudezisegmap_100662_var = program.mainMagnitudezisegmap_100662 + self.mainMagnitudezisegmap_100745_var = program.mainMagnitudezisegmap_100745 + self.mainMagnitudezisegmap_100954_var = program.mainMagnitudezisegmap_100954 + self.mainMagnitudezisegmap_101095_var = program.mainMagnitudezisegmap_101095 + self.mainMagnitudezisegmap_101227_var = program.mainMagnitudezisegmap_101227 + self.mainMagnitudezisegmap_101405_var = program.mainMagnitudezisegmap_101405 + self.mainMagnitudezisegmap_101576_var = program.mainMagnitudezisegmap_101576 + self.mainMagnitudezisegmap_101689_var = program.mainMagnitudezisegmap_101689 + self.mainMagnitudezisegmap_101812_var = program.mainMagnitudezisegmap_101812 + self.mainMagnitudezisegmap_101909_var = program.mainMagnitudezisegmap_101909 + self.mainMagnitudezisegmap_101949_var = program.mainMagnitudezisegmap_101949 + self.mainMagnitudezisegmap_102039_var = program.mainMagnitudezisegmap_102039 + self.mainMagnitudezisegmap_102093_var = program.mainMagnitudezisegmap_102093 + self.mainMagnitudezisegmap_102352_var = program.mainMagnitudezisegmap_102352 + self.mainMagnitudezisegmap_102503_var = program.mainMagnitudezisegmap_102503 + self.mainMagnitudezisegmap_90087_var = program.mainMagnitudezisegmap_90087 + self.mainMagnitudezisegmap_90187_var = program.mainMagnitudezisegmap_90187 + self.mainMagnitudezisegmap_90280_var = program.mainMagnitudezisegmap_90280 + self.mainMagnitudezisegmap_90404_var = program.mainMagnitudezisegmap_90404 + self.mainMagnitudezisegmap_90682_var = program.mainMagnitudezisegmap_90682 + self.mainMagnitudezisegmap_90818_var = program.mainMagnitudezisegmap_90818 + self.mainMagnitudezisegmap_90949_var = program.mainMagnitudezisegmap_90949 + self.mainMagnitudezisegmap_91871_var = program.mainMagnitudezisegmap_91871 + self.mainMagnitudezisegmap_91893_var = program.mainMagnitudezisegmap_91893 + self.mainMagnitudezisegmap_91941_var = program.mainMagnitudezisegmap_91941 + self.mainMagnitudezisegmap_92098_var = program.mainMagnitudezisegmap_92098 + self.mainMagnitudezisegmap_92141_var = program.mainMagnitudezisegmap_92141 + self.mainMagnitudezisegmap_92203_var = program.mainMagnitudezisegmap_92203 + self.mainMagnitudezisegmap_92419_var = program.mainMagnitudezisegmap_92419 + self.mainMagnitudezisegmap_92507_var = program.mainMagnitudezisegmap_92507 + self.mainMagnitudezisegmap_92519_var = program.mainMagnitudezisegmap_92519 + self.mainMagnitudezisegmap_92845_var = program.mainMagnitudezisegmap_92845 + self.mainMagnitudezisegmap_92880_var = program.mainMagnitudezisegmap_92880 + self.mainMagnitudezisegmap_92925_var = program.mainMagnitudezisegmap_92925 + self.mainMagnitudezisegmap_93213_var = program.mainMagnitudezisegmap_93213 + self.mainMagnitudezisegmap_93258_var = program.mainMagnitudezisegmap_93258 + self.mainMagnitudezisegmap_93273_var = program.mainMagnitudezisegmap_93273 + self.mainMagnitudezisegmap_93282_var = program.mainMagnitudezisegmap_93282 + self.mainMagnitudezisegmap_94299_var = program.mainMagnitudezisegmap_94299 + self.mainMagnitudezisegmap_94438_var = program.mainMagnitudezisegmap_94438 + self.mainMagnitudezisegmap_95535_var = program.mainMagnitudezisegmap_95535 + self.mainMagnitudezisegmap_95607_var = program.mainMagnitudezisegmap_95607 + self.mainMagnitudezisegmap_95629_var = program.mainMagnitudezisegmap_95629 + self.mainMagnitudezisegmap_95677_var = program.mainMagnitudezisegmap_95677 + self.mainMagnitudezisegmap_95834_var = program.mainMagnitudezisegmap_95834 + self.mainMagnitudezisegmap_95889_var = program.mainMagnitudezisegmap_95889 + self.mainMagnitudezisegmap_95953_var = program.mainMagnitudezisegmap_95953 + self.mainMagnitudezisegmap_96169_var = program.mainMagnitudezisegmap_96169 + self.mainMagnitudezisegmap_96257_var = program.mainMagnitudezisegmap_96257 + self.mainMagnitudezisegmap_96269_var = program.mainMagnitudezisegmap_96269 + self.mainMagnitudezisegmap_96596_var = program.mainMagnitudezisegmap_96596 + self.mainMagnitudezisegmap_96631_var = program.mainMagnitudezisegmap_96631 + self.mainMagnitudezisegmap_96689_var = program.mainMagnitudezisegmap_96689 + self.mainMagnitudezisegmap_96979_var = program.mainMagnitudezisegmap_96979 + self.mainMagnitudezisegmap_97024_var = program.mainMagnitudezisegmap_97024 + self.mainMagnitudezisegmap_97039_var = program.mainMagnitudezisegmap_97039 + self.mainMagnitudezisegmap_97048_var = program.mainMagnitudezisegmap_97048 + self.mainMagnitudezisegmap_97197_var = program.mainMagnitudezisegmap_97197 + self.mainMagnitudezisegmap_97228_var = program.mainMagnitudezisegmap_97228 + self.mainMagnitudezisegmap_98306_var = program.mainMagnitudezisegmap_98306 + self.mainMagnitudezisegmap_98483_var = program.mainMagnitudezisegmap_98483 + self.mainMagnitudezisegmap_98521_var = program.mainMagnitudezisegmap_98521 + self.mainMagnitudezisegmap_98598_var = program.mainMagnitudezisegmap_98598 + self.mainMagnitudezisegmap_98629_var = program.mainMagnitudezisegmap_98629 + self.mainMagnitudezisegmap_98804_var = program.mainMagnitudezisegmap_98804 + self.mainMagnitudezisegmap_98980_var = program.mainMagnitudezisegmap_98980 + self.mainMagnitudezisegmap_99015_var = program.mainMagnitudezisegmap_99015 + self.mainMagnitudezisegmap_99043_var = program.mainMagnitudezisegmap_99043 + self.mainMagnitudezisegmap_99145_var = program.mainMagnitudezisegmap_99145 + self.mainMagnitudezisegmap_99177_var = program.mainMagnitudezisegmap_99177 + self.mainMagnitudezisegmap_99641_var = program.mainMagnitudezisegmap_99641 + self.mainMagnitudezisegmap_99681_var = program.mainMagnitudezisegmap_99681 + self.mainMagnitudezisegmap_99808_var = program.mainMagnitudezisegmap_99808 + self.mainMagnitudezisegmap_99986_var = program.mainMagnitudezisegmap_99986 + self.mainMagnitudezisegmap_intragroup_100360_var = program.mainMagnitudezisegmap_intragroup_100360 + self.mainMagnitudezisegmap_intragroup_100498_var = program.mainMagnitudezisegmap_intragroup_100498 + self.mainMagnitudezisegmap_intragroup_101360_var = program.mainMagnitudezisegmap_intragroup_101360 + self.mainMagnitudezisegmap_intragroup_101572_var = program.mainMagnitudezisegmap_intragroup_101572 + self.mainMagnitudezisegmap_intragroup_102227_var = program.mainMagnitudezisegmap_intragroup_102227 + self.mainMagnitudezisegmap_intragroup_115661_var = program.mainMagnitudezisegmap_intragroup_115661 + self.mainMagnitudezisegmap_intragroup_116023_var = program.mainMagnitudezisegmap_intragroup_116023 + self.mainMagnitudezisegmap_intragroup_116342_var = program.mainMagnitudezisegmap_intragroup_116342 + self.mainMagnitudezisegmap_intragroup_116784_var = program.mainMagnitudezisegmap_intragroup_116784 + self.mainMagnitudezisegmap_intragroup_117146_var = program.mainMagnitudezisegmap_intragroup_117146 + self.mainMagnitudezisegmap_intragroup_117465_var = program.mainMagnitudezisegmap_intragroup_117465 + self.mainMagnitudezisegmap_intragroup_117900_var = program.mainMagnitudezisegmap_intragroup_117900 + self.mainMagnitudezisegmap_intragroup_118238_var = program.mainMagnitudezisegmap_intragroup_118238 + self.mainMagnitudezisegmap_intragroup_118391_var = program.mainMagnitudezisegmap_intragroup_118391 + self.mainMagnitudezisegmap_intragroup_118840_var = program.mainMagnitudezisegmap_intragroup_118840 + self.mainMagnitudezisegmap_intragroup_119132_var = program.mainMagnitudezisegmap_intragroup_119132 + self.mainMagnitudezisegmap_intragroup_90368_var = program.mainMagnitudezisegmap_intragroup_90368 + self.mainMagnitudezisegmap_intragroup_98302_var = program.mainMagnitudezisegmap_intragroup_98302 + self.mainMagnitudezisegmap_intragroup_98800_var = program.mainMagnitudezisegmap_intragroup_98800 + self.mainMagnitudezisegmap_intragroup_99224_var = program.mainMagnitudezisegmap_intragroup_99224 + self.mainMagnitudezisegred_large_100140_var = program.mainMagnitudezisegred_large_100140 + self.mainMagnitudezisegred_large_101024_var = program.mainMagnitudezisegred_large_101024 + self.mainMagnitudezisegred_large_101161_var = program.mainMagnitudezisegred_large_101161 + self.mainMagnitudezisegred_large_101291_var = program.mainMagnitudezisegred_large_101291 + self.mainMagnitudezisegred_large_101712_var = program.mainMagnitudezisegred_large_101712 + self.mainMagnitudezisegred_large_101736_var = program.mainMagnitudezisegred_large_101736 + self.mainMagnitudezisegred_large_101866_var = program.mainMagnitudezisegred_large_101866 + self.mainMagnitudezisegred_large_102410_var = program.mainMagnitudezisegred_large_102410 + self.mainMagnitudezisegred_large_92263_var = program.mainMagnitudezisegred_large_92263 + self.mainMagnitudezisegred_large_92541_var = program.mainMagnitudezisegred_large_92541 + self.mainMagnitudezisegred_large_93298_var = program.mainMagnitudezisegred_large_93298 + self.mainMagnitudezisegred_large_96013_var = program.mainMagnitudezisegred_large_96013 + self.mainMagnitudezisegred_large_96291_var = program.mainMagnitudezisegred_large_96291 + self.mainMagnitudezisegred_large_97064_var = program.mainMagnitudezisegred_large_97064 + self.mainMagnitudezisegred_large_97217_var = program.mainMagnitudezisegred_large_97217 + self.mainMagnitudezisegred_large_97246_var = program.mainMagnitudezisegred_large_97246 + self.mainMagnitudezisegred_large_98543_var = program.mainMagnitudezisegred_large_98543 + self.mainMagnitudezisegred_large_98618_var = program.mainMagnitudezisegred_large_98618 + self.mainMagnitudezisegred_large_98645_var = program.mainMagnitudezisegred_large_98645 + self.mainMagnitudezisegred_large_99002_var = program.mainMagnitudezisegred_large_99002 + self.mainMagnitudezisegred_large_99032_var = program.mainMagnitudezisegred_large_99032 + self.mainMagnitudezisegred_large_99667_var = program.mainMagnitudezisegred_large_99667 + self.mainMagnitudezisegred_large_99792_var = program.mainMagnitudezisegred_large_99792 + self.mainMagnitudezisegred_nonseg_101808_var = program.mainMagnitudezisegred_nonseg_101808 + self.mainMagnitudezisegred_nonseg_90321_var = program.mainMagnitudezisegred_nonseg_90321 + self.mainMagnitudezisegred_nonseg_90558_var = program.mainMagnitudezisegred_nonseg_90558 + self.mainMagnitudezisegred_nonseg_98266_var = program.mainMagnitudezisegred_nonseg_98266 + self.mainMagnitudezisegred_small_100140_var = program.mainMagnitudezisegred_small_100140 + self.mainMagnitudezisegred_small_101024_var = program.mainMagnitudezisegred_small_101024 + self.mainMagnitudezisegred_small_101161_var = program.mainMagnitudezisegred_small_101161 + self.mainMagnitudezisegred_small_101291_var = program.mainMagnitudezisegred_small_101291 + self.mainMagnitudezisegred_small_101712_var = program.mainMagnitudezisegred_small_101712 + self.mainMagnitudezisegred_small_101736_var = program.mainMagnitudezisegred_small_101736 + self.mainMagnitudezisegred_small_101866_var = program.mainMagnitudezisegred_small_101866 + self.mainMagnitudezisegred_small_102410_var = program.mainMagnitudezisegred_small_102410 + self.mainMagnitudezisegred_small_92263_var = program.mainMagnitudezisegred_small_92263 + self.mainMagnitudezisegred_small_92541_var = program.mainMagnitudezisegred_small_92541 + self.mainMagnitudezisegred_small_93298_var = program.mainMagnitudezisegred_small_93298 + self.mainMagnitudezisegred_small_96013_var = program.mainMagnitudezisegred_small_96013 + self.mainMagnitudezisegred_small_96291_var = program.mainMagnitudezisegred_small_96291 + self.mainMagnitudezisegred_small_97064_var = program.mainMagnitudezisegred_small_97064 + self.mainMagnitudezisegred_small_97217_var = program.mainMagnitudezisegred_small_97217 + self.mainMagnitudezisegred_small_97246_var = program.mainMagnitudezisegred_small_97246 + self.mainMagnitudezisegred_small_98543_var = program.mainMagnitudezisegred_small_98543 + self.mainMagnitudezisegred_small_98618_var = program.mainMagnitudezisegred_small_98618 + self.mainMagnitudezisegred_small_98645_var = program.mainMagnitudezisegred_small_98645 + self.mainMagnitudezisegred_small_99002_var = program.mainMagnitudezisegred_small_99002 + self.mainMagnitudezisegred_small_99032_var = program.mainMagnitudezisegred_small_99032 + self.mainMagnitudezisegred_small_99667_var = program.mainMagnitudezisegred_small_99667 + self.mainMagnitudezisegred_small_99792_var = program.mainMagnitudezisegred_small_99792 + self.constants = {} + mainzicounter_mem_126344 = np.array([np.int32(0), np.int32(0), np.int32(0), + np.int32(0), np.int32(0), np.int32(0), + np.int32(0), np.int32(0), np.int32(0), + np.int32(0)], dtype=np.int32) + static_mem_129837 = opencl_alloc(self, 40, "static_mem_129837") + if (40 != 0): + cl.enqueue_copy(self.queue, static_mem_129837, + normaliseArray(mainzicounter_mem_126344), + is_blocking=synchronous) + self.mainzicounter_mem_126344 = static_mem_129837 + mainzicounter_mem_126495 = np.array([np.int32(0), np.int32(0), np.int32(0), + np.int32(0), np.int32(0), np.int32(0), + np.int32(0), np.int32(0), np.int32(0), + np.int32(0)], dtype=np.int32) + static_mem_129839 = opencl_alloc(self, 40, "static_mem_129839") + if (40 != 0): + cl.enqueue_copy(self.queue, static_mem_129839, + normaliseArray(mainzicounter_mem_126495), + is_blocking=synchronous) + self.mainzicounter_mem_126495 = static_mem_129839 + mainzicounter_mem_126731 = np.zeros(10240, dtype=np.int32) + static_mem_129841 = opencl_alloc(self, 40960, "static_mem_129841") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129841, + normaliseArray(mainzicounter_mem_126731), + is_blocking=synchronous) + self.mainzicounter_mem_126731 = static_mem_129841 + mainzicounter_mem_126956 = np.zeros(10240, dtype=np.int32) + static_mem_129844 = opencl_alloc(self, 40960, "static_mem_129844") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129844, + normaliseArray(mainzicounter_mem_126956), + is_blocking=synchronous) + self.mainzicounter_mem_126956 = static_mem_129844 + mainzicounter_mem_127100 = np.zeros(10240, dtype=np.int32) + static_mem_129847 = opencl_alloc(self, 40960, "static_mem_129847") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129847, + normaliseArray(mainzicounter_mem_127100), + is_blocking=synchronous) + self.mainzicounter_mem_127100 = static_mem_129847 + mainzicounter_mem_127363 = np.zeros(10240, dtype=np.int32) + static_mem_129848 = opencl_alloc(self, 40960, "static_mem_129848") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129848, + normaliseArray(mainzicounter_mem_127363), + is_blocking=synchronous) + self.mainzicounter_mem_127363 = static_mem_129848 + mainzicounter_mem_127429 = np.zeros(10240, dtype=np.int32) + static_mem_129849 = opencl_alloc(self, 40960, "static_mem_129849") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129849, + normaliseArray(mainzicounter_mem_127429), + is_blocking=synchronous) + self.mainzicounter_mem_127429 = static_mem_129849 + mainzicounter_mem_127515 = np.zeros(10240, dtype=np.int32) + static_mem_129850 = opencl_alloc(self, 40960, "static_mem_129850") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129850, + normaliseArray(mainzicounter_mem_127515), + is_blocking=synchronous) + self.mainzicounter_mem_127515 = static_mem_129850 + mainzicounter_mem_127740 = np.zeros(10240, dtype=np.int32) + static_mem_129853 = opencl_alloc(self, 40960, "static_mem_129853") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129853, + normaliseArray(mainzicounter_mem_127740), + is_blocking=synchronous) + self.mainzicounter_mem_127740 = static_mem_129853 + mainzicounter_mem_127884 = np.zeros(10240, dtype=np.int32) + static_mem_129856 = opencl_alloc(self, 40960, "static_mem_129856") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129856, + normaliseArray(mainzicounter_mem_127884), + is_blocking=synchronous) + self.mainzicounter_mem_127884 = static_mem_129856 + mainzicounter_mem_127974 = np.array([np.int32(0), np.int32(0), np.int32(0), + np.int32(0), np.int32(0), np.int32(0), + np.int32(0), np.int32(0), np.int32(0), + np.int32(0)], dtype=np.int32) + static_mem_129857 = opencl_alloc(self, 40, "static_mem_129857") + if (40 != 0): + cl.enqueue_copy(self.queue, static_mem_129857, + normaliseArray(mainzicounter_mem_127974), + is_blocking=synchronous) + self.mainzicounter_mem_127974 = static_mem_129857 + mainzicounter_mem_128096 = np.zeros(10240, dtype=np.int32) + static_mem_129859 = opencl_alloc(self, 40960, "static_mem_129859") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129859, + normaliseArray(mainzicounter_mem_128096), + is_blocking=synchronous) + self.mainzicounter_mem_128096 = static_mem_129859 + mainzicounter_mem_128162 = np.zeros(10240, dtype=np.int32) + static_mem_129860 = opencl_alloc(self, 40960, "static_mem_129860") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129860, + normaliseArray(mainzicounter_mem_128162), + is_blocking=synchronous) + self.mainzicounter_mem_128162 = static_mem_129860 + mainzicounter_mem_128238 = np.zeros(10240, dtype=np.int32) + static_mem_129861 = opencl_alloc(self, 40960, "static_mem_129861") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129861, + normaliseArray(mainzicounter_mem_128238), + is_blocking=synchronous) + self.mainzicounter_mem_128238 = static_mem_129861 + mainzicounter_mem_128359 = np.zeros(10240, dtype=np.int32) + static_mem_129864 = opencl_alloc(self, 40960, "static_mem_129864") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129864, + normaliseArray(mainzicounter_mem_128359), + is_blocking=synchronous) + self.mainzicounter_mem_128359 = static_mem_129864 + mainzicounter_mem_128430 = np.zeros(10240, dtype=np.int32) + static_mem_129865 = opencl_alloc(self, 40960, "static_mem_129865") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129865, + normaliseArray(mainzicounter_mem_128430), + is_blocking=synchronous) + self.mainzicounter_mem_128430 = static_mem_129865 + mainzicounter_mem_128595 = np.zeros(10240, dtype=np.int32) + static_mem_129866 = opencl_alloc(self, 40960, "static_mem_129866") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129866, + normaliseArray(mainzicounter_mem_128595), + is_blocking=synchronous) + self.mainzicounter_mem_128595 = static_mem_129866 + mainzicounter_mem_128660 = np.zeros(10240, dtype=np.int32) + static_mem_129867 = opencl_alloc(self, 40960, "static_mem_129867") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129867, + normaliseArray(mainzicounter_mem_128660), + is_blocking=synchronous) + self.mainzicounter_mem_128660 = static_mem_129867 + mainzicounter_mem_128794 = np.zeros(10240, dtype=np.int32) + static_mem_129868 = opencl_alloc(self, 40960, "static_mem_129868") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129868, + normaliseArray(mainzicounter_mem_128794), + is_blocking=synchronous) + self.mainzicounter_mem_128794 = static_mem_129868 + mainzicounter_mem_128984 = np.zeros(10240, dtype=np.int32) + static_mem_129871 = opencl_alloc(self, 40960, "static_mem_129871") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129871, + normaliseArray(mainzicounter_mem_128984), + is_blocking=synchronous) + self.mainzicounter_mem_128984 = static_mem_129871 + mainzicounter_mem_129072 = np.zeros(10240, dtype=np.int32) + static_mem_129872 = opencl_alloc(self, 40960, "static_mem_129872") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129872, + normaliseArray(mainzicounter_mem_129072), + is_blocking=synchronous) + self.mainzicounter_mem_129072 = static_mem_129872 + mainzicounter_mem_129204 = np.zeros(10240, dtype=np.int32) + static_mem_129873 = opencl_alloc(self, 40960, "static_mem_129873") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129873, + normaliseArray(mainzicounter_mem_129204), + is_blocking=synchronous) + self.mainzicounter_mem_129204 = static_mem_129873 + mainzicounter_mem_129374 = np.zeros(10240, dtype=np.int32) + static_mem_129874 = opencl_alloc(self, 40960, "static_mem_129874") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129874, + normaliseArray(mainzicounter_mem_129374), + is_blocking=synchronous) + self.mainzicounter_mem_129374 = static_mem_129874 + mainzicounter_mem_129434 = np.zeros(10240, dtype=np.int32) + static_mem_129875 = opencl_alloc(self, 40960, "static_mem_129875") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129875, + normaliseArray(mainzicounter_mem_129434), + is_blocking=synchronous) + self.mainzicounter_mem_129434 = static_mem_129875 + mainzicounter_mem_129474 = np.array([np.int32(0), np.int32(0), np.int32(0), + np.int32(0), np.int32(0), np.int32(0), + np.int32(0), np.int32(0), np.int32(0), + np.int32(0)], dtype=np.int32) + static_mem_129876 = opencl_alloc(self, 40, "static_mem_129876") + if (40 != 0): + cl.enqueue_copy(self.queue, static_mem_129876, + normaliseArray(mainzicounter_mem_129474), + is_blocking=synchronous) + self.mainzicounter_mem_129474 = static_mem_129876 + mainzicounter_mem_129539 = np.zeros(10240, dtype=np.int32) + static_mem_129878 = opencl_alloc(self, 40960, "static_mem_129878") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129878, + normaliseArray(mainzicounter_mem_129539), + is_blocking=synchronous) + self.mainzicounter_mem_129539 = static_mem_129878 + mainzicounter_mem_129716 = np.zeros(10240, dtype=np.int32) + static_mem_129880 = opencl_alloc(self, 40960, "static_mem_129880") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129880, + normaliseArray(mainzicounter_mem_129716), + is_blocking=synchronous) + self.mainzicounter_mem_129716 = static_mem_129880 + mainDetailedzicounter_mem_126356 = np.array([np.int32(0), np.int32(0), + np.int32(0), np.int32(0), + np.int32(0), np.int32(0), + np.int32(0), np.int32(0), + np.int32(0), np.int32(0)], + dtype=np.int32) + static_mem_129881 = opencl_alloc(self, 40, "static_mem_129881") + if (40 != 0): + cl.enqueue_copy(self.queue, static_mem_129881, + normaliseArray(mainDetailedzicounter_mem_126356), + is_blocking=synchronous) + self.mainDetailedzicounter_mem_126356 = static_mem_129881 + mainDetailedzicounter_mem_126507 = np.array([np.int32(0), np.int32(0), + np.int32(0), np.int32(0), + np.int32(0), np.int32(0), + np.int32(0), np.int32(0), + np.int32(0), np.int32(0)], + dtype=np.int32) + static_mem_129883 = opencl_alloc(self, 40, "static_mem_129883") + if (40 != 0): + cl.enqueue_copy(self.queue, static_mem_129883, + normaliseArray(mainDetailedzicounter_mem_126507), + is_blocking=synchronous) + self.mainDetailedzicounter_mem_126507 = static_mem_129883 + mainDetailedzicounter_mem_126743 = np.zeros(10240, dtype=np.int32) + static_mem_129885 = opencl_alloc(self, 40960, "static_mem_129885") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129885, + normaliseArray(mainDetailedzicounter_mem_126743), + is_blocking=synchronous) + self.mainDetailedzicounter_mem_126743 = static_mem_129885 + mainDetailedzicounter_mem_126968 = np.zeros(10240, dtype=np.int32) + static_mem_129888 = opencl_alloc(self, 40960, "static_mem_129888") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129888, + normaliseArray(mainDetailedzicounter_mem_126968), + is_blocking=synchronous) + self.mainDetailedzicounter_mem_126968 = static_mem_129888 + mainDetailedzicounter_mem_127112 = np.zeros(10240, dtype=np.int32) + static_mem_129891 = opencl_alloc(self, 40960, "static_mem_129891") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129891, + normaliseArray(mainDetailedzicounter_mem_127112), + is_blocking=synchronous) + self.mainDetailedzicounter_mem_127112 = static_mem_129891 + mainDetailedzicounter_mem_127375 = np.zeros(10240, dtype=np.int32) + static_mem_129892 = opencl_alloc(self, 40960, "static_mem_129892") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129892, + normaliseArray(mainDetailedzicounter_mem_127375), + is_blocking=synchronous) + self.mainDetailedzicounter_mem_127375 = static_mem_129892 + mainDetailedzicounter_mem_127441 = np.zeros(10240, dtype=np.int32) + static_mem_129893 = opencl_alloc(self, 40960, "static_mem_129893") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129893, + normaliseArray(mainDetailedzicounter_mem_127441), + is_blocking=synchronous) + self.mainDetailedzicounter_mem_127441 = static_mem_129893 + mainDetailedzicounter_mem_127527 = np.zeros(10240, dtype=np.int32) + static_mem_129894 = opencl_alloc(self, 40960, "static_mem_129894") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129894, + normaliseArray(mainDetailedzicounter_mem_127527), + is_blocking=synchronous) + self.mainDetailedzicounter_mem_127527 = static_mem_129894 + mainDetailedzicounter_mem_127752 = np.zeros(10240, dtype=np.int32) + static_mem_129897 = opencl_alloc(self, 40960, "static_mem_129897") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129897, + normaliseArray(mainDetailedzicounter_mem_127752), + is_blocking=synchronous) + self.mainDetailedzicounter_mem_127752 = static_mem_129897 + mainDetailedzicounter_mem_127896 = np.zeros(10240, dtype=np.int32) + static_mem_129900 = opencl_alloc(self, 40960, "static_mem_129900") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129900, + normaliseArray(mainDetailedzicounter_mem_127896), + is_blocking=synchronous) + self.mainDetailedzicounter_mem_127896 = static_mem_129900 + mainDetailedzicounter_mem_127986 = np.array([np.int32(0), np.int32(0), + np.int32(0), np.int32(0), + np.int32(0), np.int32(0), + np.int32(0), np.int32(0), + np.int32(0), np.int32(0)], + dtype=np.int32) + static_mem_129901 = opencl_alloc(self, 40, "static_mem_129901") + if (40 != 0): + cl.enqueue_copy(self.queue, static_mem_129901, + normaliseArray(mainDetailedzicounter_mem_127986), + is_blocking=synchronous) + self.mainDetailedzicounter_mem_127986 = static_mem_129901 + mainDetailedzicounter_mem_128108 = np.zeros(10240, dtype=np.int32) + static_mem_129903 = opencl_alloc(self, 40960, "static_mem_129903") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129903, + normaliseArray(mainDetailedzicounter_mem_128108), + is_blocking=synchronous) + self.mainDetailedzicounter_mem_128108 = static_mem_129903 + mainDetailedzicounter_mem_128174 = np.zeros(10240, dtype=np.int32) + static_mem_129904 = opencl_alloc(self, 40960, "static_mem_129904") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129904, + normaliseArray(mainDetailedzicounter_mem_128174), + is_blocking=synchronous) + self.mainDetailedzicounter_mem_128174 = static_mem_129904 + mainDetailedzicounter_mem_128250 = np.zeros(10240, dtype=np.int32) + static_mem_129905 = opencl_alloc(self, 40960, "static_mem_129905") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129905, + normaliseArray(mainDetailedzicounter_mem_128250), + is_blocking=synchronous) + self.mainDetailedzicounter_mem_128250 = static_mem_129905 + mainDetailedzicounter_mem_128371 = np.zeros(10240, dtype=np.int32) + static_mem_129908 = opencl_alloc(self, 40960, "static_mem_129908") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129908, + normaliseArray(mainDetailedzicounter_mem_128371), + is_blocking=synchronous) + self.mainDetailedzicounter_mem_128371 = static_mem_129908 + mainDetailedzicounter_mem_128442 = np.zeros(10240, dtype=np.int32) + static_mem_129909 = opencl_alloc(self, 40960, "static_mem_129909") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129909, + normaliseArray(mainDetailedzicounter_mem_128442), + is_blocking=synchronous) + self.mainDetailedzicounter_mem_128442 = static_mem_129909 + mainDetailedzicounter_mem_128607 = np.zeros(10240, dtype=np.int32) + static_mem_129910 = opencl_alloc(self, 40960, "static_mem_129910") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129910, + normaliseArray(mainDetailedzicounter_mem_128607), + is_blocking=synchronous) + self.mainDetailedzicounter_mem_128607 = static_mem_129910 + mainDetailedzicounter_mem_128672 = np.zeros(10240, dtype=np.int32) + static_mem_129911 = opencl_alloc(self, 40960, "static_mem_129911") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129911, + normaliseArray(mainDetailedzicounter_mem_128672), + is_blocking=synchronous) + self.mainDetailedzicounter_mem_128672 = static_mem_129911 + mainDetailedzicounter_mem_128806 = np.zeros(10240, dtype=np.int32) + static_mem_129912 = opencl_alloc(self, 40960, "static_mem_129912") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129912, + normaliseArray(mainDetailedzicounter_mem_128806), + is_blocking=synchronous) + self.mainDetailedzicounter_mem_128806 = static_mem_129912 + mainDetailedzicounter_mem_128996 = np.zeros(10240, dtype=np.int32) + static_mem_129915 = opencl_alloc(self, 40960, "static_mem_129915") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129915, + normaliseArray(mainDetailedzicounter_mem_128996), + is_blocking=synchronous) + self.mainDetailedzicounter_mem_128996 = static_mem_129915 + mainDetailedzicounter_mem_129084 = np.zeros(10240, dtype=np.int32) + static_mem_129916 = opencl_alloc(self, 40960, "static_mem_129916") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129916, + normaliseArray(mainDetailedzicounter_mem_129084), + is_blocking=synchronous) + self.mainDetailedzicounter_mem_129084 = static_mem_129916 + mainDetailedzicounter_mem_129216 = np.zeros(10240, dtype=np.int32) + static_mem_129917 = opencl_alloc(self, 40960, "static_mem_129917") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129917, + normaliseArray(mainDetailedzicounter_mem_129216), + is_blocking=synchronous) + self.mainDetailedzicounter_mem_129216 = static_mem_129917 + mainDetailedzicounter_mem_129386 = np.zeros(10240, dtype=np.int32) + static_mem_129918 = opencl_alloc(self, 40960, "static_mem_129918") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129918, + normaliseArray(mainDetailedzicounter_mem_129386), + is_blocking=synchronous) + self.mainDetailedzicounter_mem_129386 = static_mem_129918 + mainDetailedzicounter_mem_129446 = np.zeros(10240, dtype=np.int32) + static_mem_129919 = opencl_alloc(self, 40960, "static_mem_129919") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129919, + normaliseArray(mainDetailedzicounter_mem_129446), + is_blocking=synchronous) + self.mainDetailedzicounter_mem_129446 = static_mem_129919 + mainDetailedzicounter_mem_129486 = np.array([np.int32(0), np.int32(0), + np.int32(0), np.int32(0), + np.int32(0), np.int32(0), + np.int32(0), np.int32(0), + np.int32(0), np.int32(0)], + dtype=np.int32) + static_mem_129920 = opencl_alloc(self, 40, "static_mem_129920") + if (40 != 0): + cl.enqueue_copy(self.queue, static_mem_129920, + normaliseArray(mainDetailedzicounter_mem_129486), + is_blocking=synchronous) + self.mainDetailedzicounter_mem_129486 = static_mem_129920 + mainDetailedzicounter_mem_129551 = np.zeros(10240, dtype=np.int32) + static_mem_129922 = opencl_alloc(self, 40960, "static_mem_129922") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129922, + normaliseArray(mainDetailedzicounter_mem_129551), + is_blocking=synchronous) + self.mainDetailedzicounter_mem_129551 = static_mem_129922 + mainDetailedzicounter_mem_129774 = np.zeros(10240, dtype=np.int32) + static_mem_129924 = opencl_alloc(self, 40960, "static_mem_129924") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129924, + normaliseArray(mainDetailedzicounter_mem_129774), + is_blocking=synchronous) + self.mainDetailedzicounter_mem_129774 = static_mem_129924 + mainMagnitudezicounter_mem_126345 = np.array([np.int32(0), np.int32(0), + np.int32(0), np.int32(0), + np.int32(0), np.int32(0), + np.int32(0), np.int32(0), + np.int32(0), np.int32(0)], + dtype=np.int32) + static_mem_129925 = opencl_alloc(self, 40, "static_mem_129925") + if (40 != 0): + cl.enqueue_copy(self.queue, static_mem_129925, + normaliseArray(mainMagnitudezicounter_mem_126345), + is_blocking=synchronous) + self.mainMagnitudezicounter_mem_126345 = static_mem_129925 + mainMagnitudezicounter_mem_126496 = np.array([np.int32(0), np.int32(0), + np.int32(0), np.int32(0), + np.int32(0), np.int32(0), + np.int32(0), np.int32(0), + np.int32(0), np.int32(0)], + dtype=np.int32) + static_mem_129927 = opencl_alloc(self, 40, "static_mem_129927") + if (40 != 0): + cl.enqueue_copy(self.queue, static_mem_129927, + normaliseArray(mainMagnitudezicounter_mem_126496), + is_blocking=synchronous) + self.mainMagnitudezicounter_mem_126496 = static_mem_129927 + mainMagnitudezicounter_mem_126732 = np.zeros(10240, dtype=np.int32) + static_mem_129929 = opencl_alloc(self, 40960, "static_mem_129929") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129929, + normaliseArray(mainMagnitudezicounter_mem_126732), + is_blocking=synchronous) + self.mainMagnitudezicounter_mem_126732 = static_mem_129929 + mainMagnitudezicounter_mem_126957 = np.zeros(10240, dtype=np.int32) + static_mem_129932 = opencl_alloc(self, 40960, "static_mem_129932") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129932, + normaliseArray(mainMagnitudezicounter_mem_126957), + is_blocking=synchronous) + self.mainMagnitudezicounter_mem_126957 = static_mem_129932 + mainMagnitudezicounter_mem_127101 = np.zeros(10240, dtype=np.int32) + static_mem_129935 = opencl_alloc(self, 40960, "static_mem_129935") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129935, + normaliseArray(mainMagnitudezicounter_mem_127101), + is_blocking=synchronous) + self.mainMagnitudezicounter_mem_127101 = static_mem_129935 + mainMagnitudezicounter_mem_127364 = np.zeros(10240, dtype=np.int32) + static_mem_129936 = opencl_alloc(self, 40960, "static_mem_129936") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129936, + normaliseArray(mainMagnitudezicounter_mem_127364), + is_blocking=synchronous) + self.mainMagnitudezicounter_mem_127364 = static_mem_129936 + mainMagnitudezicounter_mem_127430 = np.zeros(10240, dtype=np.int32) + static_mem_129937 = opencl_alloc(self, 40960, "static_mem_129937") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129937, + normaliseArray(mainMagnitudezicounter_mem_127430), + is_blocking=synchronous) + self.mainMagnitudezicounter_mem_127430 = static_mem_129937 + mainMagnitudezicounter_mem_127516 = np.zeros(10240, dtype=np.int32) + static_mem_129938 = opencl_alloc(self, 40960, "static_mem_129938") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129938, + normaliseArray(mainMagnitudezicounter_mem_127516), + is_blocking=synchronous) + self.mainMagnitudezicounter_mem_127516 = static_mem_129938 + mainMagnitudezicounter_mem_127741 = np.zeros(10240, dtype=np.int32) + static_mem_129941 = opencl_alloc(self, 40960, "static_mem_129941") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129941, + normaliseArray(mainMagnitudezicounter_mem_127741), + is_blocking=synchronous) + self.mainMagnitudezicounter_mem_127741 = static_mem_129941 + mainMagnitudezicounter_mem_127885 = np.zeros(10240, dtype=np.int32) + static_mem_129944 = opencl_alloc(self, 40960, "static_mem_129944") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129944, + normaliseArray(mainMagnitudezicounter_mem_127885), + is_blocking=synchronous) + self.mainMagnitudezicounter_mem_127885 = static_mem_129944 + mainMagnitudezicounter_mem_127975 = np.array([np.int32(0), np.int32(0), + np.int32(0), np.int32(0), + np.int32(0), np.int32(0), + np.int32(0), np.int32(0), + np.int32(0), np.int32(0)], + dtype=np.int32) + static_mem_129945 = opencl_alloc(self, 40, "static_mem_129945") + if (40 != 0): + cl.enqueue_copy(self.queue, static_mem_129945, + normaliseArray(mainMagnitudezicounter_mem_127975), + is_blocking=synchronous) + self.mainMagnitudezicounter_mem_127975 = static_mem_129945 + mainMagnitudezicounter_mem_128097 = np.zeros(10240, dtype=np.int32) + static_mem_129947 = opencl_alloc(self, 40960, "static_mem_129947") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129947, + normaliseArray(mainMagnitudezicounter_mem_128097), + is_blocking=synchronous) + self.mainMagnitudezicounter_mem_128097 = static_mem_129947 + mainMagnitudezicounter_mem_128163 = np.zeros(10240, dtype=np.int32) + static_mem_129948 = opencl_alloc(self, 40960, "static_mem_129948") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129948, + normaliseArray(mainMagnitudezicounter_mem_128163), + is_blocking=synchronous) + self.mainMagnitudezicounter_mem_128163 = static_mem_129948 + mainMagnitudezicounter_mem_128239 = np.zeros(10240, dtype=np.int32) + static_mem_129949 = opencl_alloc(self, 40960, "static_mem_129949") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129949, + normaliseArray(mainMagnitudezicounter_mem_128239), + is_blocking=synchronous) + self.mainMagnitudezicounter_mem_128239 = static_mem_129949 + mainMagnitudezicounter_mem_128360 = np.zeros(10240, dtype=np.int32) + static_mem_129952 = opencl_alloc(self, 40960, "static_mem_129952") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129952, + normaliseArray(mainMagnitudezicounter_mem_128360), + is_blocking=synchronous) + self.mainMagnitudezicounter_mem_128360 = static_mem_129952 + mainMagnitudezicounter_mem_128431 = np.zeros(10240, dtype=np.int32) + static_mem_129953 = opencl_alloc(self, 40960, "static_mem_129953") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129953, + normaliseArray(mainMagnitudezicounter_mem_128431), + is_blocking=synchronous) + self.mainMagnitudezicounter_mem_128431 = static_mem_129953 + mainMagnitudezicounter_mem_128596 = np.zeros(10240, dtype=np.int32) + static_mem_129954 = opencl_alloc(self, 40960, "static_mem_129954") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129954, + normaliseArray(mainMagnitudezicounter_mem_128596), + is_blocking=synchronous) + self.mainMagnitudezicounter_mem_128596 = static_mem_129954 + mainMagnitudezicounter_mem_128661 = np.zeros(10240, dtype=np.int32) + static_mem_129955 = opencl_alloc(self, 40960, "static_mem_129955") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129955, + normaliseArray(mainMagnitudezicounter_mem_128661), + is_blocking=synchronous) + self.mainMagnitudezicounter_mem_128661 = static_mem_129955 + mainMagnitudezicounter_mem_128795 = np.zeros(10240, dtype=np.int32) + static_mem_129956 = opencl_alloc(self, 40960, "static_mem_129956") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129956, + normaliseArray(mainMagnitudezicounter_mem_128795), + is_blocking=synchronous) + self.mainMagnitudezicounter_mem_128795 = static_mem_129956 + mainMagnitudezicounter_mem_128985 = np.zeros(10240, dtype=np.int32) + static_mem_129959 = opencl_alloc(self, 40960, "static_mem_129959") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129959, + normaliseArray(mainMagnitudezicounter_mem_128985), + is_blocking=synchronous) + self.mainMagnitudezicounter_mem_128985 = static_mem_129959 + mainMagnitudezicounter_mem_129073 = np.zeros(10240, dtype=np.int32) + static_mem_129960 = opencl_alloc(self, 40960, "static_mem_129960") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129960, + normaliseArray(mainMagnitudezicounter_mem_129073), + is_blocking=synchronous) + self.mainMagnitudezicounter_mem_129073 = static_mem_129960 + mainMagnitudezicounter_mem_129205 = np.zeros(10240, dtype=np.int32) + static_mem_129961 = opencl_alloc(self, 40960, "static_mem_129961") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129961, + normaliseArray(mainMagnitudezicounter_mem_129205), + is_blocking=synchronous) + self.mainMagnitudezicounter_mem_129205 = static_mem_129961 + mainMagnitudezicounter_mem_129375 = np.zeros(10240, dtype=np.int32) + static_mem_129962 = opencl_alloc(self, 40960, "static_mem_129962") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129962, + normaliseArray(mainMagnitudezicounter_mem_129375), + is_blocking=synchronous) + self.mainMagnitudezicounter_mem_129375 = static_mem_129962 + mainMagnitudezicounter_mem_129435 = np.zeros(10240, dtype=np.int32) + static_mem_129963 = opencl_alloc(self, 40960, "static_mem_129963") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129963, + normaliseArray(mainMagnitudezicounter_mem_129435), + is_blocking=synchronous) + self.mainMagnitudezicounter_mem_129435 = static_mem_129963 + mainMagnitudezicounter_mem_129475 = np.array([np.int32(0), np.int32(0), + np.int32(0), np.int32(0), + np.int32(0), np.int32(0), + np.int32(0), np.int32(0), + np.int32(0), np.int32(0)], + dtype=np.int32) + static_mem_129964 = opencl_alloc(self, 40, "static_mem_129964") + if (40 != 0): + cl.enqueue_copy(self.queue, static_mem_129964, + normaliseArray(mainMagnitudezicounter_mem_129475), + is_blocking=synchronous) + self.mainMagnitudezicounter_mem_129475 = static_mem_129964 + mainMagnitudezicounter_mem_129540 = np.zeros(10240, dtype=np.int32) + static_mem_129966 = opencl_alloc(self, 40960, "static_mem_129966") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129966, + normaliseArray(mainMagnitudezicounter_mem_129540), + is_blocking=synchronous) + self.mainMagnitudezicounter_mem_129540 = static_mem_129966 + mainMagnitudezicounter_mem_129737 = np.zeros(10240, dtype=np.int32) + static_mem_129968 = opencl_alloc(self, 40960, "static_mem_129968") + if (40960 != 0): + cl.enqueue_copy(self.queue, static_mem_129968, + normaliseArray(mainMagnitudezicounter_mem_129737), + is_blocking=synchronous) + self.mainMagnitudezicounter_mem_129737 = static_mem_129968 + def futhark_builtinzhgpu_map_transpose_f64(self, destmem_0, destoffset_1, + srcmem_2, srcoffset_3, + num_arrays_4, x_elems_5, + y_elems_6): + if ((num_arrays_4 == np.int32(0)) or ((x_elems_5 == np.int32(0)) or (y_elems_6 == np.int32(0)))): + pass + else: + muly_8 = squot32(np.int32(16), x_elems_5) + mulx_7 = squot32(np.int32(16), y_elems_6) + if ((num_arrays_4 == np.int32(1)) and ((x_elems_5 == np.int32(1)) or (y_elems_6 == np.int32(1)))): + if (sext_i32_i64(((x_elems_5 * y_elems_6) * np.int32(8))) != 0): + cl.enqueue_copy(self.queue, destmem_0, srcmem_2, + dest_offset=np.int64(sext_i32_i64(destoffset_1)), + src_offset=np.int64(sext_i32_i64(srcoffset_3)), + byte_count=np.int64(sext_i32_i64(((x_elems_5 * y_elems_6) * np.int32(8))))) + if synchronous: + sync(self) + else: + if (sle32(x_elems_5, np.int32(8)) and slt32(np.int32(16), y_elems_6)): + if ((((1 * (np.int64(sdiv_up32(x_elems_5, + np.int32(16))) * np.int64(np.int32(16)))) * (np.int64(sdiv_up32(sdiv_up32(y_elems_6, + muly_8), + np.int32(16))) * np.int64(np.int32(16)))) * (np.int64(num_arrays_4) * np.int64(np.int32(1)))) != 0): + self.gpu_map_transpose_f64_low_width_var.set_args(cl.LocalMemory(np.int64(np.int64(2176))), + np.int32(destoffset_1), + np.int32(srcoffset_3), + np.int32(num_arrays_4), + np.int32(x_elems_5), + np.int32(y_elems_6), + np.int32(mulx_7), + np.int32(muly_8), + destmem_0, + srcmem_2) + cl.enqueue_nd_range_kernel(self.queue, + self.gpu_map_transpose_f64_low_width_var, + ((np.int64(sdiv_up32(x_elems_5, + np.int32(16))) * np.int64(np.int32(16))), + (np.int64(sdiv_up32(sdiv_up32(y_elems_6, + muly_8), + np.int32(16))) * np.int64(np.int32(16))), + (np.int64(num_arrays_4) * np.int64(np.int32(1)))), + (np.int64(np.int32(16)), + np.int64(np.int32(16)), + np.int64(np.int32(1)))) + if synchronous: + sync(self) + else: + if (sle32(y_elems_6, np.int32(8)) and slt32(np.int32(16), x_elems_5)): + if ((((1 * (np.int64(sdiv_up32(sdiv_up32(x_elems_5, mulx_7), + np.int32(16))) * np.int64(np.int32(16)))) * (np.int64(sdiv_up32(y_elems_6, + np.int32(16))) * np.int64(np.int32(16)))) * (np.int64(num_arrays_4) * np.int64(np.int32(1)))) != 0): + self.gpu_map_transpose_f64_low_height_var.set_args(cl.LocalMemory(np.int64(np.int64(2176))), + np.int32(destoffset_1), + np.int32(srcoffset_3), + np.int32(num_arrays_4), + np.int32(x_elems_5), + np.int32(y_elems_6), + np.int32(mulx_7), + np.int32(muly_8), + destmem_0, + srcmem_2) + cl.enqueue_nd_range_kernel(self.queue, + self.gpu_map_transpose_f64_low_height_var, + ((np.int64(sdiv_up32(sdiv_up32(x_elems_5, + mulx_7), + np.int32(16))) * np.int64(np.int32(16))), + (np.int64(sdiv_up32(y_elems_6, + np.int32(16))) * np.int64(np.int32(16))), + (np.int64(num_arrays_4) * np.int64(np.int32(1)))), + (np.int64(np.int32(16)), + np.int64(np.int32(16)), + np.int64(np.int32(1)))) + if synchronous: + sync(self) + else: + if (sle32(x_elems_5, np.int32(8)) and sle32(y_elems_6, + np.int32(8))): + if ((1 * (np.int64(sdiv_up32(((num_arrays_4 * x_elems_5) * y_elems_6), + np.int32(256))) * np.int64(np.int32(256)))) != 0): + self.gpu_map_transpose_f64_small_var.set_args(cl.LocalMemory(np.int64(np.int64(1))), + np.int32(destoffset_1), + np.int32(srcoffset_3), + np.int32(num_arrays_4), + np.int32(x_elems_5), + np.int32(y_elems_6), + np.int32(mulx_7), + np.int32(muly_8), + destmem_0, + srcmem_2) + cl.enqueue_nd_range_kernel(self.queue, + self.gpu_map_transpose_f64_small_var, + ((np.int64(sdiv_up32(((num_arrays_4 * x_elems_5) * y_elems_6), + np.int32(256))) * np.int64(np.int32(256))),), + (np.int64(np.int32(256)),)) + if synchronous: + sync(self) + else: + if ((((1 * (np.int64(sdiv_up32(x_elems_5, + np.int32(32))) * np.int64(np.int32(32)))) * (np.int64(sdiv_up32(y_elems_6, + np.int32(32))) * np.int64(np.int32(8)))) * (np.int64(num_arrays_4) * np.int64(np.int32(1)))) != 0): + self.gpu_map_transpose_f64_var.set_args(cl.LocalMemory(np.int64(np.int64(8448))), + np.int32(destoffset_1), + np.int32(srcoffset_3), + np.int32(num_arrays_4), + np.int32(x_elems_5), + np.int32(y_elems_6), + np.int32(mulx_7), + np.int32(muly_8), + destmem_0, srcmem_2) + cl.enqueue_nd_range_kernel(self.queue, + self.gpu_map_transpose_f64_var, + ((np.int64(sdiv_up32(x_elems_5, + np.int32(32))) * np.int64(np.int32(32))), + (np.int64(sdiv_up32(y_elems_6, + np.int32(32))) * np.int64(np.int32(8))), + (np.int64(num_arrays_4) * np.int64(np.int32(1)))), + (np.int64(np.int32(32)), + np.int64(np.int32(8)), + np.int64(np.int32(1)))) + if synchronous: + sync(self) + return () + def futhark_builtinzhgpu_map_transpose_i64(self, destmem_0, destoffset_1, + srcmem_2, srcoffset_3, + num_arrays_4, x_elems_5, + y_elems_6): + if ((num_arrays_4 == np.int32(0)) or ((x_elems_5 == np.int32(0)) or (y_elems_6 == np.int32(0)))): + pass + else: + muly_8 = squot32(np.int32(16), x_elems_5) + mulx_7 = squot32(np.int32(16), y_elems_6) + if ((num_arrays_4 == np.int32(1)) and ((x_elems_5 == np.int32(1)) or (y_elems_6 == np.int32(1)))): + if (sext_i32_i64(((x_elems_5 * y_elems_6) * np.int32(8))) != 0): + cl.enqueue_copy(self.queue, destmem_0, srcmem_2, + dest_offset=np.int64(sext_i32_i64(destoffset_1)), + src_offset=np.int64(sext_i32_i64(srcoffset_3)), + byte_count=np.int64(sext_i32_i64(((x_elems_5 * y_elems_6) * np.int32(8))))) + if synchronous: + sync(self) + else: + if (sle32(x_elems_5, np.int32(8)) and slt32(np.int32(16), y_elems_6)): + if ((((1 * (np.int64(sdiv_up32(x_elems_5, + np.int32(16))) * np.int64(np.int32(16)))) * (np.int64(sdiv_up32(sdiv_up32(y_elems_6, + muly_8), + np.int32(16))) * np.int64(np.int32(16)))) * (np.int64(num_arrays_4) * np.int64(np.int32(1)))) != 0): + self.gpu_map_transpose_i64_low_width_var.set_args(cl.LocalMemory(np.int64(np.int64(2176))), + np.int32(destoffset_1), + np.int32(srcoffset_3), + np.int32(num_arrays_4), + np.int32(x_elems_5), + np.int32(y_elems_6), + np.int32(mulx_7), + np.int32(muly_8), + destmem_0, + srcmem_2) + cl.enqueue_nd_range_kernel(self.queue, + self.gpu_map_transpose_i64_low_width_var, + ((np.int64(sdiv_up32(x_elems_5, + np.int32(16))) * np.int64(np.int32(16))), + (np.int64(sdiv_up32(sdiv_up32(y_elems_6, + muly_8), + np.int32(16))) * np.int64(np.int32(16))), + (np.int64(num_arrays_4) * np.int64(np.int32(1)))), + (np.int64(np.int32(16)), + np.int64(np.int32(16)), + np.int64(np.int32(1)))) + if synchronous: + sync(self) + else: + if (sle32(y_elems_6, np.int32(8)) and slt32(np.int32(16), x_elems_5)): + if ((((1 * (np.int64(sdiv_up32(sdiv_up32(x_elems_5, mulx_7), + np.int32(16))) * np.int64(np.int32(16)))) * (np.int64(sdiv_up32(y_elems_6, + np.int32(16))) * np.int64(np.int32(16)))) * (np.int64(num_arrays_4) * np.int64(np.int32(1)))) != 0): + self.gpu_map_transpose_i64_low_height_var.set_args(cl.LocalMemory(np.int64(np.int64(2176))), + np.int32(destoffset_1), + np.int32(srcoffset_3), + np.int32(num_arrays_4), + np.int32(x_elems_5), + np.int32(y_elems_6), + np.int32(mulx_7), + np.int32(muly_8), + destmem_0, + srcmem_2) + cl.enqueue_nd_range_kernel(self.queue, + self.gpu_map_transpose_i64_low_height_var, + ((np.int64(sdiv_up32(sdiv_up32(x_elems_5, + mulx_7), + np.int32(16))) * np.int64(np.int32(16))), + (np.int64(sdiv_up32(y_elems_6, + np.int32(16))) * np.int64(np.int32(16))), + (np.int64(num_arrays_4) * np.int64(np.int32(1)))), + (np.int64(np.int32(16)), + np.int64(np.int32(16)), + np.int64(np.int32(1)))) + if synchronous: + sync(self) + else: + if (sle32(x_elems_5, np.int32(8)) and sle32(y_elems_6, + np.int32(8))): + if ((1 * (np.int64(sdiv_up32(((num_arrays_4 * x_elems_5) * y_elems_6), + np.int32(256))) * np.int64(np.int32(256)))) != 0): + self.gpu_map_transpose_i64_small_var.set_args(cl.LocalMemory(np.int64(np.int64(1))), + np.int32(destoffset_1), + np.int32(srcoffset_3), + np.int32(num_arrays_4), + np.int32(x_elems_5), + np.int32(y_elems_6), + np.int32(mulx_7), + np.int32(muly_8), + destmem_0, + srcmem_2) + cl.enqueue_nd_range_kernel(self.queue, + self.gpu_map_transpose_i64_small_var, + ((np.int64(sdiv_up32(((num_arrays_4 * x_elems_5) * y_elems_6), + np.int32(256))) * np.int64(np.int32(256))),), + (np.int64(np.int32(256)),)) + if synchronous: + sync(self) + else: + if ((((1 * (np.int64(sdiv_up32(x_elems_5, + np.int32(32))) * np.int64(np.int32(32)))) * (np.int64(sdiv_up32(y_elems_6, + np.int32(32))) * np.int64(np.int32(8)))) * (np.int64(num_arrays_4) * np.int64(np.int32(1)))) != 0): + self.gpu_map_transpose_i64_var.set_args(cl.LocalMemory(np.int64(np.int64(8448))), + np.int32(destoffset_1), + np.int32(srcoffset_3), + np.int32(num_arrays_4), + np.int32(x_elems_5), + np.int32(y_elems_6), + np.int32(mulx_7), + np.int32(muly_8), + destmem_0, srcmem_2) + cl.enqueue_nd_range_kernel(self.queue, + self.gpu_map_transpose_i64_var, + ((np.int64(sdiv_up32(x_elems_5, + np.int32(32))) * np.int64(np.int32(32))), + (np.int64(sdiv_up32(y_elems_6, + np.int32(32))) * np.int64(np.int32(8))), + (np.int64(num_arrays_4) * np.int64(np.int32(1)))), + (np.int64(np.int32(32)), + np.int64(np.int32(8)), + np.int64(np.int32(1)))) + if synchronous: + sync(self) + return () + def futhark_builtinzhiota_i64(self, mem_126539, n_126540, x_126541, s_126542): + group_sizze_126547 = self.sizes["builtin#iota_i64.group_size_126547"] + num_groups_126548 = sdiv_up64(n_126540, group_sizze_126547) + if ((1 * (np.int64(num_groups_126548) * np.int64(group_sizze_126547))) != 0): + self.builtinzhiota_i64ziiota_i64_126544_var.set_args(np.int32(n_126540), + np.int64(x_126541), + np.int64(s_126542), + mem_126539) + cl.enqueue_nd_range_kernel(self.queue, + self.builtinzhiota_i64ziiota_i64_126544_var, + ((np.int64(num_groups_126548) * np.int64(group_sizze_126547)),), + (np.int64(group_sizze_126547),)) + if synchronous: + sync(self) + return () + def futhark_builtinzhreplicate_f64(self, mem_126471, num_elems_126472, + val_126473): + group_sizze_126478 = self.sizes["builtin#replicate_f64.group_size_126478"] + num_groups_126479 = sdiv_up64(num_elems_126472, group_sizze_126478) + if ((1 * (np.int64(num_groups_126479) * np.int64(group_sizze_126478))) != 0): + self.builtinzhreplicate_f64zireplicate_126475_var.set_args(np.int32(num_elems_126472), + np.float64(val_126473), + mem_126471) + cl.enqueue_nd_range_kernel(self.queue, + self.builtinzhreplicate_f64zireplicate_126475_var, + ((np.int64(num_groups_126479) * np.int64(group_sizze_126478)),), + (np.int64(group_sizze_126478),)) + if synchronous: + sync(self) + return () + def futhark_builtinzhreplicate_i64(self, mem_126480, num_elems_126481, + val_126482): + group_sizze_126487 = self.sizes["builtin#replicate_i64.group_size_126487"] + num_groups_126488 = sdiv_up64(num_elems_126481, group_sizze_126487) + if ((1 * (np.int64(num_groups_126488) * np.int64(group_sizze_126487))) != 0): + self.builtinzhreplicate_i64zireplicate_126484_var.set_args(np.int32(num_elems_126481), + np.int64(val_126482), + mem_126480) + cl.enqueue_nd_range_kernel(self.queue, + self.builtinzhreplicate_i64zireplicate_126484_var, + ((np.int64(num_groups_126488) * np.int64(group_sizze_126487)),), + (np.int64(group_sizze_126487),)) + if synchronous: + sync(self) + return () + def futhark_convertToFloat(self, images_mem_120107, m_70846, n_70847, p_70848, + nan_value_70849): + y_77233 = (n_70847 * p_70848) + nest_sizze_77234 = (m_70846 * y_77233) + segmap_group_sizze_77235 = self.sizes["convertToFloat.segmap_group_size_77189"] + segmap_usable_groups_77236 = sdiv_up64(nest_sizze_77234, + segmap_group_sizze_77235) + binop_x_120110 = (m_70846 * n_70847) + binop_x_120111 = (p_70848 * binop_x_120110) + bytes_120109 = (np.int64(8) * binop_x_120111) + mem_120112 = opencl_alloc(self, bytes_120109, "mem_120112") + if ((1 * (np.int64(segmap_usable_groups_77236) * np.int64(segmap_group_sizze_77235))) != 0): + self.convertToFloatzisegmap_77185_var.set_args(self.global_failure, + np.int64(m_70846), + np.int64(n_70847), + np.int64(p_70848), + np.int16(nan_value_70849), + images_mem_120107, + mem_120112) + cl.enqueue_nd_range_kernel(self.queue, + self.convertToFloatzisegmap_77185_var, + ((np.int64(segmap_usable_groups_77236) * np.int64(segmap_group_sizze_77235)),), + (np.int64(segmap_group_sizze_77235),)) + if synchronous: + sync(self) + out_mem_126320 = mem_120112 + return out_mem_126320 + def futhark_main(self, mappingindices_mem_120107, images_mem_120108, N_75135, + m_75136, trend_75137, k_75138, n_75139, freq_75140, + hfrac_75141, level_75142, lam_75143, hist_75144, conf_75145): + x_75148 = (np.int64(2) * k_75138) + k2p2_75149 = (np.int64(2) + x_75148) + cond_75150 = slt64(np.int64(0), trend_75137) + if cond_75150: + k2p2zq_75151 = k2p2_75149 + else: + k2p2zq_f_res_75152 = (k2p2_75149 - np.int64(1)) + k2p2zq_75151 = k2p2zq_f_res_75152 + binop_x_120111 = (N_75135 * k2p2zq_75151) + bytes_120110 = (np.int64(8) * binop_x_120111) + if cond_75150: + bounds_invalid_upwards_75154 = slt64(k2p2zq_75151, np.int64(0)) + valid_75155 = not(bounds_invalid_upwards_75154) + range_valid_c_75156 = True + assert valid_75155, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 helpers.fut:31:10-18\n #2 bfastfinal.fut:29:17-58\n #3 bfastfinal.fut:200:5-74\n #4 bfastfinal.fut:195:1-201:36\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + k2p2zq_75151, + " is invalid.")) + segmap_group_sizze_102755 = self.sizes["main.segmap_group_size_102691"] + segmap_usable_groups_102756 = sdiv_up64(binop_x_120111, + segmap_group_sizze_102755) + mem_120112 = opencl_alloc(self, bytes_120110, "mem_120112") + if ((1 * (np.int64(segmap_usable_groups_102756) * np.int64(segmap_group_sizze_102755))) != 0): + self.mainzisegmap_102688_var.set_args(self.global_failure, + np.int64(N_75135), + np.float64(freq_75140), + np.int64(k2p2zq_75151), + mappingindices_mem_120107, + mem_120112) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_102688_var, + ((np.int64(segmap_usable_groups_102756) * np.int64(segmap_group_sizze_102755)),), + (np.int64(segmap_group_sizze_102755),)) + if synchronous: + sync(self) + binop_p_mem_120117 = mem_120112 + else: + bounds_invalid_upwards_75179 = slt64(k2p2zq_75151, np.int64(0)) + valid_75180 = not(bounds_invalid_upwards_75179) + range_valid_c_75181 = True + assert valid_75180, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 helpers.fut:44:10-20\n #2 bfastfinal.fut:30:17-56\n #3 bfastfinal.fut:200:5-74\n #4 bfastfinal.fut:195:1-201:36\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + k2p2zq_75151, + " is invalid.")) + segmap_group_sizze_102851 = self.sizes["main.segmap_group_size_102791"] + segmap_usable_groups_102852 = sdiv_up64(binop_x_120111, + segmap_group_sizze_102851) + mem_120116 = opencl_alloc(self, bytes_120110, "mem_120116") + if ((1 * (np.int64(segmap_usable_groups_102852) * np.int64(segmap_group_sizze_102851))) != 0): + self.mainzisegmap_102788_var.set_args(self.global_failure, + np.int64(N_75135), + np.float64(freq_75140), + np.int64(k2p2zq_75151), + mappingindices_mem_120107, + mem_120116) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_102788_var, + ((np.int64(segmap_usable_groups_102852) * np.int64(segmap_group_sizze_102851)),), + (np.int64(segmap_group_sizze_102851),)) + if synchronous: + sync(self) + binop_p_mem_120117 = mem_120116 + x_75203 = (N_75135 * N_75135) + y_75204 = (np.int64(2) * N_75135) + x_75205 = (x_75203 + y_75204) + x_75206 = (np.int64(1) + x_75205) + y_75207 = (np.int64(1) + N_75135) + zzero_75208 = (y_75207 == np.int64(0)) + nonzzero_75209 = not(zzero_75208) + nonzzero_cert_75210 = True + assert nonzzero_75209, ("Error: %s\n\nBacktrace:\n-> #0 bfastfinal.fut:35:25-53\n #1 bfastfinal.fut:200:5-74\n #2 bfastfinal.fut:195:1-201:36\n" % ("division by zero",)) + x_75211 = sdiv64(x_75206, y_75207) + x_75212 = (x_75211 - N_75135) + binop_p_75213 = (x_75212 - np.int64(1)) + defunc_0_f_res_75214 = sitofp_i64_f64(binop_p_75213) + segmap_group_sizze_102902 = self.sizes["main.segmap_group_size_102884"] + segmap_usable_groups_102903 = sdiv_up64(binop_x_120111, + segmap_group_sizze_102902) + mem_120120 = opencl_alloc(self, bytes_120110, "mem_120120") + self.futhark_builtinzhgpu_map_transpose_f64(mem_120120, np.int64(0), + binop_p_mem_120117, np.int64(0), + np.int64(1), N_75135, + k2p2zq_75151) + mem_120124 = opencl_alloc(self, bytes_120110, "mem_120124") + if ((1 * (np.int64(segmap_usable_groups_102903) * np.int64(segmap_group_sizze_102902))) != 0): + self.mainzisegmap_102881_var.set_args(self.global_failure, + np.int64(N_75135), + np.int64(k2p2zq_75151), + np.float64(defunc_0_f_res_75214), + mem_120120, mem_120124) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_102881_var, + ((np.int64(segmap_usable_groups_102903) * np.int64(segmap_group_sizze_102902)),), + (np.int64(segmap_group_sizze_102902),)) + if synchronous: + sync(self) + empty_slice_75222 = (k2p2zq_75151 == np.int64(0)) + m_75223 = (k2p2zq_75151 - np.int64(1)) + zzero_leq_i_p_m_t_s_75224 = sle64(np.int64(0), m_75223) + i_p_m_t_s_leq_w_75225 = slt64(m_75223, k2p2zq_75151) + i_lte_j_75226 = sle64(np.int64(0), k2p2zq_75151) + y_75227 = (zzero_leq_i_p_m_t_s_75224 and i_p_m_t_s_leq_w_75225) + y_75228 = (i_lte_j_75226 and y_75227) + ok_or_empty_75229 = (empty_slice_75222 or y_75228) + empty_slice_75230 = (n_75139 == np.int64(0)) + m_75231 = (n_75139 - np.int64(1)) + zzero_leq_i_p_m_t_s_75232 = sle64(np.int64(0), m_75231) + i_p_m_t_s_leq_w_75233 = slt64(m_75231, N_75135) + i_lte_j_75234 = sle64(np.int64(0), n_75139) + y_75235 = (zzero_leq_i_p_m_t_s_75232 and i_p_m_t_s_leq_w_75233) + y_75236 = (i_lte_j_75234 and y_75235) + ok_or_empty_75237 = (empty_slice_75230 or y_75236) + index_ok_75238 = (ok_or_empty_75229 and ok_or_empty_75237) + index_certs_75239 = True + assert index_ok_75238, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 bfastfinal.fut:38:13-19\n #1 bfastfinal.fut:200:5-74\n #2 bfastfinal.fut:195:1-201:36\n" % ("Index [", + np.int64(0), + ":, :", + n_75139, + "] out of bounds for array of shape [", + k2p2zq_75151, + "][", + N_75135, + "].")) + index_certs_75241 = True + assert index_ok_75238, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 bfastfinal.fut:39:13-20\n #1 bfastfinal.fut:200:5-74\n #2 bfastfinal.fut:195:1-201:36\n" % ("Index [:", + n_75139, + ", ", + np.int64(0), + ":] out of bounds for array of shape [", + N_75135, + "][", + k2p2zq_75151, + "].")) + empty_slice_75243 = (m_75136 == np.int64(0)) + m_75244 = (m_75136 - np.int64(1)) + zzero_leq_i_p_m_t_s_75245 = sle64(np.int64(0), m_75244) + i_p_m_t_s_leq_w_75246 = slt64(m_75244, m_75136) + i_lte_j_75247 = sle64(np.int64(0), m_75136) + y_75248 = (zzero_leq_i_p_m_t_s_75245 and i_p_m_t_s_leq_w_75246) + y_75249 = (i_lte_j_75247 and y_75248) + ok_or_empty_75250 = (empty_slice_75243 or y_75249) + index_ok_75251 = (ok_or_empty_75237 and ok_or_empty_75250) + index_certs_75252 = True + assert index_ok_75251, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 bfastfinal.fut:40:13-24\n #1 bfastfinal.fut:200:5-74\n #2 bfastfinal.fut:195:1-201:36\n" % ("Index [", + np.int64(0), + ":, :", + n_75139, + "] out of bounds for array of shape [", + m_75136, + "][", + N_75135, + "].")) + cond_75253 = (hist_75144 == np.int64(-1)) + y_75254 = slt64(m_75231, n_75139) + bounds_check_75255 = (zzero_leq_i_p_m_t_s_75232 and y_75254) + suff_outer_redomap_102909 = (self.sizes["main.suff_outer_redomap_0"] <= m_75136) + segred_group_sizze_102924 = self.sizes["main.segred_group_size_102911"] + max_num_groups_126339 = self.sizes["main.segred_num_groups_102913"] + num_groups_102925 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_75136, + segred_group_sizze_102924), + sext_i32_i64(max_num_groups_126339)))) + max_group_sizze_103162 = self.max_group_size + fits_103163 = sle64(n_75139, max_group_sizze_103162) + suff_intra_par_103165 = (self.sizes["main.suff_intra_par_1"] <= n_75139) + intra_suff_and_fits_103166 = (fits_103163 and suff_intra_par_103165) + nest_sizze_103196 = (m_75136 * n_75139) + segscan_group_sizze_103197 = self.sizes["main.segscan_group_size_103077"] + max_num_groups_126340 = self.sizes["main.segscan_num_groups_103079"] + num_groups_103198 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_103196, + segscan_group_sizze_103197), + sext_i32_i64(max_num_groups_126340)))) + segmap_group_sizze_103239 = self.sizes["main.segmap_group_size_103008"] + segred_group_sizze_103255 = self.sizes["main.segred_group_size_103151"] + max_num_groups_126341 = self.sizes["main.segred_num_groups_103153"] + num_groups_103256 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_75136, + segred_group_sizze_103255), + sext_i32_i64(max_num_groups_126341)))) + segmap_group_sizze_106122 = self.sizes["main.segmap_group_size_105861"] + max_num_groups_126342 = self.sizes["main.segmap_num_groups_105863"] + num_groups_106123 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_75136, + segmap_group_sizze_106122), + sext_i32_i64(max_num_groups_126342)))) + num_threads_115425 = (segmap_group_sizze_106122 * num_groups_106123) + y_115427 = smod_safe64(m_75136, num_threads_115425) + x_115428 = (num_threads_115425 - y_115427) + y_115429 = smod_safe64(x_115428, num_threads_115425) + segmap_group_sizze_110012 = self.sizes["main.segmap_group_size_109627"] + max_num_groups_126343 = self.sizes["main.segmap_num_groups_109629"] + num_groups_110013 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_75136, + segmap_group_sizze_110012), + sext_i32_i64(max_num_groups_126343)))) + num_threads_115503 = (segmap_group_sizze_110012 * num_groups_110013) + y_115505 = smod_safe64(m_75136, num_threads_115503) + x_115506 = (num_threads_115503 - y_115505) + y_115507 = smod_safe64(x_115506, num_threads_115503) + binop_x_120126 = (N_75135 * m_75136) + bytes_120125 = (np.int64(8) * binop_x_120126) + bytes_120173 = (np.int64(8) * m_75136) + bytes_120175 = (np.int64(8) * nest_sizze_103196) + bytes_120129 = (np.int64(8) * n_75139) + binop_x_120244 = (m_75136 * k2p2zq_75151) + bytes_120247 = (np.int64(8) * k2p2zq_75151) + binop_x_120251 = (k2p2zq_75151 * k2p2zq_75151) + bytes_120250 = (np.int64(8) * binop_x_120251) + bytes_120253 = (np.int64(8) * y_115429) + bytes_121947 = (np.int64(8) * y_115507) + bytes_121990 = (np.int64(8) * binop_x_120244) + binop_x_121994 = (np.int64(2) * m_75136) + binop_x_121995 = (k2p2zq_75151 * binop_x_121994) + bytes_121993 = (np.int64(8) * binop_x_121995) + binop_x_121999 = (k2p2zq_75151 * binop_x_120244) + bytes_121997 = (np.int64(8) * binop_x_121999) + num_threads_126097 = (segred_group_sizze_102924 * num_groups_102925) + total_sizze_126098 = (bytes_120129 * num_threads_126097) + total_sizze_126099 = (bytes_120129 * num_threads_126097) + total_sizze_126100 = (bytes_120129 * num_threads_126097) + segmap_group_sizze_103385 = self.sizes["main.segmap_group_size_103286"] + if cond_75253: + index_certs_75259 = True + assert bounds_check_75255, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:26:29-34\n #1 helpers.fut:14:13-20\n #2 recresid.fut:89:39-59\n #3 mroc.fut:27:25-38\n #4 mroc.fut:77:27-61\n #5 bfastfinal.fut:45:24-53\n #6 bfastfinal.fut:200:5-74\n #7 bfastfinal.fut:195:1-201:36\n" % ("Index [", + m_75231, + "] out of bounds for array of shape [", + n_75139, + "].")) + local_memory_capacity_126525 = self.max_local_memory + if (((sle64(((np.int32(1) + srem64((np.int64(8) - srem64(np.int32(1), + np.int64(8))), + np.int64(8))) + ((np.int32(8) * segred_group_sizze_102924) + srem64((np.int64(8) - srem64((np.int32(8) * segred_group_sizze_102924), + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_126525)) and sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_126525))) and sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_126525))) and suff_outer_redomap_102909): + mem_120127 = opencl_alloc(self, bytes_120125, "mem_120127") + self.futhark_builtinzhgpu_map_transpose_f64(mem_120127, np.int64(0), + images_mem_120108, + np.int64(0), np.int64(1), + N_75135, m_75136) + mem_120172 = opencl_alloc(self, np.int64(8), "mem_120172") + mem_120174 = opencl_alloc(self, bytes_120173, "mem_120174") + mem_120177 = opencl_alloc(self, bytes_120175, "mem_120177") + mem_120180 = opencl_alloc(self, bytes_120175, "mem_120180") + mem_120130 = opencl_alloc(self, total_sizze_126098, "mem_120130") + mem_120144 = opencl_alloc(self, total_sizze_126099, "mem_120144") + mem_120146 = opencl_alloc(self, total_sizze_126100, "mem_120146") + mainzicounter_mem_126344 = self.mainzicounter_mem_126344 + group_res_arr_mem_126346 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_102924 * num_groups_102925)), + "group_res_arr_mem_126346") + num_threads_126348 = (num_groups_102925 * segred_group_sizze_102924) + if ((1 * (np.int64(num_groups_102925) * np.int64(segred_group_sizze_102924))) != 0): + self.mainzisegred_nonseg_102922_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_102924))), + cl.LocalMemory(np.int64(np.int32(1))), + np.int64(m_75136), + np.int64(n_75139), + np.int64(m_75231), + np.int64(num_groups_102925), + np.int64(num_threads_126097), + np.int64(num_threads_126348), + mem_120127, mem_120130, + mem_120144, mem_120146, + mem_120172, mem_120174, + mem_120177, mem_120180, + mainzicounter_mem_126344, + group_res_arr_mem_126346) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_nonseg_102922_var, + ((np.int64(num_groups_102925) * np.int64(segred_group_sizze_102924)),), + (np.int64(segred_group_sizze_102924),)) + if synchronous: + sync(self) + mem_120127 = None + mem_120130 = None + mem_120144 = None + mem_120146 = None + read_res_129838 = np.empty(1, dtype=ct.c_int64) + cl.enqueue_copy(self.queue, read_res_129838, mem_120172, + device_offset=(np.int64(np.int64(0)) * 8), + is_blocking=synchronous) + sync(self) + defunc_2_reduce_res_102955 = read_res_129838[0] + mem_120172 = None + mem_120220 = opencl_alloc(self, bytes_120173, "mem_120220") + if ((m_75136 * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_120220, mem_120174, + dest_offset=np.int64(np.int64(0)), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((m_75136 * np.int32(8)))) + if synchronous: + sync(self) + mem_120174 = None + mem_120224 = opencl_alloc(self, bytes_120175, "mem_120224") + group_sizze_126386 = self.sizes["main.group_size_126386"] + num_groups_126387 = sdiv_up64((m_75136 * n_75139), group_sizze_126386) + if ((1 * (np.int64(num_groups_126387) * np.int64(group_sizze_126386))) != 0): + self.mainzicopy_126383_var.set_args(np.int64(m_75136), + np.int64(n_75139), mem_120177, + mem_120224) + cl.enqueue_nd_range_kernel(self.queue, self.mainzicopy_126383_var, + ((np.int64(num_groups_126387) * np.int64(group_sizze_126386)),), + (np.int64(group_sizze_126386),)) + if synchronous: + sync(self) + mem_120177 = None + mem_120228 = opencl_alloc(self, bytes_120175, "mem_120228") + group_sizze_126391 = self.sizes["main.group_size_126391"] + num_groups_126392 = sdiv_up64((m_75136 * n_75139), group_sizze_126391) + if ((1 * (np.int64(num_groups_126392) * np.int64(group_sizze_126391))) != 0): + self.mainzicopy_126388_var.set_args(np.int64(m_75136), + np.int64(n_75139), mem_120180, + mem_120228) + cl.enqueue_nd_range_kernel(self.queue, self.mainzicopy_126388_var, + ((np.int64(num_groups_126392) * np.int64(group_sizze_126391)),), + (np.int64(group_sizze_126391),)) + if synchronous: + sync(self) + mem_120180 = None + defunc_3_map_res_mem_120230 = mem_120220 + defunc_3_map_res_mem_120231 = mem_120224 + defunc_3_map_res_mem_120232 = mem_120228 + defunc_2_reduce_res_75260 = defunc_2_reduce_res_102955 + else: + local_memory_capacity_126494 = self.max_local_memory + if (sle64((((bytes_120129 + srem64((np.int64(8) - srem64(bytes_120129, + np.int64(8))), + np.int64(8))) + (bytes_120129 + srem64((np.int64(8) - srem64(bytes_120129, + np.int64(8))), + np.int64(8)))) + (bytes_120129 + srem64((np.int64(8) - srem64(bytes_120129, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_126494)) and intra_suff_and_fits_103166): + mem_120191 = opencl_alloc(self, bytes_120173, "mem_120191") + mem_120194 = opencl_alloc(self, bytes_120175, "mem_120194") + mem_120197 = opencl_alloc(self, bytes_120175, "mem_120197") + if ((1 * (np.int64(m_75136) * np.int64(n_75139))) != 0): + self.mainzisegmap_intragroup_102969_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_120129)), + cl.LocalMemory(np.int64(bytes_120129)), + cl.LocalMemory(np.int64(bytes_120129)), + np.int64(N_75135), + np.int64(n_75139), + np.int64(m_75231), + images_mem_120108, + mem_120191, + mem_120194, + mem_120197) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_intragroup_102969_var, + ((np.int64(m_75136) * np.int64(n_75139)),), + (np.int64(n_75139),)) + if synchronous: + sync(self) + defunc_2_reduce_res_map_acc_mem_120211 = mem_120191 + defunc_3_map_res_mem_120212 = mem_120194 + defunc_3_map_res_mem_120213 = mem_120197 + else: + mem_120201 = opencl_alloc(self, bytes_120175, "mem_120201") + if slt64(np.int64(0), (m_75136 * n_75139)): + stage1_max_num_groups_126410 = self.max_group_size + stage1_num_groups_126411 = smin64(stage1_max_num_groups_126410, + num_groups_103198) + num_threads_126412 = sext_i64_i32((stage1_num_groups_126411 * segscan_group_sizze_103197)) + if ((1 * (np.int64(stage1_num_groups_126411) * np.int64(segscan_group_sizze_103197))) != 0): + self.mainziscan_stage1_103083_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(smax64(np.int64(1), + (np.int32(8) * segscan_group_sizze_103197)))), + np.int64(N_75135), + np.int64(m_75136), + np.int64(n_75139), + np.int64(m_75231), + np.int32(num_threads_126412), + images_mem_120108, + mem_120201) + cl.enqueue_nd_range_kernel(self.queue, + self.mainziscan_stage1_103083_var, + ((np.int64(stage1_num_groups_126411) * np.int64(segscan_group_sizze_103197)),), + (np.int64(segscan_group_sizze_103197),)) + if synchronous: + sync(self) + if ((1 * (np.int64(np.int64(1)) * np.int64(stage1_num_groups_126411))) != 0): + self.mainziscan_stage2_103083_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(smax64(np.int64(1), + (np.int32(8) * stage1_num_groups_126411)))), + np.int64(m_75136), + np.int64(n_75139), + np.int64(stage1_num_groups_126411), + np.int32(num_threads_126412), + mem_120201) + cl.enqueue_nd_range_kernel(self.queue, + self.mainziscan_stage2_103083_var, + ((np.int64(np.int64(1)) * np.int64(stage1_num_groups_126411)),), + (np.int64(stage1_num_groups_126411),)) + if synchronous: + sync(self) + required_groups_126454 = sext_i64_i32(sdiv_up64((m_75136 * n_75139), + segscan_group_sizze_103197)) + if ((1 * (np.int64(num_groups_103198) * np.int64(segscan_group_sizze_103197))) != 0): + self.mainziscan_stage3_103083_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(n_75139), + np.int64(num_groups_103198), + np.int32(num_threads_126412), + np.int32(required_groups_126454), + mem_120201) + cl.enqueue_nd_range_kernel(self.queue, + self.mainziscan_stage3_103083_var, + ((np.int64(num_groups_103198) * np.int64(segscan_group_sizze_103197)),), + (np.int64(segscan_group_sizze_103197),)) + if synchronous: + sync(self) + mem_120203 = opencl_alloc(self, bytes_120173, "mem_120203") + group_sizze_126469 = self.sizes["main.group_size_126469"] + num_groups_126470 = sdiv_up64(m_75136, group_sizze_126469) + if ((1 * (np.int64(num_groups_126470) * np.int64(group_sizze_126469))) != 0): + self.mainzicopy_126466_var.set_args(np.int64(m_75136), + np.int64(n_75139), + np.int64(m_75231), mem_120201, + mem_120203) + cl.enqueue_nd_range_kernel(self.queue, self.mainzicopy_126466_var, + ((np.int64(num_groups_126470) * np.int64(group_sizze_126469)),), + (np.int64(group_sizze_126469),)) + if synchronous: + sync(self) + mem_120206 = opencl_alloc(self, bytes_120175, "mem_120206") + self.futhark_builtinzhreplicate_f64(mem_120206, (m_75136 * n_75139), + np.nan) + mem_120209 = opencl_alloc(self, bytes_120175, "mem_120209") + self.futhark_builtinzhreplicate_i64(mem_120209, (m_75136 * n_75139), + np.int64(0)) + segmap_usable_groups_103240 = sdiv_up64(nest_sizze_103196, + segmap_group_sizze_103239) + if ((1 * (np.int64(segmap_usable_groups_103240) * np.int64(segmap_group_sizze_103239))) != 0): + self.mainzisegmap_103005_var.set_args(self.global_failure, + np.int64(N_75135), + np.int64(m_75136), + np.int64(n_75139), + np.int64(m_75231), + images_mem_120108, mem_120201, + mem_120206, mem_120209) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_103005_var, + ((np.int64(segmap_usable_groups_103240) * np.int64(segmap_group_sizze_103239)),), + (np.int64(segmap_group_sizze_103239),)) + if synchronous: + sync(self) + mem_120201 = None + defunc_2_reduce_res_map_acc_mem_120211 = mem_120203 + defunc_3_map_res_mem_120212 = mem_120206 + defunc_3_map_res_mem_120213 = mem_120209 + mem_120215 = opencl_alloc(self, bytes_120173, "mem_120215") + if ((m_75136 * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_120215, + defunc_2_reduce_res_map_acc_mem_120211, + dest_offset=np.int64(np.int64(0)), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((m_75136 * np.int32(8)))) + if synchronous: + sync(self) + mem_120218 = opencl_alloc(self, np.int64(8), "mem_120218") + mainzicounter_mem_126495 = self.mainzicounter_mem_126495 + group_res_arr_mem_126497 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_103255 * num_groups_103256)), + "group_res_arr_mem_126497") + num_threads_126499 = (num_groups_103256 * segred_group_sizze_103255) + if ((1 * (np.int64(num_groups_103256) * np.int64(segred_group_sizze_103255))) != 0): + self.mainzisegred_nonseg_103159_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_103255))), + cl.LocalMemory(np.int64(np.int32(1))), + np.int64(m_75136), + np.int64(num_groups_103256), + np.int64(num_threads_126499), + defunc_2_reduce_res_map_acc_mem_120211, + mem_120218, + mainzicounter_mem_126495, + group_res_arr_mem_126497) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_nonseg_103159_var, + ((np.int64(num_groups_103256) * np.int64(segred_group_sizze_103255)),), + (np.int64(segred_group_sizze_103255),)) + if synchronous: + sync(self) + defunc_2_reduce_res_map_acc_mem_120211 = None + read_res_129840 = np.empty(1, dtype=ct.c_int64) + cl.enqueue_copy(self.queue, read_res_129840, mem_120218, + device_offset=(np.int64(np.int64(0)) * 8), + is_blocking=synchronous) + sync(self) + defunc_2_reduce_res_103263 = read_res_129840[0] + mem_120218 = None + defunc_3_map_res_mem_120230 = mem_120215 + defunc_3_map_res_mem_120231 = defunc_3_map_res_mem_120212 + defunc_3_map_res_mem_120232 = defunc_3_map_res_mem_120213 + defunc_2_reduce_res_75260 = defunc_2_reduce_res_103263 + empty_slice_75289 = (defunc_2_reduce_res_75260 == np.int64(0)) + m_75290 = (defunc_2_reduce_res_75260 - np.int64(1)) + zzero_leq_i_p_m_t_s_75291 = sle64(np.int64(0), m_75290) + i_p_m_t_s_leq_w_75292 = slt64(m_75290, n_75139) + i_lte_j_75293 = sle64(np.int64(0), defunc_2_reduce_res_75260) + y_75294 = (zzero_leq_i_p_m_t_s_75291 and i_p_m_t_s_leq_w_75292) + y_75295 = (i_lte_j_75293 and y_75294) + ok_or_empty_75296 = (empty_slice_75289 or y_75295) + nest_sizze_103384 = (m_75136 * defunc_2_reduce_res_75260) + max_num_groups_126526 = self.sizes["main.segmap_num_groups_103288"] + num_groups_103386 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_103384, + segmap_group_sizze_103385), + sext_i32_i64(max_num_groups_126526)))) + mem_120235 = opencl_alloc(self, bytes_120110, "mem_120235") + self.futhark_builtinzhgpu_map_transpose_f64(mem_120235, np.int64(0), + mem_120124, np.int64(0), + np.int64(1), k2p2zq_75151, + N_75135) + binop_x_120245 = (defunc_2_reduce_res_75260 * binop_x_120244) + bytes_120243 = (np.int64(8) * binop_x_120245) + mem_120246 = opencl_alloc(self, bytes_120243, "mem_120246") + num_threads_126105 = (segmap_group_sizze_103385 * num_groups_103386) + total_sizze_126106 = (bytes_120247 * num_threads_126105) + mem_120238 = opencl_alloc(self, total_sizze_126106, "mem_120238") + total_sizze_126107 = (bytes_120247 * num_threads_126105) + mem_125145 = opencl_alloc(self, total_sizze_126107, "mem_125145") + if ((1 * (np.int64(num_groups_103386) * np.int64(segmap_group_sizze_103385))) != 0): + self.mainzisegmap_103283_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(N_75135), + np.int64(m_75136), + np.int64(n_75139), + np.int64(k2p2zq_75151), + np.int64(m_75231), + np.int64(defunc_2_reduce_res_75260), + np.int64(num_groups_103386), + np.int64(num_threads_126105), + defunc_3_map_res_mem_120232, + mem_120235, mem_120238, + mem_120246, mem_125145) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_103283_var, + ((np.int64(num_groups_103386) * np.int64(segmap_group_sizze_103385)),), + (np.int64(segmap_group_sizze_103385),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + defunc_3_map_res_mem_120232 = None + mem_120235 = None + mem_120238 = None + mem_125145 = None + index_ok_75325 = (ok_or_empty_75250 and ok_or_empty_75296) + index_certs_75326 = True + assert index_ok_75325, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:98:15-28\n #1 mroc.fut:27:25-38\n #2 mroc.fut:77:27-61\n #3 bfastfinal.fut:45:24-53\n #4 bfastfinal.fut:200:5-74\n #5 bfastfinal.fut:195:1-201:36\n" % ("Index [", + np.int64(0), + ":, :", + defunc_2_reduce_res_75260, + "] out of bounds for array of shape [", + m_75136, + "][", + n_75139, + "].")) + i64_res_75328 = sitofp_i64_f64(k2p2zq_75151) + tol_75329 = (np.float64(1.4901161193847656e-8) / i64_res_75328) + i_p_m_t_s_leq_w_75330 = slt64(m_75223, defunc_2_reduce_res_75260) + y_75331 = (zzero_leq_i_p_m_t_s_75224 and i_p_m_t_s_leq_w_75330) + y_75332 = (i_lte_j_75226 and y_75331) + ok_or_empty_75333 = (empty_slice_75222 or y_75332) + index_certs_75334 = True + assert ok_or_empty_75333, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:23:56-63\n #1 /prelude/soacs.fut:67:19-23\n #2 /prelude/soacs.fut:67:3-37\n #3 recresid.fut:22:5-25:22\n #4 recresid.fut:100:7-30\n #5 mroc.fut:27:25-38\n #6 mroc.fut:77:27-61\n #7 bfastfinal.fut:45:24-53\n #8 bfastfinal.fut:200:5-74\n #9 bfastfinal.fut:195:1-201:36\n" % ("Index [:", + k2p2zq_75151, + "] out of bounds for array of shape [", + defunc_2_reduce_res_75260, + "].")) + index_ok_75335 = (ok_or_empty_75229 and ok_or_empty_75333) + index_certs_75336 = True + assert index_ok_75335, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:23:43-53\n #1 /prelude/soacs.fut:67:19-23\n #2 /prelude/soacs.fut:67:3-37\n #3 recresid.fut:22:5-25:22\n #4 recresid.fut:100:7-30\n #5 mroc.fut:27:25-38\n #6 mroc.fut:77:27-61\n #7 bfastfinal.fut:45:24-53\n #8 bfastfinal.fut:200:5-74\n #9 bfastfinal.fut:195:1-201:36\n" % ("Index [:", + k2p2zq_75151, + ", ", + np.int64(0), + ":] out of bounds for array of shape [", + defunc_2_reduce_res_75260, + "][", + k2p2zq_75151, + "].")) + replicate_arg_75337 = (np.int64(2) * k2p2zq_75151) + bounds_invalid_upwards_75338 = slt64(replicate_arg_75337, np.int64(0)) + valid_75339 = not(bounds_invalid_upwards_75338) + range_valid_c_75340 = True + assert valid_75339, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 /prelude/array.fut:108:18-23\n #2 lib/github.com/nhey/lm/linpack.fut:39:16-40\n #3 lib/github.com/nhey/lm/lm.fut:74:36-64\n #4 recresid.fut:23:25-63\n #5 /prelude/soacs.fut:67:19-23\n #6 /prelude/soacs.fut:67:3-37\n #7 recresid.fut:22:5-25:22\n #8 recresid.fut:100:7-30\n #9 mroc.fut:27:25-38\n #10 mroc.fut:77:27-61\n #11 bfastfinal.fut:45:24-53\n #12 bfastfinal.fut:200:5-74\n #13 bfastfinal.fut:195:1-201:36\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + replicate_arg_75337, + " is invalid.")) + min_res_75341 = smin64(k2p2zq_75151, k2p2zq_75151) + k_75342 = (np.int64(1) + k2p2zq_75151) + mem_120248 = opencl_alloc(self, bytes_120247, "mem_120248") + self.futhark_builtinzhiota_i64(mem_120248, k2p2zq_75151, np.int64(0), + np.int64(1)) + segmap_group_sizze_103444 = self.sizes["main.segmap_group_size_103422"] + segmap_usable_groups_103445 = sdiv_up64(binop_x_120251, + segmap_group_sizze_103444) + mem_120252 = opencl_alloc(self, bytes_120250, "mem_120252") + if ((1 * (np.int64(segmap_usable_groups_103445) * np.int64(segmap_group_sizze_103444))) != 0): + self.mainzisegmap_103419_var.set_args(self.global_failure, + np.int64(k2p2zq_75151), + mem_120252) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_103419_var, + ((np.int64(segmap_usable_groups_103445) * np.int64(segmap_group_sizze_103444)),), + (np.int64(segmap_group_sizze_103444),)) + if synchronous: + sync(self) + suff_outer_par_103548 = (self.sizes["main.suff_outer_par_2"] <= m_75136) + segmap_group_sizze_104005 = self.sizes["main.segmap_group_size_103552"] + max_num_groups_126554 = self.sizes["main.segmap_num_groups_103554"] + num_groups_104006 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_75136, + segmap_group_sizze_104005), + sext_i32_i64(max_num_groups_126554)))) + suff_outer_par_106073 = (self.sizes["main.suff_outer_par_8"] <= m_75136) + segmap_group_sizze_106077 = self.sizes["main.segmap_group_size_105816"] + max_num_groups_126555 = self.sizes["main.segmap_num_groups_105818"] + num_groups_106078 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_75136, + segmap_group_sizze_106077), + sext_i32_i64(max_num_groups_126555)))) + segred_group_sizze_106099 = self.sizes["main.segred_group_size_105893"] + max_num_groups_126556 = self.sizes["main.segred_num_groups_105895"] + num_groups_106100 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120244, + segred_group_sizze_106099), + sext_i32_i64(max_num_groups_126556)))) + segmap_group_sizze_106110 = self.sizes["main.segmap_group_size_105885"] + segmap_group_sizze_106117 = self.sizes["main.segmap_group_size_105877"] + segmap_group_sizze_106135 = self.sizes["main.segmap_group_size_105528"] + max_num_groups_126557 = self.sizes["main.segmap_num_groups_105530"] + num_groups_106136 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_75136, + segmap_group_sizze_106135), + sext_i32_i64(max_num_groups_126557)))) + segmap_group_sizze_106421 = self.sizes["main.segmap_group_size_105484"] + nest_sizze_106435 = (m_75136 * binop_x_120251) + segmap_group_sizze_106436 = self.sizes["main.segmap_group_size_105450"] + suff_outer_par_106460 = (self.sizes["main.suff_outer_par_5"] <= binop_x_120244) + suff_outer_par_106555 = (self.sizes["main.suff_outer_par_6"] <= m_75136) + segmap_group_sizze_106558 = self.sizes["main.segmap_group_size_105022"] + max_num_groups_126558 = self.sizes["main.segmap_num_groups_105024"] + num_groups_106559 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_75136, + segmap_group_sizze_106558), + sext_i32_i64(max_num_groups_126558)))) + suff_outer_par_106589 = (self.sizes["main.suff_outer_par_7"] <= binop_x_120244) + segred_group_sizze_106612 = self.sizes["main.segred_group_size_105136"] + segmap_group_sizze_106629 = self.sizes["main.segmap_group_size_105123"] + segmap_group_sizze_106640 = self.sizes["main.segmap_group_size_105112"] + segmap_group_sizze_106651 = self.sizes["main.segmap_group_size_104807"] + max_num_groups_126559 = self.sizes["main.segmap_num_groups_104809"] + num_groups_106652 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120244, + segmap_group_sizze_106651), + sext_i32_i64(max_num_groups_126559)))) + suff_outer_par_106656 = (self.sizes["main.suff_outer_par_3"] <= binop_x_120244) + suff_outer_par_106678 = (self.sizes["main.suff_outer_par_4"] <= nest_sizze_106435) + nest_sizze_106693 = (k2p2zq_75151 * nest_sizze_106435) + segred_group_sizze_106694 = self.sizes["main.segred_group_size_104858"] + max_num_groups_126560 = self.sizes["main.segred_num_groups_104860"] + num_groups_106695 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_106693, + segred_group_sizze_106694), + sext_i32_i64(max_num_groups_126560)))) + segmap_group_sizze_106710 = self.sizes["main.segmap_group_size_104744"] + max_num_groups_126561 = self.sizes["main.segmap_num_groups_104746"] + num_groups_106711 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_75136, + segmap_group_sizze_106710), + sext_i32_i64(max_num_groups_126561)))) + segmap_group_sizze_106780 = self.sizes["main.segmap_group_size_104702"] + segmap_group_sizze_106851 = self.sizes["main.segmap_group_size_104545"] + segmap_group_sizze_106870 = self.sizes["main.segmap_group_size_104498"] + segmap_group_sizze_106879 = self.sizes["main.segmap_group_size_104475"] + segmap_usable_groups_106111 = sdiv_up_safe64(m_75136, + segmap_group_sizze_106110) + segmap_usable_groups_106118 = sdiv_up_safe64(m_75136, + segmap_group_sizze_106117) + segmap_usable_groups_106630 = sdiv_up_safe64(binop_x_120244, + segmap_group_sizze_106629) + segmap_usable_groups_106641 = sdiv_up_safe64(binop_x_120244, + segmap_group_sizze_106640) + tile_sizze_115656 = self.sizes["main.tile_size_115655"] + group_sizze_115657 = (tile_sizze_115656 * tile_sizze_115656) + tile_sizze_116018 = self.sizes["main.tile_size_116017"] + group_sizze_116019 = (tile_sizze_116018 * tile_sizze_116018) + Ty_116322 = self.sizes["main.Ty_116319"] + Ry_116323 = self.sizes["main.Ry_116321"] + Tx_116324 = self.sizes["main.Tx_116318"] + Rx_116325 = self.sizes["main.Rx_116320"] + Tk_116326 = self.sizes["main.Tk_116317"] + TxRx_116329 = (Tx_116324 * Rx_116325) + TyRy_116330 = (Ty_116322 * Ry_116323) + a_loc_szz_116332 = (Tk_116326 * TyRy_116330) + binop_x_116333 = (Tx_116324 * Tk_116326) + b_loc_szz_116334 = (Rx_116325 * binop_x_116333) + group_sizze_116339 = (Ty_116322 * Tx_116324) + num_groups_x_116020 = sdiv_up_safe64(m_75136, tile_sizze_116018) + num_groups_y_116021 = sdiv_up_safe64(k2p2zq_75151, tile_sizze_116018) + num_groups_top_116022 = (num_groups_x_116020 * num_groups_y_116021) + padded_sizze_115430 = (m_75136 + y_115429) + mem_120254 = opencl_alloc(self, bytes_120253, "mem_120254") + per_chunk_115432 = squot_safe64(padded_sizze_115430, num_threads_115425) + bytes_120258 = (np.int64(8) * nest_sizze_106435) + bytes_120269 = (np.int64(8) * replicate_arg_75337) + binop_x_120926 = (m_75136 * replicate_arg_75337) + bytes_120924 = (np.int64(8) * binop_x_120926) + bytes_120947 = (np.int64(8) * padded_sizze_115430) + binop_x_120950 = (num_threads_115425 * per_chunk_115432) + bytes_120949 = (np.int64(8) * binop_x_120950) + binop_x_121376 = (k2p2zq_75151 * group_sizze_115657) + bytes_121374 = (np.int64(8) * binop_x_121376) + ctx_val_121390 = (k2p2zq_75151 * tile_sizze_115656) + bytes_121393 = (np.int64(8) * group_sizze_115657) + binop_x_125185 = (np.int64(8) * tile_sizze_115656) + sizze_125186 = (tile_sizze_115656 * binop_x_125185) + bytes_121515 = (np.int64(8) * group_sizze_116019) + binop_x_125210 = (np.int64(8) * tile_sizze_116018) + sizze_125211 = (tile_sizze_116018 * binop_x_125210) + binop_x_121546 = (k2p2zq_75151 * group_sizze_116019) + bytes_121544 = (np.int64(8) * binop_x_121546) + binop_x_121648 = (Ry_116323 * group_sizze_116339) + binop_x_121649 = (Rx_116325 * binop_x_121648) + bytes_121646 = (np.int64(8) * binop_x_121649) + binop_x_121640 = (Ry_116323 * Rx_116325) + bytes_121639 = (np.int64(8) * binop_x_121640) + bytes_121651 = (np.int64(8) * a_loc_szz_116332) + bytes_121653 = (np.int64(8) * b_loc_szz_116334) + bytes_121722 = (np.int64(8) * binop_x_121648) + binop_x_121728 = (Rx_116325 * group_sizze_116339) + bytes_121726 = (np.int64(8) * binop_x_121728) + bytes_121714 = (np.int64(8) * Ry_116323) + bytes_121716 = (np.int64(8) * Rx_116325) + binop_x_125231 = (np.int64(8) * Ty_116322) + binop_x_125232 = (Tx_116324 * binop_x_125231) + binop_x_125233 = (Ry_116323 * binop_x_125232) + sizze_125234 = (Rx_116325 * binop_x_125233) + sizze_125149 = (np.int64(16) * k2p2zq_75151) + sizze_125420 = (k2p2zq_75151 * bytes_120247) + binop_x_125541 = (np.int64(8) * k2p2zq_75151) + double_buffer_sizze_125542 = (k2p2zq_75151 * binop_x_125541) + double_buffer_sizze_125543 = (np.int64(8) * k2p2zq_75151) + double_buffer_sizze_125544 = (np.int64(16) * k2p2zq_75151) + double_buffer_sizze_125550 = (np.int64(8) * k2p2zq_75151) + binop_x_125558 = (np.int64(8) * k2p2zq_75151) + double_buffer_sizze_125559 = (k2p2zq_75151 * binop_x_125558) + double_buffer_sizze_125560 = (np.int64(8) * k2p2zq_75151) + double_buffer_sizze_125561 = (np.int64(16) * k2p2zq_75151) + double_buffer_sizze_125567 = (np.int64(8) * k2p2zq_75151) + num_threads_126109 = (segmap_group_sizze_104005 * num_groups_104006) + total_sizze_126110 = (bytes_120247 * num_threads_126109) + total_sizze_126111 = (bytes_120269 * num_threads_126109) + total_sizze_126112 = (bytes_120247 * num_threads_126109) + total_sizze_126113 = (bytes_120250 * num_threads_126109) + total_sizze_126114 = (bytes_120250 * num_threads_126109) + total_sizze_126115 = (bytes_120247 * num_threads_126109) + total_sizze_126116 = (bytes_120250 * num_threads_126109) + total_sizze_126117 = (bytes_120247 * num_threads_126109) + total_sizze_126118 = (bytes_120250 * num_threads_126109) + total_sizze_126119 = (bytes_120247 * num_threads_126109) + total_sizze_126120 = (bytes_120250 * num_threads_126109) + total_sizze_126121 = (bytes_120247 * num_threads_126109) + total_sizze_126122 = (bytes_120250 * num_threads_126109) + total_sizze_126123 = (sizze_125149 * num_threads_126109) + total_sizze_126124 = (bytes_120247 * num_threads_126109) + total_sizze_126125 = (bytes_120247 * num_threads_126109) + total_sizze_126126 = (sizze_125420 * num_threads_126109) + total_sizze_126127 = (sizze_125149 * num_threads_126109) + total_sizze_126128 = (bytes_120247 * num_threads_126109) + total_sizze_126129 = (sizze_125420 * num_threads_126109) + total_sizze_126130 = (double_buffer_sizze_125542 * num_threads_126109) + total_sizze_126131 = (double_buffer_sizze_125543 * num_threads_126109) + total_sizze_126132 = (double_buffer_sizze_125544 * num_threads_126109) + total_sizze_126133 = (double_buffer_sizze_125550 * num_threads_126109) + num_threads_126139 = (segmap_group_sizze_106135 * num_groups_106136) + total_sizze_126140 = (bytes_120247 * num_threads_126139) + total_sizze_126141 = (sizze_125149 * num_threads_126139) + total_sizze_126142 = (bytes_120247 * num_threads_126139) + total_sizze_126143 = (sizze_125420 * num_threads_126139) + total_sizze_126144 = (sizze_125149 * num_threads_126139) + total_sizze_126145 = (bytes_120247 * num_threads_126139) + total_sizze_126146 = (sizze_125420 * num_threads_126139) + total_sizze_126147 = (double_buffer_sizze_125559 * num_threads_126139) + total_sizze_126148 = (double_buffer_sizze_125560 * num_threads_126139) + total_sizze_126149 = (double_buffer_sizze_125561 * num_threads_126139) + num_threads_126154 = (segmap_group_sizze_106558 * num_groups_106559) + total_sizze_126155 = (bytes_120250 * num_threads_126154) + total_sizze_126156 = (bytes_120247 * num_threads_126154) + num_threads_126157 = (group_sizze_116019 * num_groups_top_116022) + total_sizze_126158 = (bytes_120247 * num_threads_126157) + num_threads_126162 = (segmap_group_sizze_106651 * num_groups_106652) + total_sizze_126163 = (bytes_120247 * num_threads_126162) + num_threads_126166 = (segmap_group_sizze_106710 * num_groups_106711) + total_sizze_126167 = (bytes_120247 * num_threads_126166) + total_sizze_126168 = (double_buffer_sizze_125567 * num_threads_126166) + local_memory_capacity_127181 = self.max_local_memory + if ((((sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127181)) and sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127181))) and sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127181))) and sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127181))) and suff_outer_par_103548): + mem_120257 = opencl_alloc(self, bytes_121990, "mem_120257") + group_sizze_126565 = self.sizes["main.group_size_126565"] + num_groups_126566 = sdiv_up64((m_75136 * k2p2zq_75151), + group_sizze_126565) + if ((1 * (np.int64(num_groups_126566) * np.int64(group_sizze_126565))) != 0): + self.mainzicopy_126562_var.set_args(np.int64(m_75136), + np.int64(n_75139), + np.int64(k2p2zq_75151), + defunc_3_map_res_mem_120231, + mem_120257) + cl.enqueue_nd_range_kernel(self.queue, self.mainzicopy_126562_var, + ((np.int64(num_groups_126566) * np.int64(group_sizze_126565)),), + (np.int64(group_sizze_126565),)) + if synchronous: + sync(self) + mem_120261 = opencl_alloc(self, bytes_120258, "mem_120261") + group_sizze_126570 = self.sizes["main.group_size_126570"] + num_groups_126571 = sdiv_up64(((m_75136 * k2p2zq_75151) * k2p2zq_75151), + group_sizze_126570) + if ((1 * (np.int64(num_groups_126571) * np.int64(group_sizze_126570))) != 0): + self.mainzicopy_126567_var.set_args(np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(defunc_2_reduce_res_75260), + mem_120246, mem_120261) + cl.enqueue_nd_range_kernel(self.queue, self.mainzicopy_126567_var, + ((np.int64(num_groups_126571) * np.int64(group_sizze_126570)),), + (np.int64(group_sizze_126570),)) + if synchronous: + sync(self) + mem_120265 = opencl_alloc(self, bytes_121997, "mem_120265") + group_sizze_126575 = self.sizes["main.group_size_126575"] + num_groups_126576 = sdiv_up64(((m_75136 * k2p2zq_75151) * k2p2zq_75151), + group_sizze_126575) + if ((1 * (np.int64(num_groups_126576) * np.int64(group_sizze_126575))) != 0): + self.mainzicopy_126572_var.set_args(np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(defunc_2_reduce_res_75260), + mem_120246, mem_120265) + cl.enqueue_nd_range_kernel(self.queue, self.mainzicopy_126572_var, + ((np.int64(num_groups_126576) * np.int64(group_sizze_126575)),), + (np.int64(group_sizze_126575),)) + if synchronous: + sync(self) + mem_120878 = opencl_alloc(self, bytes_120258, "mem_120878") + mem_120881 = opencl_alloc(self, bytes_121990, "mem_120881") + mem_120883 = opencl_alloc(self, bytes_120173, "mem_120883") + mem_120268 = opencl_alloc(self, total_sizze_126110, "mem_120268") + mem_120271 = opencl_alloc(self, total_sizze_126111, "mem_120271") + mem_120273 = opencl_alloc(self, total_sizze_126112, "mem_120273") + mem_120608 = opencl_alloc(self, total_sizze_126113, "mem_120608") + mem_120649 = opencl_alloc(self, total_sizze_126114, "mem_120649") + mem_120661 = opencl_alloc(self, total_sizze_126115, "mem_120661") + mem_120690 = opencl_alloc(self, total_sizze_126116, "mem_120690") + mem_120763 = opencl_alloc(self, total_sizze_126117, "mem_120763") + mem_120778 = opencl_alloc(self, total_sizze_126118, "mem_120778") + mem_120790 = opencl_alloc(self, total_sizze_126119, "mem_120790") + mem_120801 = opencl_alloc(self, total_sizze_126120, "mem_120801") + mem_120821 = opencl_alloc(self, total_sizze_126121, "mem_120821") + mem_120824 = opencl_alloc(self, total_sizze_126122, "mem_120824") + mem_125150 = opencl_alloc(self, total_sizze_126123, "mem_125150") + mem_125152 = opencl_alloc(self, total_sizze_126124, "mem_125152") + mem_125160 = opencl_alloc(self, total_sizze_126125, "mem_125160") + mem_125421 = opencl_alloc(self, total_sizze_126126, "mem_125421") + mem_125429 = opencl_alloc(self, total_sizze_126127, "mem_125429") + mem_125431 = opencl_alloc(self, total_sizze_126128, "mem_125431") + mem_125491 = opencl_alloc(self, total_sizze_126129, "mem_125491") + double_buffer_mem_125535 = opencl_alloc(self, total_sizze_126130, + "double_buffer_mem_125535") + double_buffer_mem_125536 = opencl_alloc(self, total_sizze_126131, + "double_buffer_mem_125536") + double_buffer_mem_125537 = opencl_alloc(self, total_sizze_126132, + "double_buffer_mem_125537") + double_buffer_mem_125548 = opencl_alloc(self, total_sizze_126133, + "double_buffer_mem_125548") + if ((1 * (np.int64(num_groups_104006) * np.int64(segmap_group_sizze_104005))) != 0): + self.mainzisegmap_103550_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(m_75223), + np.byte(y_75227), + np.byte(ok_or_empty_75229), + np.int64(min_res_75341), + np.int64(k_75342), + np.int64(num_groups_104006), + np.int64(binop_x_120251), + np.int64(num_threads_126109), + mem_120252, mem_120257, + mem_120261, mem_120265, + mem_120268, mem_120271, + mem_120273, mem_120608, + mem_120649, mem_120661, + mem_120690, mem_120763, + mem_120778, mem_120790, + mem_120801, mem_120821, + mem_120824, mem_120878, + mem_120881, mem_120883, + mem_125150, mem_125152, + mem_125160, mem_125421, + mem_125429, mem_125431, + mem_125491, + double_buffer_mem_125535, + double_buffer_mem_125536, + double_buffer_mem_125537, + double_buffer_mem_125548) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_103550_var, + ((np.int64(num_groups_104006) * np.int64(segmap_group_sizze_104005)),), + (np.int64(segmap_group_sizze_104005),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + mem_120257 = None + mem_120261 = None + mem_120265 = None + mem_120268 = None + mem_120271 = None + mem_120273 = None + mem_120608 = None + mem_120649 = None + mem_120661 = None + mem_120690 = None + mem_120763 = None + mem_120778 = None + mem_120790 = None + mem_120801 = None + mem_120821 = None + mem_120824 = None + mem_125150 = None + mem_125152 = None + mem_125160 = None + mem_125421 = None + mem_125429 = None + mem_125431 = None + mem_125491 = None + double_buffer_mem_125535 = None + double_buffer_mem_125536 = None + double_buffer_mem_125537 = None + double_buffer_mem_125548 = None + mem_121923 = opencl_alloc(self, bytes_121997, "mem_121923") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121923, np.int64(0), + mem_120878, np.int64(0), + np.int64(1), m_75136, + (k2p2zq_75151 * k2p2zq_75151)) + mem_120878 = None + mem_121927 = opencl_alloc(self, bytes_121990, "mem_121927") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121927, np.int64(0), + mem_120881, np.int64(0), + np.int64(1), m_75136, + k2p2zq_75151) + mem_120881 = None + defunc_5_map_res_mem_121929 = mem_121923 + defunc_5_map_res_mem_121930 = mem_121927 + defunc_5_map_res_mem_121931 = mem_120883 + else: + mem_120886 = opencl_alloc(self, bytes_121990, "mem_120886") + self.futhark_builtinzhreplicate_f64(mem_120886, + (m_75136 * k2p2zq_75151), + np.float64(0.0)) + mem_120890 = opencl_alloc(self, bytes_121993, "mem_120890") + self.futhark_builtinzhreplicate_f64(mem_120890, + ((m_75136 * np.int64(2)) * k2p2zq_75151), + np.float64(0.0)) + mem_120894 = opencl_alloc(self, bytes_121997, "mem_120894") + group_sizze_126685 = self.sizes["main.group_size_126685"] + num_groups_126686 = sdiv_up64(((m_75136 * k2p2zq_75151) * k2p2zq_75151), + group_sizze_126685) + if ((1 * (np.int64(num_groups_126686) * np.int64(group_sizze_126685))) != 0): + self.mainzicopy_126682_var.set_args(np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(defunc_2_reduce_res_75260), + mem_120246, mem_120894) + cl.enqueue_nd_range_kernel(self.queue, self.mainzicopy_126682_var, + ((np.int64(num_groups_126686) * np.int64(group_sizze_126685)),), + (np.int64(group_sizze_126685),)) + if synchronous: + sync(self) + mem_param_120902 = mem_120886 + mem_param_120913 = mem_120890 + j_106067 = np.int64(0) + one_129843 = np.int64(1) + for counter_129842 in range(k2p2zq_75151): + index_certs_106070 = True + assert ok_or_empty_75229, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/linpack.fut:44:25-30\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n" % ("Index [", + j_106067, + ", ", + np.int64(0), + ":] out of bounds for array of shape [", + k2p2zq_75151, + "][", + k2p2zq_75151, + "].")) + local_memory_capacity_126785 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_126785)) and suff_outer_par_106073): + mem_120923 = opencl_alloc(self, bytes_121990, "mem_120923") + self.futhark_builtinzhgpu_map_transpose_f64(mem_120923, np.int64(0), + mem_param_120902, + np.int64(0), + np.int64(1), + k2p2zq_75151, m_75136) + mem_120927 = opencl_alloc(self, bytes_120924, "mem_120927") + self.futhark_builtinzhgpu_map_transpose_f64(mem_120927, np.int64(0), + mem_param_120913, + np.int64(0), + np.int64(1), + (np.int64(2) * k2p2zq_75151), + m_75136) + mem_120931 = opencl_alloc(self, bytes_121990, "mem_120931") + mem_120935 = opencl_alloc(self, bytes_120924, "mem_120935") + if ((1 * (np.int64(num_groups_106078) * np.int64(segmap_group_sizze_106077))) != 0): + self.mainzisegmap_105814_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(j_106067), + np.int64(num_groups_106078), + mem_120894, mem_120923, + mem_120927, mem_120931, + mem_120935) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_105814_var, + ((np.int64(num_groups_106078) * np.int64(segmap_group_sizze_106077)),), + (np.int64(segmap_group_sizze_106077),)) + if synchronous: + sync(self) + mem_120923 = None + mem_120927 = None + mem_120959 = opencl_alloc(self, bytes_121990, "mem_120959") + self.futhark_builtinzhgpu_map_transpose_f64(mem_120959, np.int64(0), + mem_120931, np.int64(0), + np.int64(1), m_75136, + k2p2zq_75151) + mem_120931 = None + dqrdc2_res_mem_120965 = mem_120959 + dqrdc2_res_mem_120966 = mem_120935 + else: + mem_120938 = opencl_alloc(self, bytes_120173, "mem_120938") + if slt64((k2p2zq_75151 * np.int64(2)), segred_group_sizze_106099): + segment_sizze_nonzzero_126703 = smax64(np.int64(1), k2p2zq_75151) + num_threads_126704 = (num_groups_106100 * segred_group_sizze_106099) + if ((1 * (np.int64(num_groups_106100) * np.int64(segred_group_sizze_106099))) != 0): + self.mainzisegred_small_105899_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_106099))), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(defunc_2_reduce_res_75260), + np.int64(j_106067), + np.int64(num_groups_106100), + np.int64(segment_sizze_nonzzero_126703), + mem_120246, + mem_120938) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_small_105899_var, + ((np.int64(num_groups_106100) * np.int64(segred_group_sizze_106099)),), + (np.int64(segred_group_sizze_106099),)) + if synchronous: + sync(self) + else: + groups_per_segment_126724 = sdiv_up64(num_groups_106100, + smax64(np.int64(1), + m_75136)) + elements_per_thread_126725 = sdiv_up64(k2p2zq_75151, + (segred_group_sizze_106099 * groups_per_segment_126724)) + virt_num_groups_126726 = (groups_per_segment_126724 * m_75136) + num_threads_126727 = (num_groups_106100 * segred_group_sizze_106099) + threads_per_segment_126728 = (groups_per_segment_126724 * segred_group_sizze_106099) + group_res_arr_mem_126729 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_106099 * virt_num_groups_126726)), + "group_res_arr_mem_126729") + mainzicounter_mem_126731 = self.mainzicounter_mem_126731 + if ((1 * (np.int64(num_groups_106100) * np.int64(segred_group_sizze_106099))) != 0): + self.mainzisegred_large_105899_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_106099))), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(defunc_2_reduce_res_75260), + np.int64(j_106067), + np.int64(num_groups_106100), + np.int64(groups_per_segment_126724), + np.int64(elements_per_thread_126725), + np.int64(virt_num_groups_126726), + np.int64(threads_per_segment_126728), + mem_120246, + mem_120938, + group_res_arr_mem_126729, + mainzicounter_mem_126731) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_large_105899_var, + ((np.int64(num_groups_106100) * np.int64(segred_group_sizze_106099)),), + (np.int64(segred_group_sizze_106099),)) + if synchronous: + sync(self) + mem_120941 = opencl_alloc(self, bytes_120173, "mem_120941") + if ((1 * (np.int64(segmap_usable_groups_106111) * np.int64(segmap_group_sizze_106110))) != 0): + self.mainzisegmap_105883_var.set_args(self.global_failure, + np.int64(m_75136), + mem_120938, mem_120941) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_105883_var, + ((np.int64(segmap_usable_groups_106111) * np.int64(segmap_group_sizze_106110)),), + (np.int64(segmap_group_sizze_106110),)) + if synchronous: + sync(self) + mem_120938 = None + if ((1 * (np.int64(segmap_usable_groups_106118) * np.int64(segmap_group_sizze_106117))) != 0): + self.mainzisegmap_105874_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(j_106067), + mem_param_120902, + mem_120941) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_105874_var, + ((np.int64(segmap_usable_groups_106118) * np.int64(segmap_group_sizze_106117)),), + (np.int64(segmap_group_sizze_106117),)) + if synchronous: + sync(self) + mem_120946 = opencl_alloc(self, bytes_120924, "mem_120946") + self.futhark_builtinzhgpu_map_transpose_f64(mem_120946, np.int64(0), + mem_param_120913, + np.int64(0), + np.int64(1), + (np.int64(2) * k2p2zq_75151), + m_75136) + mem_120948 = opencl_alloc(self, bytes_120947, "mem_120948") + tmp_offs_126773 = np.int64(0) + if ((m_75136 * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_120948, mem_120941, + dest_offset=np.int64((tmp_offs_126773 * np.int64(8))), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((m_75136 * np.int32(8)))) + if synchronous: + sync(self) + tmp_offs_126773 = (tmp_offs_126773 + m_75136) + if ((y_115429 * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_120948, mem_120254, + dest_offset=np.int64((tmp_offs_126773 * np.int64(8))), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((y_115429 * np.int32(8)))) + if synchronous: + sync(self) + tmp_offs_126773 = (tmp_offs_126773 + y_115429) + mem_120951 = opencl_alloc(self, bytes_120949, "mem_120951") + self.futhark_builtinzhgpu_map_transpose_f64(mem_120951, np.int64(0), + mem_120948, np.int64(0), + np.int64(1), + per_chunk_115432, + num_threads_115425) + mem_120948 = None + mem_120956 = opencl_alloc(self, bytes_120924, "mem_120956") + if ((1 * (np.int64(num_groups_106123) * np.int64(segmap_group_sizze_106122))) != 0): + self.mainzisegmap_105859_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(j_106067), + np.int64(num_groups_106123), + np.int64(num_threads_115425), + np.int64(per_chunk_115432), + mem_120941, mem_120946, + mem_120951, mem_120956) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_105859_var, + ((np.int64(num_groups_106123) * np.int64(segmap_group_sizze_106122)),), + (np.int64(segmap_group_sizze_106122),)) + if synchronous: + sync(self) + mem_120941 = None + mem_120946 = None + mem_120951 = None + mem_120963 = opencl_alloc(self, bytes_121990, "mem_120963") + if (((m_75136 * k2p2zq_75151) * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_120963, mem_param_120902, + dest_offset=np.int64(np.int64(0)), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64(((m_75136 * k2p2zq_75151) * np.int32(8)))) + if synchronous: + sync(self) + dqrdc2_res_mem_120965 = mem_120963 + dqrdc2_res_mem_120966 = mem_120956 + mem_120972 = opencl_alloc(self, bytes_121993, "mem_120972") + self.futhark_builtinzhgpu_map_transpose_f64(mem_120972, np.int64(0), + dqrdc2_res_mem_120966, + np.int64(0), np.int64(1), + m_75136, + (np.int64(2) * k2p2zq_75151)) + dqrdc2_res_mem_120966 = None + mem_param_tmp_126687 = dqrdc2_res_mem_120965 + mem_param_tmp_126688 = mem_120972 + mem_param_120902 = mem_param_tmp_126687 + mem_param_120913 = mem_param_tmp_126688 + j_106067 += one_129843 + dqrdc2_res_r_mem_120986 = mem_param_120902 + dqrdc2_res_r_mem_120997 = mem_param_120913 + mem_120886 = None + mem_120890 = None + mem_120894 = None + mem_121001 = opencl_alloc(self, bytes_120258, "mem_121001") + group_sizze_126789 = self.sizes["main.group_size_126789"] + num_groups_126790 = sdiv_up64(((m_75136 * k2p2zq_75151) * k2p2zq_75151), + group_sizze_126789) + if ((1 * (np.int64(num_groups_126790) * np.int64(group_sizze_126789))) != 0): + self.mainzicopy_126786_var.set_args(np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(defunc_2_reduce_res_75260), + mem_120246, mem_121001) + cl.enqueue_nd_range_kernel(self.queue, self.mainzicopy_126786_var, + ((np.int64(num_groups_126790) * np.int64(group_sizze_126789)),), + (np.int64(group_sizze_126789),)) + if synchronous: + sync(self) + mem_121004 = opencl_alloc(self, bytes_121990, "mem_121004") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121004, np.int64(0), + dqrdc2_res_r_mem_120986, + np.int64(0), np.int64(1), + k2p2zq_75151, m_75136) + dqrdc2_res_r_mem_120986 = None + mem_121008 = opencl_alloc(self, bytes_120924, "mem_121008") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121008, np.int64(0), + dqrdc2_res_r_mem_120997, + np.int64(0), np.int64(1), + (np.int64(2) * k2p2zq_75151), + m_75136) + dqrdc2_res_r_mem_120997 = None + mem_121335 = opencl_alloc(self, bytes_120258, "mem_121335") + mem_121338 = opencl_alloc(self, bytes_121990, "mem_121338") + mem_121341 = opencl_alloc(self, bytes_121990, "mem_121341") + mem_121343 = opencl_alloc(self, bytes_120173, "mem_121343") + mem_121011 = opencl_alloc(self, total_sizze_126140, "mem_121011") + mem_125167 = opencl_alloc(self, total_sizze_126141, "mem_125167") + mem_125169 = opencl_alloc(self, total_sizze_126142, "mem_125169") + mem_125438 = opencl_alloc(self, total_sizze_126143, "mem_125438") + mem_125446 = opencl_alloc(self, total_sizze_126144, "mem_125446") + mem_125448 = opencl_alloc(self, total_sizze_126145, "mem_125448") + mem_125498 = opencl_alloc(self, total_sizze_126146, "mem_125498") + double_buffer_mem_125552 = opencl_alloc(self, total_sizze_126147, + "double_buffer_mem_125552") + double_buffer_mem_125553 = opencl_alloc(self, total_sizze_126148, + "double_buffer_mem_125553") + double_buffer_mem_125554 = opencl_alloc(self, total_sizze_126149, + "double_buffer_mem_125554") + if ((1 * (np.int64(num_groups_106136) * np.int64(segmap_group_sizze_106135))) != 0): + self.mainzisegmap_105526_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(m_75223), + np.byte(y_75227), + np.int64(min_res_75341), + np.int64(k_75342), + np.int64(num_groups_106136), + np.int64(num_threads_126139), + mem_120248, mem_121001, + mem_121004, mem_121008, + mem_121011, mem_121335, + mem_121338, mem_121341, + mem_121343, mem_125167, + mem_125169, mem_125438, + mem_125446, mem_125448, + mem_125498, + double_buffer_mem_125552, + double_buffer_mem_125553, + double_buffer_mem_125554) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_105526_var, + ((np.int64(num_groups_106136) * np.int64(segmap_group_sizze_106135)),), + (np.int64(segmap_group_sizze_106135),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + mem_121001 = None + mem_121004 = None + mem_121008 = None + mem_121011 = None + mem_125167 = None + mem_125169 = None + mem_125438 = None + mem_125446 = None + mem_125448 = None + mem_125498 = None + double_buffer_mem_125552 = None + double_buffer_mem_125553 = None + double_buffer_mem_125554 = None + segmap_usable_groups_106422 = sdiv_up64(binop_x_120244, + segmap_group_sizze_106421) + mem_121346 = opencl_alloc(self, binop_x_120244, "mem_121346") + if ((1 * (np.int64(segmap_usable_groups_106422) * np.int64(segmap_group_sizze_106421))) != 0): + self.mainzisegmap_105481_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + mem_121343, mem_121346) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_105481_var, + ((np.int64(segmap_usable_groups_106422) * np.int64(segmap_group_sizze_106421)),), + (np.int64(segmap_group_sizze_106421),)) + if synchronous: + sync(self) + segmap_usable_groups_106437 = sdiv_up64(nest_sizze_106435, + segmap_group_sizze_106436) + mem_121351 = opencl_alloc(self, bytes_121997, "mem_121351") + if ((1 * (np.int64(segmap_usable_groups_106437) * np.int64(segmap_group_sizze_106436))) != 0): + self.mainzisegmap_105446_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + mem_121335, mem_121343, + mem_121346, mem_121351) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_105446_var, + ((np.int64(segmap_usable_groups_106437) * np.int64(segmap_group_sizze_106436)),), + (np.int64(segmap_group_sizze_106436),)) + if synchronous: + sync(self) + mem_121346 = None + local_memory_capacity_127000 = self.max_local_memory + if ((sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127000)) and sle64((((((bytes_121374 + srem64((np.int64(8) - srem64(bytes_121374, + np.int64(8))), + np.int64(8))) + (bytes_121393 + srem64((np.int64(8) - srem64(bytes_121393, + np.int64(8))), + np.int64(8)))) + (bytes_121393 + srem64((np.int64(8) - srem64(bytes_121393, + np.int64(8))), + np.int64(8)))) + (bytes_121374 + srem64((np.int64(8) - srem64(bytes_121374, + np.int64(8))), + np.int64(8)))) + (bytes_120247 + srem64((np.int64(8) - srem64(bytes_120247, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_127000))) and suff_outer_par_106460): + mem_121355 = opencl_alloc(self, bytes_121997, "mem_121355") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121355, np.int64(0), + mem_121351, np.int64(0), + m_75136, k2p2zq_75151, + k2p2zq_75151) + mem_121359 = opencl_alloc(self, bytes_121997, "mem_121359") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121359, np.int64(0), + mem_121355, np.int64(0), + np.int64(1), k2p2zq_75151, + (m_75136 * k2p2zq_75151)) + mem_121355 = None + mem_121363 = opencl_alloc(self, bytes_121997, "mem_121363") + group_sizze_126873 = self.sizes["main.group_size_126873"] + num_groups_126874 = sdiv_up64(((m_75136 * k2p2zq_75151) * k2p2zq_75151), + group_sizze_126873) + if ((1 * (np.int64(num_groups_126874) * np.int64(group_sizze_126873))) != 0): + self.mainzicopy_126870_var.set_args(np.int64(m_75136), + np.int64(k2p2zq_75151), + mem_121351, mem_121363) + cl.enqueue_nd_range_kernel(self.queue, self.mainzicopy_126870_var, + ((np.int64(num_groups_126874) * np.int64(group_sizze_126873)),), + (np.int64(group_sizze_126873),)) + if synchronous: + sync(self) + mem_121366 = opencl_alloc(self, bytes_120250, "mem_121366") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121366, np.int64(0), + mem_120252, np.int64(0), + np.int64(1), k2p2zq_75151, + k2p2zq_75151) + num_groups_x_115658 = sdiv_up64(m_75136, tile_sizze_115656) + num_groups_y_115659 = sdiv_up64(k2p2zq_75151, tile_sizze_115656) + num_groups_top_115660 = (num_groups_x_115658 * num_groups_y_115659) + mem_121368 = opencl_alloc(self, bytes_120247, "mem_121368") + self.futhark_builtinzhreplicate_f64(mem_121368, k2p2zq_75151, + np.float64(0.0)) + mem_121446 = opencl_alloc(self, bytes_121997, "mem_121446") + num_threads_126152 = (group_sizze_115657 * num_groups_top_115660) + total_sizze_126153 = (bytes_120247 * num_threads_126152) + mem_125177 = opencl_alloc(self, total_sizze_126153, "mem_125177") + if ((1 * (np.int64(num_groups_top_115660) * np.int64(group_sizze_115657))) != 0): + self.mainzisegmap_intragroup_115661_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64(bytes_120247)), + cl.LocalMemory(np.int64(bytes_121374)), + cl.LocalMemory(np.int64(bytes_121393)), + cl.LocalMemory(np.int64(bytes_121393)), + cl.LocalMemory(np.int64(bytes_121374)), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(num_groups_y_115659), + np.int64(ctx_val_121390), + np.int64(num_threads_126152), + mem_121359, + mem_121363, + mem_121366, + mem_121368, + mem_121446, + mem_125177) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_intragroup_115661_var, + ((np.int64(num_groups_top_115660) * np.int64(group_sizze_115657)),), + (np.int64(group_sizze_115657),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + mem_121359 = None + mem_121363 = None + mem_121366 = None + mem_121368 = None + mem_125177 = None + defunc_3_map_res_r_mem_121609 = mem_121446 + else: + mem_121450 = opencl_alloc(self, bytes_121997, "mem_121450") + self.futhark_builtinzhreplicate_f64(mem_121450, + ((m_75136 * k2p2zq_75151) * k2p2zq_75151), + np.float64(0.0)) + mem_121454 = opencl_alloc(self, bytes_121997, "mem_121454") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121454, np.int64(0), + mem_121351, np.int64(0), + m_75136, k2p2zq_75151, + k2p2zq_75151) + mem_121458 = opencl_alloc(self, bytes_121997, "mem_121458") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121458, np.int64(0), + mem_121454, np.int64(0), + np.int64(1), k2p2zq_75151, + (m_75136 * k2p2zq_75151)) + mem_121454 = None + mem_param_121469 = mem_121450 + i_106524 = np.int64(0) + one_129846 = np.int64(1) + for counter_129845 in range(k2p2zq_75151): + x_106526 = (k2p2zq_75151 - i_106524) + i_106527 = (x_106526 - np.int64(1)) + x_106528 = sle64(np.int64(0), i_106527) + y_106529 = slt64(i_106527, k2p2zq_75151) + bounds_check_106530 = (x_106528 and y_106529) + j_m_i_106531 = (k2p2zq_75151 - x_106526) + empty_slice_106532 = (j_m_i_106531 == np.int64(0)) + m_106533 = (j_m_i_106531 - np.int64(1)) + i_p_m_t_s_106534 = (x_106526 + m_106533) + zzero_leq_i_p_m_t_s_106535 = sle64(np.int64(0), i_p_m_t_s_106534) + i_p_m_t_s_leq_w_106536 = slt64(i_p_m_t_s_106534, k2p2zq_75151) + zzero_lte_i_106537 = sle64(np.int64(0), x_106526) + i_lte_j_106538 = sle64(x_106526, k2p2zq_75151) + y_106539 = (i_p_m_t_s_leq_w_106536 and zzero_lte_i_106537) + y_106540 = (zzero_leq_i_p_m_t_s_106535 and y_106539) + y_106541 = (i_lte_j_106538 and y_106540) + forwards_ok_106542 = (zzero_lte_i_106537 and y_106541) + ok_or_empty_106543 = (empty_slice_106532 or forwards_ok_106542) + index_ok_106544 = (bounds_check_106530 and ok_or_empty_106543) + index_certs_106545 = True + assert index_ok_106544, ("Error: %s%d%s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:28:39-48\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:200:5-74\n #12 bfastfinal.fut:195:1-201:36\n" % ("Index [", + i_106527, + ", ", + x_106526, + ":", + k2p2zq_75151, + "] out of bounds for array of shape [", + k2p2zq_75151, + "][", + k2p2zq_75151, + "].")) + index_certs_106546 = True + assert ok_or_empty_106543, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:28:30-37\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:200:5-74\n #12 bfastfinal.fut:195:1-201:36\n" % ("Index [", + x_106526, + ":", + k2p2zq_75151, + "] out of bounds for array of shape [", + k2p2zq_75151, + "].")) + index_ok_106547 = (bounds_check_106530 and bounds_check_106530) + index_certs_106548 = True + assert index_ok_106547, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:29:38-43\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:200:5-74\n #12 bfastfinal.fut:195:1-201:36\n" % ("Index [", + i_106527, + ", ", + i_106527, + "] out of bounds for array of shape [", + k2p2zq_75151, + "][", + k2p2zq_75151, + "].")) + index_certs_106549 = True + assert bounds_check_106530, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:29:19-22\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:200:5-74\n #12 bfastfinal.fut:195:1-201:36\n" % ("Index [", + i_106527, + "] out of bounds for array of shape [", + k2p2zq_75151, + "].")) + nest_sizze_106611 = (j_m_i_106531 * binop_x_120244) + max_num_groups_126897 = self.sizes["main.segred_num_groups_105138"] + num_groups_106613 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_106611, + segred_group_sizze_106612), + sext_i32_i64(max_num_groups_126897)))) + local_memory_capacity_126999 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_126999)) and suff_outer_par_106555): + mem_121476 = opencl_alloc(self, bytes_120258, "mem_121476") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121476, + np.int64(0), + mem_param_121469, + np.int64(0), + np.int64(1), + (k2p2zq_75151 * k2p2zq_75151), + m_75136) + mem_121504 = opencl_alloc(self, bytes_120258, "mem_121504") + mem_121480 = opencl_alloc(self, total_sizze_126155, "mem_121480") + mem_121492 = opencl_alloc(self, total_sizze_126156, "mem_121492") + if ((1 * (np.int64(num_groups_106559) * np.int64(segmap_group_sizze_106558))) != 0): + self.mainzisegmap_105020_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(x_106526), + np.int64(i_106527), + np.int64(j_m_i_106531), + np.int64(num_groups_106559), + np.int64(num_threads_126154), + mem_120252, mem_121351, + mem_121458, mem_121476, + mem_121480, mem_121492, + mem_121504) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_105020_var, + ((np.int64(num_groups_106559) * np.int64(segmap_group_sizze_106558)),), + (np.int64(segmap_group_sizze_106558),)) + if synchronous: + sync(self) + mem_121476 = None + mem_121480 = None + mem_121492 = None + mem_121576 = opencl_alloc(self, bytes_121997, "mem_121576") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121576, + np.int64(0), + mem_121504, + np.int64(0), + np.int64(1), m_75136, + (k2p2zq_75151 * k2p2zq_75151)) + mem_121504 = None + defunc_3_map_res_mem_121583 = mem_121576 + else: + local_memory_capacity_126998 = self.max_local_memory + if (sle64((((bytes_121515 + srem64((np.int64(8) - srem64(bytes_121515, + np.int64(8))), + np.int64(8))) + (bytes_121515 + srem64((np.int64(8) - srem64(bytes_121515, + np.int64(8))), + np.int64(8)))) + (bytes_121544 + srem64((np.int64(8) - srem64(bytes_121544, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_126998)) and suff_outer_par_106589): + mem_121508 = opencl_alloc(self, bytes_121997, "mem_121508") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121508, + np.int64(0), + mem_param_121469, + np.int64(0), + np.int64(1), + k2p2zq_75151, + (m_75136 * k2p2zq_75151)) + mem_121512 = opencl_alloc(self, bytes_121997, "mem_121512") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121512, + np.int64(0), + mem_param_121469, + np.int64(0), + np.int64(1), + k2p2zq_75151, + (m_75136 * k2p2zq_75151)) + num_whole_tiles_116039 = squot64(j_m_i_106531, + tile_sizze_116018) + residual_input_116172 = srem64(j_m_i_106531, tile_sizze_116018) + cond_116173 = (residual_input_116172 == np.int64(0)) + mem_121551 = opencl_alloc(self, bytes_121997, "mem_121551") + mem_125219 = opencl_alloc(self, total_sizze_126158, + "mem_125219") + if ((1 * (np.int64(num_groups_top_116022) * np.int64(group_sizze_116019))) != 0): + self.mainzisegmap_intragroup_116023_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_121544)), + cl.LocalMemory(np.int64(bytes_121515)), + cl.LocalMemory(np.int64(bytes_121515)), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(x_106526), + np.int64(i_106527), + np.int64(j_m_i_106531), + np.int64(num_groups_y_116021), + np.int64(num_whole_tiles_116039), + np.int64(residual_input_116172), + np.byte(cond_116173), + np.int64(num_threads_126157), + mem_120252, + mem_121351, + mem_121458, + mem_121508, + mem_121512, + mem_121551, + mem_125219) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_intragroup_116023_var, + ((np.int64(num_groups_top_116022) * np.int64(group_sizze_116019)),), + (np.int64(group_sizze_116019),)) + if synchronous: + sync(self) + mem_121508 = None + mem_121512 = None + mem_125219 = None + defunc_3_map_res_mem_121572 = mem_121551 + else: + mem_121555 = opencl_alloc(self, bytes_121990, "mem_121555") + if slt64((j_m_i_106531 * np.int64(2)), + segred_group_sizze_106612): + segment_sizze_nonzzero_126928 = smax64(np.int64(1), + j_m_i_106531) + num_threads_126929 = (num_groups_106613 * segred_group_sizze_106612) + if ((1 * (np.int64(num_groups_106613) * np.int64(segred_group_sizze_106612))) != 0): + self.mainzisegred_small_105142_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_106612))), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(x_106526), + np.int64(i_106527), + np.int64(j_m_i_106531), + np.int64(num_groups_106613), + np.int64(binop_x_120251), + np.int64(segment_sizze_nonzzero_126928), + mem_121351, + mem_param_121469, + mem_121555) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_small_105142_var, + ((np.int64(num_groups_106613) * np.int64(segred_group_sizze_106612)),), + (np.int64(segred_group_sizze_106612),)) + if synchronous: + sync(self) + else: + groups_per_segment_126949 = sdiv_up64(num_groups_106613, + smax64(np.int64(1), + (m_75136 * k2p2zq_75151))) + elements_per_thread_126950 = sdiv_up64(j_m_i_106531, + (segred_group_sizze_106612 * groups_per_segment_126949)) + virt_num_groups_126951 = (groups_per_segment_126949 * (m_75136 * k2p2zq_75151)) + num_threads_126952 = (num_groups_106613 * segred_group_sizze_106612) + threads_per_segment_126953 = (groups_per_segment_126949 * segred_group_sizze_106612) + group_res_arr_mem_126954 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_106612 * virt_num_groups_126951)), + "group_res_arr_mem_126954") + mainzicounter_mem_126956 = self.mainzicounter_mem_126956 + if ((1 * (np.int64(num_groups_106613) * np.int64(segred_group_sizze_106612))) != 0): + self.mainzisegred_large_105142_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_106612))), + np.int64(k2p2zq_75151), + np.int64(x_106526), + np.int64(i_106527), + np.int64(j_m_i_106531), + np.int64(num_groups_106613), + np.int64(binop_x_120251), + np.int64(groups_per_segment_126949), + np.int64(elements_per_thread_126950), + np.int64(virt_num_groups_126951), + np.int64(threads_per_segment_126953), + mem_121351, + mem_param_121469, + mem_121555, + group_res_arr_mem_126954, + mainzicounter_mem_126956) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_large_105142_var, + ((np.int64(num_groups_106613) * np.int64(segred_group_sizze_106612)),), + (np.int64(segred_group_sizze_106612),)) + if synchronous: + sync(self) + mem_121559 = opencl_alloc(self, bytes_121990, "mem_121559") + if ((1 * (np.int64(segmap_usable_groups_106630) * np.int64(segmap_group_sizze_106629))) != 0): + self.mainzisegmap_105120_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(i_106527), + mem_120252, mem_121351, + mem_121555, mem_121559) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_105120_var, + ((np.int64(segmap_usable_groups_106630) * np.int64(segmap_group_sizze_106629)),), + (np.int64(segmap_group_sizze_106629),)) + if synchronous: + sync(self) + mem_121555 = None + if ((1 * (np.int64(segmap_usable_groups_106641) * np.int64(segmap_group_sizze_106640))) != 0): + self.mainzisegmap_105108_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(i_106527), + np.int64(binop_x_120251), + mem_param_121469, + mem_121559) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_105108_var, + ((np.int64(segmap_usable_groups_106641) * np.int64(segmap_group_sizze_106640)),), + (np.int64(segmap_group_sizze_106640),)) + if synchronous: + sync(self) + mem_121559 = None + defunc_3_map_res_mem_121572 = mem_param_121469 + mem_121581 = opencl_alloc(self, bytes_121997, "mem_121581") + if ((((m_75136 * k2p2zq_75151) * k2p2zq_75151) * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_121581, + defunc_3_map_res_mem_121572, + dest_offset=np.int64(np.int64(0)), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((((m_75136 * k2p2zq_75151) * k2p2zq_75151) * np.int32(8)))) + if synchronous: + sync(self) + defunc_3_map_res_mem_121572 = None + defunc_3_map_res_mem_121583 = mem_121581 + mem_param_tmp_126895 = defunc_3_map_res_mem_121583 + mem_param_121469 = mem_param_tmp_126895 + i_106524 += one_129846 + defunc_3_map_res_r_mem_121597 = mem_param_121469 + mem_121450 = None + mem_121458 = None + defunc_3_map_res_r_mem_121609 = defunc_3_map_res_r_mem_121597 + mem_121351 = None + local_memory_capacity_127133 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127133)) and suff_outer_par_106656): + mem_121613 = opencl_alloc(self, bytes_121997, "mem_121613") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121613, np.int64(0), + defunc_3_map_res_r_mem_121609, + np.int64(0), m_75136, + k2p2zq_75151, + k2p2zq_75151) + mem_121632 = opencl_alloc(self, bytes_121997, "mem_121632") + mem_121616 = opencl_alloc(self, total_sizze_126163, "mem_121616") + if ((1 * (np.int64(num_groups_106652) * np.int64(segmap_group_sizze_106651))) != 0): + self.mainzisegmap_104804_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(num_groups_106652), + np.int64(binop_x_120251), + np.int64(num_threads_126162), + defunc_3_map_res_r_mem_121609, + mem_121613, mem_121616, + mem_121632) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_104804_var, + ((np.int64(num_groups_106652) * np.int64(segmap_group_sizze_106651)),), + (np.int64(segmap_group_sizze_106651),)) + if synchronous: + sync(self) + mem_121613 = None + mem_121616 = None + mem_121845 = opencl_alloc(self, bytes_121997, "mem_121845") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121845, np.int64(0), + mem_121632, np.int64(0), + np.int64(1), + (m_75136 * k2p2zq_75151), + k2p2zq_75151) + mem_121632 = None + defunc_3_map_res_r_mem_121847 = mem_121845 + else: + local_memory_capacity_127132 = self.max_local_memory + if (sle64(((bytes_121651 + srem64((np.int64(8) - srem64(bytes_121651, + np.int64(8))), + np.int64(8))) + (bytes_121653 + srem64((np.int64(8) - srem64(bytes_121653, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_127132)) and suff_outer_par_106678): + mem_121636 = opencl_alloc(self, bytes_121997, "mem_121636") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121636, np.int64(0), + defunc_3_map_res_r_mem_121609, + np.int64(0), m_75136, + k2p2zq_75151, + k2p2zq_75151) + tk_div_tx_116327 = sdiv_up64(Tk_116326, Tx_116324) + tk_div_ty_116328 = sdiv_up64(Tk_116326, Ty_116322) + gridDim_x_116335 = sdiv_up64(k2p2zq_75151, TxRx_116329) + gridDim_y_116336 = sdiv_up64(k2p2zq_75151, TyRy_116330) + binop_y_116337 = (gridDim_x_116335 * gridDim_y_116336) + grid_sizze_116338 = (m_75136 * binop_y_116337) + full_tiles_116367 = squot64(k2p2zq_75151, Tk_116326) + kk_116570 = (Tk_116326 * full_tiles_116367) + mem_121827 = opencl_alloc(self, bytes_121997, "mem_121827") + if ((1 * (np.int64(grid_sizze_116338) * np.int64(group_sizze_116339))) != 0): + self.mainzisegmap_intragroup_116342_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_121653)), + cl.LocalMemory(np.int64(bytes_121651)), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(gridDim_x_116335), + np.int64(gridDim_y_116336), + np.int64(full_tiles_116367), + np.int64(kk_116570), + np.int64(binop_x_120251), + defunc_3_map_res_r_mem_121609, + mem_121636, + mem_121827) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_intragroup_116342_var, + ((np.int64(grid_sizze_116338) * np.int64(group_sizze_116339)),), + (np.int64(group_sizze_116339),)) + if synchronous: + sync(self) + mem_121636 = None + defunc_3_map_res_r_mem_121841 = mem_121827 + else: + mem_121831 = opencl_alloc(self, bytes_121997, "mem_121831") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121831, np.int64(0), + defunc_3_map_res_r_mem_121609, + np.int64(0), + np.int64(1), + k2p2zq_75151, + (m_75136 * k2p2zq_75151)) + mem_121835 = opencl_alloc(self, bytes_121997, "mem_121835") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121835, np.int64(0), + defunc_3_map_res_r_mem_121609, + np.int64(0), m_75136, + k2p2zq_75151, + k2p2zq_75151) + mem_121840 = opencl_alloc(self, bytes_121997, "mem_121840") + if slt64((k2p2zq_75151 * np.int64(2)), segred_group_sizze_106694): + segment_sizze_nonzzero_127072 = smax64(np.int64(1), k2p2zq_75151) + num_threads_127073 = (num_groups_106695 * segred_group_sizze_106694) + if ((1 * (np.int64(num_groups_106695) * np.int64(segred_group_sizze_106694))) != 0): + self.mainzisegred_small_104864_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_106694))), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(num_groups_106695), + np.int64(segment_sizze_nonzzero_127072), + mem_121831, + mem_121835, + mem_121840) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_small_104864_var, + ((np.int64(num_groups_106695) * np.int64(segred_group_sizze_106694)),), + (np.int64(segred_group_sizze_106694),)) + if synchronous: + sync(self) + else: + groups_per_segment_127093 = sdiv_up64(num_groups_106695, + smax64(np.int64(1), + ((m_75136 * k2p2zq_75151) * k2p2zq_75151))) + elements_per_thread_127094 = sdiv_up64(k2p2zq_75151, + (segred_group_sizze_106694 * groups_per_segment_127093)) + virt_num_groups_127095 = (groups_per_segment_127093 * ((m_75136 * k2p2zq_75151) * k2p2zq_75151)) + num_threads_127096 = (num_groups_106695 * segred_group_sizze_106694) + threads_per_segment_127097 = (groups_per_segment_127093 * segred_group_sizze_106694) + group_res_arr_mem_127098 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_106694 * virt_num_groups_127095)), + "group_res_arr_mem_127098") + mainzicounter_mem_127100 = self.mainzicounter_mem_127100 + if ((1 * (np.int64(num_groups_106695) * np.int64(segred_group_sizze_106694))) != 0): + self.mainzisegred_large_104864_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_106694))), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(num_groups_106695), + np.int64(groups_per_segment_127093), + np.int64(elements_per_thread_127094), + np.int64(virt_num_groups_127095), + np.int64(threads_per_segment_127097), + mem_121831, + mem_121835, + mem_121840, + group_res_arr_mem_127098, + mainzicounter_mem_127100) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_large_104864_var, + ((np.int64(num_groups_106695) * np.int64(segred_group_sizze_106694)),), + (np.int64(segred_group_sizze_106694),)) + if synchronous: + sync(self) + mem_121831 = None + mem_121835 = None + defunc_3_map_res_r_mem_121841 = mem_121840 + defunc_3_map_res_r_mem_121847 = defunc_3_map_res_r_mem_121841 + mem_121850 = opencl_alloc(self, bytes_121990, "mem_121850") + group_sizze_127137 = self.sizes["main.group_size_127137"] + num_groups_127138 = sdiv_up64((m_75136 * k2p2zq_75151), + group_sizze_127137) + if ((1 * (np.int64(num_groups_127138) * np.int64(group_sizze_127137))) != 0): + self.mainzicopy_127134_var.set_args(np.int64(m_75136), + np.int64(n_75139), + np.int64(k2p2zq_75151), + defunc_3_map_res_mem_120231, + mem_121850) + cl.enqueue_nd_range_kernel(self.queue, self.mainzicopy_127134_var, + ((np.int64(num_groups_127138) * np.int64(group_sizze_127137)),), + (np.int64(group_sizze_127137),)) + if synchronous: + sync(self) + mem_121854 = opencl_alloc(self, bytes_121997, "mem_121854") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121854, np.int64(0), + mem_121335, np.int64(0), + np.int64(1), m_75136, + (k2p2zq_75151 * k2p2zq_75151)) + mem_121335 = None + mem_121858 = opencl_alloc(self, bytes_120258, "mem_121858") + group_sizze_127142 = self.sizes["main.group_size_127142"] + num_groups_127143 = sdiv_up64(((m_75136 * k2p2zq_75151) * k2p2zq_75151), + group_sizze_127142) + if ((1 * (np.int64(num_groups_127143) * np.int64(group_sizze_127142))) != 0): + self.mainzicopy_127139_var.set_args(np.int64(m_75136), + np.int64(k2p2zq_75151), + mem_121854, mem_121858) + cl.enqueue_nd_range_kernel(self.queue, self.mainzicopy_127139_var, + ((np.int64(num_groups_127143) * np.int64(group_sizze_127142)),), + (np.int64(group_sizze_127142),)) + if synchronous: + sync(self) + mem_121854 = None + mem_121895 = opencl_alloc(self, bytes_121990, "mem_121895") + mem_125243 = opencl_alloc(self, total_sizze_126167, "mem_125243") + double_buffer_mem_125565 = opencl_alloc(self, total_sizze_126168, + "double_buffer_mem_125565") + if ((1 * (np.int64(num_groups_106711) * np.int64(segmap_group_sizze_106710))) != 0): + self.mainzisegmap_104742_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(m_75223), + np.int64(num_groups_106711), + np.int64(num_threads_126166), + mem_121338, mem_121343, + mem_121850, mem_121858, + mem_121895, mem_125243, + double_buffer_mem_125565) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_104742_var, + ((np.int64(num_groups_106711) * np.int64(segmap_group_sizze_106710)),), + (np.int64(segmap_group_sizze_106710),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + mem_121338 = None + mem_121850 = None + mem_121858 = None + mem_125243 = None + double_buffer_mem_125565 = None + mem_121898 = opencl_alloc(self, bytes_121990, "mem_121898") + self.futhark_builtinzhreplicate_f64(mem_121898, + (m_75136 * k2p2zq_75151), + np.float64(0.0)) + segmap_usable_groups_106781 = sdiv_up64(binop_x_120244, + segmap_group_sizze_106780) + mem_121901 = opencl_alloc(self, bytes_121990, "mem_121901") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121901, np.int64(0), + mem_121895, np.int64(0), + np.int64(1), m_75136, + k2p2zq_75151) + mem_121895 = None + if ((1 * (np.int64(segmap_usable_groups_106781) * np.int64(segmap_group_sizze_106780))) != 0): + self.mainzisegmap_104699_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(binop_x_120251), + mem_121341, + defunc_3_map_res_r_mem_121609, + mem_121898, mem_121901) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_104699_var, + ((np.int64(segmap_usable_groups_106781) * np.int64(segmap_group_sizze_106780)),), + (np.int64(segmap_group_sizze_106780),)) + if synchronous: + sync(self) + defunc_3_map_res_r_mem_121609 = None + mem_121901 = None + mem_121906 = opencl_alloc(self, bytes_121997, "mem_121906") + self.futhark_builtinzhreplicate_f64(mem_121906, + ((m_75136 * k2p2zq_75151) * k2p2zq_75151), + np.float64(0.0)) + segmap_usable_groups_106852 = sdiv_up64(nest_sizze_106435, + segmap_group_sizze_106851) + mem_121909 = opencl_alloc(self, bytes_121990, "mem_121909") + self.futhark_builtinzhgpu_map_transpose_i64(mem_121909, np.int64(0), + mem_121341, np.int64(0), + np.int64(1), m_75136, + k2p2zq_75151) + mem_121341 = None + if ((1 * (np.int64(segmap_usable_groups_106852) * np.int64(segmap_group_sizze_106851))) != 0): + self.mainzisegmap_104542_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(binop_x_120251), + defunc_3_map_res_r_mem_121847, + mem_121906, mem_121909) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_104542_var, + ((np.int64(segmap_usable_groups_106852) * np.int64(segmap_group_sizze_106851)),), + (np.int64(segmap_group_sizze_106851),)) + if synchronous: + sync(self) + defunc_3_map_res_r_mem_121847 = None + mem_121909 = None + segmap_usable_groups_106871 = sdiv_up64(nest_sizze_106435, + segmap_group_sizze_106870) + mem_121915 = opencl_alloc(self, bytes_121997, "mem_121915") + if ((1 * (np.int64(segmap_usable_groups_106871) * np.int64(segmap_group_sizze_106870))) != 0): + self.mainzisegmap_104494_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + mem_121906, mem_121915) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_104494_var, + ((np.int64(segmap_usable_groups_106871) * np.int64(segmap_group_sizze_106870)),), + (np.int64(segmap_group_sizze_106870),)) + if synchronous: + sync(self) + mem_121906 = None + segmap_usable_groups_106880 = sdiv_up64(binop_x_120244, + segmap_group_sizze_106879) + mem_121919 = opencl_alloc(self, bytes_121990, "mem_121919") + if ((1 * (np.int64(segmap_usable_groups_106880) * np.int64(segmap_group_sizze_106879))) != 0): + self.mainzisegmap_104472_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + mem_121898, mem_121919) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_104472_var, + ((np.int64(segmap_usable_groups_106880) * np.int64(segmap_group_sizze_106879)),), + (np.int64(segmap_group_sizze_106879),)) + if synchronous: + sync(self) + mem_121898 = None + defunc_5_map_res_mem_121929 = mem_121915 + defunc_5_map_res_mem_121930 = mem_121919 + defunc_5_map_res_mem_121931 = mem_121343 + mem_120252 = None + mem_120254 = None + num_recresids_padded_75809 = (defunc_2_reduce_res_75260 - k2p2zq_75151) + replicate_arg_75810 = (m_75136 * num_recresids_padded_75809) + bounds_invalid_upwards_75811 = slt64(replicate_arg_75810, np.int64(0)) + valid_75812 = not(bounds_invalid_upwards_75811) + range_valid_c_75813 = True + assert valid_75812, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 /prelude/array.fut:108:18-23\n #2 recresid.fut:28:14-49\n #3 recresid.fut:100:7-30\n #4 mroc.fut:27:25-38\n #5 mroc.fut:77:27-61\n #6 bfastfinal.fut:45:24-53\n #7 bfastfinal.fut:200:5-74\n #8 bfastfinal.fut:195:1-201:36\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + replicate_arg_75810, + " is invalid.")) + bytes_121932 = (np.int64(8) * replicate_arg_75810) + mem_121934 = opencl_alloc(self, bytes_121932, "mem_121934") + self.futhark_builtinzhreplicate_f64(mem_121934, + (num_recresids_padded_75809 * m_75136), + np.float64(0.0)) + loop_cond_t_res_75815 = slt64(k2p2zq_75151, m_75290) + loop_not_taken_75816 = not(loop_cond_t_res_75815) + protect_assert_disj_75817 = (valid_75339 or loop_not_taken_75816) + range_valid_c_75818 = True + assert protect_assert_disj_75817, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 /prelude/array.fut:108:18-23\n #2 lib/github.com/nhey/lm/linpack.fut:39:16-40\n #3 lib/github.com/nhey/lm/lm.fut:74:36-64\n #4 recresid.fut:62:33-75\n #5 /prelude/soacs.fut:91:28-38\n #6 /prelude/soacs.fut:91:3-61\n #7 recresid.fut:51:11-73:44\n #8 recresid.fut:100:7-30\n #9 mroc.fut:27:25-38\n #10 mroc.fut:77:27-61\n #11 bfastfinal.fut:45:24-53\n #12 bfastfinal.fut:200:5-74\n #13 bfastfinal.fut:195:1-201:36\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + replicate_arg_75337, + " is invalid.")) + segmap_group_sizze_106925 = self.sizes["main.segmap_group_size_106903"] + segmap_usable_groups_106926 = sdiv_up_safe64(binop_x_120251, + segmap_group_sizze_106925) + mem_121938 = opencl_alloc(self, bytes_120250, "mem_121938") + if ((1 * (np.int64(segmap_usable_groups_106926) * np.int64(segmap_group_sizze_106925))) != 0): + self.mainzisegmap_106900_var.set_args(self.global_failure, + np.int64(k2p2zq_75151), + mem_121938) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_106900_var, + ((np.int64(segmap_usable_groups_106926) * np.int64(segmap_group_sizze_106925)),), + (np.int64(segmap_group_sizze_106925),)) + if synchronous: + sync(self) + suff_outer_par_107037 = (self.sizes["main.suff_outer_par_9"] <= m_75136) + segmap_group_sizze_107574 = self.sizes["main.segmap_group_size_107041"] + max_num_groups_127187 = self.sizes["main.segmap_num_groups_107043"] + num_groups_107575 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_75136, + segmap_group_sizze_107574), + sext_i32_i64(max_num_groups_127187)))) + segred_group_sizze_109865 = self.sizes["main.segred_group_size_109841"] + max_num_groups_127188 = self.sizes["main.segred_num_groups_109843"] + num_groups_109866 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120244, + segred_group_sizze_109865), + sext_i32_i64(max_num_groups_127188)))) + segmap_group_sizze_109886 = self.sizes["main.segmap_group_size_109831"] + segred_group_sizze_109896 = self.sizes["main.segred_group_size_109812"] + max_num_groups_127189 = self.sizes["main.segred_num_groups_109814"] + num_groups_109897 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120244, + segred_group_sizze_109896), + sext_i32_i64(max_num_groups_127189)))) + segmap_group_sizze_109909 = self.sizes["main.segmap_group_size_109800"] + suff_outer_par_109963 = (self.sizes["main.suff_outer_par_15"] <= m_75136) + segmap_group_sizze_109967 = self.sizes["main.segmap_group_size_109582"] + max_num_groups_127190 = self.sizes["main.segmap_num_groups_109584"] + num_groups_109968 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_75136, + segmap_group_sizze_109967), + sext_i32_i64(max_num_groups_127190)))) + segred_group_sizze_109989 = self.sizes["main.segred_group_size_109659"] + segmap_group_sizze_110000 = self.sizes["main.segmap_group_size_109651"] + segmap_group_sizze_110007 = self.sizes["main.segmap_group_size_109643"] + segmap_group_sizze_110025 = self.sizes["main.segmap_group_size_109292"] + max_num_groups_127191 = self.sizes["main.segmap_num_groups_109294"] + num_groups_110026 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_75136, + segmap_group_sizze_110025), + sext_i32_i64(max_num_groups_127191)))) + segmap_group_sizze_110321 = self.sizes["main.segmap_group_size_109235"] + segmap_group_sizze_110336 = self.sizes["main.segmap_group_size_109201"] + suff_outer_par_110360 = (self.sizes["main.suff_outer_par_12"] <= binop_x_120244) + suff_outer_par_110455 = (self.sizes["main.suff_outer_par_13"] <= m_75136) + segmap_group_sizze_110458 = self.sizes["main.segmap_group_size_108772"] + max_num_groups_127192 = self.sizes["main.segmap_num_groups_108774"] + num_groups_110459 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_75136, + segmap_group_sizze_110458), + sext_i32_i64(max_num_groups_127192)))) + suff_outer_par_110489 = (self.sizes["main.suff_outer_par_14"] <= binop_x_120244) + segred_group_sizze_110512 = self.sizes["main.segred_group_size_108886"] + segmap_group_sizze_110529 = self.sizes["main.segmap_group_size_108873"] + segmap_group_sizze_110540 = self.sizes["main.segmap_group_size_108862"] + segmap_group_sizze_110551 = self.sizes["main.segmap_group_size_108557"] + max_num_groups_127193 = self.sizes["main.segmap_num_groups_108559"] + num_groups_110552 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120244, + segmap_group_sizze_110551), + sext_i32_i64(max_num_groups_127193)))) + suff_outer_par_110556 = (self.sizes["main.suff_outer_par_10"] <= binop_x_120244) + suff_outer_par_110578 = (self.sizes["main.suff_outer_par_11"] <= nest_sizze_106435) + segred_group_sizze_110594 = self.sizes["main.segred_group_size_108608"] + max_num_groups_127194 = self.sizes["main.segred_num_groups_108610"] + num_groups_110595 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_106693, + segred_group_sizze_110594), + sext_i32_i64(max_num_groups_127194)))) + segmap_group_sizze_110610 = self.sizes["main.segmap_group_size_108492"] + max_num_groups_127195 = self.sizes["main.segmap_num_groups_108494"] + num_groups_110611 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_75136, + segmap_group_sizze_110610), + sext_i32_i64(max_num_groups_127195)))) + segmap_group_sizze_110689 = self.sizes["main.segmap_group_size_108438"] + segmap_group_sizze_110760 = self.sizes["main.segmap_group_size_108281"] + segmap_group_sizze_110779 = self.sizes["main.segmap_group_size_108234"] + segmap_group_sizze_110788 = self.sizes["main.segmap_group_size_108211"] + segmap_group_sizze_110797 = self.sizes["main.segmap_group_size_108138"] + segred_group_sizze_110860 = self.sizes["main.segred_group_size_110859"] + max_num_groups_127196 = self.sizes["main.segred_num_groups_110861"] + num_groups_110862 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_75136, + segred_group_sizze_110860), + sext_i32_i64(max_num_groups_127196)))) + segmap_usable_groups_109887 = sdiv_up_safe64(m_75136, + segmap_group_sizze_109886) + segmap_usable_groups_109910 = sdiv_up_safe64(m_75136, + segmap_group_sizze_109909) + segmap_usable_groups_110001 = sdiv_up_safe64(m_75136, + segmap_group_sizze_110000) + segmap_usable_groups_110008 = sdiv_up_safe64(m_75136, + segmap_group_sizze_110007) + segmap_usable_groups_110322 = sdiv_up_safe64(binop_x_120244, + segmap_group_sizze_110321) + segmap_usable_groups_110337 = sdiv_up_safe64(nest_sizze_106435, + segmap_group_sizze_110336) + segmap_usable_groups_110530 = sdiv_up_safe64(binop_x_120244, + segmap_group_sizze_110529) + segmap_usable_groups_110541 = sdiv_up_safe64(binop_x_120244, + segmap_group_sizze_110540) + segmap_usable_groups_110690 = sdiv_up_safe64(binop_x_120244, + segmap_group_sizze_110689) + segmap_usable_groups_110761 = sdiv_up_safe64(nest_sizze_106435, + segmap_group_sizze_110760) + segmap_usable_groups_110780 = sdiv_up_safe64(nest_sizze_106435, + segmap_group_sizze_110779) + segmap_usable_groups_110789 = sdiv_up_safe64(binop_x_120244, + segmap_group_sizze_110788) + segmap_usable_groups_110798 = sdiv_up_safe64(m_75136, + segmap_group_sizze_110797) + mem_121941 = opencl_alloc(self, bytes_120175, "mem_121941") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121941, np.int64(0), + defunc_3_map_res_mem_120231, + np.int64(0), np.int64(1), + n_75139, m_75136) + mem_121944 = opencl_alloc(self, bytes_120250, "mem_121944") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121944, np.int64(0), + mem_121938, np.int64(0), + np.int64(1), k2p2zq_75151, + k2p2zq_75151) + tile_sizze_116779 = self.sizes["main.tile_size_116778"] + group_sizze_116780 = (tile_sizze_116779 * tile_sizze_116779) + mem_121946 = opencl_alloc(self, bytes_120247, "mem_121946") + self.futhark_builtinzhreplicate_f64(mem_121946, k2p2zq_75151, + np.float64(0.0)) + tile_sizze_117141 = self.sizes["main.tile_size_117140"] + group_sizze_117142 = (tile_sizze_117141 * tile_sizze_117141) + Ty_117445 = self.sizes["main.Ty_117442"] + Ry_117446 = self.sizes["main.Ry_117444"] + Tx_117447 = self.sizes["main.Tx_117441"] + Rx_117448 = self.sizes["main.Rx_117443"] + Tk_117449 = self.sizes["main.Tk_117440"] + TxRx_117452 = (Tx_117447 * Rx_117448) + TyRy_117453 = (Ty_117445 * Ry_117446) + a_loc_szz_117455 = (Tk_117449 * TyRy_117453) + binop_x_117456 = (Tx_117447 * Tk_117449) + b_loc_szz_117457 = (Rx_117448 * binop_x_117456) + group_sizze_117462 = (Ty_117445 * Tx_117447) + num_groups_x_116781 = sdiv_up_safe64(m_75136, tile_sizze_116779) + num_groups_y_116782 = sdiv_up_safe64(k2p2zq_75151, tile_sizze_116779) + num_groups_top_116783 = (num_groups_x_116781 * num_groups_y_116782) + num_groups_x_117143 = sdiv_up_safe64(m_75136, tile_sizze_117141) + num_groups_y_117144 = sdiv_up_safe64(k2p2zq_75151, tile_sizze_117141) + num_groups_top_117145 = (num_groups_x_117143 * num_groups_y_117144) + tk_div_tx_117450 = sdiv_up_safe64(Tk_117449, Tx_117447) + tk_div_ty_117451 = sdiv_up_safe64(Tk_117449, Ty_117445) + gridDim_x_117458 = sdiv_up_safe64(k2p2zq_75151, TxRx_117452) + gridDim_y_117459 = sdiv_up_safe64(k2p2zq_75151, TyRy_117453) + binop_y_117460 = (gridDim_x_117458 * gridDim_y_117459) + grid_sizze_117461 = (m_75136 * binop_y_117460) + full_tiles_117490 = squot_safe64(k2p2zq_75151, Tk_117449) + kk_117693 = (Tk_117449 * full_tiles_117490) + padded_sizze_115508 = (m_75136 + y_115507) + mem_121948 = opencl_alloc(self, bytes_121947, "mem_121948") + per_chunk_115510 = squot_safe64(padded_sizze_115508, num_threads_115503) + mem_121992 = opencl_alloc(self, bytes_121990, "mem_121992") + mem_121996 = opencl_alloc(self, bytes_121993, "mem_121996") + mem_122000 = opencl_alloc(self, bytes_121997, "mem_122000") + mem_122003 = opencl_alloc(self, bytes_121990, "mem_122003") + mem_122007 = opencl_alloc(self, bytes_121997, "mem_122007") + bytes_122739 = (np.int64(8) * padded_sizze_115508) + binop_x_122742 = (num_threads_115503 * per_chunk_115510) + bytes_122741 = (np.int64(8) * binop_x_122742) + binop_x_123163 = (k2p2zq_75151 * group_sizze_116780) + bytes_123161 = (np.int64(8) * binop_x_123163) + ctx_val_123177 = (k2p2zq_75151 * tile_sizze_116779) + bytes_123180 = (np.int64(8) * group_sizze_116780) + binop_x_125283 = (np.int64(8) * tile_sizze_116779) + sizze_125284 = (tile_sizze_116779 * binop_x_125283) + bytes_123298 = (np.int64(8) * group_sizze_117142) + binop_x_125308 = (np.int64(8) * tile_sizze_117141) + sizze_125309 = (tile_sizze_117141 * binop_x_125308) + binop_x_123329 = (k2p2zq_75151 * group_sizze_117142) + bytes_123327 = (np.int64(8) * binop_x_123329) + binop_x_123431 = (Ry_117446 * group_sizze_117462) + binop_x_123432 = (Rx_117448 * binop_x_123431) + bytes_123429 = (np.int64(8) * binop_x_123432) + binop_x_123423 = (Ry_117446 * Rx_117448) + bytes_123422 = (np.int64(8) * binop_x_123423) + bytes_123434 = (np.int64(8) * a_loc_szz_117455) + bytes_123436 = (np.int64(8) * b_loc_szz_117457) + bytes_123505 = (np.int64(8) * binop_x_123431) + binop_x_123511 = (Rx_117448 * group_sizze_117462) + bytes_123509 = (np.int64(8) * binop_x_123511) + bytes_123497 = (np.int64(8) * Ry_117446) + bytes_123499 = (np.int64(8) * Rx_117448) + binop_x_125329 = (np.int64(8) * Ty_117445) + binop_x_125330 = (Tx_117447 * binop_x_125329) + binop_x_125331 = (Ry_117446 * binop_x_125330) + sizze_125332 = (Rx_117448 * binop_x_125331) + mem_123728 = opencl_alloc(self, np.int64(1), "mem_123728") + binop_x_125575 = (np.int64(8) * k2p2zq_75151) + double_buffer_sizze_125577 = (np.int64(8) * k2p2zq_75151) + double_buffer_sizze_125578 = (np.int64(16) * k2p2zq_75151) + binop_x_125592 = (np.int64(8) * k2p2zq_75151) + double_buffer_sizze_125594 = (np.int64(8) * k2p2zq_75151) + double_buffer_sizze_125595 = (np.int64(16) * k2p2zq_75151) + num_threads_126174 = (segmap_group_sizze_107574 * num_groups_107575) + total_sizze_126175 = (bytes_120247 * num_threads_126174) + total_sizze_126176 = (bytes_120247 * num_threads_126174) + total_sizze_126177 = (bytes_120269 * num_threads_126174) + total_sizze_126178 = (bytes_120247 * num_threads_126174) + total_sizze_126179 = (bytes_120250 * num_threads_126174) + total_sizze_126180 = (bytes_120250 * num_threads_126174) + total_sizze_126181 = (bytes_120247 * num_threads_126174) + total_sizze_126182 = (bytes_120250 * num_threads_126174) + total_sizze_126183 = (bytes_120247 * num_threads_126174) + total_sizze_126184 = (bytes_120250 * num_threads_126174) + total_sizze_126185 = (bytes_120247 * num_threads_126174) + total_sizze_126186 = (bytes_120250 * num_threads_126174) + total_sizze_126187 = (bytes_120247 * num_threads_126174) + total_sizze_126188 = (bytes_120250 * num_threads_126174) + total_sizze_126189 = (sizze_125149 * num_threads_126174) + total_sizze_126190 = (bytes_120247 * num_threads_126174) + total_sizze_126193 = (sizze_125149 * num_threads_126174) + total_sizze_126194 = (bytes_120247 * num_threads_126174) + total_sizze_126197 = (double_buffer_sizze_125577 * num_threads_126174) + total_sizze_126198 = (double_buffer_sizze_125578 * num_threads_126174) + num_threads_126209 = (segmap_group_sizze_110025 * num_groups_110026) + total_sizze_126210 = (bytes_120247 * num_threads_126209) + total_sizze_126211 = (sizze_125149 * num_threads_126209) + total_sizze_126212 = (bytes_120247 * num_threads_126209) + total_sizze_126214 = (sizze_125149 * num_threads_126209) + total_sizze_126215 = (bytes_120247 * num_threads_126209) + total_sizze_126218 = (double_buffer_sizze_125594 * num_threads_126209) + total_sizze_126219 = (double_buffer_sizze_125595 * num_threads_126209) + num_threads_126222 = (group_sizze_116780 * num_groups_top_116783) + total_sizze_126223 = (bytes_120247 * num_threads_126222) + num_threads_126224 = (segmap_group_sizze_110458 * num_groups_110459) + total_sizze_126225 = (bytes_120250 * num_threads_126224) + total_sizze_126226 = (bytes_120247 * num_threads_126224) + num_threads_126227 = (group_sizze_117142 * num_groups_top_117145) + total_sizze_126228 = (bytes_120247 * num_threads_126227) + num_threads_126232 = (segmap_group_sizze_110551 * num_groups_110552) + total_sizze_126233 = (bytes_120247 * num_threads_126232) + num_threads_126236 = (segmap_group_sizze_110610 * num_groups_110611) + mem_param_121959 = defunc_5_map_res_mem_121929 + mem_param_121967 = defunc_5_map_res_mem_121930 + mem_param_121972 = defunc_5_map_res_mem_121931 + loop_while_75825 = loop_cond_t_res_75815 + r_75826 = k2p2zq_75151 + while loop_while_75825: + x_75831 = sle64(np.int64(0), r_75826) + y_75832 = slt64(r_75826, defunc_2_reduce_res_75260) + bounds_check_75833 = (x_75831 and y_75832) + index_ok_75834 = (ok_or_empty_75229 and bounds_check_75833) + index_certs_75835 = True + assert index_ok_75834, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:34:13-22\n #1 recresid.fut:52:47-75\n #2 /prelude/soacs.fut:91:28-38\n #3 /prelude/soacs.fut:91:3-61\n #4 recresid.fut:51:11-73:44\n #5 recresid.fut:100:7-30\n #6 mroc.fut:27:25-38\n #7 mroc.fut:77:27-61\n #8 bfastfinal.fut:45:24-53\n #9 bfastfinal.fut:200:5-74\n #10 bfastfinal.fut:195:1-201:36\n" % ("Index [", + r_75826, + ", ", + np.int64(0), + ":] out of bounds for array of shape [", + defunc_2_reduce_res_75260, + "][", + k2p2zq_75151, + "].")) + index_certs_75836 = True + assert bounds_check_75833, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:37:17-23\n #1 recresid.fut:52:47-75\n #2 /prelude/soacs.fut:91:28-38\n #3 /prelude/soacs.fut:91:3-61\n #4 recresid.fut:51:11-73:44\n #5 recresid.fut:100:7-30\n #6 mroc.fut:27:25-38\n #7 mroc.fut:77:27-61\n #8 bfastfinal.fut:45:24-53\n #9 bfastfinal.fut:200:5-74\n #10 bfastfinal.fut:195:1-201:36\n" % ("Index [", + r_75826, + "] out of bounds for array of shape [", + defunc_2_reduce_res_75260, + "].")) + rp1_75837 = (np.int64(1) + r_75826) + empty_slice_75838 = (rp1_75837 == np.int64(0)) + i_lte_j_75839 = sle64(np.int64(0), rp1_75837) + y_75840 = (bounds_check_75833 and i_lte_j_75839) + ok_or_empty_75841 = (empty_slice_75838 or y_75840) + index_certs_75842 = True + assert ok_or_empty_75841, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:62:66-75\n #1 /prelude/soacs.fut:91:28-38\n #2 /prelude/soacs.fut:91:3-61\n #3 recresid.fut:51:11-73:44\n #4 recresid.fut:100:7-30\n #5 mroc.fut:27:25-38\n #6 mroc.fut:77:27-61\n #7 bfastfinal.fut:45:24-53\n #8 bfastfinal.fut:200:5-74\n #9 bfastfinal.fut:195:1-201:36\n" % ("Index [:", + rp1_75837, + "] out of bounds for array of shape [", + defunc_2_reduce_res_75260, + "].")) + index_ok_75843 = (ok_or_empty_75229 and ok_or_empty_75841) + index_certs_75844 = True + assert index_ok_75843, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:62:51-63\n #1 /prelude/soacs.fut:91:28-38\n #2 /prelude/soacs.fut:91:3-61\n #3 recresid.fut:51:11-73:44\n #4 recresid.fut:100:7-30\n #5 mroc.fut:27:25-38\n #6 mroc.fut:77:27-61\n #7 bfastfinal.fut:45:24-53\n #8 bfastfinal.fut:200:5-74\n #9 bfastfinal.fut:195:1-201:36\n" % ("Index [:", + rp1_75837, + ", ", + np.int64(0), + ":] out of bounds for array of shape [", + defunc_2_reduce_res_75260, + "][", + k2p2zq_75151, + "].")) + i_p_m_t_s_leq_w_75845 = slt64(r_75826, rp1_75837) + y_75846 = (x_75831 and i_p_m_t_s_leq_w_75845) + y_75847 = (i_lte_j_75839 and y_75846) + ok_or_empty_75848 = (empty_slice_75838 or y_75847) + min_res_75849 = smin64(k2p2zq_75151, rp1_75837) + i_p_m_t_s_leq_w_75850 = slt64(m_75223, rp1_75837) + y_75851 = (zzero_leq_i_p_m_t_s_75224 and i_p_m_t_s_leq_w_75850) + y_75852 = (i_lte_j_75226 and y_75851) + ok_or_empty_75853 = (empty_slice_75222 or y_75852) + index_ok_75854 = (ok_or_empty_75229 and ok_or_empty_75853) + index_certs_75855 = True + assert index_ok_75854, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:80:50-58\n #1 recresid.fut:62:33-75\n #2 /prelude/soacs.fut:91:28-38\n #3 /prelude/soacs.fut:91:3-61\n #4 recresid.fut:51:11-73:44\n #5 recresid.fut:100:7-30\n #6 mroc.fut:27:25-38\n #7 mroc.fut:77:27-61\n #8 bfastfinal.fut:45:24-53\n #9 bfastfinal.fut:200:5-74\n #10 bfastfinal.fut:195:1-201:36\n" % ("Index [:", + k2p2zq_75151, + ", :", + k2p2zq_75151, + "] out of bounds for array of shape [", + k2p2zq_75151, + "][", + rp1_75837, + "].")) + index_certs_75862 = True + assert ok_or_empty_75853, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:92:15-21\n #1 recresid.fut:62:33-75\n #2 /prelude/soacs.fut:91:28-38\n #3 /prelude/soacs.fut:91:3-61\n #4 recresid.fut:51:11-73:44\n #5 recresid.fut:100:7-30\n #6 mroc.fut:27:25-38\n #7 mroc.fut:77:27-61\n #8 bfastfinal.fut:45:24-53\n #9 bfastfinal.fut:200:5-74\n #10 bfastfinal.fut:195:1-201:36\n" % ("Index [:", + k2p2zq_75151, + "] out of bounds for array of shape [", + rp1_75837, + "].")) + nest_sizze_109988 = (m_75136 * rp1_75837) + max_num_groups_127206 = self.sizes["main.segred_num_groups_109661"] + num_groups_109990 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_109988, + segred_group_sizze_109989), + sext_i32_i64(max_num_groups_127206)))) + self.futhark_builtinzhreplicate_f64(mem_121992, + (m_75136 * k2p2zq_75151), + np.float64(0.0)) + self.futhark_builtinzhreplicate_f64(mem_121996, + ((m_75136 * np.int64(2)) * k2p2zq_75151), + np.float64(0.0)) + self.futhark_builtinzhreplicate_f64(mem_122000, + ((m_75136 * k2p2zq_75151) * k2p2zq_75151), + np.float64(0.0)) + self.futhark_builtinzhreplicate_f64(mem_122003, + (m_75136 * k2p2zq_75151), + np.float64(0.0)) + self.futhark_builtinzhreplicate_f64(mem_122007, + ((m_75136 * k2p2zq_75151) * k2p2zq_75151), + np.float64(0.0)) + bytes_122015 = (np.int64(8) * nest_sizze_109988) + binop_x_122019 = (k2p2zq_75151 * rp1_75837) + binop_x_122020 = (m_75136 * binop_x_122019) + bytes_122018 = (np.int64(8) * binop_x_122020) + binop_x_122024 = (k2p2zq_75151 * nest_sizze_109988) + bytes_122022 = (np.int64(8) * binop_x_122024) + bytes_122511 = (np.int64(8) * rp1_75837) + binop_x_123636 = (rp1_75837 * binop_x_120244) + bytes_123634 = (np.int64(8) * binop_x_123636) + sizze_125454 = (rp1_75837 * bytes_120247) + double_buffer_sizze_125576 = (rp1_75837 * binop_x_125575) + double_buffer_sizze_125584 = (np.int64(8) * rp1_75837) + double_buffer_sizze_125593 = (rp1_75837 * binop_x_125592) + double_buffer_sizze_125601 = (np.int64(8) * rp1_75837) + total_sizze_126191 = (bytes_122511 * num_threads_126174) + total_sizze_126192 = (sizze_125454 * num_threads_126174) + total_sizze_126195 = (sizze_125454 * num_threads_126174) + total_sizze_126196 = (double_buffer_sizze_125576 * num_threads_126174) + total_sizze_126199 = (double_buffer_sizze_125584 * num_threads_126174) + total_sizze_126213 = (sizze_125454 * num_threads_126209) + total_sizze_126216 = (sizze_125454 * num_threads_126209) + total_sizze_126217 = (double_buffer_sizze_125593 * num_threads_126209) + total_sizze_126237 = (bytes_122511 * num_threads_126236) + total_sizze_126238 = (double_buffer_sizze_125601 * num_threads_126236) + local_memory_capacity_127973 = self.max_local_memory + if ((((sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127973)) and sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127973))) and sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127973))) and sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127973))) and suff_outer_par_107037): + mem_122011 = opencl_alloc(self, bytes_120258, "mem_122011") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122011, np.int64(0), + mem_param_121959, + np.int64(0), np.int64(1), + (k2p2zq_75151 * k2p2zq_75151), + m_75136) + mem_122014 = opencl_alloc(self, bytes_121990, "mem_122014") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122014, np.int64(0), + mem_param_121967, + np.int64(0), np.int64(1), + k2p2zq_75151, m_75136) + mem_122017 = opencl_alloc(self, bytes_122015, "mem_122017") + group_sizze_127210 = self.sizes["main.group_size_127210"] + num_groups_127211 = sdiv_up64((m_75136 * rp1_75837), + group_sizze_127210) + if ((1 * (np.int64(num_groups_127211) * np.int64(group_sizze_127210))) != 0): + self.mainzicopy_127207_var.set_args(np.int64(m_75136), + np.int64(n_75139), + np.int64(rp1_75837), + defunc_3_map_res_mem_120231, + mem_122017) + cl.enqueue_nd_range_kernel(self.queue, self.mainzicopy_127207_var, + ((np.int64(num_groups_127211) * np.int64(group_sizze_127210)),), + (np.int64(group_sizze_127210),)) + if synchronous: + sync(self) + mem_122021 = opencl_alloc(self, bytes_122018, "mem_122021") + group_sizze_127215 = self.sizes["main.group_size_127215"] + num_groups_127216 = sdiv_up64(((m_75136 * k2p2zq_75151) * rp1_75837), + group_sizze_127215) + if ((1 * (np.int64(num_groups_127216) * np.int64(group_sizze_127215))) != 0): + self.mainzicopy_127212_var.set_args(np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(defunc_2_reduce_res_75260), + np.int64(rp1_75837), mem_120246, + mem_122021) + cl.enqueue_nd_range_kernel(self.queue, self.mainzicopy_127212_var, + ((np.int64(num_groups_127216) * np.int64(group_sizze_127215)),), + (np.int64(group_sizze_127215),)) + if synchronous: + sync(self) + mem_122025 = opencl_alloc(self, bytes_122022, "mem_122025") + group_sizze_127220 = self.sizes["main.group_size_127220"] + num_groups_127221 = sdiv_up64(((m_75136 * k2p2zq_75151) * rp1_75837), + group_sizze_127220) + if ((1 * (np.int64(num_groups_127221) * np.int64(group_sizze_127220))) != 0): + self.mainzicopy_127217_var.set_args(np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(defunc_2_reduce_res_75260), + np.int64(rp1_75837), mem_120246, + mem_122025) + cl.enqueue_nd_range_kernel(self.queue, self.mainzicopy_127217_var, + ((np.int64(num_groups_127221) * np.int64(group_sizze_127220)),), + (np.int64(group_sizze_127220),)) + if synchronous: + sync(self) + mem_122650 = opencl_alloc(self, m_75136, "mem_122650") + mem_122654 = opencl_alloc(self, bytes_120258, "mem_122654") + mem_122657 = opencl_alloc(self, bytes_121990, "mem_122657") + mem_122659 = opencl_alloc(self, bytes_120173, "mem_122659") + mem_122661 = opencl_alloc(self, bytes_120173, "mem_122661") + mem_122028 = opencl_alloc(self, total_sizze_126175, "mem_122028") + mem_122042 = opencl_alloc(self, total_sizze_126176, "mem_122042") + mem_122045 = opencl_alloc(self, total_sizze_126177, "mem_122045") + mem_122047 = opencl_alloc(self, total_sizze_126178, "mem_122047") + mem_122382 = opencl_alloc(self, total_sizze_126179, "mem_122382") + mem_122423 = opencl_alloc(self, total_sizze_126180, "mem_122423") + mem_122435 = opencl_alloc(self, total_sizze_126181, "mem_122435") + mem_122464 = opencl_alloc(self, total_sizze_126182, "mem_122464") + mem_122537 = opencl_alloc(self, total_sizze_126183, "mem_122537") + mem_122552 = opencl_alloc(self, total_sizze_126184, "mem_122552") + mem_122564 = opencl_alloc(self, total_sizze_126185, "mem_122564") + mem_122575 = opencl_alloc(self, total_sizze_126186, "mem_122575") + mem_122595 = opencl_alloc(self, total_sizze_126187, "mem_122595") + mem_122598 = opencl_alloc(self, total_sizze_126188, "mem_122598") + mem_125248 = opencl_alloc(self, total_sizze_126189, "mem_125248") + mem_125250 = opencl_alloc(self, total_sizze_126190, "mem_125250") + mem_125258 = opencl_alloc(self, total_sizze_126191, "mem_125258") + mem_125455 = opencl_alloc(self, total_sizze_126192, "mem_125455") + mem_125463 = opencl_alloc(self, total_sizze_126193, "mem_125463") + mem_125465 = opencl_alloc(self, total_sizze_126194, "mem_125465") + mem_125505 = opencl_alloc(self, total_sizze_126195, "mem_125505") + double_buffer_mem_125569 = opencl_alloc(self, total_sizze_126196, + "double_buffer_mem_125569") + double_buffer_mem_125570 = opencl_alloc(self, total_sizze_126197, + "double_buffer_mem_125570") + double_buffer_mem_125571 = opencl_alloc(self, total_sizze_126198, + "double_buffer_mem_125571") + double_buffer_mem_125582 = opencl_alloc(self, total_sizze_126199, + "double_buffer_mem_125582") + if ((1 * (np.int64(num_groups_107575) * np.int64(segmap_group_sizze_107574))) != 0): + self.mainzisegmap_107039_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_75136), + np.int64(n_75139), + np.int64(k2p2zq_75151), + np.int64(m_75223), + np.byte(y_75227), + np.int64(defunc_2_reduce_res_75260), + np.float64(tol_75329), + np.int64(k_75342), + np.int64(r_75826), + np.int64(rp1_75837), + np.byte(ok_or_empty_75848), + np.int64(min_res_75849), + np.int64(num_groups_107575), + np.int64(binop_x_120251), + np.int64(num_threads_126174), + defunc_3_map_res_mem_120231, + mem_120246, mem_121938, + mem_121941, mem_param_121972, + mem_122011, mem_122014, + mem_122017, mem_122021, + mem_122025, mem_122028, + mem_122042, mem_122045, + mem_122047, mem_122382, + mem_122423, mem_122435, + mem_122464, mem_122537, + mem_122552, mem_122564, + mem_122575, mem_122595, + mem_122598, mem_122650, + mem_122654, mem_122657, + mem_122659, mem_122661, + mem_125248, mem_125250, + mem_125258, mem_125455, + mem_125463, mem_125465, + mem_125505, + double_buffer_mem_125569, + double_buffer_mem_125570, + double_buffer_mem_125571, + double_buffer_mem_125582) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_107039_var, + ((np.int64(num_groups_107575) * np.int64(segmap_group_sizze_107574)),), + (np.int64(segmap_group_sizze_107574),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + mem_122011 = None + mem_122014 = None + mem_122017 = None + mem_122021 = None + mem_122025 = None + mem_122028 = None + mem_122042 = None + mem_122045 = None + mem_122047 = None + mem_122382 = None + mem_122423 = None + mem_122435 = None + mem_122464 = None + mem_122537 = None + mem_122552 = None + mem_122564 = None + mem_122575 = None + mem_122595 = None + mem_122598 = None + mem_125248 = None + mem_125250 = None + mem_125258 = None + mem_125455 = None + mem_125463 = None + mem_125465 = None + mem_125505 = None + double_buffer_mem_125569 = None + double_buffer_mem_125570 = None + double_buffer_mem_125571 = None + double_buffer_mem_125582 = None + mem_123715 = opencl_alloc(self, bytes_121997, "mem_123715") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123715, np.int64(0), + mem_122654, np.int64(0), + np.int64(1), m_75136, + (k2p2zq_75151 * k2p2zq_75151)) + mem_122654 = None + mem_123719 = opencl_alloc(self, bytes_121990, "mem_123719") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123719, np.int64(0), + mem_122657, np.int64(0), + np.int64(1), m_75136, + k2p2zq_75151) + mem_122657 = None + defunc_7_map_res_mem_123721 = mem_122650 + defunc_7_map_res_mem_123722 = mem_123715 + defunc_7_map_res_mem_123723 = mem_123719 + defunc_7_map_res_mem_123724 = mem_122659 + defunc_7_map_res_mem_123725 = mem_122661 + else: + mem_122665 = opencl_alloc(self, bytes_121997, "mem_122665") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122665, np.int64(0), + mem_param_121959, + np.int64(0), np.int64(1), + k2p2zq_75151, + (m_75136 * k2p2zq_75151)) + mem_122668 = opencl_alloc(self, bytes_120173, "mem_122668") + mem_122671 = opencl_alloc(self, bytes_121990, "mem_122671") + if slt64((k2p2zq_75151 * np.int64(2)), segred_group_sizze_109865): + segment_sizze_nonzzero_127334 = smax64(np.int64(1), k2p2zq_75151) + num_threads_127335 = (num_groups_109866 * segred_group_sizze_109865) + if ((1 * (np.int64(num_groups_109866) * np.int64(segred_group_sizze_109865))) != 0): + self.mainzisegred_small_109847_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_109865))), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(defunc_2_reduce_res_75260), + np.int64(r_75826), + np.int64(num_groups_109866), + np.int64(segment_sizze_nonzzero_127334), + mem_120246, + mem_122665, + mem_122668, + mem_122671) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_small_109847_var, + ((np.int64(num_groups_109866) * np.int64(segred_group_sizze_109865)),), + (np.int64(segred_group_sizze_109865),)) + if synchronous: + sync(self) + else: + groups_per_segment_127356 = sdiv_up64(num_groups_109866, + smax64(np.int64(1), m_75136)) + elements_per_thread_127357 = sdiv_up64(k2p2zq_75151, + (segred_group_sizze_109865 * groups_per_segment_127356)) + virt_num_groups_127358 = (groups_per_segment_127356 * m_75136) + num_threads_127359 = (num_groups_109866 * segred_group_sizze_109865) + threads_per_segment_127360 = (groups_per_segment_127356 * segred_group_sizze_109865) + group_res_arr_mem_127361 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_109865 * virt_num_groups_127358)), + "group_res_arr_mem_127361") + mainzicounter_mem_127363 = self.mainzicounter_mem_127363 + if ((1 * (np.int64(num_groups_109866) * np.int64(segred_group_sizze_109865))) != 0): + self.mainzisegred_large_109847_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_109865))), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(defunc_2_reduce_res_75260), + np.int64(r_75826), + np.int64(num_groups_109866), + np.int64(groups_per_segment_127356), + np.int64(elements_per_thread_127357), + np.int64(virt_num_groups_127358), + np.int64(threads_per_segment_127360), + mem_120246, + mem_122665, + mem_122668, + mem_122671, + group_res_arr_mem_127361, + mainzicounter_mem_127363) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_large_109847_var, + ((np.int64(num_groups_109866) * np.int64(segred_group_sizze_109865)),), + (np.int64(segred_group_sizze_109865),)) + if synchronous: + sync(self) + mem_122665 = None + mem_122674 = opencl_alloc(self, bytes_120173, "mem_122674") + if ((1 * (np.int64(segmap_usable_groups_109887) * np.int64(segmap_group_sizze_109886))) != 0): + self.mainzisegmap_109829_var.set_args(self.global_failure, + np.int64(m_75136), mem_122668, + mem_122674) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_109829_var, + ((np.int64(segmap_usable_groups_109887) * np.int64(segmap_group_sizze_109886)),), + (np.int64(segmap_group_sizze_109886),)) + if synchronous: + sync(self) + mem_122668 = None + mem_122677 = opencl_alloc(self, bytes_120173, "mem_122677") + if slt64((k2p2zq_75151 * np.int64(2)), segred_group_sizze_109896): + segment_sizze_nonzzero_127401 = smax64(np.int64(1), k2p2zq_75151) + num_threads_127402 = (num_groups_109897 * segred_group_sizze_109896) + if ((1 * (np.int64(num_groups_109897) * np.int64(segred_group_sizze_109896))) != 0): + self.mainzisegred_small_109818_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_109896))), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(defunc_2_reduce_res_75260), + np.int64(r_75826), + np.int64(num_groups_109897), + np.int64(segment_sizze_nonzzero_127401), + mem_120246, + mem_param_121967, + mem_122677) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_small_109818_var, + ((np.int64(num_groups_109897) * np.int64(segred_group_sizze_109896)),), + (np.int64(segred_group_sizze_109896),)) + if synchronous: + sync(self) + else: + groups_per_segment_127422 = sdiv_up64(num_groups_109897, + smax64(np.int64(1), m_75136)) + elements_per_thread_127423 = sdiv_up64(k2p2zq_75151, + (segred_group_sizze_109896 * groups_per_segment_127422)) + virt_num_groups_127424 = (groups_per_segment_127422 * m_75136) + num_threads_127425 = (num_groups_109897 * segred_group_sizze_109896) + threads_per_segment_127426 = (groups_per_segment_127422 * segred_group_sizze_109896) + group_res_arr_mem_127427 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_109896 * virt_num_groups_127424)), + "group_res_arr_mem_127427") + mainzicounter_mem_127429 = self.mainzicounter_mem_127429 + if ((1 * (np.int64(num_groups_109897) * np.int64(segred_group_sizze_109896))) != 0): + self.mainzisegred_large_109818_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_109896))), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(defunc_2_reduce_res_75260), + np.int64(r_75826), + np.int64(num_groups_109897), + np.int64(groups_per_segment_127422), + np.int64(elements_per_thread_127423), + np.int64(virt_num_groups_127424), + np.int64(threads_per_segment_127426), + mem_120246, + mem_param_121967, + mem_122677, + group_res_arr_mem_127427, + mainzicounter_mem_127429) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_large_109818_var, + ((np.int64(num_groups_109897) * np.int64(segred_group_sizze_109896)),), + (np.int64(segred_group_sizze_109896),)) + if synchronous: + sync(self) + mem_122680 = opencl_alloc(self, bytes_120173, "mem_122680") + mem_122682 = opencl_alloc(self, bytes_120173, "mem_122682") + if ((1 * (np.int64(segmap_usable_groups_109910) * np.int64(segmap_group_sizze_109909))) != 0): + self.mainzisegmap_109798_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(n_75139), + np.int64(r_75826), + defunc_3_map_res_mem_120231, + mem_122674, mem_122677, + mem_122680, mem_122682) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_109798_var, + ((np.int64(segmap_usable_groups_109910) * np.int64(segmap_group_sizze_109909)),), + (np.int64(segmap_group_sizze_109909),)) + if synchronous: + sync(self) + mem_122677 = None + mem_122686 = opencl_alloc(self, bytes_122022, "mem_122686") + group_sizze_127469 = self.sizes["main.group_size_127469"] + num_groups_127470 = sdiv_up64(((m_75136 * k2p2zq_75151) * rp1_75837), + group_sizze_127469) + if ((1 * (np.int64(num_groups_127470) * np.int64(group_sizze_127469))) != 0): + self.mainzicopy_127466_var.set_args(np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(defunc_2_reduce_res_75260), + np.int64(rp1_75837), mem_120246, + mem_122686) + cl.enqueue_nd_range_kernel(self.queue, self.mainzicopy_127466_var, + ((np.int64(num_groups_127470) * np.int64(group_sizze_127469)),), + (np.int64(group_sizze_127469),)) + if synchronous: + sync(self) + mem_param_122694 = mem_121992 + mem_param_122705 = mem_121996 + j_109957 = np.int64(0) + one_129852 = np.int64(1) + for counter_129851 in range(k2p2zq_75151): + index_certs_109960 = True + assert ok_or_empty_75848, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/linpack.fut:44:25-30\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:200:5-74\n #11 bfastfinal.fut:195:1-201:36\n" % ("Index [", + j_109957, + ", ", + np.int64(0), + ":] out of bounds for array of shape [", + k2p2zq_75151, + "][", + rp1_75837, + "].")) + local_memory_capacity_127569 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127569)) and suff_outer_par_109963): + mem_122715 = opencl_alloc(self, bytes_121990, "mem_122715") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122715, + np.int64(0), + mem_param_122694, + np.int64(0), + np.int64(1), + k2p2zq_75151, m_75136) + mem_122719 = opencl_alloc(self, bytes_120924, "mem_122719") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122719, + np.int64(0), + mem_param_122705, + np.int64(0), + np.int64(1), + (np.int64(2) * k2p2zq_75151), + m_75136) + mem_122723 = opencl_alloc(self, bytes_121990, "mem_122723") + mem_122727 = opencl_alloc(self, bytes_120924, "mem_122727") + if ((1 * (np.int64(num_groups_109968) * np.int64(segmap_group_sizze_109967))) != 0): + self.mainzisegmap_109580_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(rp1_75837), + np.int64(j_109957), + np.int64(num_groups_109968), + mem_122686, mem_122715, + mem_122719, mem_122723, + mem_122727) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_109580_var, + ((np.int64(num_groups_109968) * np.int64(segmap_group_sizze_109967)),), + (np.int64(segmap_group_sizze_109967),)) + if synchronous: + sync(self) + mem_122715 = None + mem_122719 = None + mem_122751 = opencl_alloc(self, bytes_121990, "mem_122751") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122751, + np.int64(0), + mem_122723, + np.int64(0), + np.int64(1), m_75136, + k2p2zq_75151) + mem_122723 = None + dqrdc2_res_mem_122757 = mem_122751 + dqrdc2_res_mem_122758 = mem_122727 + else: + mem_122730 = opencl_alloc(self, bytes_120173, "mem_122730") + if slt64((rp1_75837 * np.int64(2)), segred_group_sizze_109989): + segment_sizze_nonzzero_127487 = smax64(np.int64(1), rp1_75837) + num_threads_127488 = (num_groups_109990 * segred_group_sizze_109989) + if ((1 * (np.int64(num_groups_109990) * np.int64(segred_group_sizze_109989))) != 0): + self.mainzisegred_small_109665_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_109989))), + np.int64(m_75136), + np.int64(defunc_2_reduce_res_75260), + np.int64(rp1_75837), + np.int64(j_109957), + np.int64(num_groups_109990), + np.int64(segment_sizze_nonzzero_127487), + mem_120246, + mem_122730) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_small_109665_var, + ((np.int64(num_groups_109990) * np.int64(segred_group_sizze_109989)),), + (np.int64(segred_group_sizze_109989),)) + if synchronous: + sync(self) + else: + groups_per_segment_127508 = sdiv_up64(num_groups_109990, + smax64(np.int64(1), + m_75136)) + elements_per_thread_127509 = sdiv_up64(rp1_75837, + (segred_group_sizze_109989 * groups_per_segment_127508)) + virt_num_groups_127510 = (groups_per_segment_127508 * m_75136) + num_threads_127511 = (num_groups_109990 * segred_group_sizze_109989) + threads_per_segment_127512 = (groups_per_segment_127508 * segred_group_sizze_109989) + group_res_arr_mem_127513 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_109989 * virt_num_groups_127510)), + "group_res_arr_mem_127513") + mainzicounter_mem_127515 = self.mainzicounter_mem_127515 + if ((1 * (np.int64(num_groups_109990) * np.int64(segred_group_sizze_109989))) != 0): + self.mainzisegred_large_109665_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_109989))), + np.int64(m_75136), + np.int64(defunc_2_reduce_res_75260), + np.int64(rp1_75837), + np.int64(j_109957), + np.int64(num_groups_109990), + np.int64(groups_per_segment_127508), + np.int64(elements_per_thread_127509), + np.int64(virt_num_groups_127510), + np.int64(threads_per_segment_127512), + mem_120246, + mem_122730, + group_res_arr_mem_127513, + mainzicounter_mem_127515) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_large_109665_var, + ((np.int64(num_groups_109990) * np.int64(segred_group_sizze_109989)),), + (np.int64(segred_group_sizze_109989),)) + if synchronous: + sync(self) + mem_122733 = opencl_alloc(self, bytes_120173, "mem_122733") + if ((1 * (np.int64(segmap_usable_groups_110001) * np.int64(segmap_group_sizze_110000))) != 0): + self.mainzisegmap_109649_var.set_args(self.global_failure, + np.int64(m_75136), + mem_122730, mem_122733) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_109649_var, + ((np.int64(segmap_usable_groups_110001) * np.int64(segmap_group_sizze_110000)),), + (np.int64(segmap_group_sizze_110000),)) + if synchronous: + sync(self) + mem_122730 = None + if ((1 * (np.int64(segmap_usable_groups_110008) * np.int64(segmap_group_sizze_110007))) != 0): + self.mainzisegmap_109640_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(j_109957), + mem_param_122694, + mem_122733) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_109640_var, + ((np.int64(segmap_usable_groups_110008) * np.int64(segmap_group_sizze_110007)),), + (np.int64(segmap_group_sizze_110007),)) + if synchronous: + sync(self) + mem_122738 = opencl_alloc(self, bytes_120924, "mem_122738") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122738, + np.int64(0), + mem_param_122705, + np.int64(0), + np.int64(1), + (np.int64(2) * k2p2zq_75151), + m_75136) + mem_122740 = opencl_alloc(self, bytes_122739, "mem_122740") + tmp_offs_127557 = np.int64(0) + if ((m_75136 * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_122740, mem_122733, + dest_offset=np.int64((tmp_offs_127557 * np.int64(8))), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((m_75136 * np.int32(8)))) + if synchronous: + sync(self) + tmp_offs_127557 = (tmp_offs_127557 + m_75136) + if ((y_115507 * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_122740, mem_121948, + dest_offset=np.int64((tmp_offs_127557 * np.int64(8))), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((y_115507 * np.int32(8)))) + if synchronous: + sync(self) + tmp_offs_127557 = (tmp_offs_127557 + y_115507) + mem_122743 = opencl_alloc(self, bytes_122741, "mem_122743") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122743, + np.int64(0), + mem_122740, + np.int64(0), + np.int64(1), + per_chunk_115510, + num_threads_115503) + mem_122740 = None + mem_122748 = opencl_alloc(self, bytes_120924, "mem_122748") + if ((1 * (np.int64(num_groups_110013) * np.int64(segmap_group_sizze_110012))) != 0): + self.mainzisegmap_109625_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(j_109957), + np.int64(num_groups_110013), + np.int64(num_threads_115503), + np.int64(per_chunk_115510), + mem_122733, mem_122738, + mem_122743, mem_122748) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_109625_var, + ((np.int64(num_groups_110013) * np.int64(segmap_group_sizze_110012)),), + (np.int64(segmap_group_sizze_110012),)) + if synchronous: + sync(self) + mem_122733 = None + mem_122738 = None + mem_122743 = None + mem_122755 = opencl_alloc(self, bytes_121990, "mem_122755") + if (((m_75136 * k2p2zq_75151) * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_122755, mem_param_122694, + dest_offset=np.int64(np.int64(0)), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64(((m_75136 * k2p2zq_75151) * np.int32(8)))) + if synchronous: + sync(self) + dqrdc2_res_mem_122757 = mem_122755 + dqrdc2_res_mem_122758 = mem_122748 + mem_122764 = opencl_alloc(self, bytes_121993, "mem_122764") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122764, np.int64(0), + dqrdc2_res_mem_122758, + np.int64(0), + np.int64(1), m_75136, + (np.int64(2) * k2p2zq_75151)) + dqrdc2_res_mem_122758 = None + mem_param_tmp_127471 = dqrdc2_res_mem_122757 + mem_param_tmp_127472 = mem_122764 + mem_param_122694 = mem_param_tmp_127471 + mem_param_122705 = mem_param_tmp_127472 + j_109957 += one_129852 + dqrdc2_res_r_mem_122778 = mem_param_122694 + dqrdc2_res_r_mem_122789 = mem_param_122705 + mem_122686 = None + mem_122793 = opencl_alloc(self, bytes_122018, "mem_122793") + group_sizze_127573 = self.sizes["main.group_size_127573"] + num_groups_127574 = sdiv_up64(((m_75136 * k2p2zq_75151) * rp1_75837), + group_sizze_127573) + if ((1 * (np.int64(num_groups_127574) * np.int64(group_sizze_127573))) != 0): + self.mainzicopy_127570_var.set_args(np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(defunc_2_reduce_res_75260), + np.int64(rp1_75837), mem_120246, + mem_122793) + cl.enqueue_nd_range_kernel(self.queue, self.mainzicopy_127570_var, + ((np.int64(num_groups_127574) * np.int64(group_sizze_127573)),), + (np.int64(group_sizze_127573),)) + if synchronous: + sync(self) + mem_122796 = opencl_alloc(self, bytes_121990, "mem_122796") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122796, np.int64(0), + dqrdc2_res_r_mem_122778, + np.int64(0), np.int64(1), + k2p2zq_75151, m_75136) + dqrdc2_res_r_mem_122778 = None + mem_122800 = opencl_alloc(self, bytes_120924, "mem_122800") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122800, np.int64(0), + dqrdc2_res_r_mem_122789, + np.int64(0), np.int64(1), + (np.int64(2) * k2p2zq_75151), + m_75136) + dqrdc2_res_r_mem_122789 = None + mem_123127 = opencl_alloc(self, bytes_122018, "mem_123127") + mem_123130 = opencl_alloc(self, bytes_121990, "mem_123130") + mem_123133 = opencl_alloc(self, bytes_121990, "mem_123133") + mem_123135 = opencl_alloc(self, bytes_120173, "mem_123135") + mem_122803 = opencl_alloc(self, total_sizze_126210, "mem_122803") + mem_125265 = opencl_alloc(self, total_sizze_126211, "mem_125265") + mem_125267 = opencl_alloc(self, total_sizze_126212, "mem_125267") + mem_125472 = opencl_alloc(self, total_sizze_126213, "mem_125472") + mem_125480 = opencl_alloc(self, total_sizze_126214, "mem_125480") + mem_125482 = opencl_alloc(self, total_sizze_126215, "mem_125482") + mem_125512 = opencl_alloc(self, total_sizze_126216, "mem_125512") + double_buffer_mem_125586 = opencl_alloc(self, total_sizze_126217, + "double_buffer_mem_125586") + double_buffer_mem_125587 = opencl_alloc(self, total_sizze_126218, + "double_buffer_mem_125587") + double_buffer_mem_125588 = opencl_alloc(self, total_sizze_126219, + "double_buffer_mem_125588") + if ((1 * (np.int64(num_groups_110026) * np.int64(segmap_group_sizze_110025))) != 0): + self.mainzisegmap_109290_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(m_75223), + np.byte(y_75227), + np.int64(k_75342), + np.int64(rp1_75837), + np.int64(min_res_75849), + np.int64(num_groups_110026), + np.int64(num_threads_126209), + mem_120248, mem_122793, + mem_122796, mem_122800, + mem_122803, mem_123127, + mem_123130, mem_123133, + mem_123135, mem_125265, + mem_125267, mem_125472, + mem_125480, mem_125482, + mem_125512, + double_buffer_mem_125586, + double_buffer_mem_125587, + double_buffer_mem_125588) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_109290_var, + ((np.int64(num_groups_110026) * np.int64(segmap_group_sizze_110025)),), + (np.int64(segmap_group_sizze_110025),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + mem_122793 = None + mem_122796 = None + mem_122800 = None + mem_122803 = None + mem_125265 = None + mem_125267 = None + mem_125472 = None + mem_125480 = None + mem_125482 = None + mem_125512 = None + double_buffer_mem_125586 = None + double_buffer_mem_125587 = None + double_buffer_mem_125588 = None + mem_123138 = opencl_alloc(self, binop_x_120244, "mem_123138") + if ((1 * (np.int64(segmap_usable_groups_110322) * np.int64(segmap_group_sizze_110321))) != 0): + self.mainzisegmap_109232_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + mem_123135, mem_123138) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_109232_var, + ((np.int64(segmap_usable_groups_110322) * np.int64(segmap_group_sizze_110321)),), + (np.int64(segmap_group_sizze_110321),)) + if synchronous: + sync(self) + mem_123143 = opencl_alloc(self, bytes_121997, "mem_123143") + if ((1 * (np.int64(segmap_usable_groups_110337) * np.int64(segmap_group_sizze_110336))) != 0): + self.mainzisegmap_109197_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(rp1_75837), + mem_123127, mem_123135, + mem_123138, mem_123143) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_109197_var, + ((np.int64(segmap_usable_groups_110337) * np.int64(segmap_group_sizze_110336)),), + (np.int64(segmap_group_sizze_110336),)) + if synchronous: + sync(self) + mem_123138 = None + local_memory_capacity_127784 = self.max_local_memory + if ((sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127784)) and sle64((((((bytes_123161 + srem64((np.int64(8) - srem64(bytes_123161, + np.int64(8))), + np.int64(8))) + (bytes_123180 + srem64((np.int64(8) - srem64(bytes_123180, + np.int64(8))), + np.int64(8)))) + (bytes_123180 + srem64((np.int64(8) - srem64(bytes_123180, + np.int64(8))), + np.int64(8)))) + (bytes_123161 + srem64((np.int64(8) - srem64(bytes_123161, + np.int64(8))), + np.int64(8)))) + (bytes_120247 + srem64((np.int64(8) - srem64(bytes_120247, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_127784))) and suff_outer_par_110360): + mem_123147 = opencl_alloc(self, bytes_121997, "mem_123147") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123147, np.int64(0), + mem_123143, np.int64(0), + m_75136, k2p2zq_75151, + k2p2zq_75151) + mem_123151 = opencl_alloc(self, bytes_121997, "mem_123151") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123151, np.int64(0), + mem_123147, np.int64(0), + np.int64(1), + k2p2zq_75151, + (m_75136 * k2p2zq_75151)) + mem_123147 = None + mem_123155 = opencl_alloc(self, bytes_121997, "mem_123155") + group_sizze_127657 = self.sizes["main.group_size_127657"] + num_groups_127658 = sdiv_up64(((m_75136 * k2p2zq_75151) * k2p2zq_75151), + group_sizze_127657) + if ((1 * (np.int64(num_groups_127658) * np.int64(group_sizze_127657))) != 0): + self.mainzicopy_127654_var.set_args(np.int64(m_75136), + np.int64(k2p2zq_75151), + mem_123143, mem_123155) + cl.enqueue_nd_range_kernel(self.queue, self.mainzicopy_127654_var, + ((np.int64(num_groups_127658) * np.int64(group_sizze_127657)),), + (np.int64(group_sizze_127657),)) + if synchronous: + sync(self) + mem_123233 = opencl_alloc(self, bytes_121997, "mem_123233") + mem_125275 = opencl_alloc(self, total_sizze_126223, "mem_125275") + if ((1 * (np.int64(num_groups_top_116783) * np.int64(group_sizze_116780))) != 0): + self.mainzisegmap_intragroup_116784_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64(bytes_120247)), + cl.LocalMemory(np.int64(bytes_123161)), + cl.LocalMemory(np.int64(bytes_123180)), + cl.LocalMemory(np.int64(bytes_123180)), + cl.LocalMemory(np.int64(bytes_123161)), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(num_groups_y_116782), + np.int64(ctx_val_123177), + np.int64(num_threads_126222), + mem_121944, + mem_121946, + mem_123151, + mem_123155, + mem_123233, + mem_125275) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_intragroup_116784_var, + ((np.int64(num_groups_top_116783) * np.int64(group_sizze_116780)),), + (np.int64(group_sizze_116780),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + mem_123151 = None + mem_123155 = None + mem_125275 = None + defunc_3_map_res_r_mem_123392 = mem_123233 + else: + mem_123237 = opencl_alloc(self, bytes_121997, "mem_123237") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123237, np.int64(0), + mem_123143, np.int64(0), + m_75136, k2p2zq_75151, + k2p2zq_75151) + mem_123241 = opencl_alloc(self, bytes_121997, "mem_123241") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123241, np.int64(0), + mem_123237, np.int64(0), + np.int64(1), + k2p2zq_75151, + (m_75136 * k2p2zq_75151)) + mem_123237 = None + mem_param_123252 = mem_122000 + i_110424 = np.int64(0) + one_129855 = np.int64(1) + for counter_129854 in range(k2p2zq_75151): + x_110426 = (k2p2zq_75151 - i_110424) + i_110427 = (x_110426 - np.int64(1)) + x_110428 = sle64(np.int64(0), i_110427) + y_110429 = slt64(i_110427, k2p2zq_75151) + bounds_check_110430 = (x_110428 and y_110429) + j_m_i_110431 = (k2p2zq_75151 - x_110426) + empty_slice_110432 = (j_m_i_110431 == np.int64(0)) + m_110433 = (j_m_i_110431 - np.int64(1)) + i_p_m_t_s_110434 = (x_110426 + m_110433) + zzero_leq_i_p_m_t_s_110435 = sle64(np.int64(0), i_p_m_t_s_110434) + i_p_m_t_s_leq_w_110436 = slt64(i_p_m_t_s_110434, k2p2zq_75151) + zzero_lte_i_110437 = sle64(np.int64(0), x_110426) + i_lte_j_110438 = sle64(x_110426, k2p2zq_75151) + y_110439 = (i_p_m_t_s_leq_w_110436 and zzero_lte_i_110437) + y_110440 = (zzero_leq_i_p_m_t_s_110435 and y_110439) + y_110441 = (i_lte_j_110438 and y_110440) + forwards_ok_110442 = (zzero_lte_i_110437 and y_110441) + ok_or_empty_110443 = (empty_slice_110432 or forwards_ok_110442) + index_ok_110444 = (bounds_check_110430 and ok_or_empty_110443) + index_certs_110445 = True + assert index_ok_110444, ("Error: %s%d%s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:28:39-48\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:200:5-74\n #12 bfastfinal.fut:195:1-201:36\n" % ("Index [", + i_110427, + ", ", + x_110426, + ":", + k2p2zq_75151, + "] out of bounds for array of shape [", + k2p2zq_75151, + "][", + k2p2zq_75151, + "].")) + index_certs_110446 = True + assert ok_or_empty_110443, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:28:30-37\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:200:5-74\n #12 bfastfinal.fut:195:1-201:36\n" % ("Index [", + x_110426, + ":", + k2p2zq_75151, + "] out of bounds for array of shape [", + k2p2zq_75151, + "].")) + index_ok_110447 = (bounds_check_110430 and bounds_check_110430) + index_certs_110448 = True + assert index_ok_110447, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:29:38-43\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:200:5-74\n #12 bfastfinal.fut:195:1-201:36\n" % ("Index [", + i_110427, + ", ", + i_110427, + "] out of bounds for array of shape [", + k2p2zq_75151, + "][", + k2p2zq_75151, + "].")) + index_certs_110449 = True + assert bounds_check_110430, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:29:19-22\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:200:5-74\n #12 bfastfinal.fut:195:1-201:36\n" % ("Index [", + i_110427, + "] out of bounds for array of shape [", + k2p2zq_75151, + "].")) + nest_sizze_110511 = (j_m_i_110431 * binop_x_120244) + max_num_groups_127681 = self.sizes["main.segred_num_groups_108888"] + num_groups_110513 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_110511, + segred_group_sizze_110512), + sext_i32_i64(max_num_groups_127681)))) + local_memory_capacity_127783 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127783)) and suff_outer_par_110455): + mem_123259 = opencl_alloc(self, bytes_120258, "mem_123259") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123259, + np.int64(0), + mem_param_123252, + np.int64(0), + np.int64(1), + (k2p2zq_75151 * k2p2zq_75151), + m_75136) + mem_123287 = opencl_alloc(self, bytes_120258, "mem_123287") + mem_123263 = opencl_alloc(self, total_sizze_126225, + "mem_123263") + mem_123275 = opencl_alloc(self, total_sizze_126226, + "mem_123275") + if ((1 * (np.int64(num_groups_110459) * np.int64(segmap_group_sizze_110458))) != 0): + self.mainzisegmap_108770_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(x_110426), + np.int64(i_110427), + np.int64(j_m_i_110431), + np.int64(num_groups_110459), + np.int64(num_threads_126224), + mem_121938, mem_123143, + mem_123241, mem_123259, + mem_123263, mem_123275, + mem_123287) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_108770_var, + ((np.int64(num_groups_110459) * np.int64(segmap_group_sizze_110458)),), + (np.int64(segmap_group_sizze_110458),)) + if synchronous: + sync(self) + mem_123259 = None + mem_123263 = None + mem_123275 = None + mem_123359 = opencl_alloc(self, bytes_121997, "mem_123359") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123359, + np.int64(0), + mem_123287, + np.int64(0), + np.int64(1), + m_75136, + (k2p2zq_75151 * k2p2zq_75151)) + mem_123287 = None + defunc_3_map_res_mem_123366 = mem_123359 + else: + local_memory_capacity_127782 = self.max_local_memory + if (sle64((((bytes_123298 + srem64((np.int64(8) - srem64(bytes_123298, + np.int64(8))), + np.int64(8))) + (bytes_123298 + srem64((np.int64(8) - srem64(bytes_123298, + np.int64(8))), + np.int64(8)))) + (bytes_123327 + srem64((np.int64(8) - srem64(bytes_123327, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_127782)) and suff_outer_par_110489): + mem_123291 = opencl_alloc(self, bytes_121997, "mem_123291") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123291, + np.int64(0), + mem_param_123252, + np.int64(0), + np.int64(1), + k2p2zq_75151, + (m_75136 * k2p2zq_75151)) + mem_123295 = opencl_alloc(self, bytes_121997, "mem_123295") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123295, + np.int64(0), + mem_param_123252, + np.int64(0), + np.int64(1), + k2p2zq_75151, + (m_75136 * k2p2zq_75151)) + num_whole_tiles_117162 = squot64(j_m_i_110431, + tile_sizze_117141) + residual_input_117295 = srem64(j_m_i_110431, + tile_sizze_117141) + cond_117296 = (residual_input_117295 == np.int64(0)) + mem_123334 = opencl_alloc(self, bytes_121997, "mem_123334") + mem_125317 = opencl_alloc(self, total_sizze_126228, + "mem_125317") + if ((1 * (np.int64(num_groups_top_117145) * np.int64(group_sizze_117142))) != 0): + self.mainzisegmap_intragroup_117146_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_123327)), + cl.LocalMemory(np.int64(bytes_123298)), + cl.LocalMemory(np.int64(bytes_123298)), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(x_110426), + np.int64(i_110427), + np.int64(j_m_i_110431), + np.int64(num_groups_y_117144), + np.int64(num_whole_tiles_117162), + np.int64(residual_input_117295), + np.byte(cond_117296), + np.int64(num_threads_126227), + mem_121938, + mem_123143, + mem_123241, + mem_123291, + mem_123295, + mem_123334, + mem_125317) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_intragroup_117146_var, + ((np.int64(num_groups_top_117145) * np.int64(group_sizze_117142)),), + (np.int64(group_sizze_117142),)) + if synchronous: + sync(self) + mem_123291 = None + mem_123295 = None + mem_125317 = None + defunc_3_map_res_mem_123355 = mem_123334 + else: + mem_123338 = opencl_alloc(self, bytes_121990, "mem_123338") + if slt64((j_m_i_110431 * np.int64(2)), + segred_group_sizze_110512): + segment_sizze_nonzzero_127712 = smax64(np.int64(1), + j_m_i_110431) + num_threads_127713 = (num_groups_110513 * segred_group_sizze_110512) + if ((1 * (np.int64(num_groups_110513) * np.int64(segred_group_sizze_110512))) != 0): + self.mainzisegred_small_108892_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_110512))), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(x_110426), + np.int64(i_110427), + np.int64(j_m_i_110431), + np.int64(num_groups_110513), + np.int64(binop_x_120251), + np.int64(segment_sizze_nonzzero_127712), + mem_123143, + mem_param_123252, + mem_123338) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_small_108892_var, + ((np.int64(num_groups_110513) * np.int64(segred_group_sizze_110512)),), + (np.int64(segred_group_sizze_110512),)) + if synchronous: + sync(self) + else: + groups_per_segment_127733 = sdiv_up64(num_groups_110513, + smax64(np.int64(1), + (m_75136 * k2p2zq_75151))) + elements_per_thread_127734 = sdiv_up64(j_m_i_110431, + (segred_group_sizze_110512 * groups_per_segment_127733)) + virt_num_groups_127735 = (groups_per_segment_127733 * (m_75136 * k2p2zq_75151)) + num_threads_127736 = (num_groups_110513 * segred_group_sizze_110512) + threads_per_segment_127737 = (groups_per_segment_127733 * segred_group_sizze_110512) + group_res_arr_mem_127738 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_110512 * virt_num_groups_127735)), + "group_res_arr_mem_127738") + mainzicounter_mem_127740 = self.mainzicounter_mem_127740 + if ((1 * (np.int64(num_groups_110513) * np.int64(segred_group_sizze_110512))) != 0): + self.mainzisegred_large_108892_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_110512))), + np.int64(k2p2zq_75151), + np.int64(x_110426), + np.int64(i_110427), + np.int64(j_m_i_110431), + np.int64(num_groups_110513), + np.int64(binop_x_120251), + np.int64(groups_per_segment_127733), + np.int64(elements_per_thread_127734), + np.int64(virt_num_groups_127735), + np.int64(threads_per_segment_127737), + mem_123143, + mem_param_123252, + mem_123338, + group_res_arr_mem_127738, + mainzicounter_mem_127740) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_large_108892_var, + ((np.int64(num_groups_110513) * np.int64(segred_group_sizze_110512)),), + (np.int64(segred_group_sizze_110512),)) + if synchronous: + sync(self) + mem_123342 = opencl_alloc(self, bytes_121990, "mem_123342") + if ((1 * (np.int64(segmap_usable_groups_110530) * np.int64(segmap_group_sizze_110529))) != 0): + self.mainzisegmap_108870_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(i_110427), + mem_121938, + mem_123143, + mem_123338, + mem_123342) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_108870_var, + ((np.int64(segmap_usable_groups_110530) * np.int64(segmap_group_sizze_110529)),), + (np.int64(segmap_group_sizze_110529),)) + if synchronous: + sync(self) + mem_123338 = None + if ((1 * (np.int64(segmap_usable_groups_110541) * np.int64(segmap_group_sizze_110540))) != 0): + self.mainzisegmap_108858_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(i_110427), + np.int64(binop_x_120251), + mem_param_123252, + mem_123342) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_108858_var, + ((np.int64(segmap_usable_groups_110541) * np.int64(segmap_group_sizze_110540)),), + (np.int64(segmap_group_sizze_110540),)) + if synchronous: + sync(self) + mem_123342 = None + defunc_3_map_res_mem_123355 = mem_param_123252 + mem_123364 = opencl_alloc(self, bytes_121997, "mem_123364") + if ((((m_75136 * k2p2zq_75151) * k2p2zq_75151) * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_123364, + defunc_3_map_res_mem_123355, + dest_offset=np.int64(np.int64(0)), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((((m_75136 * k2p2zq_75151) * k2p2zq_75151) * np.int32(8)))) + if synchronous: + sync(self) + defunc_3_map_res_mem_123355 = None + defunc_3_map_res_mem_123366 = mem_123364 + mem_param_tmp_127679 = defunc_3_map_res_mem_123366 + mem_param_123252 = mem_param_tmp_127679 + i_110424 += one_129855 + defunc_3_map_res_r_mem_123380 = mem_param_123252 + mem_123241 = None + defunc_3_map_res_r_mem_123392 = defunc_3_map_res_r_mem_123380 + mem_123143 = None + local_memory_capacity_127917 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127917)) and suff_outer_par_110556): + mem_123396 = opencl_alloc(self, bytes_121997, "mem_123396") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123396, np.int64(0), + defunc_3_map_res_r_mem_123392, + np.int64(0), m_75136, + k2p2zq_75151, + k2p2zq_75151) + mem_123415 = opencl_alloc(self, bytes_121997, "mem_123415") + mem_123399 = opencl_alloc(self, total_sizze_126233, "mem_123399") + if ((1 * (np.int64(num_groups_110552) * np.int64(segmap_group_sizze_110551))) != 0): + self.mainzisegmap_108554_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(num_groups_110552), + np.int64(binop_x_120251), + np.int64(num_threads_126232), + defunc_3_map_res_r_mem_123392, + mem_123396, mem_123399, + mem_123415) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_108554_var, + ((np.int64(num_groups_110552) * np.int64(segmap_group_sizze_110551)),), + (np.int64(segmap_group_sizze_110551),)) + if synchronous: + sync(self) + mem_123396 = None + mem_123399 = None + mem_123628 = opencl_alloc(self, bytes_121997, "mem_123628") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123628, np.int64(0), + mem_123415, np.int64(0), + np.int64(1), + (m_75136 * k2p2zq_75151), + k2p2zq_75151) + mem_123415 = None + defunc_3_map_res_r_mem_123630 = mem_123628 + else: + local_memory_capacity_127916 = self.max_local_memory + if (sle64(((bytes_123434 + srem64((np.int64(8) - srem64(bytes_123434, + np.int64(8))), + np.int64(8))) + (bytes_123436 + srem64((np.int64(8) - srem64(bytes_123436, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_127916)) and suff_outer_par_110578): + mem_123419 = opencl_alloc(self, bytes_121997, "mem_123419") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123419, + np.int64(0), + defunc_3_map_res_r_mem_123392, + np.int64(0), m_75136, + k2p2zq_75151, + k2p2zq_75151) + mem_123610 = opencl_alloc(self, bytes_121997, "mem_123610") + if ((1 * (np.int64(grid_sizze_117461) * np.int64(group_sizze_117462))) != 0): + self.mainzisegmap_intragroup_117465_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_123436)), + cl.LocalMemory(np.int64(bytes_123434)), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(gridDim_x_117458), + np.int64(gridDim_y_117459), + np.int64(full_tiles_117490), + np.int64(kk_117693), + np.int64(binop_x_120251), + defunc_3_map_res_r_mem_123392, + mem_123419, + mem_123610) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_intragroup_117465_var, + ((np.int64(grid_sizze_117461) * np.int64(group_sizze_117462)),), + (np.int64(group_sizze_117462),)) + if synchronous: + sync(self) + mem_123419 = None + defunc_3_map_res_r_mem_123624 = mem_123610 + else: + mem_123614 = opencl_alloc(self, bytes_121997, "mem_123614") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123614, + np.int64(0), + defunc_3_map_res_r_mem_123392, + np.int64(0), + np.int64(1), + k2p2zq_75151, + (m_75136 * k2p2zq_75151)) + mem_123618 = opencl_alloc(self, bytes_121997, "mem_123618") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123618, + np.int64(0), + defunc_3_map_res_r_mem_123392, + np.int64(0), m_75136, + k2p2zq_75151, + k2p2zq_75151) + mem_123623 = opencl_alloc(self, bytes_121997, "mem_123623") + if slt64((k2p2zq_75151 * np.int64(2)), segred_group_sizze_110594): + segment_sizze_nonzzero_127856 = smax64(np.int64(1), + k2p2zq_75151) + num_threads_127857 = (num_groups_110595 * segred_group_sizze_110594) + if ((1 * (np.int64(num_groups_110595) * np.int64(segred_group_sizze_110594))) != 0): + self.mainzisegred_small_108614_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_110594))), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(num_groups_110595), + np.int64(segment_sizze_nonzzero_127856), + mem_123614, + mem_123618, + mem_123623) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_small_108614_var, + ((np.int64(num_groups_110595) * np.int64(segred_group_sizze_110594)),), + (np.int64(segred_group_sizze_110594),)) + if synchronous: + sync(self) + else: + groups_per_segment_127877 = sdiv_up64(num_groups_110595, + smax64(np.int64(1), + ((m_75136 * k2p2zq_75151) * k2p2zq_75151))) + elements_per_thread_127878 = sdiv_up64(k2p2zq_75151, + (segred_group_sizze_110594 * groups_per_segment_127877)) + virt_num_groups_127879 = (groups_per_segment_127877 * ((m_75136 * k2p2zq_75151) * k2p2zq_75151)) + num_threads_127880 = (num_groups_110595 * segred_group_sizze_110594) + threads_per_segment_127881 = (groups_per_segment_127877 * segred_group_sizze_110594) + group_res_arr_mem_127882 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_110594 * virt_num_groups_127879)), + "group_res_arr_mem_127882") + mainzicounter_mem_127884 = self.mainzicounter_mem_127884 + if ((1 * (np.int64(num_groups_110595) * np.int64(segred_group_sizze_110594))) != 0): + self.mainzisegred_large_108614_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_110594))), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(num_groups_110595), + np.int64(groups_per_segment_127877), + np.int64(elements_per_thread_127878), + np.int64(virt_num_groups_127879), + np.int64(threads_per_segment_127881), + mem_123614, + mem_123618, + mem_123623, + group_res_arr_mem_127882, + mainzicounter_mem_127884) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_large_108614_var, + ((np.int64(num_groups_110595) * np.int64(segred_group_sizze_110594)),), + (np.int64(segred_group_sizze_110594),)) + if synchronous: + sync(self) + mem_123614 = None + mem_123618 = None + defunc_3_map_res_r_mem_123624 = mem_123623 + defunc_3_map_res_r_mem_123630 = defunc_3_map_res_r_mem_123624 + mem_123633 = opencl_alloc(self, bytes_122015, "mem_123633") + group_sizze_127921 = self.sizes["main.group_size_127921"] + num_groups_127922 = sdiv_up64((m_75136 * rp1_75837), + group_sizze_127921) + if ((1 * (np.int64(num_groups_127922) * np.int64(group_sizze_127921))) != 0): + self.mainzicopy_127918_var.set_args(np.int64(m_75136), + np.int64(n_75139), + np.int64(rp1_75837), + defunc_3_map_res_mem_120231, + mem_123633) + cl.enqueue_nd_range_kernel(self.queue, self.mainzicopy_127918_var, + ((np.int64(num_groups_127922) * np.int64(group_sizze_127921)),), + (np.int64(group_sizze_127921),)) + if synchronous: + sync(self) + mem_123637 = opencl_alloc(self, bytes_123634, "mem_123637") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123637, np.int64(0), + mem_123127, np.int64(0), + np.int64(1), m_75136, + (k2p2zq_75151 * rp1_75837)) + mem_123127 = None + mem_123641 = opencl_alloc(self, bytes_122018, "mem_123641") + group_sizze_127926 = self.sizes["main.group_size_127926"] + num_groups_127927 = sdiv_up64(((m_75136 * k2p2zq_75151) * rp1_75837), + group_sizze_127926) + if ((1 * (np.int64(num_groups_127927) * np.int64(group_sizze_127926))) != 0): + self.mainzicopy_127923_var.set_args(np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(rp1_75837), mem_123637, + mem_123641) + cl.enqueue_nd_range_kernel(self.queue, self.mainzicopy_127923_var, + ((np.int64(num_groups_127927) * np.int64(group_sizze_127926)),), + (np.int64(group_sizze_127926),)) + if synchronous: + sync(self) + mem_123637 = None + mem_123678 = opencl_alloc(self, bytes_122015, "mem_123678") + mem_125341 = opencl_alloc(self, total_sizze_126237, "mem_125341") + double_buffer_mem_125599 = opencl_alloc(self, total_sizze_126238, + "double_buffer_mem_125599") + if ((1 * (np.int64(num_groups_110611) * np.int64(segmap_group_sizze_110610))) != 0): + self.mainzisegmap_108490_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(r_75826), + np.int64(rp1_75837), + np.int64(num_groups_110611), + np.int64(num_threads_126236), + mem_123130, mem_123135, + mem_123633, mem_123641, + mem_123678, mem_125341, + double_buffer_mem_125599) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_108490_var, + ((np.int64(num_groups_110611) * np.int64(segmap_group_sizze_110610)),), + (np.int64(segmap_group_sizze_110610),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + mem_123130 = None + mem_123633 = None + mem_123641 = None + mem_125341 = None + double_buffer_mem_125599 = None + mem_123681 = opencl_alloc(self, bytes_122015, "mem_123681") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123681, np.int64(0), + mem_123678, np.int64(0), + np.int64(1), m_75136, + rp1_75837) + mem_123678 = None + if ((1 * (np.int64(segmap_usable_groups_110690) * np.int64(segmap_group_sizze_110689))) != 0): + self.mainzisegmap_108435_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(rp1_75837), + np.int64(binop_x_120251), + mem_122003, mem_123133, + defunc_3_map_res_r_mem_123392, + mem_123681) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_108435_var, + ((np.int64(segmap_usable_groups_110690) * np.int64(segmap_group_sizze_110689)),), + (np.int64(segmap_group_sizze_110689),)) + if synchronous: + sync(self) + defunc_3_map_res_r_mem_123392 = None + mem_123681 = None + mem_123685 = opencl_alloc(self, bytes_121990, "mem_123685") + self.futhark_builtinzhgpu_map_transpose_i64(mem_123685, np.int64(0), + mem_123133, np.int64(0), + np.int64(1), m_75136, + k2p2zq_75151) + mem_123133 = None + if ((1 * (np.int64(segmap_usable_groups_110761) * np.int64(segmap_group_sizze_110760))) != 0): + self.mainzisegmap_108278_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(binop_x_120251), + mem_122007, + defunc_3_map_res_r_mem_123630, + mem_123685) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_108278_var, + ((np.int64(segmap_usable_groups_110761) * np.int64(segmap_group_sizze_110760)),), + (np.int64(segmap_group_sizze_110760),)) + if synchronous: + sync(self) + defunc_3_map_res_r_mem_123630 = None + mem_123685 = None + mem_123691 = opencl_alloc(self, bytes_121997, "mem_123691") + if ((1 * (np.int64(segmap_usable_groups_110780) * np.int64(segmap_group_sizze_110779))) != 0): + self.mainzisegmap_108230_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + mem_122007, mem_123691) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_108230_var, + ((np.int64(segmap_usable_groups_110780) * np.int64(segmap_group_sizze_110779)),), + (np.int64(segmap_group_sizze_110779),)) + if synchronous: + sync(self) + mem_123695 = opencl_alloc(self, bytes_121990, "mem_123695") + if ((1 * (np.int64(segmap_usable_groups_110789) * np.int64(segmap_group_sizze_110788))) != 0): + self.mainzisegmap_108208_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + mem_122003, mem_123695) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_108208_var, + ((np.int64(segmap_usable_groups_110789) * np.int64(segmap_group_sizze_110788)),), + (np.int64(segmap_group_sizze_110788),)) + if synchronous: + sync(self) + mem_123699 = opencl_alloc(self, bytes_120258, "mem_123699") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123699, np.int64(0), + mem_param_121959, + np.int64(0), np.int64(1), + (k2p2zq_75151 * k2p2zq_75151), + m_75136) + mem_123702 = opencl_alloc(self, bytes_121990, "mem_123702") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123702, np.int64(0), + mem_param_121967, + np.int64(0), np.int64(1), + k2p2zq_75151, m_75136) + mem_123705 = opencl_alloc(self, bytes_121990, "mem_123705") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123705, np.int64(0), + mem_122671, np.int64(0), + np.int64(1), k2p2zq_75151, + m_75136) + mem_122671 = None + mem_123708 = opencl_alloc(self, bytes_121990, "mem_123708") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123708, np.int64(0), + mem_123695, np.int64(0), + np.int64(1), k2p2zq_75151, + m_75136) + mem_123711 = opencl_alloc(self, m_75136, "mem_123711") + if ((1 * (np.int64(segmap_usable_groups_110798) * np.int64(segmap_group_sizze_110797))) != 0): + self.mainzisegmap_108136_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(defunc_2_reduce_res_75260), + np.float64(tol_75329), + np.int64(r_75826), mem_120246, + mem_121941, mem_param_121972, + mem_122674, mem_122680, + mem_122682, mem_123135, + mem_123699, mem_123702, + mem_123705, mem_123708, + mem_123711) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_108136_var, + ((np.int64(segmap_usable_groups_110798) * np.int64(segmap_group_sizze_110797)),), + (np.int64(segmap_group_sizze_110797),)) + if synchronous: + sync(self) + mem_122674 = None + mem_122680 = None + mem_123699 = None + mem_123702 = None + mem_123705 = None + mem_123708 = None + defunc_7_map_res_mem_123721 = mem_123711 + defunc_7_map_res_mem_123722 = mem_123691 + defunc_7_map_res_mem_123723 = mem_123695 + defunc_7_map_res_mem_123724 = mem_123135 + defunc_7_map_res_mem_123725 = mem_122682 + i_76412 = (r_75826 - k2p2zq_75151) + x_76413 = sle64(np.int64(0), i_76412) + y_76414 = slt64(i_76412, num_recresids_padded_75809) + bounds_check_76415 = (x_76413 and y_76414) + index_ok_76416 = (ok_or_empty_75250 and bounds_check_76415) + index_certs_76417 = True + assert index_ok_76416, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:74:9-75:74\n #1 recresid.fut:100:7-30\n #2 mroc.fut:27:25-38\n #3 mroc.fut:77:27-61\n #4 bfastfinal.fut:45:24-53\n #5 bfastfinal.fut:200:5-74\n #6 bfastfinal.fut:195:1-201:36\n" % ("Index [", + i_76412, + ", ", + np.int64(0), + ":] out of bounds for array of shape [", + num_recresids_padded_75809, + "][", + m_75136, + "].")) + if ((m_75136 * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_121934, defunc_7_map_res_mem_123725, + dest_offset=np.int64(((i_76412 * m_75136) * np.int64(8))), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((m_75136 * np.int32(8)))) + if synchronous: + sync(self) + defunc_7_map_res_mem_123725 = None + mainzicounter_mem_127974 = self.mainzicounter_mem_127974 + group_res_arr_mem_127976 = opencl_alloc(self, + (np.int32(1) * (segred_group_sizze_110860 * num_groups_110862)), + "group_res_arr_mem_127976") + num_threads_127978 = (num_groups_110862 * segred_group_sizze_110860) + if ((1 * (np.int64(num_groups_110862) * np.int64(segred_group_sizze_110860))) != 0): + self.mainzisegred_nonseg_110867_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(1) * segred_group_sizze_110860))), + cl.LocalMemory(np.int64(np.int32(1))), + np.int64(m_75136), + np.int64(num_groups_110862), + np.int64(num_threads_127978), + defunc_7_map_res_mem_123721, + mem_123728, + mainzicounter_mem_127974, + group_res_arr_mem_127976) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_nonseg_110867_var, + ((np.int64(num_groups_110862) * np.int64(segred_group_sizze_110860)),), + (np.int64(segred_group_sizze_110860),)) + if synchronous: + sync(self) + defunc_7_map_res_mem_123721 = None + read_res_129858 = np.empty(1, dtype=ct.c_bool) + cl.enqueue_copy(self.queue, read_res_129858, mem_123728, + device_offset=(np.int64(np.int64(0)) * 1), + is_blocking=synchronous) + sync(self) + defunc_2_reduce_comm_res_76419 = read_res_129858[0] + loop_cond_t_res_76424 = slt64(rp1_75837, m_75290) + x_76425 = (defunc_2_reduce_comm_res_76419 and loop_cond_t_res_76424) + mem_param_tmp_127197 = defunc_7_map_res_mem_123722 + mem_param_tmp_127198 = defunc_7_map_res_mem_123723 + mem_param_tmp_127199 = defunc_7_map_res_mem_123724 + loop_while_tmp_127200 = x_76425 + r_tmp_127201 = rp1_75837 + mem_param_121959 = mem_param_tmp_127197 + mem_param_121967 = mem_param_tmp_127198 + mem_param_121972 = mem_param_tmp_127199 + loop_while_75825 = loop_while_tmp_127200 + r_75826 = r_tmp_127201 + mrecresid_nn_res_mem_123746 = mem_param_121959 + mrecresid_nn_res_mem_123754 = mem_param_121967 + mrecresid_nn_res_mem_123759 = mem_param_121972 + mrecresid_nn_res_75819 = loop_while_75825 + mrecresid_nn_res_75820 = r_75826 + mem_120248 = None + defunc_5_map_res_mem_121929 = None + defunc_5_map_res_mem_121930 = None + defunc_5_map_res_mem_121931 = None + mem_121938 = None + mem_121941 = None + mem_121944 = None + mem_121946 = None + mem_121948 = None + mem_121992 = None + mem_121996 = None + mem_122000 = None + mem_122003 = None + mem_122007 = None + mem_123728 = None + bounds_invalid_upwards_76426 = slt64(defunc_2_reduce_res_75260, + mrecresid_nn_res_75820) + distance_76427 = (defunc_2_reduce_res_75260 - mrecresid_nn_res_75820) + valid_76428 = not(bounds_invalid_upwards_76426) + range_valid_c_76429 = True + assert valid_76428, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:78:63-68\n #1 recresid.fut:100:7-30\n #2 mroc.fut:27:25-38\n #3 mroc.fut:77:27-61\n #4 bfastfinal.fut:45:24-53\n #5 bfastfinal.fut:200:5-74\n #6 bfastfinal.fut:195:1-201:36\n" % ("Range ", + mrecresid_nn_res_75820, + "..<", + defunc_2_reduce_res_75260, + " is invalid.")) + suff_outer_par_110905 = (self.sizes["main.suff_outer_par_16"] <= m_75136) + intra_avail_par_110899 = smin64(k2p2zq_75151, binop_x_120251) + computed_group_sizze_110870 = smax64(k2p2zq_75151, binop_x_120251) + fits_111022 = sle64(computed_group_sizze_110870, max_group_sizze_103162) + suff_intra_par_111020 = (self.sizes["main.suff_intra_par_17"] <= intra_avail_par_110899) + intra_suff_and_fits_111023 = (suff_intra_par_111020 and fits_111022) + segmap_group_sizze_110964 = self.sizes["main.segmap_group_size_110909"] + max_num_groups_128004 = self.sizes["main.segmap_num_groups_110911"] + num_groups_110965 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_75136, + segmap_group_sizze_110964), + sext_i32_i64(max_num_groups_128004)))) + segred_group_sizze_111264 = self.sizes["main.segred_group_size_111240"] + max_num_groups_128005 = self.sizes["main.segred_num_groups_111242"] + num_groups_111265 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120244, + segred_group_sizze_111264), + sext_i32_i64(max_num_groups_128005)))) + segmap_group_sizze_111285 = self.sizes["main.segmap_group_size_111232"] + segred_group_sizze_111292 = self.sizes["main.segred_group_size_111213"] + max_num_groups_128006 = self.sizes["main.segred_num_groups_111215"] + num_groups_111293 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120244, + segred_group_sizze_111292), + sext_i32_i64(max_num_groups_128006)))) + segmap_group_sizze_111305 = self.sizes["main.segmap_group_size_111201"] + segmap_group_sizze_111317 = self.sizes["main.segmap_group_size_111087"] + max_num_groups_128007 = self.sizes["main.segmap_num_groups_111089"] + num_groups_111318 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120244, + segmap_group_sizze_111317), + sext_i32_i64(max_num_groups_128007)))) + suff_outer_par_111322 = (self.sizes["main.suff_outer_par_18"] <= binop_x_120244) + segred_group_sizze_111351 = self.sizes["main.segred_group_size_111138"] + max_num_groups_128008 = self.sizes["main.segred_num_groups_111140"] + num_groups_111352 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_121999, + segred_group_sizze_111351), + sext_i32_i64(max_num_groups_128008)))) + segmap_group_sizze_111374 = self.sizes["main.segmap_group_size_111125"] + segmap_usable_groups_111286 = sdiv_up_safe64(m_75136, + segmap_group_sizze_111285) + segmap_usable_groups_111306 = sdiv_up_safe64(m_75136, + segmap_group_sizze_111305) + segmap_usable_groups_111375 = sdiv_up_safe64(binop_x_120244, + segmap_group_sizze_111374) + num_threads_126245 = (segmap_group_sizze_110964 * num_groups_110965) + total_sizze_126246 = (bytes_120247 * num_threads_126245) + total_sizze_126247 = (bytes_120247 * num_threads_126245) + total_sizze_126248 = (bytes_120250 * num_threads_126245) + total_sizze_126249 = (bytes_120247 * num_threads_126245) + num_threads_126255 = (segmap_group_sizze_111317 * num_groups_111318) + total_sizze_126256 = (bytes_120247 * num_threads_126255) + mem_param_123778 = mrecresid_nn_res_mem_123746 + mem_param_123786 = mrecresid_nn_res_mem_123754 + i_76433 = np.int64(0) + one_129863 = np.int64(1) + for counter_129862 in range(distance_76427): + index_primexp_76437 = (mrecresid_nn_res_75820 + i_76433) + x_76438 = sle64(np.int64(0), index_primexp_76437) + y_76439 = slt64(index_primexp_76437, defunc_2_reduce_res_75260) + bounds_check_76440 = (x_76438 and y_76439) + index_ok_76441 = (ok_or_empty_75229 and bounds_check_76440) + index_certs_76442 = True + assert index_ok_76441, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:34:13-22\n #1 /prelude/soacs.fut:83:25-33\n #2 /prelude/soacs.fut:83:3-53\n #3 recresid.fut:80:17-56\n #4 recresid.fut:100:7-30\n #5 mroc.fut:27:25-38\n #6 mroc.fut:77:27-61\n #7 bfastfinal.fut:45:24-53\n #8 bfastfinal.fut:200:5-74\n #9 bfastfinal.fut:195:1-201:36\n" % ("Index [", + index_primexp_76437, + ", ", + np.int64(0), + ":] out of bounds for array of shape [", + defunc_2_reduce_res_75260, + "][", + k2p2zq_75151, + "].")) + index_certs_76443 = True + assert bounds_check_76440, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:37:17-23\n #1 /prelude/soacs.fut:83:25-33\n #2 /prelude/soacs.fut:83:3-53\n #3 recresid.fut:80:17-56\n #4 recresid.fut:100:7-30\n #5 mroc.fut:27:25-38\n #6 mroc.fut:77:27-61\n #7 bfastfinal.fut:45:24-53\n #8 bfastfinal.fut:200:5-74\n #9 bfastfinal.fut:195:1-201:36\n" % ("Index [", + index_primexp_76437, + "] out of bounds for array of shape [", + defunc_2_reduce_res_75260, + "].")) + local_memory_capacity_128277 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_128277)) and suff_outer_par_110905): + mem_123798 = opencl_alloc(self, bytes_120258, "mem_123798") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123798, np.int64(0), + mem_param_123778, + np.int64(0), np.int64(1), + (k2p2zq_75151 * k2p2zq_75151), + m_75136) + mem_123801 = opencl_alloc(self, bytes_121990, "mem_123801") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123801, np.int64(0), + mem_param_123786, + np.int64(0), np.int64(1), + k2p2zq_75151, m_75136) + mem_123869 = opencl_alloc(self, bytes_120258, "mem_123869") + mem_123872 = opencl_alloc(self, bytes_121990, "mem_123872") + mem_123874 = opencl_alloc(self, bytes_120173, "mem_123874") + mem_123804 = opencl_alloc(self, total_sizze_126246, "mem_123804") + mem_123818 = opencl_alloc(self, total_sizze_126247, "mem_123818") + mem_123821 = opencl_alloc(self, total_sizze_126248, "mem_123821") + mem_123840 = opencl_alloc(self, total_sizze_126249, "mem_123840") + if ((1 * (np.int64(num_groups_110965) * np.int64(segmap_group_sizze_110964))) != 0): + self.mainzisegmap_110907_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(n_75139), + np.int64(k2p2zq_75151), + np.int64(defunc_2_reduce_res_75260), + np.int64(index_primexp_76437), + np.int64(num_groups_110965), + np.int64(num_threads_126245), + defunc_3_map_res_mem_120231, + mem_120246, mem_123798, + mem_123801, mem_123804, + mem_123818, mem_123821, + mem_123840, mem_123869, + mem_123872, mem_123874) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_110907_var, + ((np.int64(num_groups_110965) * np.int64(segmap_group_sizze_110964)),), + (np.int64(segmap_group_sizze_110964),)) + if synchronous: + sync(self) + mem_123798 = None + mem_123801 = None + mem_123804 = None + mem_123818 = None + mem_123821 = None + mem_123840 = None + mem_123966 = opencl_alloc(self, bytes_121997, "mem_123966") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123966, np.int64(0), + mem_123869, np.int64(0), + np.int64(1), m_75136, + (k2p2zq_75151 * k2p2zq_75151)) + mem_123869 = None + mem_123970 = opencl_alloc(self, bytes_121990, "mem_123970") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123970, np.int64(0), + mem_123872, np.int64(0), + np.int64(1), m_75136, + k2p2zq_75151) + mem_123872 = None + defunc_7_map_res_mem_123972 = mem_123966 + defunc_7_map_res_mem_123973 = mem_123970 + defunc_7_map_res_mem_123974 = mem_123874 + else: + local_memory_capacity_128276 = self.max_local_memory + if (sle64((((((((bytes_120247 + srem64((np.int64(8) - srem64(bytes_120247, + np.int64(8))), + np.int64(8))) + ((np.int32(8) * k2p2zq_75151) + srem64((np.int64(8) - srem64((np.int32(8) * k2p2zq_75151), + np.int64(8))), + np.int64(8)))) + ((np.int32(8) * k2p2zq_75151) + srem64((np.int64(8) - srem64((np.int32(8) * k2p2zq_75151), + np.int64(8))), + np.int64(8)))) + (bytes_120247 + srem64((np.int64(8) - srem64(bytes_120247, + np.int64(8))), + np.int64(8)))) + (bytes_120250 + srem64((np.int64(8) - srem64(bytes_120250, + np.int64(8))), + np.int64(8)))) + ((np.int32(8) * (k2p2zq_75151 * k2p2zq_75151)) + srem64((np.int64(8) - srem64((np.int32(8) * (k2p2zq_75151 * k2p2zq_75151)), + np.int64(8))), + np.int64(8)))) + (bytes_120247 + srem64((np.int64(8) - srem64(bytes_120247, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_128276)) and intra_suff_and_fits_111023): + mem_123892 = opencl_alloc(self, bytes_121997, "mem_123892") + mem_123895 = opencl_alloc(self, bytes_121990, "mem_123895") + mem_123897 = opencl_alloc(self, bytes_120173, "mem_123897") + if ((1 * (np.int64(m_75136) * np.int64(computed_group_sizze_110870))) != 0): + self.mainzisegmap_intragroup_110903_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_120247)), + cl.LocalMemory(np.int64((np.int32(8) * (k2p2zq_75151 * k2p2zq_75151)))), + cl.LocalMemory(np.int64(bytes_120250)), + cl.LocalMemory(np.int64(bytes_120247)), + cl.LocalMemory(np.int64((np.int32(8) * k2p2zq_75151))), + cl.LocalMemory(np.int64((np.int32(8) * k2p2zq_75151))), + cl.LocalMemory(np.int64(bytes_120247)), + np.int64(m_75136), + np.int64(n_75139), + np.int64(k2p2zq_75151), + np.int64(defunc_2_reduce_res_75260), + np.int64(index_primexp_76437), + np.int64(computed_group_sizze_110870), + np.int64(binop_x_120251), + defunc_3_map_res_mem_120231, + mem_120246, + mem_param_123778, + mem_param_123786, + mem_123892, + mem_123895, + mem_123897) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_intragroup_110903_var, + ((np.int64(m_75136) * np.int64(computed_group_sizze_110870)),), + (np.int64(computed_group_sizze_110870),)) + if synchronous: + sync(self) + defunc_7_map_res_mem_123960 = mem_123892 + defunc_7_map_res_mem_123961 = mem_123895 + defunc_7_map_res_mem_123962 = mem_123897 + else: + mem_123901 = opencl_alloc(self, bytes_121997, "mem_123901") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123901, np.int64(0), + mem_param_123778, + np.int64(0), + np.int64(1), + k2p2zq_75151, + (m_75136 * k2p2zq_75151)) + mem_123904 = opencl_alloc(self, bytes_120173, "mem_123904") + mem_123907 = opencl_alloc(self, bytes_121990, "mem_123907") + if slt64((k2p2zq_75151 * np.int64(2)), segred_group_sizze_111264): + segment_sizze_nonzzero_128067 = smax64(np.int64(1), k2p2zq_75151) + num_threads_128068 = (num_groups_111265 * segred_group_sizze_111264) + if ((1 * (np.int64(num_groups_111265) * np.int64(segred_group_sizze_111264))) != 0): + self.mainzisegred_small_111246_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_111264))), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(defunc_2_reduce_res_75260), + np.int64(index_primexp_76437), + np.int64(num_groups_111265), + np.int64(segment_sizze_nonzzero_128067), + mem_120246, + mem_123901, + mem_123904, + mem_123907) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_small_111246_var, + ((np.int64(num_groups_111265) * np.int64(segred_group_sizze_111264)),), + (np.int64(segred_group_sizze_111264),)) + if synchronous: + sync(self) + else: + groups_per_segment_128089 = sdiv_up64(num_groups_111265, + smax64(np.int64(1), + m_75136)) + elements_per_thread_128090 = sdiv_up64(k2p2zq_75151, + (segred_group_sizze_111264 * groups_per_segment_128089)) + virt_num_groups_128091 = (groups_per_segment_128089 * m_75136) + num_threads_128092 = (num_groups_111265 * segred_group_sizze_111264) + threads_per_segment_128093 = (groups_per_segment_128089 * segred_group_sizze_111264) + group_res_arr_mem_128094 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_111264 * virt_num_groups_128091)), + "group_res_arr_mem_128094") + mainzicounter_mem_128096 = self.mainzicounter_mem_128096 + if ((1 * (np.int64(num_groups_111265) * np.int64(segred_group_sizze_111264))) != 0): + self.mainzisegred_large_111246_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_111264))), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(defunc_2_reduce_res_75260), + np.int64(index_primexp_76437), + np.int64(num_groups_111265), + np.int64(groups_per_segment_128089), + np.int64(elements_per_thread_128090), + np.int64(virt_num_groups_128091), + np.int64(threads_per_segment_128093), + mem_120246, + mem_123901, + mem_123904, + mem_123907, + group_res_arr_mem_128094, + mainzicounter_mem_128096) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_large_111246_var, + ((np.int64(num_groups_111265) * np.int64(segred_group_sizze_111264)),), + (np.int64(segred_group_sizze_111264),)) + if synchronous: + sync(self) + mem_123910 = opencl_alloc(self, bytes_120173, "mem_123910") + if ((1 * (np.int64(segmap_usable_groups_111286) * np.int64(segmap_group_sizze_111285))) != 0): + self.mainzisegmap_111230_var.set_args(self.global_failure, + np.int64(m_75136), + mem_123904, mem_123910) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_111230_var, + ((np.int64(segmap_usable_groups_111286) * np.int64(segmap_group_sizze_111285)),), + (np.int64(segmap_group_sizze_111285),)) + if synchronous: + sync(self) + mem_123904 = None + mem_123913 = opencl_alloc(self, bytes_120173, "mem_123913") + if slt64((k2p2zq_75151 * np.int64(2)), segred_group_sizze_111292): + segment_sizze_nonzzero_128134 = smax64(np.int64(1), k2p2zq_75151) + num_threads_128135 = (num_groups_111293 * segred_group_sizze_111292) + if ((1 * (np.int64(num_groups_111293) * np.int64(segred_group_sizze_111292))) != 0): + self.mainzisegred_small_111219_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_111292))), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(defunc_2_reduce_res_75260), + np.int64(index_primexp_76437), + np.int64(num_groups_111293), + np.int64(segment_sizze_nonzzero_128134), + mem_120246, + mem_param_123786, + mem_123913) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_small_111219_var, + ((np.int64(num_groups_111293) * np.int64(segred_group_sizze_111292)),), + (np.int64(segred_group_sizze_111292),)) + if synchronous: + sync(self) + else: + groups_per_segment_128155 = sdiv_up64(num_groups_111293, + smax64(np.int64(1), + m_75136)) + elements_per_thread_128156 = sdiv_up64(k2p2zq_75151, + (segred_group_sizze_111292 * groups_per_segment_128155)) + virt_num_groups_128157 = (groups_per_segment_128155 * m_75136) + num_threads_128158 = (num_groups_111293 * segred_group_sizze_111292) + threads_per_segment_128159 = (groups_per_segment_128155 * segred_group_sizze_111292) + group_res_arr_mem_128160 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_111292 * virt_num_groups_128157)), + "group_res_arr_mem_128160") + mainzicounter_mem_128162 = self.mainzicounter_mem_128162 + if ((1 * (np.int64(num_groups_111293) * np.int64(segred_group_sizze_111292))) != 0): + self.mainzisegred_large_111219_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_111292))), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(defunc_2_reduce_res_75260), + np.int64(index_primexp_76437), + np.int64(num_groups_111293), + np.int64(groups_per_segment_128155), + np.int64(elements_per_thread_128156), + np.int64(virt_num_groups_128157), + np.int64(threads_per_segment_128159), + mem_120246, + mem_param_123786, + mem_123913, + group_res_arr_mem_128160, + mainzicounter_mem_128162) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_large_111219_var, + ((np.int64(num_groups_111293) * np.int64(segred_group_sizze_111292)),), + (np.int64(segred_group_sizze_111292),)) + if synchronous: + sync(self) + mem_123916 = opencl_alloc(self, bytes_120173, "mem_123916") + mem_123918 = opencl_alloc(self, bytes_120173, "mem_123918") + if ((1 * (np.int64(segmap_usable_groups_111306) * np.int64(segmap_group_sizze_111305))) != 0): + self.mainzisegmap_111199_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(n_75139), + np.int64(index_primexp_76437), + defunc_3_map_res_mem_120231, + mem_123910, mem_123913, + mem_123916, mem_123918) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_111199_var, + ((np.int64(segmap_usable_groups_111306) * np.int64(segmap_group_sizze_111305)),), + (np.int64(segmap_group_sizze_111305),)) + if synchronous: + sync(self) + mem_123913 = None + local_memory_capacity_128275 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_128275)) and suff_outer_par_111322): + mem_123937 = opencl_alloc(self, bytes_121997, "mem_123937") + mem_123940 = opencl_alloc(self, bytes_121990, "mem_123940") + mem_123921 = opencl_alloc(self, total_sizze_126256, "mem_123921") + if ((1 * (np.int64(num_groups_111318) * np.int64(segmap_group_sizze_111317))) != 0): + self.mainzisegmap_111084_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(defunc_2_reduce_res_75260), + np.int64(index_primexp_76437), + np.int64(num_groups_111318), + np.int64(num_threads_126255), + mem_120246, + mem_param_123786, + mem_123901, mem_123907, + mem_123910, mem_123916, + mem_123921, mem_123937, + mem_123940) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_111084_var, + ((np.int64(num_groups_111318) * np.int64(segmap_group_sizze_111317)),), + (np.int64(segmap_group_sizze_111317),)) + if synchronous: + sync(self) + mem_123921 = None + mem_123956 = opencl_alloc(self, bytes_121997, "mem_123956") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123956, + np.int64(0), + mem_123937, + np.int64(0), + np.int64(1), + (m_75136 * k2p2zq_75151), + k2p2zq_75151) + mem_123937 = None + defunc_7_map_res_mem_123958 = mem_123956 + defunc_7_map_res_mem_123959 = mem_123940 + else: + mem_123944 = opencl_alloc(self, bytes_121990, "mem_123944") + mem_123948 = opencl_alloc(self, bytes_121997, "mem_123948") + if slt64((k2p2zq_75151 * np.int64(2)), segred_group_sizze_111351): + segment_sizze_nonzzero_128210 = smax64(np.int64(1), + k2p2zq_75151) + num_threads_128211 = (num_groups_111352 * segred_group_sizze_111351) + if ((1 * (np.int64(num_groups_111352) * np.int64(segred_group_sizze_111351))) != 0): + self.mainzisegred_small_111144_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_111351))), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(defunc_2_reduce_res_75260), + np.int64(index_primexp_76437), + np.int64(num_groups_111352), + np.int64(binop_x_120251), + np.int64(segment_sizze_nonzzero_128210), + mem_120246, + mem_param_123778, + mem_123907, + mem_123910, + mem_123944, + mem_123948) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_small_111144_var, + ((np.int64(num_groups_111352) * np.int64(segred_group_sizze_111351)),), + (np.int64(segred_group_sizze_111351),)) + if synchronous: + sync(self) + else: + groups_per_segment_128231 = sdiv_up64(num_groups_111352, + smax64(np.int64(1), + (m_75136 * k2p2zq_75151))) + elements_per_thread_128232 = sdiv_up64(k2p2zq_75151, + (segred_group_sizze_111351 * groups_per_segment_128231)) + virt_num_groups_128233 = (groups_per_segment_128231 * (m_75136 * k2p2zq_75151)) + num_threads_128234 = (num_groups_111352 * segred_group_sizze_111351) + threads_per_segment_128235 = (groups_per_segment_128231 * segred_group_sizze_111351) + group_res_arr_mem_128236 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_111351 * virt_num_groups_128233)), + "group_res_arr_mem_128236") + mainzicounter_mem_128238 = self.mainzicounter_mem_128238 + if ((1 * (np.int64(num_groups_111352) * np.int64(segred_group_sizze_111351))) != 0): + self.mainzisegred_large_111144_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_111351))), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(defunc_2_reduce_res_75260), + np.int64(index_primexp_76437), + np.int64(num_groups_111352), + np.int64(binop_x_120251), + np.int64(groups_per_segment_128231), + np.int64(elements_per_thread_128232), + np.int64(virt_num_groups_128233), + np.int64(threads_per_segment_128235), + mem_120246, + mem_param_123778, + mem_123907, + mem_123910, + mem_123944, + mem_123948, + group_res_arr_mem_128236, + mainzicounter_mem_128238) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_large_111144_var, + ((np.int64(num_groups_111352) * np.int64(segred_group_sizze_111351)),), + (np.int64(segred_group_sizze_111351),)) + if synchronous: + sync(self) + mem_123952 = opencl_alloc(self, bytes_121990, "mem_123952") + if ((1 * (np.int64(segmap_usable_groups_111375) * np.int64(segmap_group_sizze_111374))) != 0): + self.mainzisegmap_111122_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + mem_param_123786, + mem_123916, mem_123944, + mem_123952) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_111122_var, + ((np.int64(segmap_usable_groups_111375) * np.int64(segmap_group_sizze_111374)),), + (np.int64(segmap_group_sizze_111374),)) + if synchronous: + sync(self) + mem_123944 = None + defunc_7_map_res_mem_123958 = mem_123948 + defunc_7_map_res_mem_123959 = mem_123952 + mem_123901 = None + mem_123907 = None + mem_123910 = None + mem_123916 = None + defunc_7_map_res_mem_123960 = defunc_7_map_res_mem_123958 + defunc_7_map_res_mem_123961 = defunc_7_map_res_mem_123959 + defunc_7_map_res_mem_123962 = mem_123918 + defunc_7_map_res_mem_123972 = defunc_7_map_res_mem_123960 + defunc_7_map_res_mem_123973 = defunc_7_map_res_mem_123961 + defunc_7_map_res_mem_123974 = defunc_7_map_res_mem_123962 + i_76528 = (index_primexp_76437 - k2p2zq_75151) + x_76529 = sle64(np.int64(0), i_76528) + y_76530 = slt64(i_76528, num_recresids_padded_75809) + bounds_check_76531 = (x_76529 and y_76530) + index_ok_76532 = (ok_or_empty_75250 and bounds_check_76531) + index_certs_76533 = True + assert index_ok_76532, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:81:7-82:29\n #1 recresid.fut:100:7-30\n #2 mroc.fut:27:25-38\n #3 mroc.fut:77:27-61\n #4 bfastfinal.fut:45:24-53\n #5 bfastfinal.fut:200:5-74\n #6 bfastfinal.fut:195:1-201:36\n" % ("Index [", + i_76528, + ", ", + np.int64(0), + ":] out of bounds for array of shape [", + num_recresids_padded_75809, + "][", + m_75136, + "].")) + if ((m_75136 * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_121934, defunc_7_map_res_mem_123974, + dest_offset=np.int64(((i_76528 * m_75136) * np.int64(8))), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((m_75136 * np.int32(8)))) + if synchronous: + sync(self) + defunc_7_map_res_mem_123974 = None + mem_param_tmp_128009 = defunc_7_map_res_mem_123972 + mem_param_tmp_128010 = defunc_7_map_res_mem_123973 + mem_param_123778 = mem_param_tmp_128009 + mem_param_123786 = mem_param_tmp_128010 + i_76433 += one_129863 + retsT_mem_123990 = mem_param_123778 + retsT_mem_123998 = mem_param_123786 + defunc_3_map_res_mem_120231 = None + mem_120246 = None + mrecresid_nn_res_mem_123746 = None + mrecresid_nn_res_mem_123754 = None + Nmk_76536 = (np.int64(1) + num_recresids_padded_75809) + bounds_invalid_upwards_76537 = slt64(Nmk_76536, np.int64(0)) + valid_76538 = not(bounds_invalid_upwards_76537) + range_valid_c_76539 = True + assert valid_76538, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 mroc.fut:36:70-77\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 mroc.fut:32:5-38:15\n #5 mroc.fut:77:27-61\n #6 bfastfinal.fut:45:24-53\n #7 bfastfinal.fut:200:5-74\n #8 bfastfinal.fut:195:1-201:36\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + Nmk_76536, + " is invalid.")) + suff_outer_par_111403 = (self.sizes["main.suff_outer_par_19"] <= m_75136) + intra_avail_par_111399 = smin64(num_recresids_padded_75809, Nmk_76536) + computed_group_sizze_111392 = smax64(num_recresids_padded_75809, + Nmk_76536) + fits_111505 = sle64(computed_group_sizze_111392, max_group_sizze_103162) + suff_intra_par_111503 = (self.sizes["main.suff_intra_par_20"] <= intra_avail_par_111399) + intra_suff_and_fits_111506 = (suff_intra_par_111503 and fits_111505) + segmap_group_sizze_111455 = self.sizes["main.segmap_group_size_111407"] + max_num_groups_128278 = self.sizes["main.segmap_num_groups_111409"] + num_groups_111456 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_75136, + segmap_group_sizze_111455), + sext_i32_i64(max_num_groups_128278)))) + segmap_group_sizze_111653 = self.sizes["main.segmap_group_size_111646"] + segred_group_sizze_111662 = self.sizes["main.segred_group_size_111627"] + max_num_groups_128279 = self.sizes["main.segred_num_groups_111629"] + num_groups_111663 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(replicate_arg_75810, + segred_group_sizze_111662), + sext_i32_i64(max_num_groups_128279)))) + segmap_group_sizze_111676 = self.sizes["main.segmap_group_size_111618"] + segred_group_sizze_111684 = self.sizes["main.segred_group_size_111597"] + max_num_groups_128280 = self.sizes["main.segred_num_groups_111599"] + num_groups_111685 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(replicate_arg_75810, + segred_group_sizze_111684), + sext_i32_i64(max_num_groups_128280)))) + segmap_group_sizze_111699 = self.sizes["main.segmap_group_size_111583"] + nest_sizze_111710 = (m_75136 * Nmk_76536) + segscan_group_sizze_111711 = self.sizes["main.segscan_group_size_111556"] + max_num_groups_128281 = self.sizes["main.segscan_num_groups_111558"] + num_groups_111712 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_111710, + segscan_group_sizze_111711), + sext_i32_i64(max_num_groups_128281)))) + bytes_124022 = (np.int64(8) * nest_sizze_111710) + bytes_124008 = (np.int64(8) * Nmk_76536) + num_threads_126259 = (segmap_group_sizze_111455 * num_groups_111456) + total_sizze_126260 = (bytes_124008 * num_threads_126259) + local_memory_capacity_128524 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_128524)) and suff_outer_par_111403): + mem_124024 = opencl_alloc(self, bytes_124022, "mem_124024") + mem_124026 = opencl_alloc(self, bytes_120173, "mem_124026") + mem_124009 = opencl_alloc(self, total_sizze_126260, "mem_124009") + if ((1 * (np.int64(num_groups_111456) * np.int64(segmap_group_sizze_111455))) != 0): + self.mainzisegmap_111405_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(num_recresids_padded_75809), + np.int64(Nmk_76536), + np.int64(num_groups_111456), + np.int64(num_threads_126259), + defunc_3_map_res_mem_120230, + mem_121934, mem_124009, + mem_124024, mem_124026) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_111405_var, + ((np.int64(num_groups_111456) * np.int64(segmap_group_sizze_111455)),), + (np.int64(segmap_group_sizze_111455),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + mem_124009 = None + mem_124066 = opencl_alloc(self, bytes_124022, "mem_124066") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124066, np.int64(0), + mem_124024, np.int64(0), + np.int64(1), m_75136, + Nmk_76536) + mem_124024 = None + defunc_3_map_res_mem_124068 = mem_124066 + defunc_3_map_res_mem_124069 = mem_124026 + else: + local_memory_capacity_128523 = self.max_local_memory + if (sle64(((((np.int32(8) * num_recresids_padded_75809) + srem64((np.int64(8) - srem64((np.int32(8) * num_recresids_padded_75809), + np.int64(8))), + np.int64(8))) + ((np.int32(8) * num_recresids_padded_75809) + srem64((np.int64(8) - srem64((np.int32(8) * num_recresids_padded_75809), + np.int64(8))), + np.int64(8)))) + (bytes_124008 + srem64((np.int64(8) - srem64(bytes_124008, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_128523)) and intra_suff_and_fits_111506): + mem_124035 = opencl_alloc(self, bytes_124022, "mem_124035") + mem_124037 = opencl_alloc(self, bytes_120173, "mem_124037") + if ((1 * (np.int64(m_75136) * np.int64(computed_group_sizze_111392))) != 0): + self.mainzisegmap_intragroup_111401_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64(bytes_124008)), + cl.LocalMemory(np.int64((np.int32(8) * num_recresids_padded_75809))), + cl.LocalMemory(np.int64((np.int32(8) * num_recresids_padded_75809))), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(num_recresids_padded_75809), + np.int64(Nmk_76536), + np.int64(computed_group_sizze_111392), + defunc_3_map_res_mem_120230, + mem_121934, + mem_124035, + mem_124037) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_intragroup_111401_var, + ((np.int64(m_75136) * np.int64(computed_group_sizze_111392)),), + (np.int64(computed_group_sizze_111392),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + defunc_3_map_res_mem_124062 = mem_124035 + defunc_3_map_res_mem_124063 = mem_124037 else: - raise self.Failure('Unknown entry point: %s' % entry) - - def _check_var(self, vname): - if not vname in self._vars: - raise self.Failure('Unknown variable: %s' % vname) - - def _get_var(self, vname): - self._check_var(vname) - return self._vars[vname] - - def _cmd_inputs(self, args): - entry = self._get_arg(args, 0) - for t in self._get_entry_point(entry)[0]: - print(t) - - def _cmd_outputs(self, args): - entry = self._get_arg(args, 0) - for t in self._get_entry_point(entry)[1]: - print(t) - - def _cmd_dummy(self, args): - pass - - def _cmd_free(self, args): - for vname in args: - self._check_var(vname) - del self._vars[vname] - - def _cmd_call(self, args): - entry = self._get_entry_point(self._get_arg(args, 0)) - num_ins = len(entry[0]) - num_outs = len(entry[1]) - - if len(args) != 1 + num_outs + num_ins: - raise self.Failure('Invalid argument count, expected %d') - - out_vnames = args[1:num_outs+1] - - for out_vname in out_vnames: - if out_vname in self._vars: - raise self.Failure('Variable already exists: %s' % out_vname) - - in_vnames = args[1+num_outs:] - ins = [ self._get_var(in_vname) for in_vname in in_vnames ] - - try: - (runtime, vals) = getattr(self._ctx, args[0])(*ins) - except Exception as e: - raise self.Failure(str(e)) - - print('runtime: %d' % runtime) - - if num_outs == 1: - self._vars[out_vnames[0]] = vals + segmap_usable_groups_111654 = sdiv_up64(m_75136, + segmap_group_sizze_111653) + mem_124040 = opencl_alloc(self, bytes_120173, "mem_124040") + mem_124042 = opencl_alloc(self, bytes_120173, "mem_124042") + if ((1 * (np.int64(segmap_usable_groups_111654) * np.int64(segmap_group_sizze_111653))) != 0): + self.mainzisegmap_111644_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + defunc_3_map_res_mem_120230, + mem_124040, mem_124042) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_111644_var, + ((np.int64(segmap_usable_groups_111654) * np.int64(segmap_group_sizze_111653)),), + (np.int64(segmap_group_sizze_111653),)) + if synchronous: + sync(self) + mem_124045 = opencl_alloc(self, bytes_121932, "mem_124045") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124045, np.int64(0), + mem_121934, np.int64(0), + np.int64(1), m_75136, + num_recresids_padded_75809) + mem_124048 = opencl_alloc(self, bytes_120173, "mem_124048") + if slt64((num_recresids_padded_75809 * np.int64(2)), + segred_group_sizze_111662): + segment_sizze_nonzzero_128327 = smax64(np.int64(1), + num_recresids_padded_75809) + num_threads_128328 = (num_groups_111663 * segred_group_sizze_111662) + if ((1 * (np.int64(num_groups_111663) * np.int64(segred_group_sizze_111662))) != 0): + self.mainzisegred_small_111633_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_111662))), + np.int64(m_75136), + np.int64(num_recresids_padded_75809), + np.int64(num_groups_111663), + np.int64(segment_sizze_nonzzero_128327), + mem_124045, + mem_124048) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_small_111633_var, + ((np.int64(num_groups_111663) * np.int64(segred_group_sizze_111662)),), + (np.int64(segred_group_sizze_111662),)) + if synchronous: + sync(self) + else: + groups_per_segment_128352 = sdiv_up64(num_groups_111663, + smax64(np.int64(1), m_75136)) + elements_per_thread_128353 = sdiv_up64(num_recresids_padded_75809, + (segred_group_sizze_111662 * groups_per_segment_128352)) + virt_num_groups_128354 = (groups_per_segment_128352 * m_75136) + num_threads_128355 = (num_groups_111663 * segred_group_sizze_111662) + threads_per_segment_128356 = (groups_per_segment_128352 * segred_group_sizze_111662) + group_res_arr_mem_128357 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_111662 * virt_num_groups_128354)), + "group_res_arr_mem_128357") + mainzicounter_mem_128359 = self.mainzicounter_mem_128359 + if ((1 * (np.int64(num_groups_111663) * np.int64(segred_group_sizze_111662))) != 0): + self.mainzisegred_large_111633_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_111662))), + np.int64(num_recresids_padded_75809), + np.int64(num_groups_111663), + np.int64(groups_per_segment_128352), + np.int64(elements_per_thread_128353), + np.int64(virt_num_groups_128354), + mem_124045, + mem_124048, + group_res_arr_mem_128357, + mainzicounter_mem_128359) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_large_111633_var, + ((np.int64(num_groups_111663) * np.int64(segred_group_sizze_111662)),), + (np.int64(segred_group_sizze_111662),)) + if synchronous: + sync(self) + segmap_usable_groups_111677 = sdiv_up64(m_75136, + segmap_group_sizze_111676) + mem_124051 = opencl_alloc(self, bytes_120173, "mem_124051") + if ((1 * (np.int64(segmap_usable_groups_111677) * np.int64(segmap_group_sizze_111676))) != 0): + self.mainzisegmap_111616_var.set_args(self.global_failure, + np.int64(m_75136), mem_124040, + mem_124048, mem_124051) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_111616_var, + ((np.int64(segmap_usable_groups_111677) * np.int64(segmap_group_sizze_111676)),), + (np.int64(segmap_group_sizze_111676),)) + if synchronous: + sync(self) + mem_124048 = None + mem_124054 = opencl_alloc(self, bytes_120173, "mem_124054") + if slt64((num_recresids_padded_75809 * np.int64(2)), + segred_group_sizze_111684): + segment_sizze_nonzzero_128402 = smax64(np.int64(1), + num_recresids_padded_75809) + num_threads_128403 = (num_groups_111685 * segred_group_sizze_111684) + if ((1 * (np.int64(num_groups_111685) * np.int64(segred_group_sizze_111684))) != 0): + self.mainzisegred_small_111603_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_111684))), + np.int64(m_75136), + np.int64(num_recresids_padded_75809), + np.int64(num_groups_111685), + np.int64(segment_sizze_nonzzero_128402), + mem_124045, + mem_124051, + mem_124054) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_small_111603_var, + ((np.int64(num_groups_111685) * np.int64(segred_group_sizze_111684)),), + (np.int64(segred_group_sizze_111684),)) + if synchronous: + sync(self) + else: + groups_per_segment_128423 = sdiv_up64(num_groups_111685, + smax64(np.int64(1), m_75136)) + elements_per_thread_128424 = sdiv_up64(num_recresids_padded_75809, + (segred_group_sizze_111684 * groups_per_segment_128423)) + virt_num_groups_128425 = (groups_per_segment_128423 * m_75136) + num_threads_128426 = (num_groups_111685 * segred_group_sizze_111684) + threads_per_segment_128427 = (groups_per_segment_128423 * segred_group_sizze_111684) + group_res_arr_mem_128428 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_111684 * virt_num_groups_128425)), + "group_res_arr_mem_128428") + mainzicounter_mem_128430 = self.mainzicounter_mem_128430 + if ((1 * (np.int64(num_groups_111685) * np.int64(segred_group_sizze_111684))) != 0): + self.mainzisegred_large_111603_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_111684))), + np.int64(num_recresids_padded_75809), + np.int64(num_groups_111685), + np.int64(groups_per_segment_128423), + np.int64(elements_per_thread_128424), + np.int64(virt_num_groups_128425), + np.int64(threads_per_segment_128427), + mem_124045, + mem_124051, + mem_124054, + group_res_arr_mem_128428, + mainzicounter_mem_128430) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_large_111603_var, + ((np.int64(num_groups_111685) * np.int64(segred_group_sizze_111684)),), + (np.int64(segred_group_sizze_111684),)) + if synchronous: + sync(self) + mem_124051 = None + segmap_usable_groups_111700 = sdiv_up64(m_75136, + segmap_group_sizze_111699) + mem_124057 = opencl_alloc(self, bytes_120173, "mem_124057") + if ((1 * (np.int64(segmap_usable_groups_111700) * np.int64(segmap_group_sizze_111699))) != 0): + self.mainzisegmap_111581_var.set_args(self.global_failure, + np.int64(m_75136), mem_124040, + mem_124054, mem_124057) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_111581_var, + ((np.int64(segmap_usable_groups_111700) * np.int64(segmap_group_sizze_111699)),), + (np.int64(segmap_group_sizze_111699),)) + if synchronous: + sync(self) + mem_124040 = None + mem_124054 = None + mem_124061 = opencl_alloc(self, bytes_124022, "mem_124061") + if slt64(np.int64(0), (m_75136 * Nmk_76536)): + stage1_max_num_groups_128467 = self.max_group_size + stage1_num_groups_128468 = smin64(stage1_max_num_groups_128467, + num_groups_111712) + num_threads_128469 = sext_i64_i32((stage1_num_groups_128468 * segscan_group_sizze_111711)) + if ((1 * (np.int64(stage1_num_groups_128468) * np.int64(segscan_group_sizze_111711))) != 0): + self.mainziscan_stage1_111562_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64(smax64(np.int64(1), + (np.int32(8) * segscan_group_sizze_111711)))), + np.int64(m_75136), + np.int64(num_recresids_padded_75809), + np.int64(Nmk_76536), + np.int32(num_threads_128469), + mem_124045, mem_124057, + mem_124061) + cl.enqueue_nd_range_kernel(self.queue, + self.mainziscan_stage1_111562_var, + ((np.int64(stage1_num_groups_128468) * np.int64(segscan_group_sizze_111711)),), + (np.int64(segscan_group_sizze_111711),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + if ((1 * (np.int64(np.int64(1)) * np.int64(stage1_num_groups_128468))) != 0): + self.mainziscan_stage2_111562_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(smax64(np.int64(1), + (np.int32(8) * stage1_num_groups_128468)))), + np.int64(m_75136), + np.int64(Nmk_76536), + np.int64(stage1_num_groups_128468), + np.int32(num_threads_128469), + mem_124061) + cl.enqueue_nd_range_kernel(self.queue, + self.mainziscan_stage2_111562_var, + ((np.int64(np.int64(1)) * np.int64(stage1_num_groups_128468)),), + (np.int64(stage1_num_groups_128468),)) + if synchronous: + sync(self) + required_groups_128511 = sext_i64_i32(sdiv_up64((m_75136 * Nmk_76536), + segscan_group_sizze_111711)) + if ((1 * (np.int64(num_groups_111712) * np.int64(segscan_group_sizze_111711))) != 0): + self.mainziscan_stage3_111562_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(Nmk_76536), + np.int64(num_groups_111712), + np.int32(num_threads_128469), + np.int32(required_groups_128511), + mem_124061) + cl.enqueue_nd_range_kernel(self.queue, + self.mainziscan_stage3_111562_var, + ((np.int64(num_groups_111712) * np.int64(segscan_group_sizze_111711)),), + (np.int64(segscan_group_sizze_111711),)) + if synchronous: + sync(self) + mem_124045 = None + mem_124057 = None + defunc_3_map_res_mem_124062 = mem_124061 + defunc_3_map_res_mem_124063 = mem_124042 + defunc_3_map_res_mem_124068 = defunc_3_map_res_mem_124062 + defunc_3_map_res_mem_124069 = defunc_3_map_res_mem_124063 + defunc_3_map_res_mem_120230 = None + mem_121934 = None + empty_slice_76585 = (num_recresids_padded_75809 == np.int64(0)) + zzero_leq_i_p_m_t_s_76586 = sle64(np.int64(0), num_recresids_padded_75809) + i_p_m_t_s_leq_w_76587 = slt64(num_recresids_padded_75809, Nmk_76536) + i_lte_j_76588 = sle64(np.int64(1), Nmk_76536) + y_76589 = (zzero_leq_i_p_m_t_s_76586 and i_p_m_t_s_leq_w_76587) + y_76590 = (i_lte_j_76588 and y_76589) + ok_or_empty_76591 = (empty_slice_76585 or y_76590) + index_certs_76592 = True + assert ok_or_empty_76591, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 mroc.fut:57:12-22\n #1 /prelude/soacs.fut:67:19-23\n #2 /prelude/soacs.fut:67:3-37\n #3 mroc.fut:78:15-34\n #4 bfastfinal.fut:45:24-53\n #5 bfastfinal.fut:200:5-74\n #6 bfastfinal.fut:195:1-201:36\n" % ("Index [", + np.int64(1), + ":] out of bounds for array of shape [", + Nmk_76536, + "].")) + range_valid_c_76595 = True + assert valid_76538, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 mroc.fut:72:13-18\n #2 mroc.fut:80:16-46\n #3 bfastfinal.fut:45:24-53\n #4 bfastfinal.fut:200:5-74\n #5 bfastfinal.fut:195:1-201:36\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + Nmk_76536, + " is invalid.")) + segmap_group_sizze_111788 = self.sizes["main.segmap_group_size_111780"] + segmap_usable_groups_111789 = sdiv_up64(m_75136, + segmap_group_sizze_111788) + mem_124072 = opencl_alloc(self, bytes_120173, "mem_124072") + mem_124074 = opencl_alloc(self, bytes_120173, "mem_124074") + if ((1 * (np.int64(segmap_usable_groups_111789) * np.int64(segmap_group_sizze_111788))) != 0): + self.mainzisegmap_111778_var.set_args(self.global_failure, + np.int64(m_75136), + defunc_3_map_res_mem_124069, + mem_124072, mem_124074) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_111778_var, + ((np.int64(segmap_usable_groups_111789) * np.int64(segmap_group_sizze_111788)),), + (np.int64(segmap_group_sizze_111788),)) + if synchronous: + sync(self) + segmap_group_sizze_111802 = self.sizes["main.segmap_group_size_111749"] + segmap_usable_groups_111803 = sdiv_up64(nest_sizze_111710, + segmap_group_sizze_111802) + mem_124078 = opencl_alloc(self, bytes_124022, "mem_124078") + if ((1 * (np.int64(segmap_usable_groups_111803) * np.int64(segmap_group_sizze_111802))) != 0): + self.mainzisegmap_111746_var.set_args(self.global_failure, + np.int64(m_75136), + np.float64(conf_75145), + np.int64(Nmk_76536), mem_124072, + mem_124074, mem_124078) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_111746_var, + ((np.int64(segmap_usable_groups_111803) * np.int64(segmap_group_sizze_111802)),), + (np.int64(segmap_group_sizze_111802),)) + if synchronous: + sync(self) + mem_124072 = None + mem_124074 = None + suff_outer_par_111827 = (self.sizes["main.suff_outer_par_21"] <= m_75136) + fits_112103 = sle64(num_recresids_padded_75809, max_group_sizze_103162) + suff_intra_par_112101 = (self.sizes["main.suff_intra_par_22"] <= num_recresids_padded_75809) + intra_suff_and_fits_112104 = (suff_intra_par_112101 and fits_112103) + segmap_group_sizze_111966 = self.sizes["main.segmap_group_size_111831"] + segmap_group_sizze_112417 = self.sizes["main.segmap_group_size_112411"] + segred_group_sizze_112424 = self.sizes["main.segred_group_size_112387"] + max_num_groups_128535 = self.sizes["main.segred_num_groups_112389"] + num_groups_112425 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(replicate_arg_75810, + segred_group_sizze_112424), + sext_i32_i64(max_num_groups_128535)))) + segmap_group_sizze_112443 = self.sizes["main.segmap_group_size_112284"] + segred_group_sizze_112544 = self.sizes["main.segred_group_size_112262"] + max_num_groups_128536 = self.sizes["main.segred_num_groups_112264"] + num_groups_112545 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(replicate_arg_75810, + segred_group_sizze_112544), + sext_i32_i64(max_num_groups_128536)))) + segmap_group_sizze_112560 = self.sizes["main.segmap_group_size_112244"] + bytes_124087 = (np.int64(8) * segmap_group_sizze_111966) + local_memory_capacity_128698 = self.max_local_memory + if (sle64(((bytes_124087 + srem64((np.int64(8) - srem64(bytes_124087, + np.int64(8))), + np.int64(8))) + (bytes_124087 + srem64((np.int64(8) - srem64(bytes_124087, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_128698)) and suff_outer_par_111827): + segmap_usable_groups_111967 = sdiv_up64(m_75136, + segmap_group_sizze_111966) + mem_124081 = opencl_alloc(self, bytes_124022, "mem_124081") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124081, np.int64(0), + defunc_3_map_res_mem_124068, + np.int64(0), np.int64(1), + Nmk_76536, m_75136) + mem_124084 = opencl_alloc(self, bytes_124022, "mem_124084") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124084, np.int64(0), + mem_124078, np.int64(0), + np.int64(1), Nmk_76536, + m_75136) + num_whole_tiles_117920 = squot64(num_recresids_padded_75809, + segmap_group_sizze_111966) + residual_input_118032 = srem64(num_recresids_padded_75809, + segmap_group_sizze_111966) + cond_118033 = (residual_input_118032 == np.int64(0)) + mem_124113 = opencl_alloc(self, bytes_120173, "mem_124113") + if ((1 * (np.int64(segmap_usable_groups_111967) * np.int64(segmap_group_sizze_111966))) != 0): + self.mainzisegmap_intragroup_117900_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_124087)), + cl.LocalMemory(np.int64(bytes_124087)), + np.int64(m_75136), + np.float64(level_75142), + np.int64(num_recresids_padded_75809), + np.int64(num_whole_tiles_117920), + np.int64(residual_input_118032), + np.byte(cond_118033), + defunc_3_map_res_mem_124069, + mem_124081, + mem_124084, + mem_124113) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_intragroup_117900_var, + ((np.int64(segmap_usable_groups_111967) * np.int64(segmap_group_sizze_111966)),), + (np.int64(segmap_group_sizze_111966),)) + if synchronous: + sync(self) + mem_124081 = None + mem_124084 = None + defunc_1_map_res_mem_124135 = mem_124113 + else: + local_memory_capacity_128697 = self.max_local_memory + if (sle64((((np.int32(8) * num_recresids_padded_75809) + srem64((np.int64(8) - srem64((np.int32(8) * num_recresids_padded_75809), + np.int64(8))), + np.int64(8))) + ((np.int32(8) * num_recresids_padded_75809) + srem64((np.int64(8) - srem64((np.int32(8) * num_recresids_padded_75809), + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_128697)) and intra_suff_and_fits_112104): + mem_124118 = opencl_alloc(self, bytes_120173, "mem_124118") + if ((1 * (np.int64(m_75136) * np.int64(num_recresids_padded_75809))) != 0): + self.mainzisegmap_intragroup_111825_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * num_recresids_padded_75809))), + cl.LocalMemory(np.int64((np.int32(8) * num_recresids_padded_75809))), + np.float64(level_75142), + np.int64(num_recresids_padded_75809), + np.int64(Nmk_76536), + defunc_3_map_res_mem_124068, + defunc_3_map_res_mem_124069, + mem_124078, + mem_124118) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_intragroup_111825_var, + ((np.int64(m_75136) * np.int64(num_recresids_padded_75809)),), + (np.int64(num_recresids_padded_75809),)) + if synchronous: + sync(self) + defunc_1_map_res_mem_124134 = mem_124118 + else: + segmap_usable_groups_112418 = sdiv_up64(m_75136, + segmap_group_sizze_112417) + mem_124121 = opencl_alloc(self, bytes_120173, "mem_124121") + if ((1 * (np.int64(segmap_usable_groups_112418) * np.int64(segmap_group_sizze_112417))) != 0): + self.mainzisegmap_112409_var.set_args(self.global_failure, + np.int64(m_75136), + defunc_3_map_res_mem_124069, + mem_124121) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_112409_var, + ((np.int64(segmap_usable_groups_112418) * np.int64(segmap_group_sizze_112417)),), + (np.int64(segmap_group_sizze_112417),)) + if synchronous: + sync(self) + mem_124124 = opencl_alloc(self, bytes_120173, "mem_124124") + if slt64((num_recresids_padded_75809 * np.int64(2)), + segred_group_sizze_112424): + segment_sizze_nonzzero_128567 = smax64(np.int64(1), + num_recresids_padded_75809) + num_threads_128568 = (num_groups_112425 * segred_group_sizze_112424) + if ((1 * (np.int64(num_groups_112425) * np.int64(segred_group_sizze_112424))) != 0): + self.mainzisegred_small_112393_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_112424))), + np.int64(m_75136), + np.int64(num_recresids_padded_75809), + np.int64(Nmk_76536), + np.int64(num_groups_112425), + np.int64(segment_sizze_nonzzero_128567), + defunc_3_map_res_mem_124068, + mem_124121, + mem_124124) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_small_112393_var, + ((np.int64(num_groups_112425) * np.int64(segred_group_sizze_112424)),), + (np.int64(segred_group_sizze_112424),)) + if synchronous: + sync(self) + else: + groups_per_segment_128588 = sdiv_up64(num_groups_112425, + smax64(np.int64(1), m_75136)) + elements_per_thread_128589 = sdiv_up64(num_recresids_padded_75809, + (segred_group_sizze_112424 * groups_per_segment_128588)) + virt_num_groups_128590 = (groups_per_segment_128588 * m_75136) + num_threads_128591 = (num_groups_112425 * segred_group_sizze_112424) + threads_per_segment_128592 = (groups_per_segment_128588 * segred_group_sizze_112424) + group_res_arr_mem_128593 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_112424 * virt_num_groups_128590)), + "group_res_arr_mem_128593") + mainzicounter_mem_128595 = self.mainzicounter_mem_128595 + if ((1 * (np.int64(num_groups_112425) * np.int64(segred_group_sizze_112424))) != 0): + self.mainzisegred_large_112393_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_112424))), + np.int64(num_recresids_padded_75809), + np.int64(Nmk_76536), + np.int64(num_groups_112425), + np.int64(groups_per_segment_128588), + np.int64(elements_per_thread_128589), + np.int64(virt_num_groups_128590), + np.int64(threads_per_segment_128592), + defunc_3_map_res_mem_124068, + mem_124121, + mem_124124, + group_res_arr_mem_128593, + mainzicounter_mem_128595) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_large_112393_var, + ((np.int64(num_groups_112425) * np.int64(segred_group_sizze_112424)),), + (np.int64(segred_group_sizze_112424),)) + if synchronous: + sync(self) + mem_124121 = None + segmap_usable_groups_112444 = sdiv_up64(m_75136, + segmap_group_sizze_112443) + mem_124127 = opencl_alloc(self, bytes_120173, "mem_124127") + if ((1 * (np.int64(segmap_usable_groups_112444) * np.int64(segmap_group_sizze_112443))) != 0): + self.mainzisegmap_112282_var.set_args(self.global_failure, + np.int64(m_75136), mem_124124, + mem_124127) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_112282_var, + ((np.int64(segmap_usable_groups_112444) * np.int64(segmap_group_sizze_112443)),), + (np.int64(segmap_group_sizze_112443),)) + if synchronous: + sync(self) + mem_124124 = None + mem_124130 = opencl_alloc(self, bytes_120173, "mem_124130") + if slt64((num_recresids_padded_75809 * np.int64(2)), + segred_group_sizze_112544): + segment_sizze_nonzzero_128632 = smax64(np.int64(1), + num_recresids_padded_75809) + num_threads_128633 = (num_groups_112545 * segred_group_sizze_112544) + if ((1 * (np.int64(num_groups_112545) * np.int64(segred_group_sizze_112544))) != 0): + self.mainzisegred_small_112268_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_112544))), + np.int64(m_75136), + np.int64(num_recresids_padded_75809), + np.int64(Nmk_76536), + np.int64(num_groups_112545), + np.int64(segment_sizze_nonzzero_128632), + defunc_3_map_res_mem_124068, + mem_124078, + mem_124130) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_small_112268_var, + ((np.int64(num_groups_112545) * np.int64(segred_group_sizze_112544)),), + (np.int64(segred_group_sizze_112544),)) + if synchronous: + sync(self) + else: + groups_per_segment_128653 = sdiv_up64(num_groups_112545, + smax64(np.int64(1), m_75136)) + elements_per_thread_128654 = sdiv_up64(num_recresids_padded_75809, + (segred_group_sizze_112544 * groups_per_segment_128653)) + virt_num_groups_128655 = (groups_per_segment_128653 * m_75136) + num_threads_128656 = (num_groups_112545 * segred_group_sizze_112544) + threads_per_segment_128657 = (groups_per_segment_128653 * segred_group_sizze_112544) + group_res_arr_mem_128658 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_112544 * virt_num_groups_128655)), + "group_res_arr_mem_128658") + mainzicounter_mem_128660 = self.mainzicounter_mem_128660 + if ((1 * (np.int64(num_groups_112545) * np.int64(segred_group_sizze_112544))) != 0): + self.mainzisegred_large_112268_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_112544))), + np.int64(num_recresids_padded_75809), + np.int64(Nmk_76536), + np.int64(num_groups_112545), + np.int64(groups_per_segment_128653), + np.int64(elements_per_thread_128654), + np.int64(virt_num_groups_128655), + np.int64(threads_per_segment_128657), + defunc_3_map_res_mem_124068, + mem_124078, + mem_124130, + group_res_arr_mem_128658, + mainzicounter_mem_128660) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_large_112268_var, + ((np.int64(num_groups_112545) * np.int64(segred_group_sizze_112544)),), + (np.int64(segred_group_sizze_112544),)) + if synchronous: + sync(self) + segmap_usable_groups_112561 = sdiv_up64(m_75136, + segmap_group_sizze_112560) + mem_124133 = opencl_alloc(self, bytes_120173, "mem_124133") + if ((1 * (np.int64(segmap_usable_groups_112561) * np.int64(segmap_group_sizze_112560))) != 0): + self.mainzisegmap_112242_var.set_args(self.global_failure, + np.int64(m_75136), + np.float64(level_75142), + defunc_3_map_res_mem_124069, + mem_124127, mem_124130, + mem_124133) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_112242_var, + ((np.int64(segmap_usable_groups_112561) * np.int64(segmap_group_sizze_112560)),), + (np.int64(segmap_group_sizze_112560),)) + if synchronous: + sync(self) + mem_124127 = None + mem_124130 = None + defunc_1_map_res_mem_124134 = mem_124133 + defunc_1_map_res_mem_124135 = defunc_1_map_res_mem_124134 + defunc_3_map_res_mem_124068 = None + defunc_3_map_res_mem_124069 = None + mem_124078 = None + hist_inds_mem_124138 = defunc_1_map_res_mem_124135 + else: + mem_124137 = opencl_alloc(self, bytes_120173, "mem_124137") + self.futhark_builtinzhreplicate_i64(mem_124137, m_75136, hist_75144) + hist_inds_mem_124138 = mem_124137 + segmap_group_sizze_112614 = self.sizes["main.segmap_group_size_112590"] + segmap_usable_groups_112615 = sdiv_up64(binop_x_120126, + segmap_group_sizze_112614) + mem_124142 = opencl_alloc(self, bytes_120125, "mem_124142") + if ((1 * (np.int64(segmap_usable_groups_112615) * np.int64(segmap_group_sizze_112614))) != 0): + self.mainzisegmap_112587_var.set_args(self.global_failure, + np.int64(N_75135), + np.int64(m_75136), + images_mem_120108, + hist_inds_mem_124138, mem_124142) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_112587_var, + ((np.int64(segmap_usable_groups_112615) * np.int64(segmap_group_sizze_112614)),), + (np.int64(segmap_group_sizze_112614),)) + if synchronous: + sync(self) + suff_outer_par_112624 = (self.sizes["main.suff_outer_par_23"] <= m_75136) + segmap_group_sizze_112650 = self.sizes["main.segmap_group_size_112628"] + max_num_groups_128704 = self.sizes["main.segmap_num_groups_112630"] + num_groups_112651 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_75136, + segmap_group_sizze_112650), + sext_i32_i64(max_num_groups_128704)))) + segmap_group_sizze_112827 = self.sizes["main.segmap_group_size_112675"] + max_num_groups_128705 = self.sizes["main.segmap_num_groups_112677"] + num_groups_112828 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120244, + segmap_group_sizze_112827), + sext_i32_i64(max_num_groups_128705)))) + suff_outer_par_112832 = (self.sizes["main.suff_outer_par_24"] <= binop_x_120244) + comparatee_112857 = (m_75136 * binop_x_120251) + suff_outer_par_112858 = (self.sizes["main.suff_outer_par_25"] <= comparatee_112857) + nest_sizze_112878 = (n_75139 * comparatee_112857) + segred_group_sizze_112879 = self.sizes["main.segred_group_size_112735"] + max_num_groups_128706 = self.sizes["main.segred_num_groups_112737"] + num_groups_112880 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_112878, + segred_group_sizze_112879), + sext_i32_i64(max_num_groups_128706)))) + Tx_118224 = self.sizes["main.Tx_118222"] + Ty_118225 = self.sizes["main.Ty_118223"] + Ty_118226 = smin64(k2p2zq_75151, Ty_118225) + Tx_118227 = smin64(k2p2zq_75151, Tx_118224) + gridDim_zz_118230 = sdiv_up64(m_75136, np.int64(30)) + group_sizze_tile3d_118233 = (Ty_118226 * Tx_118227) + bytes_124188 = (np.int64(8) * comparatee_112857) + binop_x_124222 = (np.int64(30) * group_sizze_tile3d_118233) + bytes_124220 = (np.int64(8) * binop_x_124222) + binop_x_125361 = (np.int64(8) * Ty_118226) + binop_x_125362 = (Tx_118227 * binop_x_125361) + sizze_125363 = (np.int64(30) * binop_x_125362) + num_threads_126278 = (segmap_group_sizze_112650 * num_groups_112651) + total_sizze_126279 = (bytes_120250 * num_threads_126278) + num_threads_126280 = (segmap_group_sizze_112827 * num_groups_112828) + total_sizze_126281 = (bytes_120247 * num_threads_126280) + local_memory_capacity_128828 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_128828)) and suff_outer_par_112624): + mem_124145 = opencl_alloc(self, bytes_120125, "mem_124145") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124145, np.int64(0), + mem_124142, np.int64(0), + np.int64(1), N_75135, m_75136) + mem_124191 = opencl_alloc(self, bytes_124188, "mem_124191") + mem_124149 = opencl_alloc(self, total_sizze_126279, "mem_124149") + if ((1 * (np.int64(num_groups_112651) * np.int64(segmap_group_sizze_112650))) != 0): + self.mainzisegmap_112626_var.set_args(self.global_failure, + np.int64(N_75135), + np.int64(m_75136), + np.int64(n_75139), + np.int64(k2p2zq_75151), + np.int64(num_groups_112651), + np.int64(num_threads_126278), + binop_p_mem_120117, mem_120124, + mem_124145, mem_124149, + mem_124191) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_112626_var, + ((np.int64(num_groups_112651) * np.int64(segmap_group_sizze_112650)),), + (np.int64(segmap_group_sizze_112650),)) + if synchronous: + sync(self) + mem_124145 = None + mem_124149 = None + mem_124292 = opencl_alloc(self, bytes_121997, "mem_124292") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124292, np.int64(0), + mem_124191, np.int64(0), + np.int64(1), m_75136, + (k2p2zq_75151 * k2p2zq_75151)) + mem_124191 = None + defunc_3_map_res_mem_124294 = mem_124292 + else: + local_memory_capacity_128827 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_128827)) and suff_outer_par_112832): + mem_124210 = opencl_alloc(self, bytes_121997, "mem_124210") + mem_124194 = opencl_alloc(self, total_sizze_126281, "mem_124194") + if ((1 * (np.int64(num_groups_112828) * np.int64(segmap_group_sizze_112827))) != 0): + self.mainzisegmap_112672_var.set_args(self.global_failure, + np.int64(N_75135), + np.int64(m_75136), + np.int64(n_75139), + np.int64(k2p2zq_75151), + np.int64(num_groups_112828), + np.int64(num_threads_126280), + mem_120120, mem_120124, + mem_124142, mem_124194, + mem_124210) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_112672_var, + ((np.int64(num_groups_112828) * np.int64(segmap_group_sizze_112827)),), + (np.int64(segmap_group_sizze_112827),)) + if synchronous: + sync(self) + mem_124194 = None + mem_124286 = opencl_alloc(self, bytes_121997, "mem_124286") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124286, np.int64(0), + mem_124210, np.int64(0), + np.int64(1), + (m_75136 * k2p2zq_75151), + k2p2zq_75151) + mem_124210 = None + defunc_3_map_res_mem_124288 = mem_124286 + else: + local_memory_capacity_128826 = self.max_local_memory + if (sle64(np.int64(240), + sext_i32_i64(local_memory_capacity_128826)) and suff_outer_par_112858): + mem_124213 = opencl_alloc(self, bytes_120125, "mem_124213") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124213, np.int64(0), + mem_124142, np.int64(0), + np.int64(1), N_75135, + m_75136) + gridDim_x_118228 = sdiv_up64(k2p2zq_75151, Tx_118227) + gridDim_y_118229 = sdiv_up64(k2p2zq_75151, Ty_118226) + binop_x_118231 = (gridDim_y_118229 * gridDim_zz_118230) + grid_sizze_tile3d_118232 = (gridDim_x_118228 * binop_x_118231) + count_shmem_118234 = sdiv_up64(np.int64(30), + group_sizze_tile3d_118233) + mem_124273 = opencl_alloc(self, bytes_121997, "mem_124273") + if ((1 * (np.int64(grid_sizze_tile3d_118232) * np.int64(group_sizze_tile3d_118233))) != 0): + self.mainzisegmap_intragroup_118238_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int64(240))), + np.int64(m_75136), + np.int64(n_75139), + np.int64(k2p2zq_75151), + np.int64(Ty_118226), + np.int64(Tx_118227), + np.int64(gridDim_x_118228), + np.int64(gridDim_y_118229), + np.int64(group_sizze_tile3d_118233), + np.int64(count_shmem_118234), + mem_120120, + mem_120124, + mem_124213, + mem_124273) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_intragroup_118238_var, + ((np.int64(grid_sizze_tile3d_118232) * np.int64(group_sizze_tile3d_118233)),), + (np.int64(group_sizze_tile3d_118233),)) + if synchronous: + sync(self) + mem_124213 = None + defunc_3_map_res_mem_124282 = mem_124273 + else: + mem_124276 = opencl_alloc(self, bytes_120110, "mem_124276") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124276, np.int64(0), + mem_120124, np.int64(0), + np.int64(1), k2p2zq_75151, + N_75135) + mem_124281 = opencl_alloc(self, bytes_121997, "mem_124281") + if slt64((n_75139 * np.int64(2)), segred_group_sizze_112879): + segment_sizze_nonzzero_128766 = smax64(np.int64(1), n_75139) + num_threads_128767 = (num_groups_112880 * segred_group_sizze_112879) + if ((1 * (np.int64(num_groups_112880) * np.int64(segred_group_sizze_112879))) != 0): + self.mainzisegred_small_112741_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_112879))), + np.int64(N_75135), + np.int64(m_75136), + np.int64(n_75139), + np.int64(k2p2zq_75151), + np.int64(num_groups_112880), + np.int64(segment_sizze_nonzzero_128766), + binop_p_mem_120117, + mem_124142, + mem_124276, + mem_124281) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_small_112741_var, + ((np.int64(num_groups_112880) * np.int64(segred_group_sizze_112879)),), + (np.int64(segred_group_sizze_112879),)) + if synchronous: + sync(self) + else: + groups_per_segment_128787 = sdiv_up64(num_groups_112880, + smax64(np.int64(1), + ((m_75136 * k2p2zq_75151) * k2p2zq_75151))) + elements_per_thread_128788 = sdiv_up64(n_75139, + (segred_group_sizze_112879 * groups_per_segment_128787)) + virt_num_groups_128789 = (groups_per_segment_128787 * ((m_75136 * k2p2zq_75151) * k2p2zq_75151)) + num_threads_128790 = (num_groups_112880 * segred_group_sizze_112879) + threads_per_segment_128791 = (groups_per_segment_128787 * segred_group_sizze_112879) + group_res_arr_mem_128792 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_112879 * virt_num_groups_128789)), + "group_res_arr_mem_128792") + mainzicounter_mem_128794 = self.mainzicounter_mem_128794 + if ((1 * (np.int64(num_groups_112880) * np.int64(segred_group_sizze_112879))) != 0): + self.mainzisegred_large_112741_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_112879))), + np.int64(N_75135), + np.int64(n_75139), + np.int64(k2p2zq_75151), + np.int64(num_groups_112880), + np.int64(groups_per_segment_128787), + np.int64(elements_per_thread_128788), + np.int64(virt_num_groups_128789), + np.int64(threads_per_segment_128791), + binop_p_mem_120117, + mem_124142, + mem_124276, + mem_124281, + group_res_arr_mem_128792, + mainzicounter_mem_128794) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_large_112741_var, + ((np.int64(num_groups_112880) * np.int64(segred_group_sizze_112879)),), + (np.int64(segred_group_sizze_112879),)) + if synchronous: + sync(self) + mem_124276 = None + defunc_3_map_res_mem_124282 = mem_124281 + defunc_3_map_res_mem_124288 = defunc_3_map_res_mem_124282 + defunc_3_map_res_mem_124294 = defunc_3_map_res_mem_124288 + m_76774 = (np.int64(2) * k2p2zq_75151) + nm_76775 = (k2p2zq_75151 * m_76774) + bounds_invalid_upwards_76776 = slt64(nm_76775, np.int64(0)) + valid_76777 = not(bounds_invalid_upwards_76776) + range_valid_c_76778 = True + assert valid_76777, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 helpers.fut:73:21-27\n #2 bfastfinal.fut:61:35-50\n #3 bfastfinal.fut:200:5-74\n #4 bfastfinal.fut:195:1-201:36\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + nm_76775, + " is invalid.")) + zzero_76780 = (m_76774 == np.int64(0)) + nonzzero_76781 = not(zzero_76780) + nonzzero_cert_76782 = True + assert nonzzero_76781, ("Error: %s\n\nBacktrace:\n-> #0 helpers.fut:68:41-47\n #1 helpers.fut:68:14-73:28\n #2 bfastfinal.fut:61:35-50\n #3 bfastfinal.fut:200:5-74\n #4 bfastfinal.fut:195:1-201:36\n" % ("division by zero",)) + loop_nonempty_76783 = slt64(np.int64(0), k2p2zq_75151) + loop_not_taken_76784 = not(loop_nonempty_76783) + protect_assert_disj_76785 = (nonzzero_76781 or loop_not_taken_76784) + nonzzero_cert_76786 = True + assert protect_assert_disj_76785, ("Error: %s\n\nBacktrace:\n-> #0 helpers.fut:54:43-49\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:200:5-74\n #5 bfastfinal.fut:195:1-201:36\n" % ("division by zero",)) + j_m_i_76787 = (m_76774 - k2p2zq_75151) + empty_slice_76788 = (j_m_i_76787 == np.int64(0)) + m_76789 = (j_m_i_76787 - np.int64(1)) + i_p_m_t_s_76790 = (k2p2zq_75151 + m_76789) + zzero_leq_i_p_m_t_s_76791 = sle64(np.int64(0), i_p_m_t_s_76790) + i_p_m_t_s_leq_w_76792 = slt64(i_p_m_t_s_76790, m_76774) + i_lte_j_76793 = sle64(k2p2zq_75151, m_76774) + y_76794 = (i_lte_j_75226 and i_p_m_t_s_leq_w_76792) + y_76795 = (zzero_leq_i_p_m_t_s_76791 and y_76794) + y_76796 = (i_lte_j_76793 and y_76795) + forwards_ok_76797 = (i_lte_j_75226 and y_76796) + ok_or_empty_76798 = (empty_slice_76788 or forwards_ok_76797) + index_ok_76799 = (ok_or_empty_75229 and ok_or_empty_76798) + index_certs_76800 = True + assert index_ok_76799, ("Error: %s%d%s%d%s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 helpers.fut:77:8-30\n #1 bfastfinal.fut:61:35-50\n #2 bfastfinal.fut:200:5-74\n #3 bfastfinal.fut:195:1-201:36\n" % ("Index [", + np.int64(0), + ":", + k2p2zq_75151, + ", ", + k2p2zq_75151, + ":", + m_76774, + "] out of bounds for array of shape [", + k2p2zq_75151, + "][", + m_76774, + "].")) + dim_match_76801 = (k2p2zq_75151 == j_m_i_76787) + empty_or_match_cert_76802 = True + assert dim_match_76801, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 helpers.fut:77:8-45\n #1 bfastfinal.fut:61:35-50\n #2 bfastfinal.fut:200:5-74\n #3 bfastfinal.fut:195:1-201:36\n" % ("Value of (core language) shape (", + k2p2zq_75151, + ", ", + j_m_i_76787, + ") cannot match shape of type `[", + k2p2zq_75151, + "][", + k2p2zq_75151, + "]f64`.")) + fits_112965 = sle64(nm_76775, max_group_sizze_103162) + suff_intra_par_112963 = (self.sizes["main.suff_intra_par_26"] <= nm_76775) + intra_suff_and_fits_112966 = (suff_intra_par_112963 and fits_112965) + nest_sizze_113396 = (m_75136 * nm_76775) + segmap_group_sizze_113397 = self.sizes["main.segmap_group_size_113349"] + suff_intra_par_113428 = (self.sizes["main.suff_intra_par_27"] <= nm_76775) + intra_suff_and_fits_113429 = (fits_112965 and suff_intra_par_113428) + segmap_group_sizze_113477 = self.sizes["main.segmap_group_size_113265"] + segmap_group_sizze_113490 = self.sizes["main.segmap_group_size_113176"] + segmap_group_sizze_113535 = self.sizes["main.segmap_group_size_113156"] + segmap_group_sizze_113545 = self.sizes["main.segmap_group_size_113046"] + segmap_usable_groups_113478 = sdiv_up_safe64(m_75136, + segmap_group_sizze_113477) + segmap_usable_groups_113491 = sdiv_up_safe64(nest_sizze_113396, + segmap_group_sizze_113490) + segmap_usable_groups_113536 = sdiv_up_safe64(nest_sizze_113396, + segmap_group_sizze_113535) + bytes_124297 = (np.int64(8) * nm_76775) + bytes_124320 = (np.int64(8) * nest_sizze_113396) + local_memory_capacity_128885 = self.max_local_memory + if (sle64(((bytes_124297 + srem64((np.int64(8) - srem64(bytes_124297, + np.int64(8))), + np.int64(8))) + (bytes_124297 + srem64((np.int64(8) - srem64(bytes_124297, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_128885)) and intra_suff_and_fits_112966): + mem_124318 = opencl_alloc(self, bytes_121997, "mem_124318") + if ((1 * (np.int64(m_75136) * np.int64(nm_76775))) != 0): + self.mainzisegmap_intragroup_112961_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64(bytes_124297)), + cl.LocalMemory(np.int64(bytes_124297)), + np.int64(k2p2zq_75151), + np.int64(m_76774), + np.int64(nm_76775), + defunc_3_map_res_mem_124294, + mem_124318) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_intragroup_112961_var, + ((np.int64(m_75136) * np.int64(nm_76775)),), + (np.int64(nm_76775),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + defunc_3_map_res_mem_124372 = mem_124318 + else: + segmap_usable_groups_113398 = sdiv_up64(nest_sizze_113396, + segmap_group_sizze_113397) + mem_124322 = opencl_alloc(self, bytes_124320, "mem_124322") + if ((1 * (np.int64(segmap_usable_groups_113398) * np.int64(segmap_group_sizze_113397))) != 0): + self.mainzisegmap_113346_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(m_76774), + np.int64(nm_76775), + defunc_3_map_res_mem_124294, + mem_124322) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_113346_var, + ((np.int64(segmap_usable_groups_113398) * np.int64(segmap_group_sizze_113397)),), + (np.int64(segmap_group_sizze_113397),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + ctx_param_ext_124323 = m_75136 + ctx_param_ext_124324 = nm_76775 + ctx_param_ext_124325 = np.int64(0) + ctx_param_ext_124326 = nm_76775 + ctx_param_ext_124327 = m_75136 + ctx_param_ext_124328 = np.int64(1) + ctx_param_ext_124329 = nm_76775 + mem_param_124330 = mem_124322 + i_113419 = np.int64(0) + one_129870 = np.int64(1) + for counter_129869 in range(k2p2zq_75151): + y_113421 = slt64(i_113419, nm_76775) + index_certs_113422 = True + assert y_113421, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 helpers.fut:53:16-19\n #1 helpers.fut:74:16-34\n #2 bfastfinal.fut:61:35-50\n #3 bfastfinal.fut:200:5-74\n #4 bfastfinal.fut:195:1-201:36\n" % ("Index [", + i_113419, + "] out of bounds for array of shape [", + nm_76775, + "].")) + local_memory_capacity_128851 = self.max_local_memory + if intra_suff_and_fits_113429: + gauss_jordan_res_ixfn_124351 = m_75136 + else: + gauss_jordan_res_ixfn_124351 = ctx_param_ext_124327 + local_memory_capacity_128852 = self.max_local_memory + if intra_suff_and_fits_113429: + gauss_jordan_res_ixfn_124352 = nm_76775 + else: + gauss_jordan_res_ixfn_124352 = ctx_param_ext_124329 + local_memory_capacity_128853 = self.max_local_memory + if intra_suff_and_fits_113429: + gauss_jordan_res_ixfn_124353 = m_75136 + else: + gauss_jordan_res_ixfn_124353 = ctx_param_ext_124323 + local_memory_capacity_128854 = self.max_local_memory + if intra_suff_and_fits_113429: + gauss_jordan_res_ixfn_124354 = nm_76775 + else: + gauss_jordan_res_ixfn_124354 = ctx_param_ext_124324 + local_memory_capacity_128855 = self.max_local_memory + if intra_suff_and_fits_113429: + gauss_jordan_res_ixfn_124355 = nm_76775 + else: + gauss_jordan_res_ixfn_124355 = ctx_param_ext_124326 + local_memory_capacity_128856 = self.max_local_memory + if intra_suff_and_fits_113429: + gauss_jordan_res_ixfn_124356 = np.int64(1) + else: + gauss_jordan_res_ixfn_124356 = ctx_param_ext_124328 + local_memory_capacity_128857 = self.max_local_memory + if intra_suff_and_fits_113429: + gauss_jordan_res_ixfn_124357 = np.int64(0) + else: + gauss_jordan_res_ixfn_124357 = ctx_param_ext_124325 + local_memory_capacity_128879 = self.max_local_memory + if (sle64(((bytes_124297 + srem64((np.int64(8) - srem64(bytes_124297, + np.int64(8))), + np.int64(8))) + (bytes_124297 + srem64((np.int64(8) - srem64(bytes_124297, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_128879)) and intra_suff_and_fits_113429): + mem_124342 = opencl_alloc(self, bytes_124320, "mem_124342") + if ((1 * (np.int64(m_75136) * np.int64(nm_76775))) != 0): + self.mainzisegmap_intragroup_113099_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64(bytes_124297)), + cl.LocalMemory(np.int64(bytes_124297)), + np.int64(k2p2zq_75151), + np.int64(m_76774), + np.int64(nm_76775), + np.int64(i_113419), + np.int64(ctx_param_ext_124325), + np.int64(ctx_param_ext_124326), + np.int64(ctx_param_ext_124328), + mem_param_124330, + mem_124342) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_intragroup_113099_var, + ((np.int64(m_75136) * np.int64(nm_76775)),), + (np.int64(nm_76775),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + gauss_jordan_res_mem_124358 = mem_124342 + else: + mem_124345 = opencl_alloc(self, m_75136, "mem_124345") + if ((1 * (np.int64(segmap_usable_groups_113478) * np.int64(segmap_group_sizze_113477))) != 0): + self.mainzisegmap_113263_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(i_113419), + np.int64(ctx_param_ext_124325), + np.int64(ctx_param_ext_124326), + np.int64(ctx_param_ext_124328), + mem_param_124330, mem_124345) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_113263_var, + ((np.int64(segmap_usable_groups_113478) * np.int64(segmap_group_sizze_113477)),), + (np.int64(segmap_group_sizze_113477),)) + if synchronous: + sync(self) + mem_124349 = opencl_alloc(self, bytes_124320, "mem_124349") + if ((1 * (np.int64(segmap_usable_groups_113491) * np.int64(segmap_group_sizze_113490))) != 0): + self.mainzisegmap_113173_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(m_76774), + np.int64(nm_76775), + np.int64(i_113419), + np.int64(ctx_param_ext_124325), + np.int64(ctx_param_ext_124326), + np.int64(ctx_param_ext_124328), + mem_param_124330, mem_124345, + mem_124349) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_113173_var, + ((np.int64(segmap_usable_groups_113491) * np.int64(segmap_group_sizze_113490)),), + (np.int64(segmap_group_sizze_113490),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + mem_124345 = None + if ((1 * (np.int64(segmap_usable_groups_113536) * np.int64(segmap_group_sizze_113535))) != 0): + self.mainzisegmap_113153_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(nm_76775), + np.int64(ctx_param_ext_124325), + np.int64(ctx_param_ext_124326), + np.int64(ctx_param_ext_124328), + mem_param_124330, mem_124349) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_113153_var, + ((np.int64(segmap_usable_groups_113536) * np.int64(segmap_group_sizze_113535)),), + (np.int64(segmap_group_sizze_113535),)) + if synchronous: + sync(self) + mem_124349 = None + gauss_jordan_res_mem_124358 = mem_param_124330 + ctx_param_ext_tmp_128842 = gauss_jordan_res_ixfn_124353 + ctx_param_ext_tmp_128843 = gauss_jordan_res_ixfn_124354 + ctx_param_ext_tmp_128844 = gauss_jordan_res_ixfn_124357 + ctx_param_ext_tmp_128845 = gauss_jordan_res_ixfn_124355 + ctx_param_ext_tmp_128846 = gauss_jordan_res_ixfn_124351 + ctx_param_ext_tmp_128847 = gauss_jordan_res_ixfn_124356 + ctx_param_ext_tmp_128848 = gauss_jordan_res_ixfn_124352 + mem_param_tmp_128849 = gauss_jordan_res_mem_124358 + ctx_param_ext_124323 = ctx_param_ext_tmp_128842 + ctx_param_ext_124324 = ctx_param_ext_tmp_128843 + ctx_param_ext_124325 = ctx_param_ext_tmp_128844 + ctx_param_ext_124326 = ctx_param_ext_tmp_128845 + ctx_param_ext_124327 = ctx_param_ext_tmp_128846 + ctx_param_ext_124328 = ctx_param_ext_tmp_128847 + ctx_param_ext_124329 = ctx_param_ext_tmp_128848 + mem_param_124330 = mem_param_tmp_128849 + i_113419 += one_129870 + gauss_jordan_res_r_ixfn_124359 = ctx_param_ext_124323 + gauss_jordan_res_r_ixfn_124360 = ctx_param_ext_124324 + gauss_jordan_res_r_ixfn_124361 = ctx_param_ext_124325 + gauss_jordan_res_r_ixfn_124362 = ctx_param_ext_124326 + gauss_jordan_res_r_ixfn_124363 = ctx_param_ext_124327 + gauss_jordan_res_r_ixfn_124364 = ctx_param_ext_124328 + gauss_jordan_res_r_ixfn_124365 = ctx_param_ext_124329 + gauss_jordan_res_r_mem_124366 = mem_param_124330 + mem_124322 = None + segmap_usable_groups_113546 = sdiv_up64(comparatee_112857, + segmap_group_sizze_113545) + mem_124371 = opencl_alloc(self, bytes_121997, "mem_124371") + if ((1 * (np.int64(segmap_usable_groups_113546) * np.int64(segmap_group_sizze_113545))) != 0): + self.mainzisegmap_113042_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(m_76774), + np.int64(nm_76775), + np.int64(gauss_jordan_res_r_ixfn_124361), + np.int64(gauss_jordan_res_r_ixfn_124362), + np.int64(gauss_jordan_res_r_ixfn_124364), + gauss_jordan_res_r_mem_124366, + mem_124371) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_113042_var, + ((np.int64(segmap_usable_groups_113546) * np.int64(segmap_group_sizze_113545)),), + (np.int64(segmap_group_sizze_113545),)) + if synchronous: + sync(self) + gauss_jordan_res_r_mem_124366 = None + defunc_3_map_res_mem_124372 = mem_124371 + defunc_3_map_res_mem_124294 = None + suff_outer_par_113553 = (self.sizes["main.suff_outer_par_28"] <= m_75136) + segmap_group_sizze_113575 = self.sizes["main.segmap_group_size_113557"] + max_num_groups_128886 = self.sizes["main.segmap_num_groups_113559"] + num_groups_113576 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_75136, + segmap_group_sizze_113575), + sext_i32_i64(max_num_groups_128886)))) + suff_outer_par_113660 = (self.sizes["main.suff_outer_par_29"] <= binop_x_120244) + nest_sizze_113676 = (n_75139 * binop_x_120244) + segred_group_sizze_113677 = self.sizes["main.segred_group_size_113619"] + max_num_groups_128887 = self.sizes["main.segred_num_groups_113621"] + num_groups_113678 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_113676, + segred_group_sizze_113677), + sext_i32_i64(max_num_groups_128887)))) + Ty_118372 = self.sizes["main.Ty_118369"] + Ry_118373 = self.sizes["main.Ry_118371"] + Tx_118374 = self.sizes["main.Tx_118368"] + Rx_118375 = self.sizes["main.Rx_118370"] + Tk_118376 = self.sizes["main.Tk_118367"] + TxRx_118379 = (Tx_118374 * Rx_118375) + TyRy_118380 = (Ty_118372 * Ry_118373) + a_loc_szz_118382 = (Tk_118376 * TyRy_118380) + binop_x_118383 = (Tx_118374 * Tk_118376) + b_loc_szz_118384 = (Rx_118375 * binop_x_118383) + group_sizze_118388 = (Ty_118372 * Tx_118374) + binop_x_124405 = (Ry_118373 * group_sizze_118388) + binop_x_124406 = (Rx_118375 * binop_x_124405) + bytes_124403 = (np.int64(8) * binop_x_124406) + binop_x_124397 = (Ry_118373 * Rx_118375) + bytes_124396 = (np.int64(8) * binop_x_124397) + bytes_124408 = (np.int64(8) * a_loc_szz_118382) + bytes_124410 = (np.int64(8) * b_loc_szz_118384) + bytes_124479 = (np.int64(8) * binop_x_124405) + binop_x_124485 = (Rx_118375 * group_sizze_118388) + bytes_124483 = (np.int64(8) * binop_x_124485) + bytes_124471 = (np.int64(8) * Ry_118373) + bytes_124473 = (np.int64(8) * Rx_118375) + binop_x_125378 = (np.int64(8) * Ty_118372) + binop_x_125379 = (Tx_118374 * binop_x_125378) + binop_x_125380 = (Ry_118373 * binop_x_125379) + sizze_125381 = (Rx_118375 * binop_x_125380) + num_threads_126291 = (segmap_group_sizze_113575 * num_groups_113576) + total_sizze_126292 = (bytes_120247 * num_threads_126291) + local_memory_capacity_129017 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_129017)) and suff_outer_par_113553): + mem_124375 = opencl_alloc(self, bytes_120125, "mem_124375") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124375, np.int64(0), + mem_124142, np.int64(0), + np.int64(1), N_75135, m_75136) + mem_124393 = opencl_alloc(self, bytes_121990, "mem_124393") + mem_124378 = opencl_alloc(self, total_sizze_126292, "mem_124378") + if ((1 * (np.int64(num_groups_113576) * np.int64(segmap_group_sizze_113575))) != 0): + self.mainzisegmap_113555_var.set_args(self.global_failure, + np.int64(N_75135), + np.int64(m_75136), + np.int64(n_75139), + np.int64(k2p2zq_75151), + np.int64(num_groups_113576), + np.int64(num_threads_126291), + binop_p_mem_120117, mem_124375, + mem_124378, mem_124393) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_113555_var, + ((np.int64(num_groups_113576) * np.int64(segmap_group_sizze_113575)),), + (np.int64(segmap_group_sizze_113575),)) + if synchronous: + sync(self) + mem_124375 = None + mem_124378 = None + mem_124591 = opencl_alloc(self, bytes_121990, "mem_124591") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124591, np.int64(0), + mem_124393, np.int64(0), + np.int64(1), m_75136, + k2p2zq_75151) + mem_124393 = None + defunc_3_map_res_mem_124593 = mem_124591 + else: + local_memory_capacity_129016 = self.max_local_memory + if (sle64(((bytes_124408 + srem64((np.int64(8) - srem64(bytes_124408, + np.int64(8))), + np.int64(8))) + (bytes_124410 + srem64((np.int64(8) - srem64(bytes_124410, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_129016)) and suff_outer_par_113660): + tk_div_tx_118377 = sdiv_up64(Tk_118376, Tx_118374) + tk_div_ty_118378 = sdiv_up64(Tk_118376, Ty_118372) + gridDim_x_118385 = sdiv_up64(k2p2zq_75151, TxRx_118379) + gridDim_y_118386 = sdiv_up64(m_75136, TyRy_118380) + grid_sizze_118387 = (gridDim_x_118385 * gridDim_y_118386) + full_tiles_118416 = squot64(n_75139, Tk_118376) + kk_118623 = (Tk_118376 * full_tiles_118416) + mem_124583 = opencl_alloc(self, bytes_121990, "mem_124583") + if ((1 * (np.int64(grid_sizze_118387) * np.int64(group_sizze_118388))) != 0): + self.mainzisegmap_intragroup_118391_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_124410)), + cl.LocalMemory(np.int64(bytes_124408)), + np.int64(N_75135), + np.int64(m_75136), + np.int64(n_75139), + np.int64(k2p2zq_75151), + np.int64(gridDim_x_118385), + np.int64(full_tiles_118416), + np.int64(kk_118623), + mem_120120, + mem_124142, + mem_124583) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_intragroup_118391_var, + ((np.int64(grid_sizze_118387) * np.int64(group_sizze_118388)),), + (np.int64(group_sizze_118388),)) + if synchronous: + sync(self) + defunc_3_map_res_mem_124588 = mem_124583 + else: + mem_124587 = opencl_alloc(self, bytes_121990, "mem_124587") + if slt64((n_75139 * np.int64(2)), segred_group_sizze_113677): + segment_sizze_nonzzero_128956 = smax64(np.int64(1), n_75139) + num_threads_128957 = (num_groups_113678 * segred_group_sizze_113677) + if ((1 * (np.int64(num_groups_113678) * np.int64(segred_group_sizze_113677))) != 0): + self.mainzisegred_small_113625_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_113677))), + np.int64(N_75135), + np.int64(m_75136), + np.int64(n_75139), + np.int64(k2p2zq_75151), + np.int64(num_groups_113678), + np.int64(segment_sizze_nonzzero_128956), + binop_p_mem_120117, + mem_124142, mem_124587) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_small_113625_var, + ((np.int64(num_groups_113678) * np.int64(segred_group_sizze_113677)),), + (np.int64(segred_group_sizze_113677),)) + if synchronous: + sync(self) + else: + groups_per_segment_128977 = sdiv_up64(num_groups_113678, + smax64(np.int64(1), + (m_75136 * k2p2zq_75151))) + elements_per_thread_128978 = sdiv_up64(n_75139, + (segred_group_sizze_113677 * groups_per_segment_128977)) + virt_num_groups_128979 = (groups_per_segment_128977 * (m_75136 * k2p2zq_75151)) + num_threads_128980 = (num_groups_113678 * segred_group_sizze_113677) + threads_per_segment_128981 = (groups_per_segment_128977 * segred_group_sizze_113677) + group_res_arr_mem_128982 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_113677 * virt_num_groups_128979)), + "group_res_arr_mem_128982") + mainzicounter_mem_128984 = self.mainzicounter_mem_128984 + if ((1 * (np.int64(num_groups_113678) * np.int64(segred_group_sizze_113677))) != 0): + self.mainzisegred_large_113625_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_113677))), + np.int64(N_75135), + np.int64(n_75139), + np.int64(k2p2zq_75151), + np.int64(num_groups_113678), + np.int64(groups_per_segment_128977), + np.int64(elements_per_thread_128978), + np.int64(virt_num_groups_128979), + np.int64(threads_per_segment_128981), + binop_p_mem_120117, + mem_124142, mem_124587, + group_res_arr_mem_128982, + mainzicounter_mem_128984) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_large_113625_var, + ((np.int64(num_groups_113678) * np.int64(segred_group_sizze_113677)),), + (np.int64(segred_group_sizze_113677),)) + if synchronous: + sync(self) + defunc_3_map_res_mem_124588 = mem_124587 + defunc_3_map_res_mem_124593 = defunc_3_map_res_mem_124588 + binop_p_mem_120117 = None + mem_120120 = None + suff_outer_par_113694 = (self.sizes["main.suff_outer_par_30"] <= m_75136) + segmap_group_sizze_113715 = self.sizes["main.segmap_group_size_113698"] + max_num_groups_129018 = self.sizes["main.segmap_num_groups_113700"] + num_groups_113716 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_75136, + segmap_group_sizze_113715), + sext_i32_i64(max_num_groups_129018)))) + suff_outer_par_113795 = (self.sizes["main.suff_outer_par_31"] <= binop_x_120244) + segred_group_sizze_113810 = self.sizes["main.segred_group_size_113756"] + max_num_groups_129019 = self.sizes["main.segred_num_groups_113758"] + num_groups_113811 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_121999, + segred_group_sizze_113810), + sext_i32_i64(max_num_groups_129019)))) + tile_sizze_118835 = self.sizes["main.tile_size_118834"] + group_sizze_118836 = (tile_sizze_118835 * tile_sizze_118835) + bytes_124625 = (np.int64(8) * group_sizze_118836) + binop_x_125394 = (np.int64(8) * tile_sizze_118835) + sizze_125395 = (tile_sizze_118835 * binop_x_125394) + num_threads_126295 = (segmap_group_sizze_113715 * num_groups_113716) + total_sizze_126296 = (bytes_120247 * num_threads_126295) + local_memory_capacity_129105 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_129105)) and suff_outer_par_113694): + mem_124597 = opencl_alloc(self, bytes_124188, "mem_124597") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124597, np.int64(0), + defunc_3_map_res_mem_124372, + np.int64(0), np.int64(1), + (k2p2zq_75151 * k2p2zq_75151), + m_75136) + mem_124600 = opencl_alloc(self, bytes_121990, "mem_124600") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124600, np.int64(0), + defunc_3_map_res_mem_124593, + np.int64(0), np.int64(1), + k2p2zq_75151, m_75136) + mem_124618 = opencl_alloc(self, bytes_121990, "mem_124618") + mem_124603 = opencl_alloc(self, total_sizze_126296, "mem_124603") + if ((1 * (np.int64(num_groups_113716) * np.int64(segmap_group_sizze_113715))) != 0): + self.mainzisegmap_113696_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(num_groups_113716), + np.int64(num_threads_126295), + mem_124597, mem_124600, + mem_124603, mem_124618) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_113696_var, + ((np.int64(num_groups_113716) * np.int64(segmap_group_sizze_113715)),), + (np.int64(segmap_group_sizze_113715),)) + if synchronous: + sync(self) + mem_124597 = None + mem_124600 = None + mem_124603 = None + mem_124657 = opencl_alloc(self, bytes_121990, "mem_124657") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124657, np.int64(0), + mem_124618, np.int64(0), + np.int64(1), m_75136, + k2p2zq_75151) + mem_124618 = None + defunc_4_map_res_mem_124659 = mem_124657 + else: + local_memory_capacity_129104 = self.max_local_memory + if (sle64(((bytes_124625 + srem64((np.int64(8) - srem64(bytes_124625, + np.int64(8))), + np.int64(8))) + (bytes_124625 + srem64((np.int64(8) - srem64(bytes_124625, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_129104)) and suff_outer_par_113795): + mem_124622 = opencl_alloc(self, bytes_121997, "mem_124622") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124622, np.int64(0), + defunc_3_map_res_mem_124372, + np.int64(0), np.int64(1), + k2p2zq_75151, + (m_75136 * k2p2zq_75151)) + num_groups_x_118837 = sdiv_up64(m_75136, tile_sizze_118835) + num_groups_y_118838 = sdiv_up64(k2p2zq_75151, tile_sizze_118835) + num_groups_top_118839 = (num_groups_x_118837 * num_groups_y_118838) + num_whole_tiles_118856 = squot64(k2p2zq_75151, tile_sizze_118835) + residual_input_118983 = srem64(k2p2zq_75151, tile_sizze_118835) + cond_118984 = (residual_input_118983 == np.int64(0)) + mem_124649 = opencl_alloc(self, bytes_121990, "mem_124649") + if ((1 * (np.int64(num_groups_top_118839) * np.int64(group_sizze_118836))) != 0): + self.mainzisegmap_intragroup_118840_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_124625)), + cl.LocalMemory(np.int64(bytes_124625)), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(num_groups_y_118838), + np.int64(num_whole_tiles_118856), + np.int64(residual_input_118983), + np.byte(cond_118984), + defunc_3_map_res_mem_124593, + mem_124622, + mem_124649) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_intragroup_118840_var, + ((np.int64(num_groups_top_118839) * np.int64(group_sizze_118836)),), + (np.int64(group_sizze_118836),)) + if synchronous: + sync(self) + mem_124622 = None + defunc_4_map_res_mem_124654 = mem_124649 + else: + mem_124653 = opencl_alloc(self, bytes_121990, "mem_124653") + if slt64((k2p2zq_75151 * np.int64(2)), segred_group_sizze_113810): + segment_sizze_nonzzero_129044 = smax64(np.int64(1), k2p2zq_75151) + num_threads_129045 = (num_groups_113811 * segred_group_sizze_113810) + if ((1 * (np.int64(num_groups_113811) * np.int64(segred_group_sizze_113810))) != 0): + self.mainzisegred_small_113762_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_113810))), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(num_groups_113811), + np.int64(segment_sizze_nonzzero_129044), + defunc_3_map_res_mem_124372, + defunc_3_map_res_mem_124593, + mem_124653) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_small_113762_var, + ((np.int64(num_groups_113811) * np.int64(segred_group_sizze_113810)),), + (np.int64(segred_group_sizze_113810),)) + if synchronous: + sync(self) + else: + groups_per_segment_129065 = sdiv_up64(num_groups_113811, + smax64(np.int64(1), + (m_75136 * k2p2zq_75151))) + elements_per_thread_129066 = sdiv_up64(k2p2zq_75151, + (segred_group_sizze_113810 * groups_per_segment_129065)) + virt_num_groups_129067 = (groups_per_segment_129065 * (m_75136 * k2p2zq_75151)) + num_threads_129068 = (num_groups_113811 * segred_group_sizze_113810) + threads_per_segment_129069 = (groups_per_segment_129065 * segred_group_sizze_113810) + group_res_arr_mem_129070 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_113810 * virt_num_groups_129067)), + "group_res_arr_mem_129070") + mainzicounter_mem_129072 = self.mainzicounter_mem_129072 + if ((1 * (np.int64(num_groups_113811) * np.int64(segred_group_sizze_113810))) != 0): + self.mainzisegred_large_113762_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_113810))), + np.int64(k2p2zq_75151), + np.int64(num_groups_113811), + np.int64(groups_per_segment_129065), + np.int64(elements_per_thread_129066), + np.int64(virt_num_groups_129067), + np.int64(threads_per_segment_129069), + defunc_3_map_res_mem_124372, + defunc_3_map_res_mem_124593, + mem_124653, + group_res_arr_mem_129070, + mainzicounter_mem_129072) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_large_113762_var, + ((np.int64(num_groups_113811) * np.int64(segred_group_sizze_113810)),), + (np.int64(segred_group_sizze_113810),)) + if synchronous: + sync(self) + defunc_4_map_res_mem_124654 = mem_124653 + defunc_4_map_res_mem_124659 = defunc_4_map_res_mem_124654 + defunc_3_map_res_mem_124372 = None + defunc_3_map_res_mem_124593 = None + suff_outer_par_113826 = (self.sizes["main.suff_outer_par_32"] <= m_75136) + segmap_group_sizze_113846 = self.sizes["main.segmap_group_size_113830"] + max_num_groups_129106 = self.sizes["main.segmap_num_groups_113832"] + num_groups_113847 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_75136, + segmap_group_sizze_113846), + sext_i32_i64(max_num_groups_129106)))) + suff_outer_par_113923 = (self.sizes["main.suff_outer_par_33"] <= binop_x_120126) + nest_sizze_113937 = (k2p2zq_75151 * binop_x_120126) + segred_group_sizze_113938 = self.sizes["main.segred_group_size_113886"] + max_num_groups_129107 = self.sizes["main.segred_num_groups_113888"] + num_groups_113939 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_113937, + segred_group_sizze_113938), + sext_i32_i64(max_num_groups_129107)))) + Ty_119113 = self.sizes["main.Ty_119110"] + Ry_119114 = self.sizes["main.Ry_119112"] + Tx_119115 = self.sizes["main.Tx_119109"] + Rx_119116 = self.sizes["main.Rx_119111"] + Tk_119117 = self.sizes["main.Tk_119108"] + TxRx_119120 = (Tx_119115 * Rx_119116) + TyRy_119121 = (Ty_119113 * Ry_119114) + a_loc_szz_119123 = (Tk_119117 * TyRy_119121) + binop_x_119124 = (Tx_119115 * Tk_119117) + b_loc_szz_119125 = (Rx_119116 * binop_x_119124) + group_sizze_119129 = (Ty_119113 * Tx_119115) + bytes_124664 = (np.int64(8) * N_75135) + binop_x_124695 = (Ry_119114 * group_sizze_119129) + binop_x_124696 = (Rx_119116 * binop_x_124695) + bytes_124693 = (np.int64(8) * binop_x_124696) + binop_x_124687 = (Ry_119114 * Rx_119116) + bytes_124686 = (np.int64(8) * binop_x_124687) + bytes_124698 = (np.int64(8) * a_loc_szz_119123) + bytes_124700 = (np.int64(8) * b_loc_szz_119125) + bytes_124769 = (np.int64(8) * binop_x_124695) + binop_x_124775 = (Rx_119116 * group_sizze_119129) + bytes_124773 = (np.int64(8) * binop_x_124775) + bytes_124761 = (np.int64(8) * Ry_119114) + bytes_124763 = (np.int64(8) * Rx_119116) + binop_x_125408 = (np.int64(8) * Ty_119113) + binop_x_125409 = (Tx_119115 * binop_x_125408) + binop_x_125410 = (Ry_119114 * binop_x_125409) + sizze_125411 = (Rx_119116 * binop_x_125410) + num_threads_126299 = (segmap_group_sizze_113846 * num_groups_113847) + total_sizze_126300 = (bytes_124664 * num_threads_126299) + local_memory_capacity_129237 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_129237)) and suff_outer_par_113826): + mem_124662 = opencl_alloc(self, bytes_121990, "mem_124662") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124662, np.int64(0), + defunc_4_map_res_mem_124659, + np.int64(0), np.int64(1), + k2p2zq_75151, m_75136) + mem_124680 = opencl_alloc(self, bytes_120125, "mem_124680") + mem_124665 = opencl_alloc(self, total_sizze_126300, "mem_124665") + if ((1 * (np.int64(num_groups_113847) * np.int64(segmap_group_sizze_113846))) != 0): + self.mainzisegmap_113828_var.set_args(self.global_failure, + np.int64(N_75135), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(num_groups_113847), + np.int64(num_threads_126299), + mem_120124, mem_124662, + mem_124665, mem_124680) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_113828_var, + ((np.int64(num_groups_113847) * np.int64(segmap_group_sizze_113846)),), + (np.int64(segmap_group_sizze_113846),)) + if synchronous: + sync(self) + mem_124662 = None + mem_124665 = None + mem_124881 = opencl_alloc(self, bytes_120125, "mem_124881") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124881, np.int64(0), + mem_124680, np.int64(0), + np.int64(1), m_75136, N_75135) + mem_124680 = None + defunc_3_map_res_mem_124883 = mem_124881 + else: + local_memory_capacity_129236 = self.max_local_memory + if (sle64(((bytes_124698 + srem64((np.int64(8) - srem64(bytes_124698, + np.int64(8))), + np.int64(8))) + (bytes_124700 + srem64((np.int64(8) - srem64(bytes_124700, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_129236)) and suff_outer_par_113923): + mem_124683 = opencl_alloc(self, bytes_120110, "mem_124683") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124683, np.int64(0), + mem_120124, np.int64(0), + np.int64(1), k2p2zq_75151, + N_75135) + tk_div_tx_119118 = sdiv_up64(Tk_119117, Tx_119115) + tk_div_ty_119119 = sdiv_up64(Tk_119117, Ty_119113) + gridDim_x_119126 = sdiv_up64(N_75135, TxRx_119120) + gridDim_y_119127 = sdiv_up64(m_75136, TyRy_119121) + grid_sizze_119128 = (gridDim_x_119126 * gridDim_y_119127) + full_tiles_119157 = squot64(k2p2zq_75151, Tk_119117) + kk_119360 = (Tk_119117 * full_tiles_119157) + mem_124873 = opencl_alloc(self, bytes_120125, "mem_124873") + if ((1 * (np.int64(grid_sizze_119128) * np.int64(group_sizze_119129))) != 0): + self.mainzisegmap_intragroup_119132_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_124700)), + cl.LocalMemory(np.int64(bytes_124698)), + np.int64(N_75135), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(gridDim_x_119126), + np.int64(full_tiles_119157), + np.int64(kk_119360), + defunc_4_map_res_mem_124659, + mem_124683, + mem_124873) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_intragroup_119132_var, + ((np.int64(grid_sizze_119128) * np.int64(group_sizze_119129)),), + (np.int64(group_sizze_119129),)) + if synchronous: + sync(self) + mem_124683 = None + defunc_3_map_res_mem_124878 = mem_124873 + else: + mem_124877 = opencl_alloc(self, bytes_120125, "mem_124877") + if slt64((k2p2zq_75151 * np.int64(2)), segred_group_sizze_113938): + segment_sizze_nonzzero_129176 = smax64(np.int64(1), k2p2zq_75151) + num_threads_129177 = (num_groups_113939 * segred_group_sizze_113938) + if ((1 * (np.int64(num_groups_113939) * np.int64(segred_group_sizze_113938))) != 0): + self.mainzisegred_small_113892_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_113938))), + np.int64(N_75135), + np.int64(m_75136), + np.int64(k2p2zq_75151), + np.int64(num_groups_113939), + np.int64(segment_sizze_nonzzero_129176), + mem_120124, + defunc_4_map_res_mem_124659, + mem_124877) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_small_113892_var, + ((np.int64(num_groups_113939) * np.int64(segred_group_sizze_113938)),), + (np.int64(segred_group_sizze_113938),)) + if synchronous: + sync(self) + else: + groups_per_segment_129197 = sdiv_up64(num_groups_113939, + smax64(np.int64(1), + (m_75136 * N_75135))) + elements_per_thread_129198 = sdiv_up64(k2p2zq_75151, + (segred_group_sizze_113938 * groups_per_segment_129197)) + virt_num_groups_129199 = (groups_per_segment_129197 * (m_75136 * N_75135)) + num_threads_129200 = (num_groups_113939 * segred_group_sizze_113938) + threads_per_segment_129201 = (groups_per_segment_129197 * segred_group_sizze_113938) + group_res_arr_mem_129202 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_113938 * virt_num_groups_129199)), + "group_res_arr_mem_129202") + mainzicounter_mem_129204 = self.mainzicounter_mem_129204 + if ((1 * (np.int64(num_groups_113939) * np.int64(segred_group_sizze_113938))) != 0): + self.mainzisegred_large_113892_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_113938))), + np.int64(N_75135), + np.int64(k2p2zq_75151), + np.int64(num_groups_113939), + np.int64(groups_per_segment_129197), + np.int64(elements_per_thread_129198), + np.int64(virt_num_groups_129199), + np.int64(threads_per_segment_129201), + mem_120124, + defunc_4_map_res_mem_124659, + mem_124877, + group_res_arr_mem_129202, + mainzicounter_mem_129204) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_large_113892_var, + ((np.int64(num_groups_113939) * np.int64(segred_group_sizze_113938)),), + (np.int64(segred_group_sizze_113938),)) + if synchronous: + sync(self) + defunc_3_map_res_mem_124878 = mem_124877 + defunc_3_map_res_mem_124883 = defunc_3_map_res_mem_124878 + mem_120124 = None + defunc_4_map_res_mem_124659 = None + i_76911 = (N_75135 - np.int64(1)) + x_76912 = sle64(np.int64(0), i_76911) + y_76913 = slt64(i_76911, N_75135) + bounds_check_76914 = (x_76912 and y_76913) + index_certs_76915 = True + assert bounds_check_76914, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:26:29-34\n #1 helpers.fut:14:13-20\n #2 bfastfinal.fut:87:16-75\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 bfastfinal.fut:82:5-88:25\n #6 bfastfinal.fut:200:5-74\n #7 bfastfinal.fut:195:1-201:36\n" % ("Index [", + i_76911, + "] out of bounds for array of shape [", + N_75135, + "].")) + fits_113965 = sle64(N_75135, max_group_sizze_103162) + suff_intra_par_113963 = (self.sizes["main.suff_intra_par_34"] <= N_75135) + intra_suff_and_fits_113966 = (suff_intra_par_113963 and fits_113965) + segscan_group_sizze_114101 = self.sizes["main.segscan_group_size_114078"] + max_num_groups_129238 = self.sizes["main.segscan_num_groups_114080"] + num_groups_114102 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120126, + segscan_group_sizze_114101), + sext_i32_i64(max_num_groups_129238)))) + segmap_group_sizze_114150 = self.sizes["main.segmap_group_size_114009"] + local_memory_capacity_129322 = self.max_local_memory + if (sle64(((((bytes_124664 + srem64((np.int64(8) - srem64(bytes_124664, + np.int64(8))), + np.int64(8))) + (bytes_124664 + srem64((np.int64(8) - srem64(bytes_124664, + np.int64(8))), + np.int64(8)))) + (bytes_124664 + srem64((np.int64(8) - srem64(bytes_124664, + np.int64(8))), + np.int64(8)))) + (bytes_124664 + srem64((np.int64(8) - srem64(bytes_124664, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_129322)) and intra_suff_and_fits_113966): + mem_124896 = opencl_alloc(self, bytes_120173, "mem_124896") + mem_124899 = opencl_alloc(self, bytes_120125, "mem_124899") + mem_124902 = opencl_alloc(self, bytes_120125, "mem_124902") + if ((1 * (np.int64(m_75136) * np.int64(N_75135))) != 0): + self.mainzisegmap_intragroup_113961_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_124664)), + cl.LocalMemory(np.int64(bytes_124664)), + cl.LocalMemory(np.int64(bytes_124664)), + cl.LocalMemory(np.int64(bytes_124664)), + np.int64(N_75135), + np.int64(i_76911), + mem_124142, + defunc_3_map_res_mem_124883, + mem_124896, mem_124899, + mem_124902) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_intragroup_113961_var, + ((np.int64(m_75136) * np.int64(N_75135)),), + (np.int64(N_75135),)) + if synchronous: + sync(self) + defunc_4_map_res_mem_124919 = mem_124896 + defunc_4_map_res_mem_124920 = mem_124899 + defunc_4_map_res_mem_124921 = mem_124902 + else: + mem_124906 = opencl_alloc(self, bytes_120125, "mem_124906") + mem_124909 = opencl_alloc(self, bytes_120125, "mem_124909") + if slt64(np.int64(0), (m_75136 * N_75135)): + stage1_max_num_groups_129256 = self.max_group_size + stage1_num_groups_129257 = smin64(stage1_max_num_groups_129256, + num_groups_114102) + num_threads_129258 = sext_i64_i32((stage1_num_groups_129257 * segscan_group_sizze_114101)) + if ((1 * (np.int64(stage1_num_groups_129257) * np.int64(segscan_group_sizze_114101))) != 0): + self.mainziscan_stage1_114084_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(smax64(np.int64(1), + (np.int32(8) * segscan_group_sizze_114101)))), + np.int64(N_75135), + np.int64(m_75136), + np.int32(num_threads_129258), + mem_124142, + defunc_3_map_res_mem_124883, + mem_124906, mem_124909) + cl.enqueue_nd_range_kernel(self.queue, + self.mainziscan_stage1_114084_var, + ((np.int64(stage1_num_groups_129257) * np.int64(segscan_group_sizze_114101)),), + (np.int64(segscan_group_sizze_114101),)) + if synchronous: + sync(self) + if ((1 * (np.int64(np.int64(1)) * np.int64(stage1_num_groups_129257))) != 0): + self.mainziscan_stage2_114084_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(smax64(np.int64(1), + (np.int32(8) * stage1_num_groups_129257)))), + np.int64(N_75135), + np.int64(m_75136), + np.int64(stage1_num_groups_129257), + np.int32(num_threads_129258), + mem_124906) + cl.enqueue_nd_range_kernel(self.queue, + self.mainziscan_stage2_114084_var, + ((np.int64(np.int64(1)) * np.int64(stage1_num_groups_129257)),), + (np.int64(stage1_num_groups_129257),)) + if synchronous: + sync(self) + required_groups_129300 = sext_i64_i32(sdiv_up64((m_75136 * N_75135), + segscan_group_sizze_114101)) + if ((1 * (np.int64(num_groups_114102) * np.int64(segscan_group_sizze_114101))) != 0): + self.mainziscan_stage3_114084_var.set_args(self.global_failure, + np.int64(N_75135), + np.int64(m_75136), + np.int64(num_groups_114102), + np.int32(num_threads_129258), + np.int32(required_groups_129300), + mem_124906) + cl.enqueue_nd_range_kernel(self.queue, + self.mainziscan_stage3_114084_var, + ((np.int64(num_groups_114102) * np.int64(segscan_group_sizze_114101)),), + (np.int64(segscan_group_sizze_114101),)) + if synchronous: + sync(self) + mem_124911 = opencl_alloc(self, bytes_120173, "mem_124911") + group_sizze_129315 = self.sizes["main.group_size_129315"] + num_groups_129316 = sdiv_up64(m_75136, group_sizze_129315) + if ((1 * (np.int64(num_groups_129316) * np.int64(group_sizze_129315))) != 0): + self.mainzicopy_129312_var.set_args(np.int64(N_75135), + np.int64(m_75136), + np.int64(i_76911), mem_124906, + mem_124911) + cl.enqueue_nd_range_kernel(self.queue, self.mainzicopy_129312_var, + ((np.int64(num_groups_129316) * np.int64(group_sizze_129315)),), + (np.int64(group_sizze_129315),)) + if synchronous: + sync(self) + mem_124914 = opencl_alloc(self, bytes_120125, "mem_124914") + self.futhark_builtinzhreplicate_f64(mem_124914, (m_75136 * N_75135), + np.nan) + mem_124917 = opencl_alloc(self, bytes_120125, "mem_124917") + self.futhark_builtinzhreplicate_i64(mem_124917, (m_75136 * N_75135), + np.int64(0)) + segmap_usable_groups_114151 = sdiv_up64(binop_x_120126, + segmap_group_sizze_114150) + if ((1 * (np.int64(segmap_usable_groups_114151) * np.int64(segmap_group_sizze_114150))) != 0): + self.mainzisegmap_114006_var.set_args(self.global_failure, + np.int64(N_75135), + np.int64(m_75136), mem_124906, + mem_124909, mem_124914, + mem_124917) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_114006_var, + ((np.int64(segmap_usable_groups_114151) * np.int64(segmap_group_sizze_114150)),), + (np.int64(segmap_group_sizze_114150),)) + if synchronous: + sync(self) + mem_124906 = None + mem_124909 = None + defunc_4_map_res_mem_124919 = mem_124911 + defunc_4_map_res_mem_124920 = mem_124914 + defunc_4_map_res_mem_124921 = mem_124917 + defunc_3_map_res_mem_124883 = None + suff_outer_par_114175 = (self.sizes["main.suff_outer_par_35"] <= m_75136) + suff_intra_par_114249 = (self.sizes["main.suff_intra_par_36"] <= n_75139) + intra_suff_and_fits_114252 = (fits_103163 and suff_intra_par_114249) + segmap_group_sizze_114213 = self.sizes["main.segmap_group_size_114179"] + segred_group_sizze_114348 = self.sizes["main.segred_group_size_114331"] + max_num_groups_129323 = self.sizes["main.segred_num_groups_114333"] + num_groups_114349 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_103196, + segred_group_sizze_114348), + sext_i32_i64(max_num_groups_129323)))) + segred_group_sizze_114362 = self.sizes["main.segred_group_size_114307"] + max_num_groups_129324 = self.sizes["main.segred_num_groups_114309"] + num_groups_114363 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_103196, + segred_group_sizze_114362), + sext_i32_i64(max_num_groups_129324)))) + segmap_group_sizze_114381 = self.sizes["main.segmap_group_size_114292"] + local_memory_capacity_129472 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_129472)) and suff_outer_par_114175): + segmap_usable_groups_114214 = sdiv_up64(m_75136, + segmap_group_sizze_114213) + mem_124924 = opencl_alloc(self, bytes_120125, "mem_124924") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124924, np.int64(0), + mem_124142, np.int64(0), + np.int64(1), N_75135, m_75136) + mem_124927 = opencl_alloc(self, bytes_120125, "mem_124927") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124927, np.int64(0), + defunc_4_map_res_mem_124920, + np.int64(0), np.int64(1), + N_75135, m_75136) + mem_124930 = opencl_alloc(self, bytes_120173, "mem_124930") + mem_124932 = opencl_alloc(self, bytes_120173, "mem_124932") + mem_124934 = opencl_alloc(self, bytes_120173, "mem_124934") + if ((1 * (np.int64(segmap_usable_groups_114214) * np.int64(segmap_group_sizze_114213))) != 0): + self.mainzisegmap_114177_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(N_75135), + np.int64(m_75136), + np.int64(n_75139), + np.float64(hfrac_75141), + np.int64(k2p2_75149), mem_124924, + mem_124927, mem_124930, + mem_124932, mem_124934) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_114177_var, + ((np.int64(segmap_usable_groups_114214) * np.int64(segmap_group_sizze_114213)),), + (np.int64(segmap_group_sizze_114213),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + mem_124924 = None + mem_124927 = None + defunc_3_map_res_mem_124958 = mem_124930 + defunc_3_map_res_mem_124959 = mem_124932 + defunc_3_map_res_mem_124960 = mem_124934 + else: + local_memory_capacity_129471 = self.max_local_memory + if (sle64((((np.int32(8) * n_75139) + srem64((np.int64(8) - srem64((np.int32(8) * n_75139), + np.int64(8))), + np.int64(8))) + ((np.int32(8) * n_75139) + srem64((np.int64(8) - srem64((np.int32(8) * n_75139), + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_129471)) and intra_suff_and_fits_114252): + mem_124939 = opencl_alloc(self, bytes_120173, "mem_124939") + mem_124941 = opencl_alloc(self, bytes_120173, "mem_124941") + mem_124943 = opencl_alloc(self, bytes_120173, "mem_124943") + if ((1 * (np.int64(m_75136) * np.int64(n_75139))) != 0): + self.mainzisegmap_intragroup_114173_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64((np.int32(8) * n_75139))), + cl.LocalMemory(np.int64((np.int32(8) * n_75139))), + np.int64(N_75135), + np.int64(n_75139), + np.float64(hfrac_75141), + np.int64(k2p2_75149), + mem_124142, + defunc_4_map_res_mem_124920, + mem_124939, + mem_124941, + mem_124943) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_intragroup_114173_var, + ((np.int64(m_75136) * np.int64(n_75139)),), + (np.int64(n_75139),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + defunc_3_map_res_mem_124955 = mem_124939 + defunc_3_map_res_mem_124956 = mem_124941 + defunc_3_map_res_mem_124957 = mem_124943 + else: + mem_124946 = opencl_alloc(self, bytes_120173, "mem_124946") + if slt64((n_75139 * np.int64(2)), segred_group_sizze_114348): + segment_sizze_nonzzero_129346 = smax64(np.int64(1), n_75139) + num_threads_129347 = (num_groups_114349 * segred_group_sizze_114348) + if ((1 * (np.int64(num_groups_114349) * np.int64(segred_group_sizze_114348))) != 0): + self.mainzisegred_small_114337_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_114348))), + np.int64(N_75135), + np.int64(m_75136), + np.int64(n_75139), + np.int64(num_groups_114349), + np.int64(segment_sizze_nonzzero_129346), + mem_124142, mem_124946) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_small_114337_var, + ((np.int64(num_groups_114349) * np.int64(segred_group_sizze_114348)),), + (np.int64(segred_group_sizze_114348),)) + if synchronous: + sync(self) + else: + groups_per_segment_129367 = sdiv_up64(num_groups_114349, + smax64(np.int64(1), m_75136)) + elements_per_thread_129368 = sdiv_up64(n_75139, + (segred_group_sizze_114348 * groups_per_segment_129367)) + virt_num_groups_129369 = (groups_per_segment_129367 * m_75136) + num_threads_129370 = (num_groups_114349 * segred_group_sizze_114348) + threads_per_segment_129371 = (groups_per_segment_129367 * segred_group_sizze_114348) + group_res_arr_mem_129372 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_114348 * virt_num_groups_129369)), + "group_res_arr_mem_129372") + mainzicounter_mem_129374 = self.mainzicounter_mem_129374 + if ((1 * (np.int64(num_groups_114349) * np.int64(segred_group_sizze_114348))) != 0): + self.mainzisegred_large_114337_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_114348))), + np.int64(N_75135), + np.int64(n_75139), + np.int64(num_groups_114349), + np.int64(groups_per_segment_129367), + np.int64(elements_per_thread_129368), + np.int64(virt_num_groups_129369), + np.int64(threads_per_segment_129371), + mem_124142, mem_124946, + group_res_arr_mem_129372, + mainzicounter_mem_129374) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_large_114337_var, + ((np.int64(num_groups_114349) * np.int64(segred_group_sizze_114348)),), + (np.int64(segred_group_sizze_114348),)) + if synchronous: + sync(self) + mem_124949 = opencl_alloc(self, bytes_120173, "mem_124949") + if slt64((n_75139 * np.int64(2)), segred_group_sizze_114362): + segment_sizze_nonzzero_129406 = smax64(np.int64(1), n_75139) + num_threads_129407 = (num_groups_114363 * segred_group_sizze_114362) + if ((1 * (np.int64(num_groups_114363) * np.int64(segred_group_sizze_114362))) != 0): + self.mainzisegred_small_114313_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_114362))), + np.int64(N_75135), + np.int64(m_75136), + np.int64(n_75139), + np.int64(num_groups_114363), + np.int64(segment_sizze_nonzzero_129406), + defunc_4_map_res_mem_124920, + mem_124946, mem_124949) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_small_114313_var, + ((np.int64(num_groups_114363) * np.int64(segred_group_sizze_114362)),), + (np.int64(segred_group_sizze_114362),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + else: + groups_per_segment_129427 = sdiv_up64(num_groups_114363, + smax64(np.int64(1), m_75136)) + elements_per_thread_129428 = sdiv_up64(n_75139, + (segred_group_sizze_114362 * groups_per_segment_129427)) + virt_num_groups_129429 = (groups_per_segment_129427 * m_75136) + num_threads_129430 = (num_groups_114363 * segred_group_sizze_114362) + threads_per_segment_129431 = (groups_per_segment_129427 * segred_group_sizze_114362) + group_res_arr_mem_129432 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_114362 * virt_num_groups_129429)), + "group_res_arr_mem_129432") + mainzicounter_mem_129434 = self.mainzicounter_mem_129434 + if ((1 * (np.int64(num_groups_114363) * np.int64(segred_group_sizze_114362))) != 0): + self.mainzisegred_large_114313_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_114362))), + np.int64(N_75135), + np.int64(n_75139), + np.int64(num_groups_114363), + np.int64(groups_per_segment_129427), + np.int64(elements_per_thread_129428), + np.int64(virt_num_groups_129429), + np.int64(threads_per_segment_129431), + defunc_4_map_res_mem_124920, + mem_124946, mem_124949, + group_res_arr_mem_129432, + mainzicounter_mem_129434) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_large_114313_var, + ((np.int64(num_groups_114363) * np.int64(segred_group_sizze_114362)),), + (np.int64(segred_group_sizze_114362),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + segmap_usable_groups_114382 = sdiv_up64(m_75136, + segmap_group_sizze_114381) + mem_124952 = opencl_alloc(self, bytes_120173, "mem_124952") + mem_124954 = opencl_alloc(self, bytes_120173, "mem_124954") + if ((1 * (np.int64(segmap_usable_groups_114382) * np.int64(segmap_group_sizze_114381))) != 0): + self.mainzisegmap_114290_var.set_args(self.global_failure, + np.int64(m_75136), + np.float64(hfrac_75141), + np.int64(k2p2_75149), + mem_124946, mem_124949, + mem_124952, mem_124954) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_114290_var, + ((np.int64(segmap_usable_groups_114382) * np.int64(segmap_group_sizze_114381)),), + (np.int64(segmap_group_sizze_114381),)) + if synchronous: + sync(self) + mem_124949 = None + defunc_3_map_res_mem_124955 = mem_124952 + defunc_3_map_res_mem_124956 = mem_124946 + defunc_3_map_res_mem_124957 = mem_124954 + defunc_3_map_res_mem_124958 = defunc_3_map_res_mem_124955 + defunc_3_map_res_mem_124959 = defunc_3_map_res_mem_124956 + defunc_3_map_res_mem_124960 = defunc_3_map_res_mem_124957 + mem_124142 = None + segred_group_sizze_114402 = self.sizes["main.segred_group_size_114401"] + max_num_groups_129473 = self.sizes["main.segred_num_groups_114403"] + num_groups_114404 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_75136, + segred_group_sizze_114402), + sext_i32_i64(max_num_groups_129473)))) + mem_124963 = opencl_alloc(self, np.int64(8), "mem_124963") + mainzicounter_mem_129474 = self.mainzicounter_mem_129474 + group_res_arr_mem_129476 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_114402 * num_groups_114404)), + "group_res_arr_mem_129476") + num_threads_129478 = (num_groups_114404 * segred_group_sizze_114402) + if ((1 * (np.int64(num_groups_114404) * np.int64(segred_group_sizze_114402))) != 0): + self.mainzisegred_nonseg_114409_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_114402))), + cl.LocalMemory(np.int64(np.int32(1))), + np.int64(m_75136), + np.int64(num_groups_114404), + np.int64(num_threads_129478), + defunc_3_map_res_mem_124958, + mem_124963, + mainzicounter_mem_129474, + group_res_arr_mem_129476) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_nonseg_114409_var, + ((np.int64(num_groups_114404) * np.int64(segred_group_sizze_114402)),), + (np.int64(segred_group_sizze_114402),)) + if synchronous: + sync(self) + read_res_129877 = np.empty(1, dtype=ct.c_int64) + cl.enqueue_copy(self.queue, read_res_129877, mem_124963, + device_offset=(np.int64(np.int64(0)) * 8), + is_blocking=synchronous) + sync(self) + defunc_2_reduce_comm_res_76995 = read_res_129877[0] + mem_124963 = None + bounds_invalid_upwards_77000 = slt64(defunc_2_reduce_comm_res_76995, + np.int64(0)) + valid_77001 = not(bounds_invalid_upwards_77000) + range_valid_c_77002 = True + assert valid_77001, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 bfastfinal.fut:113:34-42\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:108:17-115:24\n #4 bfastfinal.fut:200:5-74\n #5 bfastfinal.fut:195:1-201:36\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + defunc_2_reduce_comm_res_76995, + " is invalid.")) + suff_outer_par_114411 = (self.sizes["main.suff_outer_par_37"] <= m_75136) + segmap_group_sizze_114437 = self.sizes["main.segmap_group_size_114415"] + nest_sizze_114486 = (m_75136 * defunc_2_reduce_comm_res_76995) + segred_group_sizze_114487 = self.sizes["main.segred_group_size_114461"] + max_num_groups_129504 = self.sizes["main.segred_num_groups_114463"] + num_groups_114488 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_114486, + segred_group_sizze_114487), + sext_i32_i64(max_num_groups_129504)))) + local_memory_capacity_129571 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_129571)) and suff_outer_par_114411): + segmap_usable_groups_114438 = sdiv_up64(m_75136, + segmap_group_sizze_114437) + mem_124966 = opencl_alloc(self, bytes_120173, "mem_124966") + if ((1 * (np.int64(segmap_usable_groups_114438) * np.int64(segmap_group_sizze_114437))) != 0): + self.mainzisegmap_114413_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(N_75135), + np.int64(m_75136), + np.int64(defunc_2_reduce_comm_res_76995), + defunc_4_map_res_mem_124920, + defunc_3_map_res_mem_124958, + defunc_3_map_res_mem_124959, + mem_124966) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_114413_var, + ((np.int64(segmap_usable_groups_114438) * np.int64(segmap_group_sizze_114437)),), + (np.int64(segmap_group_sizze_114437),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + defunc_0_f_res_mem_124970 = mem_124966 + else: + mem_124969 = opencl_alloc(self, bytes_120173, "mem_124969") + if slt64((defunc_2_reduce_comm_res_76995 * np.int64(2)), + segred_group_sizze_114487): + segment_sizze_nonzzero_129511 = smax64(np.int64(1), + defunc_2_reduce_comm_res_76995) + num_threads_129512 = (num_groups_114488 * segred_group_sizze_114487) + if ((1 * (np.int64(num_groups_114488) * np.int64(segred_group_sizze_114487))) != 0): + self.mainzisegred_small_114467_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_114487))), + np.int64(N_75135), + np.int64(m_75136), + np.int64(defunc_2_reduce_comm_res_76995), + np.int64(num_groups_114488), + np.int64(segment_sizze_nonzzero_129511), + defunc_4_map_res_mem_124920, + defunc_3_map_res_mem_124958, + defunc_3_map_res_mem_124959, + mem_124969) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_small_114467_var, + ((np.int64(num_groups_114488) * np.int64(segred_group_sizze_114487)),), + (np.int64(segred_group_sizze_114487),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + else: + groups_per_segment_129532 = sdiv_up64(num_groups_114488, + smax64(np.int64(1), m_75136)) + elements_per_thread_129533 = sdiv_up64(defunc_2_reduce_comm_res_76995, + (segred_group_sizze_114487 * groups_per_segment_129532)) + virt_num_groups_129534 = (groups_per_segment_129532 * m_75136) + num_threads_129535 = (num_groups_114488 * segred_group_sizze_114487) + threads_per_segment_129536 = (groups_per_segment_129532 * segred_group_sizze_114487) + group_res_arr_mem_129537 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_114487 * virt_num_groups_129534)), + "group_res_arr_mem_129537") + mainzicounter_mem_129539 = self.mainzicounter_mem_129539 + if ((1 * (np.int64(num_groups_114488) * np.int64(segred_group_sizze_114487))) != 0): + self.mainzisegred_large_114467_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_114487))), + np.int64(N_75135), + np.int64(defunc_2_reduce_comm_res_76995), + np.int64(num_groups_114488), + np.int64(groups_per_segment_129532), + np.int64(elements_per_thread_129533), + np.int64(virt_num_groups_129534), + np.int64(threads_per_segment_129536), + defunc_4_map_res_mem_124920, + defunc_3_map_res_mem_124958, + defunc_3_map_res_mem_124959, + mem_124969, + group_res_arr_mem_129537, + mainzicounter_mem_129539) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_large_114467_var, + ((np.int64(num_groups_114488) * np.int64(segred_group_sizze_114487)),), + (np.int64(segred_group_sizze_114487),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + defunc_0_f_res_mem_124970 = mem_124969 + iota_arg_77024 = (N_75135 - n_75139) + bounds_invalid_upwards_77025 = slt64(iota_arg_77024, np.int64(0)) + valid_77026 = not(bounds_invalid_upwards_77025) + range_valid_c_77027 = True + assert valid_77026, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 bfastfinal.fut:120:22-31\n #2 bfastfinal.fut:200:5-74\n #3 bfastfinal.fut:195:1-201:36\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + iota_arg_77024, + " is invalid.")) + index_certs_77029 = True + assert y_75235, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 bfastfinal.fut:118:72-92\n #1 bfastfinal.fut:117:15-120:32\n #2 bfastfinal.fut:200:5-74\n #3 bfastfinal.fut:195:1-201:36\n" % ("Index [", + m_75231, + "] out of bounds for array of shape [", + N_75135, + "].")) + read_res_129879 = np.empty(1, dtype=ct.c_int64) + cl.enqueue_copy(self.queue, read_res_129879, mappingindices_mem_120107, + device_offset=(np.int64(m_75231) * 8), + is_blocking=synchronous) + sync(self) + i64_arg_77030 = read_res_129879[0] + i64_res_77031 = sitofp_i64_f64(i64_arg_77030) + segmap_group_sizze_114530 = self.sizes["main.segmap_group_size_114512"] + segmap_usable_groups_114531 = sdiv_up64(iota_arg_77024, + segmap_group_sizze_114530) + bytes_124972 = (np.int64(8) * iota_arg_77024) + mem_124973 = opencl_alloc(self, bytes_124972, "mem_124973") + if ((1 * (np.int64(segmap_usable_groups_114531) * np.int64(segmap_group_sizze_114530))) != 0): + self.mainzisegmap_114510_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(N_75135), + np.int64(n_75139), + np.float64(lam_75143), + np.int64(iota_arg_77024), + np.float64(i64_res_77031), + mappingindices_mem_120107, + mem_124973) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_114510_var, + ((np.int64(segmap_usable_groups_114531) * np.int64(segmap_group_sizze_114530)),), + (np.int64(segmap_group_sizze_114530),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + range_valid_c_77048 = True + assert valid_77026, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 bfastfinal.fut:130:20-31\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:125:20-142:9\n #4 bfastfinal.fut:200:5-74\n #5 bfastfinal.fut:195:1-201:36\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + iota_arg_77024, + " is invalid.")) + fits_114560 = sle64(iota_arg_77024, max_group_sizze_103162) + suff_intra_par_114558 = (self.sizes["main.suff_intra_par_38"] <= iota_arg_77024) + intra_suff_and_fits_114561 = (suff_intra_par_114558 and fits_114560) + segmap_group_sizze_114854 = self.sizes["main.segmap_group_size_114834"] + max_num_groups_129577 = self.sizes["main.segmap_num_groups_114836"] + num_groups_114855 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_75136, + segmap_group_sizze_114854), + sext_i32_i64(max_num_groups_129577)))) + nest_sizze_114876 = (m_75136 * iota_arg_77024) + segscan_group_sizze_114877 = self.sizes["main.segscan_group_size_114787"] + max_num_groups_129578 = self.sizes["main.segscan_num_groups_114789"] + num_groups_114878 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_114876, + segscan_group_sizze_114877), + sext_i32_i64(max_num_groups_129578)))) + segred_group_sizze_114921 = self.sizes["main.segred_group_size_114733"] + max_num_groups_129579 = self.sizes["main.segred_num_groups_114735"] + num_groups_114922 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_114876, + segred_group_sizze_114921), + sext_i32_i64(max_num_groups_129579)))) + segmap_group_sizze_114960 = self.sizes["main.segmap_group_size_114683"] + bytes_124989 = (np.int64(8) * nest_sizze_114876) + local_memory_capacity_129772 = self.max_local_memory + if (sle64(((((bytes_124972 + srem64((np.int64(8) - srem64(bytes_124972, + np.int64(8))), + np.int64(8))) + ((np.int32(1) * iota_arg_77024) + srem64((np.int64(8) - srem64((np.int32(1) * iota_arg_77024), + np.int64(8))), + np.int64(8)))) + ((np.int32(8) * iota_arg_77024) + srem64((np.int64(8) - srem64((np.int32(8) * iota_arg_77024), + np.int64(8))), + np.int64(8)))) + ((np.int32(8) * iota_arg_77024) + srem64((np.int64(8) - srem64((np.int32(8) * iota_arg_77024), + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_129772)) and intra_suff_and_fits_114561): + mem_124980 = opencl_alloc(self, bytes_120173, "mem_124980") + mem_124982 = opencl_alloc(self, bytes_120173, "mem_124982") + if ((1 * (np.int64(m_75136) * np.int64(iota_arg_77024))) != 0): + self.mainzisegmap_intragroup_114556_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64((np.int32(8) * iota_arg_77024))), + cl.LocalMemory(np.int64((np.int32(8) * iota_arg_77024))), + cl.LocalMemory(np.int64((np.int32(1) * iota_arg_77024))), + cl.LocalMemory(np.int64(bytes_124972)), + np.int64(N_75135), + np.int64(n_75139), + np.int64(iota_arg_77024), + defunc_4_map_res_mem_124919, + defunc_4_map_res_mem_124920, + defunc_4_map_res_mem_124921, + defunc_3_map_res_mem_124958, + defunc_3_map_res_mem_124959, + defunc_3_map_res_mem_124960, + defunc_0_f_res_mem_124970, + mem_124973, mem_124980, + mem_124982) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegmap_intragroup_114556_var, + ((np.int64(m_75136) * np.int64(iota_arg_77024)),), + (np.int64(iota_arg_77024),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + defunc_0_f_res_mem_125004 = mem_124980 + defunc_0_f_res_mem_125005 = mem_124982 + else: + mem_124985 = opencl_alloc(self, bytes_120173, "mem_124985") + mem_124987 = opencl_alloc(self, bytes_120173, "mem_124987") + if ((1 * (np.int64(num_groups_114855) * np.int64(segmap_group_sizze_114854))) != 0): + self.mainzisegmap_114832_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(num_groups_114855), + defunc_4_map_res_mem_124919, + defunc_3_map_res_mem_124959, + defunc_3_map_res_mem_124960, + mem_124985, mem_124987) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_114832_var, + ((np.int64(num_groups_114855) * np.int64(segmap_group_sizze_114854)),), + (np.int64(segmap_group_sizze_114854),)) + if synchronous: + sync(self) + mem_124991 = opencl_alloc(self, bytes_124989, "mem_124991") + if slt64(np.int64(0), (m_75136 * iota_arg_77024)): + stage1_max_num_groups_129613 = self.max_group_size + stage1_num_groups_129614 = smin64(stage1_max_num_groups_129613, + num_groups_114878) + num_threads_129615 = sext_i64_i32((stage1_num_groups_129614 * segscan_group_sizze_114877)) + if ((1 * (np.int64(stage1_num_groups_129614) * np.int64(segscan_group_sizze_114877))) != 0): + self.mainziscan_stage1_114793_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64(smax64(np.int64(1), + (np.int32(8) * segscan_group_sizze_114877)))), + np.int64(N_75135), + np.int64(m_75136), + np.int64(iota_arg_77024), + np.int32(num_threads_129615), + defunc_4_map_res_mem_124920, + defunc_3_map_res_mem_124958, + defunc_3_map_res_mem_124959, + defunc_0_f_res_mem_124970, + mem_124987, mem_124991) + cl.enqueue_nd_range_kernel(self.queue, + self.mainziscan_stage1_114793_var, + ((np.int64(stage1_num_groups_129614) * np.int64(segscan_group_sizze_114877)),), + (np.int64(segscan_group_sizze_114877),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + if ((1 * (np.int64(np.int64(1)) * np.int64(stage1_num_groups_129614))) != 0): + self.mainziscan_stage2_114793_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(smax64(np.int64(1), + (np.int32(8) * stage1_num_groups_129614)))), + np.int64(m_75136), + np.int64(iota_arg_77024), + np.int64(stage1_num_groups_129614), + np.int32(num_threads_129615), + mem_124991) + cl.enqueue_nd_range_kernel(self.queue, + self.mainziscan_stage2_114793_var, + ((np.int64(np.int64(1)) * np.int64(stage1_num_groups_129614)),), + (np.int64(stage1_num_groups_129614),)) + if synchronous: + sync(self) + required_groups_129657 = sext_i64_i32(sdiv_up64((m_75136 * iota_arg_77024), + segscan_group_sizze_114877)) + if ((1 * (np.int64(num_groups_114878) * np.int64(segscan_group_sizze_114877))) != 0): + self.mainziscan_stage3_114793_var.set_args(self.global_failure, + np.int64(m_75136), + np.int64(iota_arg_77024), + np.int64(num_groups_114878), + np.int32(num_threads_129615), + np.int32(required_groups_129657), + mem_124991) + cl.enqueue_nd_range_kernel(self.queue, + self.mainziscan_stage3_114793_var, + ((np.int64(num_groups_114878) * np.int64(segscan_group_sizze_114877)),), + (np.int64(segscan_group_sizze_114877),)) + if synchronous: + sync(self) + mem_124994 = opencl_alloc(self, m_75136, "mem_124994") + mem_124996 = opencl_alloc(self, bytes_120173, "mem_124996") + mem_124998 = opencl_alloc(self, bytes_120173, "mem_124998") + if slt64((iota_arg_77024 * np.int64(2)), segred_group_sizze_114921): + segment_sizze_nonzzero_129669 = smax64(np.int64(1), iota_arg_77024) + num_threads_129670 = (num_groups_114922 * segred_group_sizze_114921) + if ((1 * (np.int64(num_groups_114922) * np.int64(segred_group_sizze_114921))) != 0): + self.mainzisegred_small_114739_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_114921))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_114921))), + cl.LocalMemory(np.int64((np.int32(1) * segred_group_sizze_114921))), + np.int64(m_75136), + np.int64(iota_arg_77024), + np.int64(num_groups_114922), + np.int64(segment_sizze_nonzzero_129669), + mem_124973, mem_124985, + mem_124987, mem_124991, + mem_124994, mem_124996, + mem_124998) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_small_114739_var, + ((np.int64(num_groups_114922) * np.int64(segred_group_sizze_114921)),), + (np.int64(segred_group_sizze_114921),)) + if synchronous: + sync(self) + else: + groups_per_segment_129705 = sdiv_up64(num_groups_114922, + smax64(np.int64(1), m_75136)) + elements_per_thread_129706 = sdiv_up64(iota_arg_77024, + (segred_group_sizze_114921 * groups_per_segment_129705)) + virt_num_groups_129707 = (groups_per_segment_129705 * m_75136) + num_threads_129708 = (num_groups_114922 * segred_group_sizze_114921) + threads_per_segment_129709 = (groups_per_segment_129705 * segred_group_sizze_114921) + group_res_arr_mem_129710 = opencl_alloc(self, + (np.int32(1) * (segred_group_sizze_114921 * virt_num_groups_129707)), + "group_res_arr_mem_129710") + group_res_arr_mem_129712 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_114921 * virt_num_groups_129707)), + "group_res_arr_mem_129712") + group_res_arr_mem_129714 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_114921 * virt_num_groups_129707)), + "group_res_arr_mem_129714") + mainzicounter_mem_129716 = self.mainzicounter_mem_129716 + if ((1 * (np.int64(num_groups_114922) * np.int64(segred_group_sizze_114921))) != 0): + self.mainzisegred_large_114739_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_114921))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_114921))), + cl.LocalMemory(np.int64((np.int32(1) * segred_group_sizze_114921))), + np.int64(iota_arg_77024), + np.int64(num_groups_114922), + np.int64(groups_per_segment_129705), + np.int64(elements_per_thread_129706), + np.int64(virt_num_groups_129707), + mem_124973, mem_124985, + mem_124987, mem_124991, + mem_124994, mem_124996, + mem_124998, + group_res_arr_mem_129710, + group_res_arr_mem_129712, + group_res_arr_mem_129714, + mainzicounter_mem_129716) + cl.enqueue_nd_range_kernel(self.queue, + self.mainzisegred_large_114739_var, + ((np.int64(num_groups_114922) * np.int64(segred_group_sizze_114921)),), + (np.int64(segred_group_sizze_114921),)) + if synchronous: + sync(self) + mem_124985 = None + mem_124991 = None + segmap_usable_groups_114961 = sdiv_up64(m_75136, + segmap_group_sizze_114960) + mem_125001 = opencl_alloc(self, bytes_120173, "mem_125001") + mem_125003 = opencl_alloc(self, bytes_120173, "mem_125003") + if ((1 * (np.int64(segmap_usable_groups_114961) * np.int64(segmap_group_sizze_114960))) != 0): + self.mainzisegmap_114681_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(N_75135), + np.int64(m_75136), + np.int64(n_75139), + defunc_4_map_res_mem_124921, + defunc_3_map_res_mem_124959, + mem_124987, mem_124994, + mem_124996, mem_124998, + mem_125001, mem_125003) + cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_114681_var, + ((np.int64(segmap_usable_groups_114961) * np.int64(segmap_group_sizze_114960)),), + (np.int64(segmap_group_sizze_114960),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + mem_124987 = None + mem_124994 = None + mem_124996 = None + mem_124998 = None + defunc_0_f_res_mem_125004 = mem_125001 + defunc_0_f_res_mem_125005 = mem_125003 + defunc_4_map_res_mem_124920 = None + defunc_4_map_res_mem_124921 = None + defunc_3_map_res_mem_124958 = None + defunc_3_map_res_mem_124959 = None + defunc_3_map_res_mem_124960 = None + defunc_0_f_res_mem_124970 = None + mem_124973 = None + out_mem_126320 = defunc_4_map_res_mem_124919 + out_mem_126321 = defunc_0_f_res_mem_125004 + out_mem_126322 = defunc_0_f_res_mem_125005 + out_mem_126323 = hist_inds_mem_124138 + return (out_mem_126320, out_mem_126321, out_mem_126322, out_mem_126323) + def futhark_mainDetailed(self, mappingindices_mem_120107, images_mem_120108, + N_70860, m_70861, trend_70862, k_70863, n_70864, + freq_70865, hfrac_70866, level_70867, lam_70868, + hist_70869, conf_70870): + x_70873 = (np.int64(2) * k_70863) + k2p2_70874 = (np.int64(2) + x_70873) + cond_70875 = slt64(np.int64(0), trend_70862) + if cond_70875: + k2p2zq_70876 = k2p2_70874 + else: + k2p2zq_f_res_70877 = (k2p2_70874 - np.int64(1)) + k2p2zq_70876 = k2p2zq_f_res_70877 + binop_x_120111 = (N_70860 * k2p2zq_70876) + bytes_120110 = (np.int64(8) * binop_x_120111) + if cond_70875: + bounds_invalid_upwards_70879 = slt64(k2p2zq_70876, np.int64(0)) + valid_70880 = not(bounds_invalid_upwards_70879) + range_valid_c_70881 = True + assert valid_70880, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 helpers.fut:31:10-18\n #2 bfastfinal.fut:29:17-58\n #3 bfastfinal.fut:185:3-72\n #4 bfastfinal.fut:181:1-185:72\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + k2p2zq_70876, + " is invalid.")) + segmap_group_sizze_77322 = self.sizes["mainDetailed.segmap_group_size_77258"] + segmap_usable_groups_77323 = sdiv_up64(binop_x_120111, + segmap_group_sizze_77322) + mem_120112 = opencl_alloc(self, bytes_120110, "mem_120112") + if ((1 * (np.int64(segmap_usable_groups_77323) * np.int64(segmap_group_sizze_77322))) != 0): + self.mainDetailedzisegmap_77255_var.set_args(self.global_failure, + np.int64(N_70860), + np.float64(freq_70865), + np.int64(k2p2zq_70876), + mappingindices_mem_120107, + mem_120112) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_77255_var, + ((np.int64(segmap_usable_groups_77323) * np.int64(segmap_group_sizze_77322)),), + (np.int64(segmap_group_sizze_77322),)) + if synchronous: + sync(self) + binop_p_mem_120117 = mem_120112 + else: + bounds_invalid_upwards_70904 = slt64(k2p2zq_70876, np.int64(0)) + valid_70905 = not(bounds_invalid_upwards_70904) + range_valid_c_70906 = True + assert valid_70905, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 helpers.fut:44:10-20\n #2 bfastfinal.fut:30:17-56\n #3 bfastfinal.fut:185:3-72\n #4 bfastfinal.fut:181:1-185:72\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + k2p2zq_70876, + " is invalid.")) + segmap_group_sizze_77418 = self.sizes["mainDetailed.segmap_group_size_77358"] + segmap_usable_groups_77419 = sdiv_up64(binop_x_120111, + segmap_group_sizze_77418) + mem_120116 = opencl_alloc(self, bytes_120110, "mem_120116") + if ((1 * (np.int64(segmap_usable_groups_77419) * np.int64(segmap_group_sizze_77418))) != 0): + self.mainDetailedzisegmap_77355_var.set_args(self.global_failure, + np.int64(N_70860), + np.float64(freq_70865), + np.int64(k2p2zq_70876), + mappingindices_mem_120107, + mem_120116) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_77355_var, + ((np.int64(segmap_usable_groups_77419) * np.int64(segmap_group_sizze_77418)),), + (np.int64(segmap_group_sizze_77418),)) + if synchronous: + sync(self) + binop_p_mem_120117 = mem_120116 + x_70928 = (N_70860 * N_70860) + y_70929 = (np.int64(2) * N_70860) + x_70930 = (x_70928 + y_70929) + x_70931 = (np.int64(1) + x_70930) + y_70932 = (np.int64(1) + N_70860) + zzero_70933 = (y_70932 == np.int64(0)) + nonzzero_70934 = not(zzero_70933) + nonzzero_cert_70935 = True + assert nonzzero_70934, ("Error: %s\n\nBacktrace:\n-> #0 bfastfinal.fut:35:25-53\n #1 bfastfinal.fut:185:3-72\n #2 bfastfinal.fut:181:1-185:72\n" % ("division by zero",)) + x_70936 = sdiv64(x_70931, y_70932) + x_70937 = (x_70936 - N_70860) + binop_p_70938 = (x_70937 - np.int64(1)) + defunc_0_f_res_70939 = sitofp_i64_f64(binop_p_70938) + segmap_group_sizze_77469 = self.sizes["mainDetailed.segmap_group_size_77451"] + segmap_usable_groups_77470 = sdiv_up64(binop_x_120111, + segmap_group_sizze_77469) + mem_120120 = opencl_alloc(self, bytes_120110, "mem_120120") + self.futhark_builtinzhgpu_map_transpose_f64(mem_120120, np.int64(0), + binop_p_mem_120117, np.int64(0), + np.int64(1), N_70860, + k2p2zq_70876) + mem_120124 = opencl_alloc(self, bytes_120110, "mem_120124") + if ((1 * (np.int64(segmap_usable_groups_77470) * np.int64(segmap_group_sizze_77469))) != 0): + self.mainDetailedzisegmap_77448_var.set_args(self.global_failure, + np.int64(N_70860), + np.int64(k2p2zq_70876), + np.float64(defunc_0_f_res_70939), + mem_120120, mem_120124) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_77448_var, + ((np.int64(segmap_usable_groups_77470) * np.int64(segmap_group_sizze_77469)),), + (np.int64(segmap_group_sizze_77469),)) + if synchronous: + sync(self) + empty_slice_70947 = (k2p2zq_70876 == np.int64(0)) + m_70948 = (k2p2zq_70876 - np.int64(1)) + zzero_leq_i_p_m_t_s_70949 = sle64(np.int64(0), m_70948) + i_p_m_t_s_leq_w_70950 = slt64(m_70948, k2p2zq_70876) + i_lte_j_70951 = sle64(np.int64(0), k2p2zq_70876) + y_70952 = (zzero_leq_i_p_m_t_s_70949 and i_p_m_t_s_leq_w_70950) + y_70953 = (i_lte_j_70951 and y_70952) + ok_or_empty_70954 = (empty_slice_70947 or y_70953) + empty_slice_70955 = (n_70864 == np.int64(0)) + m_70956 = (n_70864 - np.int64(1)) + zzero_leq_i_p_m_t_s_70957 = sle64(np.int64(0), m_70956) + i_p_m_t_s_leq_w_70958 = slt64(m_70956, N_70860) + i_lte_j_70959 = sle64(np.int64(0), n_70864) + y_70960 = (zzero_leq_i_p_m_t_s_70957 and i_p_m_t_s_leq_w_70958) + y_70961 = (i_lte_j_70959 and y_70960) + ok_or_empty_70962 = (empty_slice_70955 or y_70961) + index_ok_70963 = (ok_or_empty_70954 and ok_or_empty_70962) + index_certs_70964 = True + assert index_ok_70963, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 bfastfinal.fut:38:13-19\n #1 bfastfinal.fut:185:3-72\n #2 bfastfinal.fut:181:1-185:72\n" % ("Index [", + np.int64(0), + ":, :", + n_70864, + "] out of bounds for array of shape [", + k2p2zq_70876, + "][", + N_70860, + "].")) + index_certs_70966 = True + assert index_ok_70963, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 bfastfinal.fut:39:13-20\n #1 bfastfinal.fut:185:3-72\n #2 bfastfinal.fut:181:1-185:72\n" % ("Index [:", + n_70864, + ", ", + np.int64(0), + ":] out of bounds for array of shape [", + N_70860, + "][", + k2p2zq_70876, + "].")) + empty_slice_70968 = (m_70861 == np.int64(0)) + m_70969 = (m_70861 - np.int64(1)) + zzero_leq_i_p_m_t_s_70970 = sle64(np.int64(0), m_70969) + i_p_m_t_s_leq_w_70971 = slt64(m_70969, m_70861) + i_lte_j_70972 = sle64(np.int64(0), m_70861) + y_70973 = (zzero_leq_i_p_m_t_s_70970 and i_p_m_t_s_leq_w_70971) + y_70974 = (i_lte_j_70972 and y_70973) + ok_or_empty_70975 = (empty_slice_70968 or y_70974) + index_ok_70976 = (ok_or_empty_70962 and ok_or_empty_70975) + index_certs_70977 = True + assert index_ok_70976, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 bfastfinal.fut:40:13-24\n #1 bfastfinal.fut:185:3-72\n #2 bfastfinal.fut:181:1-185:72\n" % ("Index [", + np.int64(0), + ":, :", + n_70864, + "] out of bounds for array of shape [", + m_70861, + "][", + N_70860, + "].")) + cond_70978 = (hist_70869 == np.int64(-1)) + y_70979 = slt64(m_70956, n_70864) + bounds_check_70980 = (zzero_leq_i_p_m_t_s_70957 and y_70979) + suff_outer_redomap_77476 = (self.sizes["mainDetailed.suff_outer_redomap_0"] <= m_70861) + segred_group_sizze_77491 = self.sizes["mainDetailed.segred_group_size_77478"] + max_num_groups_126351 = self.sizes["mainDetailed.segred_num_groups_77480"] + num_groups_77492 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_70861, + segred_group_sizze_77491), + sext_i32_i64(max_num_groups_126351)))) + max_group_sizze_77729 = self.max_group_size + fits_77730 = sle64(n_70864, max_group_sizze_77729) + suff_intra_par_77732 = (self.sizes["mainDetailed.suff_intra_par_1"] <= n_70864) + intra_suff_and_fits_77733 = (fits_77730 and suff_intra_par_77732) + nest_sizze_77763 = (m_70861 * n_70864) + segscan_group_sizze_77764 = self.sizes["mainDetailed.segscan_group_size_77644"] + max_num_groups_126352 = self.sizes["mainDetailed.segscan_num_groups_77646"] + num_groups_77765 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_77763, + segscan_group_sizze_77764), + sext_i32_i64(max_num_groups_126352)))) + segmap_group_sizze_77806 = self.sizes["mainDetailed.segmap_group_size_77575"] + segred_group_sizze_77822 = self.sizes["mainDetailed.segred_group_size_77718"] + max_num_groups_126353 = self.sizes["mainDetailed.segred_num_groups_77720"] + num_groups_77823 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_70861, + segred_group_sizze_77822), + sext_i32_i64(max_num_groups_126353)))) + segmap_group_sizze_80689 = self.sizes["mainDetailed.segmap_group_size_80428"] + max_num_groups_126354 = self.sizes["mainDetailed.segmap_num_groups_80430"] + num_groups_80690 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_70861, + segmap_group_sizze_80689), + sext_i32_i64(max_num_groups_126354)))) + num_threads_115425 = (segmap_group_sizze_80689 * num_groups_80690) + y_115427 = smod_safe64(m_70861, num_threads_115425) + x_115428 = (num_threads_115425 - y_115427) + y_115429 = smod_safe64(x_115428, num_threads_115425) + segmap_group_sizze_84579 = self.sizes["mainDetailed.segmap_group_size_84194"] + max_num_groups_126355 = self.sizes["mainDetailed.segmap_num_groups_84196"] + num_groups_84580 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_70861, + segmap_group_sizze_84579), + sext_i32_i64(max_num_groups_126355)))) + num_threads_115503 = (segmap_group_sizze_84579 * num_groups_84580) + y_115505 = smod_safe64(m_70861, num_threads_115503) + x_115506 = (num_threads_115503 - y_115505) + y_115507 = smod_safe64(x_115506, num_threads_115503) + binop_x_120126 = (N_70860 * m_70861) + bytes_120125 = (np.int64(8) * binop_x_120126) + bytes_120173 = (np.int64(8) * m_70861) + bytes_120175 = (np.int64(8) * nest_sizze_77763) + bytes_120129 = (np.int64(8) * n_70864) + binop_x_120244 = (m_70861 * k2p2zq_70876) + bytes_120247 = (np.int64(8) * k2p2zq_70876) + binop_x_120251 = (k2p2zq_70876 * k2p2zq_70876) + bytes_120250 = (np.int64(8) * binop_x_120251) + bytes_120253 = (np.int64(8) * y_115429) + bytes_121947 = (np.int64(8) * y_115507) + bytes_121990 = (np.int64(8) * binop_x_120244) + binop_x_121994 = (np.int64(2) * m_70861) + binop_x_121995 = (k2p2zq_70876 * binop_x_121994) + bytes_121993 = (np.int64(8) * binop_x_121995) + binop_x_121999 = (k2p2zq_70876 * binop_x_120244) + bytes_121997 = (np.int64(8) * binop_x_121999) + num_threads_125631 = (segred_group_sizze_77491 * num_groups_77492) + total_sizze_125632 = (bytes_120129 * num_threads_125631) + total_sizze_125633 = (bytes_120129 * num_threads_125631) + total_sizze_125634 = (bytes_120129 * num_threads_125631) + segmap_group_sizze_77952 = self.sizes["mainDetailed.segmap_group_size_77853"] + if cond_70978: + index_certs_70984 = True + assert bounds_check_70980, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:26:29-34\n #1 helpers.fut:14:13-20\n #2 recresid.fut:89:39-59\n #3 mroc.fut:27:25-38\n #4 mroc.fut:77:27-61\n #5 bfastfinal.fut:45:24-53\n #6 bfastfinal.fut:185:3-72\n #7 bfastfinal.fut:181:1-185:72\n" % ("Index [", + m_70956, + "] out of bounds for array of shape [", + n_70864, + "].")) + local_memory_capacity_126537 = self.max_local_memory + if (((sle64(((np.int32(1) + srem64((np.int64(8) - srem64(np.int32(1), + np.int64(8))), + np.int64(8))) + ((np.int32(8) * segred_group_sizze_77491) + srem64((np.int64(8) - srem64((np.int32(8) * segred_group_sizze_77491), + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_126537)) and sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_126537))) and sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_126537))) and suff_outer_redomap_77476): + mem_120127 = opencl_alloc(self, bytes_120125, "mem_120127") + self.futhark_builtinzhgpu_map_transpose_f64(mem_120127, np.int64(0), + images_mem_120108, + np.int64(0), np.int64(1), + N_70860, m_70861) + mem_120172 = opencl_alloc(self, np.int64(8), "mem_120172") + mem_120174 = opencl_alloc(self, bytes_120173, "mem_120174") + mem_120177 = opencl_alloc(self, bytes_120175, "mem_120177") + mem_120180 = opencl_alloc(self, bytes_120175, "mem_120180") + mem_120130 = opencl_alloc(self, total_sizze_125632, "mem_120130") + mem_120144 = opencl_alloc(self, total_sizze_125633, "mem_120144") + mem_120146 = opencl_alloc(self, total_sizze_125634, "mem_120146") + mainDetailedzicounter_mem_126356 = self.mainDetailedzicounter_mem_126356 + group_res_arr_mem_126358 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_77491 * num_groups_77492)), + "group_res_arr_mem_126358") + num_threads_126360 = (num_groups_77492 * segred_group_sizze_77491) + if ((1 * (np.int64(num_groups_77492) * np.int64(segred_group_sizze_77491))) != 0): + self.mainDetailedzisegred_nonseg_77489_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_77491))), + cl.LocalMemory(np.int64(np.int32(1))), + np.int64(m_70861), + np.int64(n_70864), + np.int64(m_70956), + np.int64(num_groups_77492), + np.int64(num_threads_125631), + np.int64(num_threads_126360), + mem_120127, + mem_120130, + mem_120144, + mem_120146, + mem_120172, + mem_120174, + mem_120177, + mem_120180, + mainDetailedzicounter_mem_126356, + group_res_arr_mem_126358) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_nonseg_77489_var, + ((np.int64(num_groups_77492) * np.int64(segred_group_sizze_77491)),), + (np.int64(segred_group_sizze_77491),)) + if synchronous: + sync(self) + mem_120127 = None + mem_120130 = None + mem_120144 = None + mem_120146 = None + read_res_129882 = np.empty(1, dtype=ct.c_int64) + cl.enqueue_copy(self.queue, read_res_129882, mem_120172, + device_offset=(np.int64(np.int64(0)) * 8), + is_blocking=synchronous) + sync(self) + defunc_2_reduce_res_77522 = read_res_129882[0] + mem_120172 = None + mem_120220 = opencl_alloc(self, bytes_120173, "mem_120220") + if ((m_70861 * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_120220, mem_120174, + dest_offset=np.int64(np.int64(0)), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((m_70861 * np.int32(8)))) + if synchronous: + sync(self) + mem_120174 = None + mem_120224 = opencl_alloc(self, bytes_120175, "mem_120224") + group_sizze_126398 = self.sizes["mainDetailed.group_size_126398"] + num_groups_126399 = sdiv_up64((m_70861 * n_70864), group_sizze_126398) + if ((1 * (np.int64(num_groups_126399) * np.int64(group_sizze_126398))) != 0): + self.mainDetailedzicopy_126395_var.set_args(np.int64(m_70861), + np.int64(n_70864), + mem_120177, mem_120224) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzicopy_126395_var, + ((np.int64(num_groups_126399) * np.int64(group_sizze_126398)),), + (np.int64(group_sizze_126398),)) + if synchronous: + sync(self) + mem_120177 = None + mem_120228 = opencl_alloc(self, bytes_120175, "mem_120228") + group_sizze_126403 = self.sizes["mainDetailed.group_size_126403"] + num_groups_126404 = sdiv_up64((m_70861 * n_70864), group_sizze_126403) + if ((1 * (np.int64(num_groups_126404) * np.int64(group_sizze_126403))) != 0): + self.mainDetailedzicopy_126400_var.set_args(np.int64(m_70861), + np.int64(n_70864), + mem_120180, mem_120228) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzicopy_126400_var, + ((np.int64(num_groups_126404) * np.int64(group_sizze_126403)),), + (np.int64(group_sizze_126403),)) + if synchronous: + sync(self) + mem_120180 = None + defunc_3_map_res_mem_120230 = mem_120220 + defunc_3_map_res_mem_120231 = mem_120224 + defunc_3_map_res_mem_120232 = mem_120228 + defunc_2_reduce_res_70985 = defunc_2_reduce_res_77522 + else: + local_memory_capacity_126506 = self.max_local_memory + if (sle64((((bytes_120129 + srem64((np.int64(8) - srem64(bytes_120129, + np.int64(8))), + np.int64(8))) + (bytes_120129 + srem64((np.int64(8) - srem64(bytes_120129, + np.int64(8))), + np.int64(8)))) + (bytes_120129 + srem64((np.int64(8) - srem64(bytes_120129, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_126506)) and intra_suff_and_fits_77733): + mem_120191 = opencl_alloc(self, bytes_120173, "mem_120191") + mem_120194 = opencl_alloc(self, bytes_120175, "mem_120194") + mem_120197 = opencl_alloc(self, bytes_120175, "mem_120197") + if ((1 * (np.int64(m_70861) * np.int64(n_70864))) != 0): + self.mainDetailedzisegmap_intragroup_77536_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_120129)), + cl.LocalMemory(np.int64(bytes_120129)), + cl.LocalMemory(np.int64(bytes_120129)), + np.int64(N_70860), + np.int64(n_70864), + np.int64(m_70956), + images_mem_120108, + mem_120191, + mem_120194, + mem_120197) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_intragroup_77536_var, + ((np.int64(m_70861) * np.int64(n_70864)),), + (np.int64(n_70864),)) + if synchronous: + sync(self) + defunc_2_reduce_res_map_acc_mem_120211 = mem_120191 + defunc_3_map_res_mem_120212 = mem_120194 + defunc_3_map_res_mem_120213 = mem_120197 + else: + mem_120201 = opencl_alloc(self, bytes_120175, "mem_120201") + if slt64(np.int64(0), (m_70861 * n_70864)): + stage1_max_num_groups_126422 = self.max_group_size + stage1_num_groups_126423 = smin64(stage1_max_num_groups_126422, + num_groups_77765) + num_threads_126424 = sext_i64_i32((stage1_num_groups_126423 * segscan_group_sizze_77764)) + if ((1 * (np.int64(stage1_num_groups_126423) * np.int64(segscan_group_sizze_77764))) != 0): + self.mainDetailedziscan_stage1_77650_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(smax64(np.int64(1), + (np.int32(8) * segscan_group_sizze_77764)))), + np.int64(N_70860), + np.int64(m_70861), + np.int64(n_70864), + np.int64(m_70956), + np.int32(num_threads_126424), + images_mem_120108, + mem_120201) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedziscan_stage1_77650_var, + ((np.int64(stage1_num_groups_126423) * np.int64(segscan_group_sizze_77764)),), + (np.int64(segscan_group_sizze_77764),)) + if synchronous: + sync(self) + if ((1 * (np.int64(np.int64(1)) * np.int64(stage1_num_groups_126423))) != 0): + self.mainDetailedziscan_stage2_77650_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(smax64(np.int64(1), + (np.int32(8) * stage1_num_groups_126423)))), + np.int64(m_70861), + np.int64(n_70864), + np.int64(stage1_num_groups_126423), + np.int32(num_threads_126424), + mem_120201) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedziscan_stage2_77650_var, + ((np.int64(np.int64(1)) * np.int64(stage1_num_groups_126423)),), + (np.int64(stage1_num_groups_126423),)) + if synchronous: + sync(self) + required_groups_126466 = sext_i64_i32(sdiv_up64((m_70861 * n_70864), + segscan_group_sizze_77764)) + if ((1 * (np.int64(num_groups_77765) * np.int64(segscan_group_sizze_77764))) != 0): + self.mainDetailedziscan_stage3_77650_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(n_70864), + np.int64(num_groups_77765), + np.int32(num_threads_126424), + np.int32(required_groups_126466), + mem_120201) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedziscan_stage3_77650_var, + ((np.int64(num_groups_77765) * np.int64(segscan_group_sizze_77764)),), + (np.int64(segscan_group_sizze_77764),)) + if synchronous: + sync(self) + mem_120203 = opencl_alloc(self, bytes_120173, "mem_120203") + group_sizze_126481 = self.sizes["mainDetailed.group_size_126481"] + num_groups_126482 = sdiv_up64(m_70861, group_sizze_126481) + if ((1 * (np.int64(num_groups_126482) * np.int64(group_sizze_126481))) != 0): + self.mainDetailedzicopy_126478_var.set_args(np.int64(m_70861), + np.int64(n_70864), + np.int64(m_70956), + mem_120201, mem_120203) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzicopy_126478_var, + ((np.int64(num_groups_126482) * np.int64(group_sizze_126481)),), + (np.int64(group_sizze_126481),)) + if synchronous: + sync(self) + mem_120206 = opencl_alloc(self, bytes_120175, "mem_120206") + self.futhark_builtinzhreplicate_f64(mem_120206, (m_70861 * n_70864), + np.nan) + mem_120209 = opencl_alloc(self, bytes_120175, "mem_120209") + self.futhark_builtinzhreplicate_i64(mem_120209, (m_70861 * n_70864), + np.int64(0)) + segmap_usable_groups_77807 = sdiv_up64(nest_sizze_77763, + segmap_group_sizze_77806) + if ((1 * (np.int64(segmap_usable_groups_77807) * np.int64(segmap_group_sizze_77806))) != 0): + self.mainDetailedzisegmap_77572_var.set_args(self.global_failure, + np.int64(N_70860), + np.int64(m_70861), + np.int64(n_70864), + np.int64(m_70956), + images_mem_120108, + mem_120201, mem_120206, + mem_120209) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_77572_var, + ((np.int64(segmap_usable_groups_77807) * np.int64(segmap_group_sizze_77806)),), + (np.int64(segmap_group_sizze_77806),)) + if synchronous: + sync(self) + mem_120201 = None + defunc_2_reduce_res_map_acc_mem_120211 = mem_120203 + defunc_3_map_res_mem_120212 = mem_120206 + defunc_3_map_res_mem_120213 = mem_120209 + mem_120215 = opencl_alloc(self, bytes_120173, "mem_120215") + if ((m_70861 * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_120215, + defunc_2_reduce_res_map_acc_mem_120211, + dest_offset=np.int64(np.int64(0)), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((m_70861 * np.int32(8)))) + if synchronous: + sync(self) + mem_120218 = opencl_alloc(self, np.int64(8), "mem_120218") + mainDetailedzicounter_mem_126507 = self.mainDetailedzicounter_mem_126507 + group_res_arr_mem_126509 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_77822 * num_groups_77823)), + "group_res_arr_mem_126509") + num_threads_126511 = (num_groups_77823 * segred_group_sizze_77822) + if ((1 * (np.int64(num_groups_77823) * np.int64(segred_group_sizze_77822))) != 0): + self.mainDetailedzisegred_nonseg_77726_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_77822))), + cl.LocalMemory(np.int64(np.int32(1))), + np.int64(m_70861), + np.int64(num_groups_77823), + np.int64(num_threads_126511), + defunc_2_reduce_res_map_acc_mem_120211, + mem_120218, + mainDetailedzicounter_mem_126507, + group_res_arr_mem_126509) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_nonseg_77726_var, + ((np.int64(num_groups_77823) * np.int64(segred_group_sizze_77822)),), + (np.int64(segred_group_sizze_77822),)) + if synchronous: + sync(self) + defunc_2_reduce_res_map_acc_mem_120211 = None + read_res_129884 = np.empty(1, dtype=ct.c_int64) + cl.enqueue_copy(self.queue, read_res_129884, mem_120218, + device_offset=(np.int64(np.int64(0)) * 8), + is_blocking=synchronous) + sync(self) + defunc_2_reduce_res_77830 = read_res_129884[0] + mem_120218 = None + defunc_3_map_res_mem_120230 = mem_120215 + defunc_3_map_res_mem_120231 = defunc_3_map_res_mem_120212 + defunc_3_map_res_mem_120232 = defunc_3_map_res_mem_120213 + defunc_2_reduce_res_70985 = defunc_2_reduce_res_77830 + empty_slice_71014 = (defunc_2_reduce_res_70985 == np.int64(0)) + m_71015 = (defunc_2_reduce_res_70985 - np.int64(1)) + zzero_leq_i_p_m_t_s_71016 = sle64(np.int64(0), m_71015) + i_p_m_t_s_leq_w_71017 = slt64(m_71015, n_70864) + i_lte_j_71018 = sle64(np.int64(0), defunc_2_reduce_res_70985) + y_71019 = (zzero_leq_i_p_m_t_s_71016 and i_p_m_t_s_leq_w_71017) + y_71020 = (i_lte_j_71018 and y_71019) + ok_or_empty_71021 = (empty_slice_71014 or y_71020) + nest_sizze_77951 = (m_70861 * defunc_2_reduce_res_70985) + max_num_groups_126538 = self.sizes["mainDetailed.segmap_num_groups_77855"] + num_groups_77953 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_77951, + segmap_group_sizze_77952), + sext_i32_i64(max_num_groups_126538)))) + mem_120235 = opencl_alloc(self, bytes_120110, "mem_120235") + self.futhark_builtinzhgpu_map_transpose_f64(mem_120235, np.int64(0), + mem_120124, np.int64(0), + np.int64(1), k2p2zq_70876, + N_70860) + binop_x_120245 = (defunc_2_reduce_res_70985 * binop_x_120244) + bytes_120243 = (np.int64(8) * binop_x_120245) + mem_120246 = opencl_alloc(self, bytes_120243, "mem_120246") + num_threads_125639 = (segmap_group_sizze_77952 * num_groups_77953) + total_sizze_125640 = (bytes_120247 * num_threads_125639) + mem_120238 = opencl_alloc(self, total_sizze_125640, "mem_120238") + total_sizze_125641 = (bytes_120247 * num_threads_125639) + mem_125145 = opencl_alloc(self, total_sizze_125641, "mem_125145") + if ((1 * (np.int64(num_groups_77953) * np.int64(segmap_group_sizze_77952))) != 0): + self.mainDetailedzisegmap_77850_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(N_70860), + np.int64(m_70861), + np.int64(n_70864), + np.int64(k2p2zq_70876), + np.int64(m_70956), + np.int64(defunc_2_reduce_res_70985), + np.int64(num_groups_77953), + np.int64(num_threads_125639), + defunc_3_map_res_mem_120232, + mem_120235, mem_120238, + mem_120246, mem_125145) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_77850_var, + ((np.int64(num_groups_77953) * np.int64(segmap_group_sizze_77952)),), + (np.int64(segmap_group_sizze_77952),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + defunc_3_map_res_mem_120232 = None + mem_120235 = None + mem_120238 = None + mem_125145 = None + index_ok_71050 = (ok_or_empty_70975 and ok_or_empty_71021) + index_certs_71051 = True + assert index_ok_71050, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:98:15-28\n #1 mroc.fut:27:25-38\n #2 mroc.fut:77:27-61\n #3 bfastfinal.fut:45:24-53\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n" % ("Index [", + np.int64(0), + ":, :", + defunc_2_reduce_res_70985, + "] out of bounds for array of shape [", + m_70861, + "][", + n_70864, + "].")) + i64_res_71053 = sitofp_i64_f64(k2p2zq_70876) + tol_71054 = (np.float64(1.4901161193847656e-8) / i64_res_71053) + i_p_m_t_s_leq_w_71055 = slt64(m_70948, defunc_2_reduce_res_70985) + y_71056 = (zzero_leq_i_p_m_t_s_70949 and i_p_m_t_s_leq_w_71055) + y_71057 = (i_lte_j_70951 and y_71056) + ok_or_empty_71058 = (empty_slice_70947 or y_71057) + index_certs_71059 = True + assert ok_or_empty_71058, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:23:56-63\n #1 /prelude/soacs.fut:67:19-23\n #2 /prelude/soacs.fut:67:3-37\n #3 recresid.fut:22:5-25:22\n #4 recresid.fut:100:7-30\n #5 mroc.fut:27:25-38\n #6 mroc.fut:77:27-61\n #7 bfastfinal.fut:45:24-53\n #8 bfastfinal.fut:185:3-72\n #9 bfastfinal.fut:181:1-185:72\n" % ("Index [:", + k2p2zq_70876, + "] out of bounds for array of shape [", + defunc_2_reduce_res_70985, + "].")) + index_ok_71060 = (ok_or_empty_70954 and ok_or_empty_71058) + index_certs_71061 = True + assert index_ok_71060, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:23:43-53\n #1 /prelude/soacs.fut:67:19-23\n #2 /prelude/soacs.fut:67:3-37\n #3 recresid.fut:22:5-25:22\n #4 recresid.fut:100:7-30\n #5 mroc.fut:27:25-38\n #6 mroc.fut:77:27-61\n #7 bfastfinal.fut:45:24-53\n #8 bfastfinal.fut:185:3-72\n #9 bfastfinal.fut:181:1-185:72\n" % ("Index [:", + k2p2zq_70876, + ", ", + np.int64(0), + ":] out of bounds for array of shape [", + defunc_2_reduce_res_70985, + "][", + k2p2zq_70876, + "].")) + replicate_arg_71062 = (np.int64(2) * k2p2zq_70876) + bounds_invalid_upwards_71063 = slt64(replicate_arg_71062, np.int64(0)) + valid_71064 = not(bounds_invalid_upwards_71063) + range_valid_c_71065 = True + assert valid_71064, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 /prelude/array.fut:108:18-23\n #2 lib/github.com/nhey/lm/linpack.fut:39:16-40\n #3 lib/github.com/nhey/lm/lm.fut:74:36-64\n #4 recresid.fut:23:25-63\n #5 /prelude/soacs.fut:67:19-23\n #6 /prelude/soacs.fut:67:3-37\n #7 recresid.fut:22:5-25:22\n #8 recresid.fut:100:7-30\n #9 mroc.fut:27:25-38\n #10 mroc.fut:77:27-61\n #11 bfastfinal.fut:45:24-53\n #12 bfastfinal.fut:185:3-72\n #13 bfastfinal.fut:181:1-185:72\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + replicate_arg_71062, + " is invalid.")) + min_res_71066 = smin64(k2p2zq_70876, k2p2zq_70876) + k_71067 = (np.int64(1) + k2p2zq_70876) + mem_120248 = opencl_alloc(self, bytes_120247, "mem_120248") + self.futhark_builtinzhiota_i64(mem_120248, k2p2zq_70876, np.int64(0), + np.int64(1)) + segmap_group_sizze_78011 = self.sizes["mainDetailed.segmap_group_size_77989"] + segmap_usable_groups_78012 = sdiv_up64(binop_x_120251, + segmap_group_sizze_78011) + mem_120252 = opencl_alloc(self, bytes_120250, "mem_120252") + if ((1 * (np.int64(segmap_usable_groups_78012) * np.int64(segmap_group_sizze_78011))) != 0): + self.mainDetailedzisegmap_77986_var.set_args(self.global_failure, + np.int64(k2p2zq_70876), + mem_120252) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_77986_var, + ((np.int64(segmap_usable_groups_78012) * np.int64(segmap_group_sizze_78011)),), + (np.int64(segmap_group_sizze_78011),)) + if synchronous: + sync(self) + suff_outer_par_78115 = (self.sizes["mainDetailed.suff_outer_par_2"] <= m_70861) + segmap_group_sizze_78572 = self.sizes["mainDetailed.segmap_group_size_78119"] + max_num_groups_126566 = self.sizes["mainDetailed.segmap_num_groups_78121"] + num_groups_78573 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_70861, + segmap_group_sizze_78572), + sext_i32_i64(max_num_groups_126566)))) + suff_outer_par_80640 = (self.sizes["mainDetailed.suff_outer_par_8"] <= m_70861) + segmap_group_sizze_80644 = self.sizes["mainDetailed.segmap_group_size_80383"] + max_num_groups_126567 = self.sizes["mainDetailed.segmap_num_groups_80385"] + num_groups_80645 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_70861, + segmap_group_sizze_80644), + sext_i32_i64(max_num_groups_126567)))) + segred_group_sizze_80666 = self.sizes["mainDetailed.segred_group_size_80460"] + max_num_groups_126568 = self.sizes["mainDetailed.segred_num_groups_80462"] + num_groups_80667 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120244, + segred_group_sizze_80666), + sext_i32_i64(max_num_groups_126568)))) + segmap_group_sizze_80677 = self.sizes["mainDetailed.segmap_group_size_80452"] + segmap_group_sizze_80684 = self.sizes["mainDetailed.segmap_group_size_80444"] + segmap_group_sizze_80702 = self.sizes["mainDetailed.segmap_group_size_80095"] + max_num_groups_126569 = self.sizes["mainDetailed.segmap_num_groups_80097"] + num_groups_80703 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_70861, + segmap_group_sizze_80702), + sext_i32_i64(max_num_groups_126569)))) + segmap_group_sizze_80988 = self.sizes["mainDetailed.segmap_group_size_80051"] + nest_sizze_81002 = (m_70861 * binop_x_120251) + segmap_group_sizze_81003 = self.sizes["mainDetailed.segmap_group_size_80017"] + suff_outer_par_81027 = (self.sizes["mainDetailed.suff_outer_par_5"] <= binop_x_120244) + suff_outer_par_81122 = (self.sizes["mainDetailed.suff_outer_par_6"] <= m_70861) + segmap_group_sizze_81125 = self.sizes["mainDetailed.segmap_group_size_79589"] + max_num_groups_126570 = self.sizes["mainDetailed.segmap_num_groups_79591"] + num_groups_81126 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_70861, + segmap_group_sizze_81125), + sext_i32_i64(max_num_groups_126570)))) + suff_outer_par_81156 = (self.sizes["mainDetailed.suff_outer_par_7"] <= binop_x_120244) + segred_group_sizze_81179 = self.sizes["mainDetailed.segred_group_size_79703"] + segmap_group_sizze_81196 = self.sizes["mainDetailed.segmap_group_size_79690"] + segmap_group_sizze_81207 = self.sizes["mainDetailed.segmap_group_size_79679"] + segmap_group_sizze_81218 = self.sizes["mainDetailed.segmap_group_size_79374"] + max_num_groups_126571 = self.sizes["mainDetailed.segmap_num_groups_79376"] + num_groups_81219 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120244, + segmap_group_sizze_81218), + sext_i32_i64(max_num_groups_126571)))) + suff_outer_par_81223 = (self.sizes["mainDetailed.suff_outer_par_3"] <= binop_x_120244) + suff_outer_par_81245 = (self.sizes["mainDetailed.suff_outer_par_4"] <= nest_sizze_81002) + nest_sizze_81260 = (k2p2zq_70876 * nest_sizze_81002) + segred_group_sizze_81261 = self.sizes["mainDetailed.segred_group_size_79425"] + max_num_groups_126572 = self.sizes["mainDetailed.segred_num_groups_79427"] + num_groups_81262 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_81260, + segred_group_sizze_81261), + sext_i32_i64(max_num_groups_126572)))) + segmap_group_sizze_81277 = self.sizes["mainDetailed.segmap_group_size_79311"] + max_num_groups_126573 = self.sizes["mainDetailed.segmap_num_groups_79313"] + num_groups_81278 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_70861, + segmap_group_sizze_81277), + sext_i32_i64(max_num_groups_126573)))) + segmap_group_sizze_81347 = self.sizes["mainDetailed.segmap_group_size_79269"] + segmap_group_sizze_81418 = self.sizes["mainDetailed.segmap_group_size_79112"] + segmap_group_sizze_81437 = self.sizes["mainDetailed.segmap_group_size_79065"] + segmap_group_sizze_81446 = self.sizes["mainDetailed.segmap_group_size_79042"] + segmap_usable_groups_80678 = sdiv_up_safe64(m_70861, + segmap_group_sizze_80677) + segmap_usable_groups_80685 = sdiv_up_safe64(m_70861, + segmap_group_sizze_80684) + segmap_usable_groups_81197 = sdiv_up_safe64(binop_x_120244, + segmap_group_sizze_81196) + segmap_usable_groups_81208 = sdiv_up_safe64(binop_x_120244, + segmap_group_sizze_81207) + tile_sizze_115656 = self.sizes["mainDetailed.tile_size_115655"] + group_sizze_115657 = (tile_sizze_115656 * tile_sizze_115656) + tile_sizze_116018 = self.sizes["mainDetailed.tile_size_116017"] + group_sizze_116019 = (tile_sizze_116018 * tile_sizze_116018) + Ty_116322 = self.sizes["mainDetailed.Ty_116319"] + Ry_116323 = self.sizes["mainDetailed.Ry_116321"] + Tx_116324 = self.sizes["mainDetailed.Tx_116318"] + Rx_116325 = self.sizes["mainDetailed.Rx_116320"] + Tk_116326 = self.sizes["mainDetailed.Tk_116317"] + TxRx_116329 = (Tx_116324 * Rx_116325) + TyRy_116330 = (Ty_116322 * Ry_116323) + a_loc_szz_116332 = (Tk_116326 * TyRy_116330) + binop_x_116333 = (Tx_116324 * Tk_116326) + b_loc_szz_116334 = (Rx_116325 * binop_x_116333) + group_sizze_116339 = (Ty_116322 * Tx_116324) + num_groups_x_116020 = sdiv_up_safe64(m_70861, tile_sizze_116018) + num_groups_y_116021 = sdiv_up_safe64(k2p2zq_70876, tile_sizze_116018) + num_groups_top_116022 = (num_groups_x_116020 * num_groups_y_116021) + padded_sizze_115430 = (m_70861 + y_115429) + mem_120254 = opencl_alloc(self, bytes_120253, "mem_120254") + per_chunk_115432 = squot_safe64(padded_sizze_115430, num_threads_115425) + bytes_120258 = (np.int64(8) * nest_sizze_81002) + bytes_120269 = (np.int64(8) * replicate_arg_71062) + binop_x_120926 = (m_70861 * replicate_arg_71062) + bytes_120924 = (np.int64(8) * binop_x_120926) + bytes_120947 = (np.int64(8) * padded_sizze_115430) + binop_x_120950 = (num_threads_115425 * per_chunk_115432) + bytes_120949 = (np.int64(8) * binop_x_120950) + binop_x_121376 = (k2p2zq_70876 * group_sizze_115657) + bytes_121374 = (np.int64(8) * binop_x_121376) + ctx_val_121390 = (k2p2zq_70876 * tile_sizze_115656) + bytes_121393 = (np.int64(8) * group_sizze_115657) + binop_x_125185 = (np.int64(8) * tile_sizze_115656) + sizze_125186 = (tile_sizze_115656 * binop_x_125185) + bytes_121515 = (np.int64(8) * group_sizze_116019) + binop_x_125210 = (np.int64(8) * tile_sizze_116018) + sizze_125211 = (tile_sizze_116018 * binop_x_125210) + binop_x_121546 = (k2p2zq_70876 * group_sizze_116019) + bytes_121544 = (np.int64(8) * binop_x_121546) + binop_x_121648 = (Ry_116323 * group_sizze_116339) + binop_x_121649 = (Rx_116325 * binop_x_121648) + bytes_121646 = (np.int64(8) * binop_x_121649) + binop_x_121640 = (Ry_116323 * Rx_116325) + bytes_121639 = (np.int64(8) * binop_x_121640) + bytes_121651 = (np.int64(8) * a_loc_szz_116332) + bytes_121653 = (np.int64(8) * b_loc_szz_116334) + bytes_121722 = (np.int64(8) * binop_x_121648) + binop_x_121728 = (Rx_116325 * group_sizze_116339) + bytes_121726 = (np.int64(8) * binop_x_121728) + bytes_121714 = (np.int64(8) * Ry_116323) + bytes_121716 = (np.int64(8) * Rx_116325) + binop_x_125231 = (np.int64(8) * Ty_116322) + binop_x_125232 = (Tx_116324 * binop_x_125231) + binop_x_125233 = (Ry_116323 * binop_x_125232) + sizze_125234 = (Rx_116325 * binop_x_125233) + sizze_125149 = (np.int64(16) * k2p2zq_70876) + sizze_125420 = (k2p2zq_70876 * bytes_120247) + binop_x_125541 = (np.int64(8) * k2p2zq_70876) + double_buffer_sizze_125542 = (k2p2zq_70876 * binop_x_125541) + double_buffer_sizze_125543 = (np.int64(8) * k2p2zq_70876) + double_buffer_sizze_125544 = (np.int64(16) * k2p2zq_70876) + double_buffer_sizze_125550 = (np.int64(8) * k2p2zq_70876) + binop_x_125558 = (np.int64(8) * k2p2zq_70876) + double_buffer_sizze_125559 = (k2p2zq_70876 * binop_x_125558) + double_buffer_sizze_125560 = (np.int64(8) * k2p2zq_70876) + double_buffer_sizze_125561 = (np.int64(16) * k2p2zq_70876) + double_buffer_sizze_125567 = (np.int64(8) * k2p2zq_70876) + num_threads_125643 = (segmap_group_sizze_78572 * num_groups_78573) + total_sizze_125644 = (bytes_120247 * num_threads_125643) + total_sizze_125645 = (bytes_120269 * num_threads_125643) + total_sizze_125646 = (bytes_120247 * num_threads_125643) + total_sizze_125647 = (bytes_120250 * num_threads_125643) + total_sizze_125648 = (bytes_120250 * num_threads_125643) + total_sizze_125649 = (bytes_120247 * num_threads_125643) + total_sizze_125650 = (bytes_120250 * num_threads_125643) + total_sizze_125651 = (bytes_120247 * num_threads_125643) + total_sizze_125652 = (bytes_120250 * num_threads_125643) + total_sizze_125653 = (bytes_120247 * num_threads_125643) + total_sizze_125654 = (bytes_120250 * num_threads_125643) + total_sizze_125655 = (bytes_120247 * num_threads_125643) + total_sizze_125656 = (bytes_120250 * num_threads_125643) + total_sizze_125657 = (sizze_125149 * num_threads_125643) + total_sizze_125658 = (bytes_120247 * num_threads_125643) + total_sizze_125659 = (bytes_120247 * num_threads_125643) + total_sizze_125660 = (sizze_125420 * num_threads_125643) + total_sizze_125661 = (sizze_125149 * num_threads_125643) + total_sizze_125662 = (bytes_120247 * num_threads_125643) + total_sizze_125663 = (sizze_125420 * num_threads_125643) + total_sizze_125664 = (double_buffer_sizze_125542 * num_threads_125643) + total_sizze_125665 = (double_buffer_sizze_125543 * num_threads_125643) + total_sizze_125666 = (double_buffer_sizze_125544 * num_threads_125643) + total_sizze_125667 = (double_buffer_sizze_125550 * num_threads_125643) + num_threads_125673 = (segmap_group_sizze_80702 * num_groups_80703) + total_sizze_125674 = (bytes_120247 * num_threads_125673) + total_sizze_125675 = (sizze_125149 * num_threads_125673) + total_sizze_125676 = (bytes_120247 * num_threads_125673) + total_sizze_125677 = (sizze_125420 * num_threads_125673) + total_sizze_125678 = (sizze_125149 * num_threads_125673) + total_sizze_125679 = (bytes_120247 * num_threads_125673) + total_sizze_125680 = (sizze_125420 * num_threads_125673) + total_sizze_125681 = (double_buffer_sizze_125559 * num_threads_125673) + total_sizze_125682 = (double_buffer_sizze_125560 * num_threads_125673) + total_sizze_125683 = (double_buffer_sizze_125561 * num_threads_125673) + num_threads_125688 = (segmap_group_sizze_81125 * num_groups_81126) + total_sizze_125689 = (bytes_120250 * num_threads_125688) + total_sizze_125690 = (bytes_120247 * num_threads_125688) + num_threads_125691 = (group_sizze_116019 * num_groups_top_116022) + total_sizze_125692 = (bytes_120247 * num_threads_125691) + num_threads_125696 = (segmap_group_sizze_81218 * num_groups_81219) + total_sizze_125697 = (bytes_120247 * num_threads_125696) + num_threads_125700 = (segmap_group_sizze_81277 * num_groups_81278) + total_sizze_125701 = (bytes_120247 * num_threads_125700) + total_sizze_125702 = (double_buffer_sizze_125567 * num_threads_125700) + local_memory_capacity_127193 = self.max_local_memory + if ((((sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127193)) and sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127193))) and sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127193))) and sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127193))) and suff_outer_par_78115): + mem_120257 = opencl_alloc(self, bytes_121990, "mem_120257") + group_sizze_126577 = self.sizes["mainDetailed.group_size_126577"] + num_groups_126578 = sdiv_up64((m_70861 * k2p2zq_70876), + group_sizze_126577) + if ((1 * (np.int64(num_groups_126578) * np.int64(group_sizze_126577))) != 0): + self.mainDetailedzicopy_126574_var.set_args(np.int64(m_70861), + np.int64(n_70864), + np.int64(k2p2zq_70876), + defunc_3_map_res_mem_120231, + mem_120257) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzicopy_126574_var, + ((np.int64(num_groups_126578) * np.int64(group_sizze_126577)),), + (np.int64(group_sizze_126577),)) + if synchronous: + sync(self) + mem_120261 = opencl_alloc(self, bytes_120258, "mem_120261") + group_sizze_126582 = self.sizes["mainDetailed.group_size_126582"] + num_groups_126583 = sdiv_up64(((m_70861 * k2p2zq_70876) * k2p2zq_70876), + group_sizze_126582) + if ((1 * (np.int64(num_groups_126583) * np.int64(group_sizze_126582))) != 0): + self.mainDetailedzicopy_126579_var.set_args(np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(defunc_2_reduce_res_70985), + mem_120246, mem_120261) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzicopy_126579_var, + ((np.int64(num_groups_126583) * np.int64(group_sizze_126582)),), + (np.int64(group_sizze_126582),)) + if synchronous: + sync(self) + mem_120265 = opencl_alloc(self, bytes_121997, "mem_120265") + group_sizze_126587 = self.sizes["mainDetailed.group_size_126587"] + num_groups_126588 = sdiv_up64(((m_70861 * k2p2zq_70876) * k2p2zq_70876), + group_sizze_126587) + if ((1 * (np.int64(num_groups_126588) * np.int64(group_sizze_126587))) != 0): + self.mainDetailedzicopy_126584_var.set_args(np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(defunc_2_reduce_res_70985), + mem_120246, mem_120265) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzicopy_126584_var, + ((np.int64(num_groups_126588) * np.int64(group_sizze_126587)),), + (np.int64(group_sizze_126587),)) + if synchronous: + sync(self) + mem_120878 = opencl_alloc(self, bytes_120258, "mem_120878") + mem_120881 = opencl_alloc(self, bytes_121990, "mem_120881") + mem_120883 = opencl_alloc(self, bytes_120173, "mem_120883") + mem_120268 = opencl_alloc(self, total_sizze_125644, "mem_120268") + mem_120271 = opencl_alloc(self, total_sizze_125645, "mem_120271") + mem_120273 = opencl_alloc(self, total_sizze_125646, "mem_120273") + mem_120608 = opencl_alloc(self, total_sizze_125647, "mem_120608") + mem_120649 = opencl_alloc(self, total_sizze_125648, "mem_120649") + mem_120661 = opencl_alloc(self, total_sizze_125649, "mem_120661") + mem_120690 = opencl_alloc(self, total_sizze_125650, "mem_120690") + mem_120763 = opencl_alloc(self, total_sizze_125651, "mem_120763") + mem_120778 = opencl_alloc(self, total_sizze_125652, "mem_120778") + mem_120790 = opencl_alloc(self, total_sizze_125653, "mem_120790") + mem_120801 = opencl_alloc(self, total_sizze_125654, "mem_120801") + mem_120821 = opencl_alloc(self, total_sizze_125655, "mem_120821") + mem_120824 = opencl_alloc(self, total_sizze_125656, "mem_120824") + mem_125150 = opencl_alloc(self, total_sizze_125657, "mem_125150") + mem_125152 = opencl_alloc(self, total_sizze_125658, "mem_125152") + mem_125160 = opencl_alloc(self, total_sizze_125659, "mem_125160") + mem_125421 = opencl_alloc(self, total_sizze_125660, "mem_125421") + mem_125429 = opencl_alloc(self, total_sizze_125661, "mem_125429") + mem_125431 = opencl_alloc(self, total_sizze_125662, "mem_125431") + mem_125491 = opencl_alloc(self, total_sizze_125663, "mem_125491") + double_buffer_mem_125535 = opencl_alloc(self, total_sizze_125664, + "double_buffer_mem_125535") + double_buffer_mem_125536 = opencl_alloc(self, total_sizze_125665, + "double_buffer_mem_125536") + double_buffer_mem_125537 = opencl_alloc(self, total_sizze_125666, + "double_buffer_mem_125537") + double_buffer_mem_125548 = opencl_alloc(self, total_sizze_125667, + "double_buffer_mem_125548") + if ((1 * (np.int64(num_groups_78573) * np.int64(segmap_group_sizze_78572))) != 0): + self.mainDetailedzisegmap_78117_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(m_70948), + np.byte(y_70952), + np.byte(ok_or_empty_70954), + np.int64(min_res_71066), + np.int64(k_71067), + np.int64(num_groups_78573), + np.int64(binop_x_120251), + np.int64(num_threads_125643), + mem_120252, mem_120257, + mem_120261, mem_120265, + mem_120268, mem_120271, + mem_120273, mem_120608, + mem_120649, mem_120661, + mem_120690, mem_120763, + mem_120778, mem_120790, + mem_120801, mem_120821, + mem_120824, mem_120878, + mem_120881, mem_120883, + mem_125150, mem_125152, + mem_125160, mem_125421, + mem_125429, mem_125431, + mem_125491, + double_buffer_mem_125535, + double_buffer_mem_125536, + double_buffer_mem_125537, + double_buffer_mem_125548) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_78117_var, + ((np.int64(num_groups_78573) * np.int64(segmap_group_sizze_78572)),), + (np.int64(segmap_group_sizze_78572),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + mem_120257 = None + mem_120261 = None + mem_120265 = None + mem_120268 = None + mem_120271 = None + mem_120273 = None + mem_120608 = None + mem_120649 = None + mem_120661 = None + mem_120690 = None + mem_120763 = None + mem_120778 = None + mem_120790 = None + mem_120801 = None + mem_120821 = None + mem_120824 = None + mem_125150 = None + mem_125152 = None + mem_125160 = None + mem_125421 = None + mem_125429 = None + mem_125431 = None + mem_125491 = None + double_buffer_mem_125535 = None + double_buffer_mem_125536 = None + double_buffer_mem_125537 = None + double_buffer_mem_125548 = None + mem_121923 = opencl_alloc(self, bytes_121997, "mem_121923") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121923, np.int64(0), + mem_120878, np.int64(0), + np.int64(1), m_70861, + (k2p2zq_70876 * k2p2zq_70876)) + mem_120878 = None + mem_121927 = opencl_alloc(self, bytes_121990, "mem_121927") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121927, np.int64(0), + mem_120881, np.int64(0), + np.int64(1), m_70861, + k2p2zq_70876) + mem_120881 = None + defunc_5_map_res_mem_121929 = mem_121923 + defunc_5_map_res_mem_121930 = mem_121927 + defunc_5_map_res_mem_121931 = mem_120883 + else: + mem_120886 = opencl_alloc(self, bytes_121990, "mem_120886") + self.futhark_builtinzhreplicate_f64(mem_120886, + (m_70861 * k2p2zq_70876), + np.float64(0.0)) + mem_120890 = opencl_alloc(self, bytes_121993, "mem_120890") + self.futhark_builtinzhreplicate_f64(mem_120890, + ((m_70861 * np.int64(2)) * k2p2zq_70876), + np.float64(0.0)) + mem_120894 = opencl_alloc(self, bytes_121997, "mem_120894") + group_sizze_126697 = self.sizes["mainDetailed.group_size_126697"] + num_groups_126698 = sdiv_up64(((m_70861 * k2p2zq_70876) * k2p2zq_70876), + group_sizze_126697) + if ((1 * (np.int64(num_groups_126698) * np.int64(group_sizze_126697))) != 0): + self.mainDetailedzicopy_126694_var.set_args(np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(defunc_2_reduce_res_70985), + mem_120246, mem_120894) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzicopy_126694_var, + ((np.int64(num_groups_126698) * np.int64(group_sizze_126697)),), + (np.int64(group_sizze_126697),)) + if synchronous: + sync(self) + mem_param_120902 = mem_120886 + mem_param_120913 = mem_120890 + j_80634 = np.int64(0) + one_129887 = np.int64(1) + for counter_129886 in range(k2p2zq_70876): + index_certs_80637 = True + assert ok_or_empty_70954, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/linpack.fut:44:25-30\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n" % ("Index [", + j_80634, + ", ", + np.int64(0), + ":] out of bounds for array of shape [", + k2p2zq_70876, + "][", + k2p2zq_70876, + "].")) + local_memory_capacity_126797 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_126797)) and suff_outer_par_80640): + mem_120923 = opencl_alloc(self, bytes_121990, "mem_120923") + self.futhark_builtinzhgpu_map_transpose_f64(mem_120923, np.int64(0), + mem_param_120902, + np.int64(0), + np.int64(1), + k2p2zq_70876, m_70861) + mem_120927 = opencl_alloc(self, bytes_120924, "mem_120927") + self.futhark_builtinzhgpu_map_transpose_f64(mem_120927, np.int64(0), + mem_param_120913, + np.int64(0), + np.int64(1), + (np.int64(2) * k2p2zq_70876), + m_70861) + mem_120931 = opencl_alloc(self, bytes_121990, "mem_120931") + mem_120935 = opencl_alloc(self, bytes_120924, "mem_120935") + if ((1 * (np.int64(num_groups_80645) * np.int64(segmap_group_sizze_80644))) != 0): + self.mainDetailedzisegmap_80381_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(j_80634), + np.int64(num_groups_80645), + mem_120894, + mem_120923, + mem_120927, + mem_120931, + mem_120935) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_80381_var, + ((np.int64(num_groups_80645) * np.int64(segmap_group_sizze_80644)),), + (np.int64(segmap_group_sizze_80644),)) + if synchronous: + sync(self) + mem_120923 = None + mem_120927 = None + mem_120959 = opencl_alloc(self, bytes_121990, "mem_120959") + self.futhark_builtinzhgpu_map_transpose_f64(mem_120959, np.int64(0), + mem_120931, np.int64(0), + np.int64(1), m_70861, + k2p2zq_70876) + mem_120931 = None + dqrdc2_res_mem_120965 = mem_120959 + dqrdc2_res_mem_120966 = mem_120935 + else: + mem_120938 = opencl_alloc(self, bytes_120173, "mem_120938") + if slt64((k2p2zq_70876 * np.int64(2)), segred_group_sizze_80666): + segment_sizze_nonzzero_126715 = smax64(np.int64(1), k2p2zq_70876) + num_threads_126716 = (num_groups_80667 * segred_group_sizze_80666) + if ((1 * (np.int64(num_groups_80667) * np.int64(segred_group_sizze_80666))) != 0): + self.mainDetailedzisegred_small_80466_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_80666))), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(defunc_2_reduce_res_70985), + np.int64(j_80634), + np.int64(num_groups_80667), + np.int64(segment_sizze_nonzzero_126715), + mem_120246, + mem_120938) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_small_80466_var, + ((np.int64(num_groups_80667) * np.int64(segred_group_sizze_80666)),), + (np.int64(segred_group_sizze_80666),)) + if synchronous: + sync(self) + else: + groups_per_segment_126736 = sdiv_up64(num_groups_80667, + smax64(np.int64(1), + m_70861)) + elements_per_thread_126737 = sdiv_up64(k2p2zq_70876, + (segred_group_sizze_80666 * groups_per_segment_126736)) + virt_num_groups_126738 = (groups_per_segment_126736 * m_70861) + num_threads_126739 = (num_groups_80667 * segred_group_sizze_80666) + threads_per_segment_126740 = (groups_per_segment_126736 * segred_group_sizze_80666) + group_res_arr_mem_126741 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_80666 * virt_num_groups_126738)), + "group_res_arr_mem_126741") + mainDetailedzicounter_mem_126743 = self.mainDetailedzicounter_mem_126743 + if ((1 * (np.int64(num_groups_80667) * np.int64(segred_group_sizze_80666))) != 0): + self.mainDetailedzisegred_large_80466_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_80666))), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(defunc_2_reduce_res_70985), + np.int64(j_80634), + np.int64(num_groups_80667), + np.int64(groups_per_segment_126736), + np.int64(elements_per_thread_126737), + np.int64(virt_num_groups_126738), + np.int64(threads_per_segment_126740), + mem_120246, + mem_120938, + group_res_arr_mem_126741, + mainDetailedzicounter_mem_126743) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_large_80466_var, + ((np.int64(num_groups_80667) * np.int64(segred_group_sizze_80666)),), + (np.int64(segred_group_sizze_80666),)) + if synchronous: + sync(self) + mem_120941 = opencl_alloc(self, bytes_120173, "mem_120941") + if ((1 * (np.int64(segmap_usable_groups_80678) * np.int64(segmap_group_sizze_80677))) != 0): + self.mainDetailedzisegmap_80450_var.set_args(self.global_failure, + np.int64(m_70861), + mem_120938, + mem_120941) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_80450_var, + ((np.int64(segmap_usable_groups_80678) * np.int64(segmap_group_sizze_80677)),), + (np.int64(segmap_group_sizze_80677),)) + if synchronous: + sync(self) + mem_120938 = None + if ((1 * (np.int64(segmap_usable_groups_80685) * np.int64(segmap_group_sizze_80684))) != 0): + self.mainDetailedzisegmap_80441_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(j_80634), + mem_param_120902, + mem_120941) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_80441_var, + ((np.int64(segmap_usable_groups_80685) * np.int64(segmap_group_sizze_80684)),), + (np.int64(segmap_group_sizze_80684),)) + if synchronous: + sync(self) + mem_120946 = opencl_alloc(self, bytes_120924, "mem_120946") + self.futhark_builtinzhgpu_map_transpose_f64(mem_120946, np.int64(0), + mem_param_120913, + np.int64(0), + np.int64(1), + (np.int64(2) * k2p2zq_70876), + m_70861) + mem_120948 = opencl_alloc(self, bytes_120947, "mem_120948") + tmp_offs_126785 = np.int64(0) + if ((m_70861 * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_120948, mem_120941, + dest_offset=np.int64((tmp_offs_126785 * np.int64(8))), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((m_70861 * np.int32(8)))) + if synchronous: + sync(self) + tmp_offs_126785 = (tmp_offs_126785 + m_70861) + if ((y_115429 * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_120948, mem_120254, + dest_offset=np.int64((tmp_offs_126785 * np.int64(8))), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((y_115429 * np.int32(8)))) + if synchronous: + sync(self) + tmp_offs_126785 = (tmp_offs_126785 + y_115429) + mem_120951 = opencl_alloc(self, bytes_120949, "mem_120951") + self.futhark_builtinzhgpu_map_transpose_f64(mem_120951, np.int64(0), + mem_120948, np.int64(0), + np.int64(1), + per_chunk_115432, + num_threads_115425) + mem_120948 = None + mem_120956 = opencl_alloc(self, bytes_120924, "mem_120956") + if ((1 * (np.int64(num_groups_80690) * np.int64(segmap_group_sizze_80689))) != 0): + self.mainDetailedzisegmap_80426_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(j_80634), + np.int64(num_groups_80690), + np.int64(num_threads_115425), + np.int64(per_chunk_115432), + mem_120941, + mem_120946, + mem_120951, + mem_120956) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_80426_var, + ((np.int64(num_groups_80690) * np.int64(segmap_group_sizze_80689)),), + (np.int64(segmap_group_sizze_80689),)) + if synchronous: + sync(self) + mem_120941 = None + mem_120946 = None + mem_120951 = None + mem_120963 = opencl_alloc(self, bytes_121990, "mem_120963") + if (((m_70861 * k2p2zq_70876) * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_120963, mem_param_120902, + dest_offset=np.int64(np.int64(0)), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64(((m_70861 * k2p2zq_70876) * np.int32(8)))) + if synchronous: + sync(self) + dqrdc2_res_mem_120965 = mem_120963 + dqrdc2_res_mem_120966 = mem_120956 + mem_120972 = opencl_alloc(self, bytes_121993, "mem_120972") + self.futhark_builtinzhgpu_map_transpose_f64(mem_120972, np.int64(0), + dqrdc2_res_mem_120966, + np.int64(0), np.int64(1), + m_70861, + (np.int64(2) * k2p2zq_70876)) + dqrdc2_res_mem_120966 = None + mem_param_tmp_126699 = dqrdc2_res_mem_120965 + mem_param_tmp_126700 = mem_120972 + mem_param_120902 = mem_param_tmp_126699 + mem_param_120913 = mem_param_tmp_126700 + j_80634 += one_129887 + dqrdc2_res_r_mem_120986 = mem_param_120902 + dqrdc2_res_r_mem_120997 = mem_param_120913 + mem_120886 = None + mem_120890 = None + mem_120894 = None + mem_121001 = opencl_alloc(self, bytes_120258, "mem_121001") + group_sizze_126801 = self.sizes["mainDetailed.group_size_126801"] + num_groups_126802 = sdiv_up64(((m_70861 * k2p2zq_70876) * k2p2zq_70876), + group_sizze_126801) + if ((1 * (np.int64(num_groups_126802) * np.int64(group_sizze_126801))) != 0): + self.mainDetailedzicopy_126798_var.set_args(np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(defunc_2_reduce_res_70985), + mem_120246, mem_121001) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzicopy_126798_var, + ((np.int64(num_groups_126802) * np.int64(group_sizze_126801)),), + (np.int64(group_sizze_126801),)) + if synchronous: + sync(self) + mem_121004 = opencl_alloc(self, bytes_121990, "mem_121004") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121004, np.int64(0), + dqrdc2_res_r_mem_120986, + np.int64(0), np.int64(1), + k2p2zq_70876, m_70861) + dqrdc2_res_r_mem_120986 = None + mem_121008 = opencl_alloc(self, bytes_120924, "mem_121008") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121008, np.int64(0), + dqrdc2_res_r_mem_120997, + np.int64(0), np.int64(1), + (np.int64(2) * k2p2zq_70876), + m_70861) + dqrdc2_res_r_mem_120997 = None + mem_121335 = opencl_alloc(self, bytes_120258, "mem_121335") + mem_121338 = opencl_alloc(self, bytes_121990, "mem_121338") + mem_121341 = opencl_alloc(self, bytes_121990, "mem_121341") + mem_121343 = opencl_alloc(self, bytes_120173, "mem_121343") + mem_121011 = opencl_alloc(self, total_sizze_125674, "mem_121011") + mem_125167 = opencl_alloc(self, total_sizze_125675, "mem_125167") + mem_125169 = opencl_alloc(self, total_sizze_125676, "mem_125169") + mem_125438 = opencl_alloc(self, total_sizze_125677, "mem_125438") + mem_125446 = opencl_alloc(self, total_sizze_125678, "mem_125446") + mem_125448 = opencl_alloc(self, total_sizze_125679, "mem_125448") + mem_125498 = opencl_alloc(self, total_sizze_125680, "mem_125498") + double_buffer_mem_125552 = opencl_alloc(self, total_sizze_125681, + "double_buffer_mem_125552") + double_buffer_mem_125553 = opencl_alloc(self, total_sizze_125682, + "double_buffer_mem_125553") + double_buffer_mem_125554 = opencl_alloc(self, total_sizze_125683, + "double_buffer_mem_125554") + if ((1 * (np.int64(num_groups_80703) * np.int64(segmap_group_sizze_80702))) != 0): + self.mainDetailedzisegmap_80093_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(m_70948), + np.byte(y_70952), + np.int64(min_res_71066), + np.int64(k_71067), + np.int64(num_groups_80703), + np.int64(num_threads_125673), + mem_120248, mem_121001, + mem_121004, mem_121008, + mem_121011, mem_121335, + mem_121338, mem_121341, + mem_121343, mem_125167, + mem_125169, mem_125438, + mem_125446, mem_125448, + mem_125498, + double_buffer_mem_125552, + double_buffer_mem_125553, + double_buffer_mem_125554) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_80093_var, + ((np.int64(num_groups_80703) * np.int64(segmap_group_sizze_80702)),), + (np.int64(segmap_group_sizze_80702),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + mem_121001 = None + mem_121004 = None + mem_121008 = None + mem_121011 = None + mem_125167 = None + mem_125169 = None + mem_125438 = None + mem_125446 = None + mem_125448 = None + mem_125498 = None + double_buffer_mem_125552 = None + double_buffer_mem_125553 = None + double_buffer_mem_125554 = None + segmap_usable_groups_80989 = sdiv_up64(binop_x_120244, + segmap_group_sizze_80988) + mem_121346 = opencl_alloc(self, binop_x_120244, "mem_121346") + if ((1 * (np.int64(segmap_usable_groups_80989) * np.int64(segmap_group_sizze_80988))) != 0): + self.mainDetailedzisegmap_80048_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + mem_121343, mem_121346) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_80048_var, + ((np.int64(segmap_usable_groups_80989) * np.int64(segmap_group_sizze_80988)),), + (np.int64(segmap_group_sizze_80988),)) + if synchronous: + sync(self) + segmap_usable_groups_81004 = sdiv_up64(nest_sizze_81002, + segmap_group_sizze_81003) + mem_121351 = opencl_alloc(self, bytes_121997, "mem_121351") + if ((1 * (np.int64(segmap_usable_groups_81004) * np.int64(segmap_group_sizze_81003))) != 0): + self.mainDetailedzisegmap_80013_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + mem_121335, mem_121343, + mem_121346, mem_121351) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_80013_var, + ((np.int64(segmap_usable_groups_81004) * np.int64(segmap_group_sizze_81003)),), + (np.int64(segmap_group_sizze_81003),)) + if synchronous: + sync(self) + mem_121346 = None + local_memory_capacity_127012 = self.max_local_memory + if ((sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127012)) and sle64((((((bytes_121374 + srem64((np.int64(8) - srem64(bytes_121374, + np.int64(8))), + np.int64(8))) + (bytes_121393 + srem64((np.int64(8) - srem64(bytes_121393, + np.int64(8))), + np.int64(8)))) + (bytes_121393 + srem64((np.int64(8) - srem64(bytes_121393, + np.int64(8))), + np.int64(8)))) + (bytes_121374 + srem64((np.int64(8) - srem64(bytes_121374, + np.int64(8))), + np.int64(8)))) + (bytes_120247 + srem64((np.int64(8) - srem64(bytes_120247, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_127012))) and suff_outer_par_81027): + mem_121355 = opencl_alloc(self, bytes_121997, "mem_121355") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121355, np.int64(0), + mem_121351, np.int64(0), + m_70861, k2p2zq_70876, + k2p2zq_70876) + mem_121359 = opencl_alloc(self, bytes_121997, "mem_121359") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121359, np.int64(0), + mem_121355, np.int64(0), + np.int64(1), k2p2zq_70876, + (m_70861 * k2p2zq_70876)) + mem_121355 = None + mem_121363 = opencl_alloc(self, bytes_121997, "mem_121363") + group_sizze_126885 = self.sizes["mainDetailed.group_size_126885"] + num_groups_126886 = sdiv_up64(((m_70861 * k2p2zq_70876) * k2p2zq_70876), + group_sizze_126885) + if ((1 * (np.int64(num_groups_126886) * np.int64(group_sizze_126885))) != 0): + self.mainDetailedzicopy_126882_var.set_args(np.int64(m_70861), + np.int64(k2p2zq_70876), + mem_121351, mem_121363) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzicopy_126882_var, + ((np.int64(num_groups_126886) * np.int64(group_sizze_126885)),), + (np.int64(group_sizze_126885),)) + if synchronous: + sync(self) + mem_121366 = opencl_alloc(self, bytes_120250, "mem_121366") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121366, np.int64(0), + mem_120252, np.int64(0), + np.int64(1), k2p2zq_70876, + k2p2zq_70876) + num_groups_x_115658 = sdiv_up64(m_70861, tile_sizze_115656) + num_groups_y_115659 = sdiv_up64(k2p2zq_70876, tile_sizze_115656) + num_groups_top_115660 = (num_groups_x_115658 * num_groups_y_115659) + mem_121368 = opencl_alloc(self, bytes_120247, "mem_121368") + self.futhark_builtinzhreplicate_f64(mem_121368, k2p2zq_70876, + np.float64(0.0)) + mem_121446 = opencl_alloc(self, bytes_121997, "mem_121446") + num_threads_125686 = (group_sizze_115657 * num_groups_top_115660) + total_sizze_125687 = (bytes_120247 * num_threads_125686) + mem_125177 = opencl_alloc(self, total_sizze_125687, "mem_125177") + if ((1 * (np.int64(num_groups_top_115660) * np.int64(group_sizze_115657))) != 0): + self.mainDetailedzisegmap_intragroup_115661_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64(bytes_120247)), + cl.LocalMemory(np.int64(bytes_121374)), + cl.LocalMemory(np.int64(bytes_121393)), + cl.LocalMemory(np.int64(bytes_121393)), + cl.LocalMemory(np.int64(bytes_121374)), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(num_groups_y_115659), + np.int64(ctx_val_121390), + np.int64(num_threads_125686), + mem_121359, + mem_121363, + mem_121366, + mem_121368, + mem_121446, + mem_125177) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_intragroup_115661_var, + ((np.int64(num_groups_top_115660) * np.int64(group_sizze_115657)),), + (np.int64(group_sizze_115657),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + mem_121359 = None + mem_121363 = None + mem_121366 = None + mem_121368 = None + mem_125177 = None + defunc_3_map_res_r_mem_121609 = mem_121446 else: - for (out_vname, val) in zip(out_vnames, vals): - self._vars[out_vname] = val - - def _cmd_store(self, args): - fname = self._get_arg(args, 0) - - with open(fname, 'wb') as f: - for i in range(1, len(args)): - vname = args[i] - value = self._get_var(vname) - # In case we are using the PyOpenCL backend, we first - # need to convert OpenCL arrays to ordinary NumPy - # arrays. We do this in a nasty way. - if isinstance(value, np.number) or isinstance(value, np.bool) or isinstance(value, np.bool_) or isinstance(value, np.ndarray): - # Ordinary NumPy value. - f.write(construct_binary_value(self._vars[vname])) + mem_121450 = opencl_alloc(self, bytes_121997, "mem_121450") + self.futhark_builtinzhreplicate_f64(mem_121450, + ((m_70861 * k2p2zq_70876) * k2p2zq_70876), + np.float64(0.0)) + mem_121454 = opencl_alloc(self, bytes_121997, "mem_121454") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121454, np.int64(0), + mem_121351, np.int64(0), + m_70861, k2p2zq_70876, + k2p2zq_70876) + mem_121458 = opencl_alloc(self, bytes_121997, "mem_121458") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121458, np.int64(0), + mem_121454, np.int64(0), + np.int64(1), k2p2zq_70876, + (m_70861 * k2p2zq_70876)) + mem_121454 = None + mem_param_121469 = mem_121450 + i_81091 = np.int64(0) + one_129890 = np.int64(1) + for counter_129889 in range(k2p2zq_70876): + x_81093 = (k2p2zq_70876 - i_81091) + i_81094 = (x_81093 - np.int64(1)) + x_81095 = sle64(np.int64(0), i_81094) + y_81096 = slt64(i_81094, k2p2zq_70876) + bounds_check_81097 = (x_81095 and y_81096) + j_m_i_81098 = (k2p2zq_70876 - x_81093) + empty_slice_81099 = (j_m_i_81098 == np.int64(0)) + m_81100 = (j_m_i_81098 - np.int64(1)) + i_p_m_t_s_81101 = (x_81093 + m_81100) + zzero_leq_i_p_m_t_s_81102 = sle64(np.int64(0), i_p_m_t_s_81101) + i_p_m_t_s_leq_w_81103 = slt64(i_p_m_t_s_81101, k2p2zq_70876) + zzero_lte_i_81104 = sle64(np.int64(0), x_81093) + i_lte_j_81105 = sle64(x_81093, k2p2zq_70876) + y_81106 = (i_p_m_t_s_leq_w_81103 and zzero_lte_i_81104) + y_81107 = (zzero_leq_i_p_m_t_s_81102 and y_81106) + y_81108 = (i_lte_j_81105 and y_81107) + forwards_ok_81109 = (zzero_lte_i_81104 and y_81108) + ok_or_empty_81110 = (empty_slice_81099 or forwards_ok_81109) + index_ok_81111 = (bounds_check_81097 and ok_or_empty_81110) + index_certs_81112 = True + assert index_ok_81111, ("Error: %s%d%s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:28:39-48\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:185:3-72\n #12 bfastfinal.fut:181:1-185:72\n" % ("Index [", + i_81094, + ", ", + x_81093, + ":", + k2p2zq_70876, + "] out of bounds for array of shape [", + k2p2zq_70876, + "][", + k2p2zq_70876, + "].")) + index_certs_81113 = True + assert ok_or_empty_81110, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:28:30-37\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:185:3-72\n #12 bfastfinal.fut:181:1-185:72\n" % ("Index [", + x_81093, + ":", + k2p2zq_70876, + "] out of bounds for array of shape [", + k2p2zq_70876, + "].")) + index_ok_81114 = (bounds_check_81097 and bounds_check_81097) + index_certs_81115 = True + assert index_ok_81114, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:29:38-43\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:185:3-72\n #12 bfastfinal.fut:181:1-185:72\n" % ("Index [", + i_81094, + ", ", + i_81094, + "] out of bounds for array of shape [", + k2p2zq_70876, + "][", + k2p2zq_70876, + "].")) + index_certs_81116 = True + assert bounds_check_81097, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:29:19-22\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:185:3-72\n #12 bfastfinal.fut:181:1-185:72\n" % ("Index [", + i_81094, + "] out of bounds for array of shape [", + k2p2zq_70876, + "].")) + nest_sizze_81178 = (j_m_i_81098 * binop_x_120244) + max_num_groups_126909 = self.sizes["mainDetailed.segred_num_groups_79705"] + num_groups_81180 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_81178, + segred_group_sizze_81179), + sext_i32_i64(max_num_groups_126909)))) + local_memory_capacity_127011 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127011)) and suff_outer_par_81122): + mem_121476 = opencl_alloc(self, bytes_120258, "mem_121476") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121476, + np.int64(0), + mem_param_121469, + np.int64(0), + np.int64(1), + (k2p2zq_70876 * k2p2zq_70876), + m_70861) + mem_121504 = opencl_alloc(self, bytes_120258, "mem_121504") + mem_121480 = opencl_alloc(self, total_sizze_125689, "mem_121480") + mem_121492 = opencl_alloc(self, total_sizze_125690, "mem_121492") + if ((1 * (np.int64(num_groups_81126) * np.int64(segmap_group_sizze_81125))) != 0): + self.mainDetailedzisegmap_79587_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(x_81093), + np.int64(i_81094), + np.int64(j_m_i_81098), + np.int64(num_groups_81126), + np.int64(num_threads_125688), + mem_120252, + mem_121351, + mem_121458, + mem_121476, + mem_121480, + mem_121492, + mem_121504) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_79587_var, + ((np.int64(num_groups_81126) * np.int64(segmap_group_sizze_81125)),), + (np.int64(segmap_group_sizze_81125),)) + if synchronous: + sync(self) + mem_121476 = None + mem_121480 = None + mem_121492 = None + mem_121576 = opencl_alloc(self, bytes_121997, "mem_121576") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121576, + np.int64(0), + mem_121504, + np.int64(0), + np.int64(1), m_70861, + (k2p2zq_70876 * k2p2zq_70876)) + mem_121504 = None + defunc_3_map_res_mem_121583 = mem_121576 + else: + local_memory_capacity_127010 = self.max_local_memory + if (sle64((((bytes_121515 + srem64((np.int64(8) - srem64(bytes_121515, + np.int64(8))), + np.int64(8))) + (bytes_121515 + srem64((np.int64(8) - srem64(bytes_121515, + np.int64(8))), + np.int64(8)))) + (bytes_121544 + srem64((np.int64(8) - srem64(bytes_121544, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_127010)) and suff_outer_par_81156): + mem_121508 = opencl_alloc(self, bytes_121997, "mem_121508") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121508, + np.int64(0), + mem_param_121469, + np.int64(0), + np.int64(1), + k2p2zq_70876, + (m_70861 * k2p2zq_70876)) + mem_121512 = opencl_alloc(self, bytes_121997, "mem_121512") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121512, + np.int64(0), + mem_param_121469, + np.int64(0), + np.int64(1), + k2p2zq_70876, + (m_70861 * k2p2zq_70876)) + num_whole_tiles_116039 = squot64(j_m_i_81098, tile_sizze_116018) + residual_input_116172 = srem64(j_m_i_81098, tile_sizze_116018) + cond_116173 = (residual_input_116172 == np.int64(0)) + mem_121551 = opencl_alloc(self, bytes_121997, "mem_121551") + mem_125219 = opencl_alloc(self, total_sizze_125692, + "mem_125219") + if ((1 * (np.int64(num_groups_top_116022) * np.int64(group_sizze_116019))) != 0): + self.mainDetailedzisegmap_intragroup_116023_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_121544)), + cl.LocalMemory(np.int64(bytes_121515)), + cl.LocalMemory(np.int64(bytes_121515)), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(x_81093), + np.int64(i_81094), + np.int64(j_m_i_81098), + np.int64(num_groups_y_116021), + np.int64(num_whole_tiles_116039), + np.int64(residual_input_116172), + np.byte(cond_116173), + np.int64(num_threads_125691), + mem_120252, + mem_121351, + mem_121458, + mem_121508, + mem_121512, + mem_121551, + mem_125219) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_intragroup_116023_var, + ((np.int64(num_groups_top_116022) * np.int64(group_sizze_116019)),), + (np.int64(group_sizze_116019),)) + if synchronous: + sync(self) + mem_121508 = None + mem_121512 = None + mem_125219 = None + defunc_3_map_res_mem_121572 = mem_121551 + else: + mem_121555 = opencl_alloc(self, bytes_121990, "mem_121555") + if slt64((j_m_i_81098 * np.int64(2)), segred_group_sizze_81179): + segment_sizze_nonzzero_126940 = smax64(np.int64(1), + j_m_i_81098) + num_threads_126941 = (num_groups_81180 * segred_group_sizze_81179) + if ((1 * (np.int64(num_groups_81180) * np.int64(segred_group_sizze_81179))) != 0): + self.mainDetailedzisegred_small_79709_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_81179))), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(x_81093), + np.int64(i_81094), + np.int64(j_m_i_81098), + np.int64(num_groups_81180), + np.int64(binop_x_120251), + np.int64(segment_sizze_nonzzero_126940), + mem_121351, + mem_param_121469, + mem_121555) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_small_79709_var, + ((np.int64(num_groups_81180) * np.int64(segred_group_sizze_81179)),), + (np.int64(segred_group_sizze_81179),)) + if synchronous: + sync(self) else: - # Assuming PyOpenCL array. - f.write(construct_binary_value(self._vars[vname].get())) - - def _cmd_restore(self, args): - if len(args) % 2 == 0: - raise self.Failure('Invalid argument count') - - fname = args[0] - args = args[1:] - - with open(fname, 'rb') as f: - reader = ReaderInput(f) - while args != []: - vname = args[0] - typename = args[1] - args = args[2:] - - if vname in self._vars: - raise self.Failure('Variable already exists: %s' % vname) - - try: - self._vars[vname] = read_value(typename, reader) - except ValueError: - raise self.Failure('Failed to restore variable %s.\n' - 'Possibly malformed data in %s.\n' - % (vname, fname)) - - skip_spaces(reader) - if reader.get_char() != b'': - raise self.Failure('Expected EOF after reading values') - - _commands = { 'inputs': _cmd_inputs, - 'outputs': _cmd_outputs, - 'call': _cmd_call, - 'restore': _cmd_restore, - 'store': _cmd_store, - 'free': _cmd_free, - 'clear': _cmd_dummy, - 'pause_profiling': _cmd_dummy, - 'unpause_profiling': _cmd_dummy, - 'report': _cmd_dummy - } - - def _process_line(self, line): - words = line.split() - if words == []: - raise self.Failure('Empty line') + groups_per_segment_126961 = sdiv_up64(num_groups_81180, + smax64(np.int64(1), + (m_70861 * k2p2zq_70876))) + elements_per_thread_126962 = sdiv_up64(j_m_i_81098, + (segred_group_sizze_81179 * groups_per_segment_126961)) + virt_num_groups_126963 = (groups_per_segment_126961 * (m_70861 * k2p2zq_70876)) + num_threads_126964 = (num_groups_81180 * segred_group_sizze_81179) + threads_per_segment_126965 = (groups_per_segment_126961 * segred_group_sizze_81179) + group_res_arr_mem_126966 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_81179 * virt_num_groups_126963)), + "group_res_arr_mem_126966") + mainDetailedzicounter_mem_126968 = self.mainDetailedzicounter_mem_126968 + if ((1 * (np.int64(num_groups_81180) * np.int64(segred_group_sizze_81179))) != 0): + self.mainDetailedzisegred_large_79709_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_81179))), + np.int64(k2p2zq_70876), + np.int64(x_81093), + np.int64(i_81094), + np.int64(j_m_i_81098), + np.int64(num_groups_81180), + np.int64(binop_x_120251), + np.int64(groups_per_segment_126961), + np.int64(elements_per_thread_126962), + np.int64(virt_num_groups_126963), + np.int64(threads_per_segment_126965), + mem_121351, + mem_param_121469, + mem_121555, + group_res_arr_mem_126966, + mainDetailedzicounter_mem_126968) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_large_79709_var, + ((np.int64(num_groups_81180) * np.int64(segred_group_sizze_81179)),), + (np.int64(segred_group_sizze_81179),)) + if synchronous: + sync(self) + mem_121559 = opencl_alloc(self, bytes_121990, "mem_121559") + if ((1 * (np.int64(segmap_usable_groups_81197) * np.int64(segmap_group_sizze_81196))) != 0): + self.mainDetailedzisegmap_79687_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(i_81094), + mem_120252, + mem_121351, + mem_121555, + mem_121559) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_79687_var, + ((np.int64(segmap_usable_groups_81197) * np.int64(segmap_group_sizze_81196)),), + (np.int64(segmap_group_sizze_81196),)) + if synchronous: + sync(self) + mem_121555 = None + if ((1 * (np.int64(segmap_usable_groups_81208) * np.int64(segmap_group_sizze_81207))) != 0): + self.mainDetailedzisegmap_79675_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(i_81094), + np.int64(binop_x_120251), + mem_param_121469, + mem_121559) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_79675_var, + ((np.int64(segmap_usable_groups_81208) * np.int64(segmap_group_sizze_81207)),), + (np.int64(segmap_group_sizze_81207),)) + if synchronous: + sync(self) + mem_121559 = None + defunc_3_map_res_mem_121572 = mem_param_121469 + mem_121581 = opencl_alloc(self, bytes_121997, "mem_121581") + if ((((m_70861 * k2p2zq_70876) * k2p2zq_70876) * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_121581, + defunc_3_map_res_mem_121572, + dest_offset=np.int64(np.int64(0)), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((((m_70861 * k2p2zq_70876) * k2p2zq_70876) * np.int32(8)))) + if synchronous: + sync(self) + defunc_3_map_res_mem_121572 = None + defunc_3_map_res_mem_121583 = mem_121581 + mem_param_tmp_126907 = defunc_3_map_res_mem_121583 + mem_param_121469 = mem_param_tmp_126907 + i_81091 += one_129890 + defunc_3_map_res_r_mem_121597 = mem_param_121469 + mem_121450 = None + mem_121458 = None + defunc_3_map_res_r_mem_121609 = defunc_3_map_res_r_mem_121597 + mem_121351 = None + local_memory_capacity_127145 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127145)) and suff_outer_par_81223): + mem_121613 = opencl_alloc(self, bytes_121997, "mem_121613") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121613, np.int64(0), + defunc_3_map_res_r_mem_121609, + np.int64(0), m_70861, + k2p2zq_70876, + k2p2zq_70876) + mem_121632 = opencl_alloc(self, bytes_121997, "mem_121632") + mem_121616 = opencl_alloc(self, total_sizze_125697, "mem_121616") + if ((1 * (np.int64(num_groups_81219) * np.int64(segmap_group_sizze_81218))) != 0): + self.mainDetailedzisegmap_79371_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(num_groups_81219), + np.int64(binop_x_120251), + np.int64(num_threads_125696), + defunc_3_map_res_r_mem_121609, + mem_121613, mem_121616, + mem_121632) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_79371_var, + ((np.int64(num_groups_81219) * np.int64(segmap_group_sizze_81218)),), + (np.int64(segmap_group_sizze_81218),)) + if synchronous: + sync(self) + mem_121613 = None + mem_121616 = None + mem_121845 = opencl_alloc(self, bytes_121997, "mem_121845") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121845, np.int64(0), + mem_121632, np.int64(0), + np.int64(1), + (m_70861 * k2p2zq_70876), + k2p2zq_70876) + mem_121632 = None + defunc_3_map_res_r_mem_121847 = mem_121845 else: - cmd = words[0] - args = words[1:] - if cmd in self._commands: - self._commands[cmd](self, args) + local_memory_capacity_127144 = self.max_local_memory + if (sle64(((bytes_121651 + srem64((np.int64(8) - srem64(bytes_121651, + np.int64(8))), + np.int64(8))) + (bytes_121653 + srem64((np.int64(8) - srem64(bytes_121653, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_127144)) and suff_outer_par_81245): + mem_121636 = opencl_alloc(self, bytes_121997, "mem_121636") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121636, np.int64(0), + defunc_3_map_res_r_mem_121609, + np.int64(0), m_70861, + k2p2zq_70876, + k2p2zq_70876) + tk_div_tx_116327 = sdiv_up64(Tk_116326, Tx_116324) + tk_div_ty_116328 = sdiv_up64(Tk_116326, Ty_116322) + gridDim_x_116335 = sdiv_up64(k2p2zq_70876, TxRx_116329) + gridDim_y_116336 = sdiv_up64(k2p2zq_70876, TyRy_116330) + binop_y_116337 = (gridDim_x_116335 * gridDim_y_116336) + grid_sizze_116338 = (m_70861 * binop_y_116337) + full_tiles_116367 = squot64(k2p2zq_70876, Tk_116326) + kk_116570 = (Tk_116326 * full_tiles_116367) + mem_121827 = opencl_alloc(self, bytes_121997, "mem_121827") + if ((1 * (np.int64(grid_sizze_116338) * np.int64(group_sizze_116339))) != 0): + self.mainDetailedzisegmap_intragroup_116342_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_121653)), + cl.LocalMemory(np.int64(bytes_121651)), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(gridDim_x_116335), + np.int64(gridDim_y_116336), + np.int64(full_tiles_116367), + np.int64(kk_116570), + np.int64(binop_x_120251), + defunc_3_map_res_r_mem_121609, + mem_121636, + mem_121827) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_intragroup_116342_var, + ((np.int64(grid_sizze_116338) * np.int64(group_sizze_116339)),), + (np.int64(group_sizze_116339),)) + if synchronous: + sync(self) + mem_121636 = None + defunc_3_map_res_r_mem_121841 = mem_121827 + else: + mem_121831 = opencl_alloc(self, bytes_121997, "mem_121831") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121831, np.int64(0), + defunc_3_map_res_r_mem_121609, + np.int64(0), + np.int64(1), + k2p2zq_70876, + (m_70861 * k2p2zq_70876)) + mem_121835 = opencl_alloc(self, bytes_121997, "mem_121835") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121835, np.int64(0), + defunc_3_map_res_r_mem_121609, + np.int64(0), m_70861, + k2p2zq_70876, + k2p2zq_70876) + mem_121840 = opencl_alloc(self, bytes_121997, "mem_121840") + if slt64((k2p2zq_70876 * np.int64(2)), segred_group_sizze_81261): + segment_sizze_nonzzero_127084 = smax64(np.int64(1), k2p2zq_70876) + num_threads_127085 = (num_groups_81262 * segred_group_sizze_81261) + if ((1 * (np.int64(num_groups_81262) * np.int64(segred_group_sizze_81261))) != 0): + self.mainDetailedzisegred_small_79431_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_81261))), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(num_groups_81262), + np.int64(segment_sizze_nonzzero_127084), + mem_121831, + mem_121835, + mem_121840) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_small_79431_var, + ((np.int64(num_groups_81262) * np.int64(segred_group_sizze_81261)),), + (np.int64(segred_group_sizze_81261),)) + if synchronous: + sync(self) else: - raise self.Failure('Unknown command: %s' % cmd) - - def run(self): - while True: - line = sys.stdin.readline() - if line == '': - return - try: - self._process_line(line) - except self.Failure as e: - print('%%% FAILURE') - print(e.msg) - print('%%% OK', flush=True) - - -# End of server.py -class bfastfinal: - entry_points = {"convertToFloat": (["i16", "[][][]i16"], ["[][][]f32"]), - "main": (["i32", "i32", "i32", "f32", "f32", "f32", "[]i32", - "[][]f32"], ["[]i32", "[]i32", "[]f32"]), - "mainDetailed": (["i32", "i32", "i32", "f32", "f32", "f32", - "[]i32", "[][]f32"], ["[]f32", "[]i32", - "[]i32", "[]f32", - "[][]f32", "[][]f32", - "[]f32", "[]i32", - "[]f32", "[]f32", - "[][]f32", - "[][]f32"]), - "mainMagnitude": (["i32", "i32", "i32", "f32", "f32", "f32", - "[]i32", "[][]f32"], ["[]i32", "[]i32", - "[]f32", "[]f32"]), - "reshapeTransp": (["[][][]f32"], ["[][]f32"])} - def __init__(self, command_queue=None, interactive=False, - platform_pref=preferred_platform, device_pref=preferred_device, - default_group_size=default_group_size, - default_num_groups=default_num_groups, - default_tile_size=default_tile_size, - default_reg_tile_size=default_reg_tile_size, - default_threshold=default_threshold, sizes=sizes): - size_heuristics=[("NVIDIA CUDA", cl.device_type.GPU, "lockstep_width", - lambda device: np.int32(32)), ("AMD Accelerated Parallel Processing", - cl.device_type.GPU, "lockstep_width", - lambda device: np.int32(32)), ("", - cl.device_type.GPU, - "lockstep_width", - lambda device: np.int32(1)), - ("", cl.device_type.GPU, "num_groups", - lambda device: (np.int32(4) * device.get_info(getattr(cl.device_info, - "MAX_COMPUTE_UNITS")))), - ("", cl.device_type.GPU, "group_size", lambda device: np.int32(256)), ("", - cl.device_type.GPU, - "tile_size", - lambda device: np.int32(32)), - ("", cl.device_type.GPU, "reg_tile_size", lambda device: np.int32(2)), ("", - cl.device_type.GPU, - "threshold", - lambda device: np.int32(32768)), - ("", cl.device_type.CPU, "lockstep_width", lambda device: np.int32(1)), ("", - cl.device_type.CPU, - "num_groups", - lambda device: device.get_info(getattr(cl.device_info, - "MAX_COMPUTE_UNITS"))), - ("", cl.device_type.CPU, "group_size", lambda device: np.int32(32)), ("", - cl.device_type.CPU, - "tile_size", - lambda device: np.int32(4)), - ("", cl.device_type.CPU, "reg_tile_size", lambda device: np.int32(1)), ("", - cl.device_type.CPU, - "threshold", - lambda device: device.get_info(getattr(cl.device_info, - "MAX_COMPUTE_UNITS")))] - self.global_failure_args_max = 4 - self.failure_msgs=["Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 helpers.fut:75:45-50\n #1 helpers.fut:74:14-79:30\n #2 bfastfinal.fut:50:35-50\n #3 bfastfinal.fut:189:5-58\n #4 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:59:16-27\n #1 helpers.fut:80:16-34\n #2 bfastfinal.fut:50:35-50\n #3 bfastfinal.fut:189:5-58\n #4 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:61:50-71\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:189:5-58\n #5 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:62:37-48\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:189:5-58\n #5 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:64:38-65\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:189:5-58\n #5 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:64:69-95\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:189:5-58\n #5 bfastfinal.fut:184:1-190:25\n", - "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 helpers.fut:75:45-50\n #1 helpers.fut:74:14-79:30\n #2 bfastfinal.fut:50:35-50\n #3 bfastfinal.fut:189:5-58\n #4 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:61:50-71\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:189:5-58\n #5 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:62:37-48\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:189:5-58\n #5 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:64:38-65\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:189:5-58\n #5 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:64:69-95\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:189:5-58\n #5 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:61:50-71\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:189:5-58\n #5 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:62:37-48\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:189:5-58\n #5 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:64:38-65\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:189:5-58\n #5 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:64:69-95\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:189:5-58\n #5 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:86:51-60\n #1 bfastfinal.fut:86:25-83\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 bfastfinal.fut:83:5-91:22\n #5 bfastfinal.fut:189:5-58\n #6 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:86:51-60\n #1 bfastfinal.fut:86:25-83\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 bfastfinal.fut:83:5-91:22\n #5 bfastfinal.fut:189:5-58\n #6 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:86:51-60\n #1 bfastfinal.fut:86:25-83\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 bfastfinal.fut:83:5-91:22\n #5 bfastfinal.fut:189:5-58\n #6 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:86:51-60\n #1 bfastfinal.fut:86:25-83\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 bfastfinal.fut:83:5-91:22\n #5 bfastfinal.fut:189:5-58\n #6 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:86:51-60\n #1 bfastfinal.fut:86:25-83\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 bfastfinal.fut:83:5-91:22\n #5 bfastfinal.fut:189:5-58\n #6 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:100:43-65\n #1 bfastfinal.fut:99:27-102:47\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:97:17-104:24\n #4 bfastfinal.fut:189:5-58\n #5 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:100:43-65\n #1 bfastfinal.fut:99:27-102:47\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:97:17-104:24\n #4 bfastfinal.fut:189:5-58\n #5 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:100:43-65\n #1 bfastfinal.fut:99:27-102:47\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:97:17-104:24\n #4 bfastfinal.fut:189:5-58\n #5 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:100:43-65\n #1 bfastfinal.fut:99:27-102:47\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:97:17-104:24\n #4 bfastfinal.fut:189:5-58\n #5 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:106:37-57\n #1 bfastfinal.fut:106:15-109:36\n #2 bfastfinal.fut:189:5-58\n #3 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:141:38-52\n #1 bfastfinal.fut:139:22-142:45\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:136:38-164:9\n #4 bfastfinal.fut:189:5-58\n #5 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:141:56-74\n #1 bfastfinal.fut:139:22-142:45\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:136:38-164:9\n #4 bfastfinal.fut:189:5-58\n #5 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:12:27-44\n #1 bfastfinal.fut:158:35-74\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:136:38-164:9\n #4 bfastfinal.fut:189:5-58\n #5 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:141:38-52\n #1 bfastfinal.fut:139:22-142:45\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:136:38-164:9\n #4 bfastfinal.fut:189:5-58\n #5 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:141:56-74\n #1 bfastfinal.fut:139:22-142:45\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:136:38-164:9\n #4 bfastfinal.fut:189:5-58\n #5 bfastfinal.fut:184:1-190:25\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:12:27-44\n #1 bfastfinal.fut:158:35-74\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:136:38-164:9\n #4 bfastfinal.fut:189:5-58\n #5 bfastfinal.fut:184:1-190:25\n", - "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 helpers.fut:75:45-50\n #1 helpers.fut:74:14-79:30\n #2 bfastfinal.fut:50:35-50\n #3 bfastfinal.fut:174:3-56\n #4 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:59:16-27\n #1 helpers.fut:80:16-34\n #2 bfastfinal.fut:50:35-50\n #3 bfastfinal.fut:174:3-56\n #4 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:61:50-71\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:62:37-48\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:64:38-65\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:64:69-95\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 helpers.fut:75:45-50\n #1 helpers.fut:74:14-79:30\n #2 bfastfinal.fut:50:35-50\n #3 bfastfinal.fut:174:3-56\n #4 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:61:50-71\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:62:37-48\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:64:38-65\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:64:69-95\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:61:50-71\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:62:37-48\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:64:38-65\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:64:69-95\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:86:51-60\n #1 bfastfinal.fut:86:25-83\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 bfastfinal.fut:83:5-91:22\n #5 bfastfinal.fut:174:3-56\n #6 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:86:51-60\n #1 bfastfinal.fut:86:25-83\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 bfastfinal.fut:83:5-91:22\n #5 bfastfinal.fut:174:3-56\n #6 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:86:51-60\n #1 bfastfinal.fut:86:25-83\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 bfastfinal.fut:83:5-91:22\n #5 bfastfinal.fut:174:3-56\n #6 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:86:51-60\n #1 bfastfinal.fut:86:25-83\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 bfastfinal.fut:83:5-91:22\n #5 bfastfinal.fut:174:3-56\n #6 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:86:51-60\n #1 bfastfinal.fut:86:25-83\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 bfastfinal.fut:83:5-91:22\n #5 bfastfinal.fut:174:3-56\n #6 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:100:43-65\n #1 bfastfinal.fut:99:27-102:47\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:97:17-104:24\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:100:43-65\n #1 bfastfinal.fut:99:27-102:47\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:97:17-104:24\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:100:43-65\n #1 bfastfinal.fut:99:27-102:47\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:97:17-104:24\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:100:43-65\n #1 bfastfinal.fut:99:27-102:47\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:97:17-104:24\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:106:37-57\n #1 bfastfinal.fut:106:15-109:36\n #2 bfastfinal.fut:174:3-56\n #3 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:116:54-68\n #1 bfastfinal.fut:116:13-119:36\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:114:20-131:9\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:117:29-43\n #1 bfastfinal.fut:116:13-119:36\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:114:20-131:9\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/diku-dk/sorts/insertion_sort.fut:19:72-76\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:116:13-121:42\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:114:20-131:9\n #5 bfastfinal.fut:174:3-56\n #6 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/diku-dk/sorts/insertion_sort.fut:19:59-65\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:116:13-121:42\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:114:20-131:9\n #5 bfastfinal.fut:174:3-56\n #6 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/diku-dk/sorts/insertion_sort.fut:8:17-21\n #1 lib/github.com/diku-dk/sorts/insertion_sort.fut:20:28-42\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:116:13-121:42\n #4 /prelude/functional.fut:9:42-44\n #5 bfastfinal.fut:114:20-131:9\n #6 bfastfinal.fut:174:3-56\n #7 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/diku-dk/sorts/insertion_sort.fut:9:20-24\n #1 lib/github.com/diku-dk/sorts/insertion_sort.fut:20:28-42\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:116:13-121:42\n #4 /prelude/functional.fut:9:42-44\n #5 bfastfinal.fut:114:20-131:9\n #6 bfastfinal.fut:174:3-56\n #7 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/diku-dk/sorts/insertion_sort.fut:19:72-76\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:116:13-121:42\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:114:20-131:9\n #5 bfastfinal.fut:174:3-56\n #6 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/diku-dk/sorts/insertion_sort.fut:19:59-65\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:116:13-121:42\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:114:20-131:9\n #5 bfastfinal.fut:174:3-56\n #6 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:129:39-43\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:116:13-130:43\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:114:20-131:9\n #5 bfastfinal.fut:174:3-56\n #6 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:129:47-51\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:116:13-130:43\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:114:20-131:9\n #5 bfastfinal.fut:174:3-56\n #6 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:130:38-42\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:116:13-130:43\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:114:20-131:9\n #5 bfastfinal.fut:174:3-56\n #6 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:141:38-52\n #1 bfastfinal.fut:139:22-142:45\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:136:38-164:9\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:141:56-74\n #1 bfastfinal.fut:139:22-142:45\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:136:38-164:9\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:12:27-44\n #1 bfastfinal.fut:158:35-74\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:136:38-164:9\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:12:27-44\n #1 bfastfinal.fut:161:29-83\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:136:38-164:9\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:141:38-52\n #1 bfastfinal.fut:139:22-142:45\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:136:38-164:9\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:141:56-74\n #1 bfastfinal.fut:139:22-142:45\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:136:38-164:9\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:12:27-44\n #1 bfastfinal.fut:158:35-74\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:136:38-164:9\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:12:27-44\n #1 bfastfinal.fut:161:29-83\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:136:38-164:9\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:141:38-52\n #1 bfastfinal.fut:139:22-142:45\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:136:38-164:9\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:141:56-74\n #1 bfastfinal.fut:139:22-142:45\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:136:38-164:9\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:12:27-44\n #1 bfastfinal.fut:158:35-74\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:136:38-164:9\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:12:27-44\n #1 bfastfinal.fut:161:29-83\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:136:38-164:9\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n", - "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 helpers.fut:75:45-50\n #1 helpers.fut:74:14-79:30\n #2 bfastfinal.fut:50:35-50\n #3 bfastfinal.fut:181:5-58\n #4 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:59:16-27\n #1 helpers.fut:80:16-34\n #2 bfastfinal.fut:50:35-50\n #3 bfastfinal.fut:181:5-58\n #4 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:61:50-71\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:181:5-58\n #5 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:62:37-48\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:181:5-58\n #5 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:64:38-65\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:181:5-58\n #5 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:64:69-95\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:181:5-58\n #5 bfastfinal.fut:176:1-182:37\n", - "Index [{}, {}] out of bounds for array of shape [{}][{}].\n-> #0 helpers.fut:75:45-50\n #1 helpers.fut:74:14-79:30\n #2 bfastfinal.fut:50:35-50\n #3 bfastfinal.fut:181:5-58\n #4 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:61:50-71\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:181:5-58\n #5 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:62:37-48\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:181:5-58\n #5 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:64:38-65\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:181:5-58\n #5 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:64:69-95\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:181:5-58\n #5 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:61:50-71\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:181:5-58\n #5 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:62:37-48\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:181:5-58\n #5 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:64:38-65\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:181:5-58\n #5 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:64:69-95\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:181:5-58\n #5 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:86:51-60\n #1 bfastfinal.fut:86:25-83\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 bfastfinal.fut:83:5-91:22\n #5 bfastfinal.fut:181:5-58\n #6 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:86:51-60\n #1 bfastfinal.fut:86:25-83\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 bfastfinal.fut:83:5-91:22\n #5 bfastfinal.fut:181:5-58\n #6 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:86:51-60\n #1 bfastfinal.fut:86:25-83\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 bfastfinal.fut:83:5-91:22\n #5 bfastfinal.fut:181:5-58\n #6 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:86:51-60\n #1 bfastfinal.fut:86:25-83\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 bfastfinal.fut:83:5-91:22\n #5 bfastfinal.fut:181:5-58\n #6 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:86:51-60\n #1 bfastfinal.fut:86:25-83\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 bfastfinal.fut:83:5-91:22\n #5 bfastfinal.fut:181:5-58\n #6 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:100:43-65\n #1 bfastfinal.fut:99:27-102:47\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:97:17-104:24\n #4 bfastfinal.fut:181:5-58\n #5 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:100:43-65\n #1 bfastfinal.fut:99:27-102:47\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:97:17-104:24\n #4 bfastfinal.fut:181:5-58\n #5 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:100:43-65\n #1 bfastfinal.fut:99:27-102:47\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:97:17-104:24\n #4 bfastfinal.fut:181:5-58\n #5 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:100:43-65\n #1 bfastfinal.fut:99:27-102:47\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:97:17-104:24\n #4 bfastfinal.fut:181:5-58\n #5 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:106:37-57\n #1 bfastfinal.fut:106:15-109:36\n #2 bfastfinal.fut:181:5-58\n #3 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:116:54-68\n #1 bfastfinal.fut:116:13-119:36\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:114:20-131:9\n #4 bfastfinal.fut:181:5-58\n #5 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:117:29-43\n #1 bfastfinal.fut:116:13-119:36\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:114:20-131:9\n #4 bfastfinal.fut:181:5-58\n #5 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/diku-dk/sorts/insertion_sort.fut:19:72-76\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:116:13-121:42\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:114:20-131:9\n #5 bfastfinal.fut:181:5-58\n #6 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/diku-dk/sorts/insertion_sort.fut:19:59-65\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:116:13-121:42\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:114:20-131:9\n #5 bfastfinal.fut:181:5-58\n #6 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/diku-dk/sorts/insertion_sort.fut:8:17-21\n #1 lib/github.com/diku-dk/sorts/insertion_sort.fut:20:28-42\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:116:13-121:42\n #4 /prelude/functional.fut:9:42-44\n #5 bfastfinal.fut:114:20-131:9\n #6 bfastfinal.fut:181:5-58\n #7 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/diku-dk/sorts/insertion_sort.fut:9:20-24\n #1 lib/github.com/diku-dk/sorts/insertion_sort.fut:20:28-42\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:116:13-121:42\n #4 /prelude/functional.fut:9:42-44\n #5 bfastfinal.fut:114:20-131:9\n #6 bfastfinal.fut:181:5-58\n #7 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/diku-dk/sorts/insertion_sort.fut:19:72-76\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:116:13-121:42\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:114:20-131:9\n #5 bfastfinal.fut:181:5-58\n #6 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 lib/github.com/diku-dk/sorts/insertion_sort.fut:19:59-65\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:116:13-121:42\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:114:20-131:9\n #5 bfastfinal.fut:181:5-58\n #6 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:129:39-43\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:116:13-130:43\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:114:20-131:9\n #5 bfastfinal.fut:181:5-58\n #6 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:129:47-51\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:116:13-130:43\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:114:20-131:9\n #5 bfastfinal.fut:181:5-58\n #6 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:130:38-42\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:116:13-130:43\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:114:20-131:9\n #5 bfastfinal.fut:181:5-58\n #6 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:141:38-52\n #1 bfastfinal.fut:139:22-142:45\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:136:38-164:9\n #4 bfastfinal.fut:181:5-58\n #5 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:141:56-74\n #1 bfastfinal.fut:139:22-142:45\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:136:38-164:9\n #4 bfastfinal.fut:181:5-58\n #5 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:12:27-44\n #1 bfastfinal.fut:158:35-74\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:136:38-164:9\n #4 bfastfinal.fut:181:5-58\n #5 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:141:38-52\n #1 bfastfinal.fut:139:22-142:45\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:136:38-164:9\n #4 bfastfinal.fut:181:5-58\n #5 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 bfastfinal.fut:141:56-74\n #1 bfastfinal.fut:139:22-142:45\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:136:38-164:9\n #4 bfastfinal.fut:181:5-58\n #5 bfastfinal.fut:176:1-182:37\n", - "Index [{}] out of bounds for array of shape [{}].\n-> #0 helpers.fut:12:27-44\n #1 bfastfinal.fut:158:35-74\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:136:38-164:9\n #4 bfastfinal.fut:181:5-58\n #5 bfastfinal.fut:176:1-182:37\n"] - program = initialise_opencl_object(self, - program_src=fut_opencl_src, - command_queue=command_queue, - interactive=interactive, - platform_pref=platform_pref, - device_pref=device_pref, - default_group_size=default_group_size, - default_num_groups=default_num_groups, - default_tile_size=default_tile_size, - default_reg_tile_size=default_reg_tile_size, - default_threshold=default_threshold, - size_heuristics=size_heuristics, - required_types=["i16", "i32", "i64", "f32", "bool", "cert"], - user_sizes=sizes, - all_sizes={"builtin#replicate_f32.group_size_46321": {"class": "group_size", - "value": None}, - "builtin#replicate_i32.group_size_46330": {"class": "group_size", - "value": None}, - "convertToFloat.segmap_group_size_29820": {"class": "group_size", - "value": None}, - "main.Rx_42673": {"class": "reg_tile_size", "value": None}, - "main.Rx_43414": {"class": "reg_tile_size", "value": None}, - "main.Ry_42674": {"class": "reg_tile_size", "value": None}, - "main.Ry_43415": {"class": "reg_tile_size", "value": None}, - "main.Tk_42670": {"class": "tile_size", "value": None}, - "main.Tk_43411": {"class": "tile_size", "value": None}, - "main.Tx_42525": {"class": "tile_size", "value": None}, - "main.Tx_42671": {"class": "tile_size", "value": None}, - "main.Tx_43412": {"class": "tile_size", "value": None}, - "main.Ty_42526": {"class": "tile_size", "value": None}, - "main.Ty_42672": {"class": "tile_size", "value": None}, - "main.Ty_43413": {"class": "tile_size", "value": None}, - "main.group_size_45852": {"class": "group_size", "value": None}, - "main.segmap_group_size_38664": {"class": "group_size", "value": None}, - "main.segmap_group_size_38842": {"class": "group_size", "value": None}, - "main.segmap_group_size_38970": {"class": "group_size", "value": None}, - "main.segmap_group_size_39002": {"class": "group_size", "value": None}, - "main.segmap_group_size_39049": {"class": "group_size", "value": None}, - "main.segmap_group_size_39642": {"class": "group_size", "value": None}, - "main.segmap_group_size_39871": {"class": "group_size", "value": None}, - "main.segmap_group_size_39941": {"class": "group_size", "value": None}, - "main.segmap_group_size_40040": {"class": "group_size", "value": None}, - "main.segmap_group_size_40158": {"class": "group_size", "value": None}, - "main.segmap_group_size_40398": {"class": "group_size", "value": None}, - "main.segmap_group_size_40539": {"class": "group_size", "value": None}, - "main.segmap_group_size_40671": {"class": "group_size", "value": None}, - "main.segmap_group_size_40952": {"class": "group_size", "value": None}, - "main.segmap_group_size_41027": {"class": "group_size", "value": None}, - "main.segmap_group_size_41176": {"class": "group_size", "value": None}, - "main.segmap_group_size_41290": {"class": "group_size", "value": None}, - "main.segmap_group_size_41445": {"class": "group_size", "value": None}, - "main.segmap_group_size_41591": {"class": "group_size", "value": None}, - "main.segmap_group_size_42003": {"class": "group_size", "value": None}, - "main.segmap_group_size_42157": {"class": "group_size", "value": None}, - "main.segmap_num_groups_39004": {"class": "num_groups", "value": None}, - "main.segmap_num_groups_39051": {"class": "num_groups", "value": None}, - "main.segmap_num_groups_40400": {"class": "num_groups", "value": None}, - "main.segmap_num_groups_40541": {"class": "num_groups", "value": None}, - "main.segmap_num_groups_40673": {"class": "num_groups", "value": None}, - "main.segmap_num_groups_42159": {"class": "num_groups", "value": None}, - "main.segred_group_size_39109": {"class": "group_size", "value": None}, - "main.segred_group_size_40460": {"class": "group_size", "value": None}, - "main.segred_group_size_40597": {"class": "group_size", "value": None}, - "main.segred_group_size_40727": {"class": "group_size", "value": None}, - "main.segred_group_size_41305": {"class": "group_size", "value": None}, - "main.segred_group_size_41330": {"class": "group_size", "value": None}, - "main.segred_group_size_41401": {"class": "group_size", "value": None}, - "main.segred_group_size_41493": {"class": "group_size", "value": None}, - "main.segred_group_size_42054": {"class": "group_size", "value": None}, - "main.segred_num_groups_39111": {"class": "num_groups", "value": None}, - "main.segred_num_groups_40462": {"class": "num_groups", "value": None}, - "main.segred_num_groups_40599": {"class": "num_groups", "value": None}, - "main.segred_num_groups_40729": {"class": "num_groups", "value": None}, - "main.segred_num_groups_41307": {"class": "num_groups", "value": None}, - "main.segred_num_groups_41332": {"class": "num_groups", "value": None}, - "main.segred_num_groups_41403": {"class": "num_groups", "value": None}, - "main.segred_num_groups_41495": {"class": "num_groups", "value": None}, - "main.segred_num_groups_42056": {"class": "num_groups", "value": None}, - "main.segscan_group_size_41036": {"class": "group_size", "value": None}, - "main.segscan_group_size_42108": {"class": "group_size", "value": None}, - "main.segscan_num_groups_41038": {"class": "num_groups", "value": None}, - "main.segscan_num_groups_42110": {"class": "num_groups", "value": None}, - "main.suff_intra_par_11": {"class": "threshold ()", "value": 32}, - "main.suff_intra_par_13": {"class": "threshold (!main.suff_intra_par_11)", - "value": 32}, - "main.suff_intra_par_24": {"class": "threshold ()", "value": 32}, - "main.suff_intra_par_29": {"class": "threshold (!main.suff_outer_par_28)", - "value": 32}, - "main.suff_intra_par_34": {"class": "threshold ()", "value": 32}, - "main.suff_outer_par_16": {"class": "threshold ()", "value": None}, - "main.suff_outer_par_17": {"class": "threshold (!main.suff_outer_par_16)", - "value": None}, - "main.suff_outer_par_18": {"class": "threshold ()", "value": None}, - "main.suff_outer_par_19": {"class": "threshold (!main.suff_outer_par_18)", - "value": None}, - "main.suff_outer_par_20": {"class": "threshold ()", "value": None}, - "main.suff_outer_par_21": {"class": "threshold (!main.suff_outer_par_20)", - "value": None}, - "main.suff_outer_par_28": {"class": "threshold ()", "value": None}, - "main.suff_outer_par_31": {"class": "threshold ()", "value": None}, - "main.suff_outer_par_6": {"class": "threshold ()", "value": None}, - "main.suff_outer_par_7": {"class": "threshold (!main.suff_outer_par_6)", - "value": None}, - "main.suff_outer_par_8": {"class": "threshold (!main.suff_outer_par_7 !main.suff_outer_par_6)", - "value": None}, - "main.tile_size_43137": {"class": "tile_size", "value": None}, - "mainDetailed.Rx_42673": {"class": "reg_tile_size", "value": None}, - "mainDetailed.Rx_43414": {"class": "reg_tile_size", "value": None}, - "mainDetailed.Ry_42674": {"class": "reg_tile_size", "value": None}, - "mainDetailed.Ry_43415": {"class": "reg_tile_size", "value": None}, - "mainDetailed.Tk_42670": {"class": "tile_size", "value": None}, - "mainDetailed.Tk_43411": {"class": "tile_size", "value": None}, - "mainDetailed.Tx_42525": {"class": "tile_size", "value": None}, - "mainDetailed.Tx_42671": {"class": "tile_size", "value": None}, - "mainDetailed.Tx_43412": {"class": "tile_size", "value": None}, - "mainDetailed.Ty_42526": {"class": "tile_size", "value": None}, - "mainDetailed.Ty_42672": {"class": "tile_size", "value": None}, - "mainDetailed.Ty_43413": {"class": "tile_size", "value": None}, - "mainDetailed.group_size_45864": {"class": "group_size", "value": None}, - "mainDetailed.segmap_group_size_29978": {"class": "group_size", "value": None}, - "mainDetailed.segmap_group_size_30156": {"class": "group_size", "value": None}, - "mainDetailed.segmap_group_size_30284": {"class": "group_size", "value": None}, - "mainDetailed.segmap_group_size_30316": {"class": "group_size", "value": None}, - "mainDetailed.segmap_group_size_30363": {"class": "group_size", "value": None}, - "mainDetailed.segmap_group_size_30956": {"class": "group_size", "value": None}, - "mainDetailed.segmap_group_size_31185": {"class": "group_size", "value": None}, - "mainDetailed.segmap_group_size_31255": {"class": "group_size", "value": None}, - "mainDetailed.segmap_group_size_31354": {"class": "group_size", "value": None}, - "mainDetailed.segmap_group_size_31472": {"class": "group_size", "value": None}, - "mainDetailed.segmap_group_size_31712": {"class": "group_size", "value": None}, - "mainDetailed.segmap_group_size_31853": {"class": "group_size", "value": None}, - "mainDetailed.segmap_group_size_31985": {"class": "group_size", "value": None}, - "mainDetailed.segmap_group_size_32266": {"class": "group_size", "value": None}, - "mainDetailed.segmap_group_size_32341": {"class": "group_size", "value": None}, - "mainDetailed.segmap_group_size_32490": {"class": "group_size", "value": None}, - "mainDetailed.segmap_group_size_32604": {"class": "group_size", "value": None}, - "mainDetailed.segmap_group_size_32759": {"class": "group_size", "value": None}, - "mainDetailed.segmap_group_size_32904": {"class": "group_size", "value": None}, - "mainDetailed.segmap_group_size_33190": {"class": "group_size", "value": None}, - "mainDetailed.segmap_group_size_33312": {"class": "group_size", "value": None}, - "mainDetailed.segmap_group_size_33369": {"class": "group_size", "value": None}, - "mainDetailed.segmap_group_size_33547": {"class": "group_size", "value": None}, - "mainDetailed.segmap_group_size_33896": {"class": "group_size", "value": None}, - "mainDetailed.segmap_group_size_33952": {"class": "group_size", "value": None}, - "mainDetailed.segmap_group_size_34028": {"class": "group_size", "value": None}, - "mainDetailed.segmap_group_size_34078": {"class": "group_size", "value": None}, - "mainDetailed.segmap_num_groups_30318": {"class": "num_groups", "value": None}, - "mainDetailed.segmap_num_groups_30365": {"class": "num_groups", "value": None}, - "mainDetailed.segmap_num_groups_31714": {"class": "num_groups", "value": None}, - "mainDetailed.segmap_num_groups_31855": {"class": "num_groups", "value": None}, - "mainDetailed.segmap_num_groups_31987": {"class": "num_groups", "value": None}, - "mainDetailed.segmap_num_groups_33549": {"class": "num_groups", "value": None}, - "mainDetailed.segred_group_size_30423": {"class": "group_size", "value": None}, - "mainDetailed.segred_group_size_31774": {"class": "group_size", "value": None}, - "mainDetailed.segred_group_size_31911": {"class": "group_size", "value": None}, - "mainDetailed.segred_group_size_32041": {"class": "group_size", "value": None}, - "mainDetailed.segred_group_size_32619": {"class": "group_size", "value": None}, - "mainDetailed.segred_group_size_32644": {"class": "group_size", "value": None}, - "mainDetailed.segred_group_size_32715": {"class": "group_size", "value": None}, - "mainDetailed.segred_group_size_32807": {"class": "group_size", "value": None}, - "mainDetailed.segred_group_size_33988": {"class": "group_size", "value": None}, - "mainDetailed.segred_num_groups_30425": {"class": "num_groups", "value": None}, - "mainDetailed.segred_num_groups_31776": {"class": "num_groups", "value": None}, - "mainDetailed.segred_num_groups_31913": {"class": "num_groups", "value": None}, - "mainDetailed.segred_num_groups_32043": {"class": "num_groups", "value": None}, - "mainDetailed.segred_num_groups_32621": {"class": "num_groups", "value": None}, - "mainDetailed.segred_num_groups_32646": {"class": "num_groups", "value": None}, - "mainDetailed.segred_num_groups_32717": {"class": "num_groups", "value": None}, - "mainDetailed.segred_num_groups_32809": {"class": "num_groups", "value": None}, - "mainDetailed.segred_num_groups_33990": {"class": "num_groups", "value": None}, - "mainDetailed.segscan_group_size_32350": {"class": "group_size", - "value": None}, - "mainDetailed.segscan_group_size_34039": {"class": "group_size", - "value": None}, - "mainDetailed.segscan_num_groups_32352": {"class": "num_groups", - "value": None}, - "mainDetailed.segscan_num_groups_34041": {"class": "num_groups", - "value": None}, - "mainDetailed.suff_intra_par_11": {"class": "threshold ()", "value": 32}, - "mainDetailed.suff_intra_par_13": {"class": "threshold (!mainDetailed.suff_intra_par_11)", - "value": 32}, - "mainDetailed.suff_intra_par_24": {"class": "threshold ()", "value": 32}, - "mainDetailed.suff_intra_par_29": {"class": "threshold (!mainDetailed.suff_outer_par_28)", - "value": 32}, - "mainDetailed.suff_intra_par_38": {"class": "threshold (!mainDetailed.suff_outer_par_37)", - "value": 32}, - "mainDetailed.suff_outer_par_16": {"class": "threshold ()", "value": None}, - "mainDetailed.suff_outer_par_17": {"class": "threshold (!mainDetailed.suff_outer_par_16)", - "value": None}, - "mainDetailed.suff_outer_par_18": {"class": "threshold ()", "value": None}, - "mainDetailed.suff_outer_par_19": {"class": "threshold (!mainDetailed.suff_outer_par_18)", - "value": None}, - "mainDetailed.suff_outer_par_20": {"class": "threshold ()", "value": None}, - "mainDetailed.suff_outer_par_21": {"class": "threshold (!mainDetailed.suff_outer_par_20)", - "value": None}, - "mainDetailed.suff_outer_par_28": {"class": "threshold ()", "value": None}, - "mainDetailed.suff_outer_par_31": {"class": "threshold ()", "value": None}, - "mainDetailed.suff_outer_par_37": {"class": "threshold ()", "value": None}, - "mainDetailed.suff_outer_par_6": {"class": "threshold ()", "value": None}, - "mainDetailed.suff_outer_par_7": {"class": "threshold (!mainDetailed.suff_outer_par_6)", - "value": None}, - "mainDetailed.suff_outer_par_8": {"class": "threshold (!mainDetailed.suff_outer_par_7 !mainDetailed.suff_outer_par_6)", - "value": None}, - "mainDetailed.tile_size_43137": {"class": "tile_size", "value": None}, - "mainMagnitude.Rx_42673": {"class": "reg_tile_size", "value": None}, - "mainMagnitude.Rx_43414": {"class": "reg_tile_size", "value": None}, - "mainMagnitude.Ry_42674": {"class": "reg_tile_size", "value": None}, - "mainMagnitude.Ry_43415": {"class": "reg_tile_size", "value": None}, - "mainMagnitude.Tk_42670": {"class": "tile_size", "value": None}, - "mainMagnitude.Tk_43411": {"class": "tile_size", "value": None}, - "mainMagnitude.Tx_42525": {"class": "tile_size", "value": None}, - "mainMagnitude.Tx_42671": {"class": "tile_size", "value": None}, - "mainMagnitude.Tx_43412": {"class": "tile_size", "value": None}, - "mainMagnitude.Ty_42526": {"class": "tile_size", "value": None}, - "mainMagnitude.Ty_42672": {"class": "tile_size", "value": None}, - "mainMagnitude.Ty_43413": {"class": "tile_size", "value": None}, - "mainMagnitude.group_size_45853": {"class": "group_size", "value": None}, - "mainMagnitude.segmap_group_size_34346": {"class": "group_size", - "value": None}, - "mainMagnitude.segmap_group_size_34524": {"class": "group_size", - "value": None}, - "mainMagnitude.segmap_group_size_34652": {"class": "group_size", - "value": None}, - "mainMagnitude.segmap_group_size_34684": {"class": "group_size", - "value": None}, - "mainMagnitude.segmap_group_size_34731": {"class": "group_size", - "value": None}, - "mainMagnitude.segmap_group_size_35324": {"class": "group_size", - "value": None}, - "mainMagnitude.segmap_group_size_35553": {"class": "group_size", - "value": None}, - "mainMagnitude.segmap_group_size_35623": {"class": "group_size", - "value": None}, - "mainMagnitude.segmap_group_size_35722": {"class": "group_size", - "value": None}, - "mainMagnitude.segmap_group_size_35840": {"class": "group_size", - "value": None}, - "mainMagnitude.segmap_group_size_36080": {"class": "group_size", - "value": None}, - "mainMagnitude.segmap_group_size_36221": {"class": "group_size", - "value": None}, - "mainMagnitude.segmap_group_size_36353": {"class": "group_size", - "value": None}, - "mainMagnitude.segmap_group_size_36634": {"class": "group_size", - "value": None}, - "mainMagnitude.segmap_group_size_36709": {"class": "group_size", - "value": None}, - "mainMagnitude.segmap_group_size_36858": {"class": "group_size", - "value": None}, - "mainMagnitude.segmap_group_size_36972": {"class": "group_size", - "value": None}, - "mainMagnitude.segmap_group_size_37127": {"class": "group_size", - "value": None}, - "mainMagnitude.segmap_group_size_37273": {"class": "group_size", - "value": None}, - "mainMagnitude.segmap_group_size_37530": {"class": "group_size", - "value": None}, - "mainMagnitude.segmap_group_size_37652": {"class": "group_size", - "value": None}, - "mainMagnitude.segmap_group_size_37709": {"class": "group_size", - "value": None}, - "mainMagnitude.segmap_group_size_38243": {"class": "group_size", - "value": None}, - "mainMagnitude.segmap_group_size_38397": {"class": "group_size", - "value": None}, - "mainMagnitude.segmap_num_groups_34686": {"class": "num_groups", - "value": None}, - "mainMagnitude.segmap_num_groups_34733": {"class": "num_groups", - "value": None}, - "mainMagnitude.segmap_num_groups_36082": {"class": "num_groups", - "value": None}, - "mainMagnitude.segmap_num_groups_36223": {"class": "num_groups", - "value": None}, - "mainMagnitude.segmap_num_groups_36355": {"class": "num_groups", - "value": None}, - "mainMagnitude.segmap_num_groups_38399": {"class": "num_groups", - "value": None}, - "mainMagnitude.segred_group_size_34791": {"class": "group_size", - "value": None}, - "mainMagnitude.segred_group_size_36142": {"class": "group_size", - "value": None}, - "mainMagnitude.segred_group_size_36279": {"class": "group_size", - "value": None}, - "mainMagnitude.segred_group_size_36409": {"class": "group_size", - "value": None}, - "mainMagnitude.segred_group_size_36987": {"class": "group_size", - "value": None}, - "mainMagnitude.segred_group_size_37012": {"class": "group_size", - "value": None}, - "mainMagnitude.segred_group_size_37083": {"class": "group_size", - "value": None}, - "mainMagnitude.segred_group_size_37175": {"class": "group_size", - "value": None}, - "mainMagnitude.segred_group_size_38294": {"class": "group_size", - "value": None}, - "mainMagnitude.segred_num_groups_34793": {"class": "num_groups", - "value": None}, - "mainMagnitude.segred_num_groups_36144": {"class": "num_groups", - "value": None}, - "mainMagnitude.segred_num_groups_36281": {"class": "num_groups", - "value": None}, - "mainMagnitude.segred_num_groups_36411": {"class": "num_groups", - "value": None}, - "mainMagnitude.segred_num_groups_36989": {"class": "num_groups", - "value": None}, - "mainMagnitude.segred_num_groups_37014": {"class": "num_groups", - "value": None}, - "mainMagnitude.segred_num_groups_37085": {"class": "num_groups", - "value": None}, - "mainMagnitude.segred_num_groups_37177": {"class": "num_groups", - "value": None}, - "mainMagnitude.segred_num_groups_38296": {"class": "num_groups", - "value": None}, - "mainMagnitude.segscan_group_size_36718": {"class": "group_size", - "value": None}, - "mainMagnitude.segscan_group_size_38348": {"class": "group_size", - "value": None}, - "mainMagnitude.segscan_num_groups_36720": {"class": "num_groups", - "value": None}, - "mainMagnitude.segscan_num_groups_38350": {"class": "num_groups", - "value": None}, - "mainMagnitude.suff_intra_par_11": {"class": "threshold ()", "value": 32}, - "mainMagnitude.suff_intra_par_13": {"class": "threshold (!mainMagnitude.suff_intra_par_11)", - "value": 32}, - "mainMagnitude.suff_intra_par_24": {"class": "threshold ()", "value": 32}, - "mainMagnitude.suff_intra_par_29": {"class": "threshold (!mainMagnitude.suff_outer_par_28)", - "value": 32}, - "mainMagnitude.suff_intra_par_37": {"class": "threshold ()", "value": 32}, - "mainMagnitude.suff_outer_par_16": {"class": "threshold ()", "value": None}, - "mainMagnitude.suff_outer_par_17": {"class": "threshold (!mainMagnitude.suff_outer_par_16)", - "value": None}, - "mainMagnitude.suff_outer_par_18": {"class": "threshold ()", "value": None}, - "mainMagnitude.suff_outer_par_19": {"class": "threshold (!mainMagnitude.suff_outer_par_18)", - "value": None}, - "mainMagnitude.suff_outer_par_20": {"class": "threshold ()", "value": None}, - "mainMagnitude.suff_outer_par_21": {"class": "threshold (!mainMagnitude.suff_outer_par_20)", - "value": None}, - "mainMagnitude.suff_outer_par_28": {"class": "threshold ()", "value": None}, - "mainMagnitude.suff_outer_par_31": {"class": "threshold ()", "value": None}, - "mainMagnitude.suff_outer_par_6": {"class": "threshold ()", "value": None}, - "mainMagnitude.suff_outer_par_7": {"class": "threshold (!mainMagnitude.suff_outer_par_6)", - "value": None}, - "mainMagnitude.suff_outer_par_8": {"class": "threshold (!mainMagnitude.suff_outer_par_7 !mainMagnitude.suff_outer_par_6)", - "value": None}, - "mainMagnitude.tile_size_43137": {"class": "tile_size", "value": None}}) - self.builtinzhreplicate_f32zireplicate_46318_var = program.builtinzhreplicate_f32zireplicate_46318 - self.builtinzhreplicate_i32zireplicate_46327_var = program.builtinzhreplicate_i32zireplicate_46327 - self.convertToFloatzisegmap_29816_var = program.convertToFloatzisegmap_29816 - self.gpu_map_transpose_f32_var = program.gpu_map_transpose_f32 - self.gpu_map_transpose_f32_low_height_var = program.gpu_map_transpose_f32_low_height - self.gpu_map_transpose_f32_low_width_var = program.gpu_map_transpose_f32_low_width - self.gpu_map_transpose_f32_small_var = program.gpu_map_transpose_f32_small - self.mainzicopy_45849_var = program.mainzicopy_45849 - self.mainziscan_stage1_41042_var = program.mainziscan_stage1_41042 - self.mainziscan_stage1_42114_var = program.mainziscan_stage1_42114 - self.mainziscan_stage2_41042_var = program.mainziscan_stage2_41042 - self.mainziscan_stage2_42114_var = program.mainziscan_stage2_42114 - self.mainziscan_stage3_41042_var = program.mainziscan_stage3_41042 - self.mainziscan_stage3_42114_var = program.mainziscan_stage3_42114 - self.mainzisegmap_38661_var = program.mainzisegmap_38661 - self.mainzisegmap_38839_var = program.mainzisegmap_38839 - self.mainzisegmap_38967_var = program.mainzisegmap_38967 - self.mainzisegmap_39000_var = program.mainzisegmap_39000 - self.mainzisegmap_39046_var = program.mainzisegmap_39046 - self.mainzisegmap_39638_var = program.mainzisegmap_39638 - self.mainzisegmap_39868_var = program.mainzisegmap_39868 - self.mainzisegmap_39938_var = program.mainzisegmap_39938 - self.mainzisegmap_40038_var = program.mainzisegmap_40038 - self.mainzisegmap_40155_var = program.mainzisegmap_40155 - self.mainzisegmap_40396_var = program.mainzisegmap_40396 - self.mainzisegmap_40537_var = program.mainzisegmap_40537 - self.mainzisegmap_40669_var = program.mainzisegmap_40669 - self.mainzisegmap_40949_var = program.mainzisegmap_40949 - self.mainzisegmap_41025_var = program.mainzisegmap_41025 - self.mainzisegmap_41288_var = program.mainzisegmap_41288 - self.mainzisegmap_41589_var = program.mainzisegmap_41589 - self.mainzisegmap_42001_var = program.mainzisegmap_42001 - self.mainzisegmap_42155_var = program.mainzisegmap_42155 - self.mainzisegmap_intragroup_39374_var = program.mainzisegmap_intragroup_39374 - self.mainzisegmap_intragroup_39701_var = program.mainzisegmap_intragroup_39701 - self.mainzisegmap_intragroup_40832_var = program.mainzisegmap_intragroup_40832 - self.mainzisegmap_intragroup_41172_var = program.mainzisegmap_intragroup_41172 - self.mainzisegmap_intragroup_41640_var = program.mainzisegmap_intragroup_41640 - self.mainzisegmap_intragroup_42541_var = program.mainzisegmap_intragroup_42541 - self.mainzisegmap_intragroup_42694_var = program.mainzisegmap_intragroup_42694 - self.mainzisegmap_intragroup_43143_var = program.mainzisegmap_intragroup_43143 - self.mainzisegmap_intragroup_43435_var = program.mainzisegmap_intragroup_43435 - self.mainzisegmap_intragroup_43869_var = program.mainzisegmap_intragroup_43869 - self.mainzisegmap_intragroup_44075_var = program.mainzisegmap_intragroup_44075 - self.mainzisegred_large_39115_var = program.mainzisegred_large_39115 - self.mainzisegred_large_40466_var = program.mainzisegred_large_40466 - self.mainzisegred_large_40603_var = program.mainzisegred_large_40603 - self.mainzisegred_large_40733_var = program.mainzisegred_large_40733 - self.mainzisegred_large_41311_var = program.mainzisegred_large_41311 - self.mainzisegred_large_41336_var = program.mainzisegred_large_41336 - self.mainzisegred_large_41499_var = program.mainzisegred_large_41499 - self.mainzisegred_large_42060_var = program.mainzisegred_large_42060 - self.mainzisegred_nonseg_41409_var = program.mainzisegred_nonseg_41409 - self.mainzisegred_small_39115_var = program.mainzisegred_small_39115 - self.mainzisegred_small_40466_var = program.mainzisegred_small_40466 - self.mainzisegred_small_40603_var = program.mainzisegred_small_40603 - self.mainzisegred_small_40733_var = program.mainzisegred_small_40733 - self.mainzisegred_small_41311_var = program.mainzisegred_small_41311 - self.mainzisegred_small_41336_var = program.mainzisegred_small_41336 - self.mainzisegred_small_41499_var = program.mainzisegred_small_41499 - self.mainzisegred_small_42060_var = program.mainzisegred_small_42060 - self.mainDetailedzicopy_45861_var = program.mainDetailedzicopy_45861 - self.mainDetailedziscan_stage1_32356_var = program.mainDetailedziscan_stage1_32356 - self.mainDetailedziscan_stage1_34045_var = program.mainDetailedziscan_stage1_34045 - self.mainDetailedziscan_stage2_32356_var = program.mainDetailedziscan_stage2_32356 - self.mainDetailedziscan_stage2_34045_var = program.mainDetailedziscan_stage2_34045 - self.mainDetailedziscan_stage3_32356_var = program.mainDetailedziscan_stage3_32356 - self.mainDetailedziscan_stage3_34045_var = program.mainDetailedziscan_stage3_34045 - self.mainDetailedzisegmap_29975_var = program.mainDetailedzisegmap_29975 - self.mainDetailedzisegmap_30153_var = program.mainDetailedzisegmap_30153 - self.mainDetailedzisegmap_30281_var = program.mainDetailedzisegmap_30281 - self.mainDetailedzisegmap_30314_var = program.mainDetailedzisegmap_30314 - self.mainDetailedzisegmap_30360_var = program.mainDetailedzisegmap_30360 - self.mainDetailedzisegmap_30952_var = program.mainDetailedzisegmap_30952 - self.mainDetailedzisegmap_31182_var = program.mainDetailedzisegmap_31182 - self.mainDetailedzisegmap_31252_var = program.mainDetailedzisegmap_31252 - self.mainDetailedzisegmap_31352_var = program.mainDetailedzisegmap_31352 - self.mainDetailedzisegmap_31469_var = program.mainDetailedzisegmap_31469 - self.mainDetailedzisegmap_31710_var = program.mainDetailedzisegmap_31710 - self.mainDetailedzisegmap_31851_var = program.mainDetailedzisegmap_31851 - self.mainDetailedzisegmap_31983_var = program.mainDetailedzisegmap_31983 - self.mainDetailedzisegmap_32263_var = program.mainDetailedzisegmap_32263 - self.mainDetailedzisegmap_32339_var = program.mainDetailedzisegmap_32339 - self.mainDetailedzisegmap_32602_var = program.mainDetailedzisegmap_32602 - self.mainDetailedzisegmap_32902_var = program.mainDetailedzisegmap_32902 - self.mainDetailedzisegmap_33188_var = program.mainDetailedzisegmap_33188 - self.mainDetailedzisegmap_33309_var = program.mainDetailedzisegmap_33309 - self.mainDetailedzisegmap_33367_var = program.mainDetailedzisegmap_33367 - self.mainDetailedzisegmap_33545_var = program.mainDetailedzisegmap_33545 - self.mainDetailedzisegmap_33893_var = program.mainDetailedzisegmap_33893 - self.mainDetailedzisegmap_33950_var = program.mainDetailedzisegmap_33950 - self.mainDetailedzisegmap_34026_var = program.mainDetailedzisegmap_34026 - self.mainDetailedzisegmap_34076_var = program.mainDetailedzisegmap_34076 - self.mainDetailedzisegmap_intragroup_30688_var = program.mainDetailedzisegmap_intragroup_30688 - self.mainDetailedzisegmap_intragroup_31015_var = program.mainDetailedzisegmap_intragroup_31015 - self.mainDetailedzisegmap_intragroup_32146_var = program.mainDetailedzisegmap_intragroup_32146 - self.mainDetailedzisegmap_intragroup_32486_var = program.mainDetailedzisegmap_intragroup_32486 - self.mainDetailedzisegmap_intragroup_33543_var = program.mainDetailedzisegmap_intragroup_33543 - self.mainDetailedzisegmap_intragroup_42541_var = program.mainDetailedzisegmap_intragroup_42541 - self.mainDetailedzisegmap_intragroup_42694_var = program.mainDetailedzisegmap_intragroup_42694 - self.mainDetailedzisegmap_intragroup_43143_var = program.mainDetailedzisegmap_intragroup_43143 - self.mainDetailedzisegmap_intragroup_43435_var = program.mainDetailedzisegmap_intragroup_43435 - self.mainDetailedzisegmap_intragroup_43869_var = program.mainDetailedzisegmap_intragroup_43869 - self.mainDetailedzisegmap_intragroup_44075_var = program.mainDetailedzisegmap_intragroup_44075 - self.mainDetailedzisegred_large_30429_var = program.mainDetailedzisegred_large_30429 - self.mainDetailedzisegred_large_31780_var = program.mainDetailedzisegred_large_31780 - self.mainDetailedzisegred_large_31917_var = program.mainDetailedzisegred_large_31917 - self.mainDetailedzisegred_large_32047_var = program.mainDetailedzisegred_large_32047 - self.mainDetailedzisegred_large_32625_var = program.mainDetailedzisegred_large_32625 - self.mainDetailedzisegred_large_32650_var = program.mainDetailedzisegred_large_32650 - self.mainDetailedzisegred_large_32813_var = program.mainDetailedzisegred_large_32813 - self.mainDetailedzisegred_large_33994_var = program.mainDetailedzisegred_large_33994 - self.mainDetailedzisegred_nonseg_32723_var = program.mainDetailedzisegred_nonseg_32723 - self.mainDetailedzisegred_small_30429_var = program.mainDetailedzisegred_small_30429 - self.mainDetailedzisegred_small_31780_var = program.mainDetailedzisegred_small_31780 - self.mainDetailedzisegred_small_31917_var = program.mainDetailedzisegred_small_31917 - self.mainDetailedzisegred_small_32047_var = program.mainDetailedzisegred_small_32047 - self.mainDetailedzisegred_small_32625_var = program.mainDetailedzisegred_small_32625 - self.mainDetailedzisegred_small_32650_var = program.mainDetailedzisegred_small_32650 - self.mainDetailedzisegred_small_32813_var = program.mainDetailedzisegred_small_32813 - self.mainDetailedzisegred_small_33994_var = program.mainDetailedzisegred_small_33994 - self.mainMagnitudezicopy_45850_var = program.mainMagnitudezicopy_45850 - self.mainMagnitudeziscan_stage1_36724_var = program.mainMagnitudeziscan_stage1_36724 - self.mainMagnitudeziscan_stage1_38354_var = program.mainMagnitudeziscan_stage1_38354 - self.mainMagnitudeziscan_stage2_36724_var = program.mainMagnitudeziscan_stage2_36724 - self.mainMagnitudeziscan_stage2_38354_var = program.mainMagnitudeziscan_stage2_38354 - self.mainMagnitudeziscan_stage3_36724_var = program.mainMagnitudeziscan_stage3_36724 - self.mainMagnitudeziscan_stage3_38354_var = program.mainMagnitudeziscan_stage3_38354 - self.mainMagnitudezisegmap_34343_var = program.mainMagnitudezisegmap_34343 - self.mainMagnitudezisegmap_34521_var = program.mainMagnitudezisegmap_34521 - self.mainMagnitudezisegmap_34649_var = program.mainMagnitudezisegmap_34649 - self.mainMagnitudezisegmap_34682_var = program.mainMagnitudezisegmap_34682 - self.mainMagnitudezisegmap_34728_var = program.mainMagnitudezisegmap_34728 - self.mainMagnitudezisegmap_35320_var = program.mainMagnitudezisegmap_35320 - self.mainMagnitudezisegmap_35550_var = program.mainMagnitudezisegmap_35550 - self.mainMagnitudezisegmap_35620_var = program.mainMagnitudezisegmap_35620 - self.mainMagnitudezisegmap_35720_var = program.mainMagnitudezisegmap_35720 - self.mainMagnitudezisegmap_35837_var = program.mainMagnitudezisegmap_35837 - self.mainMagnitudezisegmap_36078_var = program.mainMagnitudezisegmap_36078 - self.mainMagnitudezisegmap_36219_var = program.mainMagnitudezisegmap_36219 - self.mainMagnitudezisegmap_36351_var = program.mainMagnitudezisegmap_36351 - self.mainMagnitudezisegmap_36631_var = program.mainMagnitudezisegmap_36631 - self.mainMagnitudezisegmap_36707_var = program.mainMagnitudezisegmap_36707 - self.mainMagnitudezisegmap_36970_var = program.mainMagnitudezisegmap_36970 - self.mainMagnitudezisegmap_37271_var = program.mainMagnitudezisegmap_37271 - self.mainMagnitudezisegmap_37528_var = program.mainMagnitudezisegmap_37528 - self.mainMagnitudezisegmap_37649_var = program.mainMagnitudezisegmap_37649 - self.mainMagnitudezisegmap_37707_var = program.mainMagnitudezisegmap_37707 - self.mainMagnitudezisegmap_38241_var = program.mainMagnitudezisegmap_38241 - self.mainMagnitudezisegmap_38395_var = program.mainMagnitudezisegmap_38395 - self.mainMagnitudezisegmap_intragroup_35056_var = program.mainMagnitudezisegmap_intragroup_35056 - self.mainMagnitudezisegmap_intragroup_35383_var = program.mainMagnitudezisegmap_intragroup_35383 - self.mainMagnitudezisegmap_intragroup_36514_var = program.mainMagnitudezisegmap_intragroup_36514 - self.mainMagnitudezisegmap_intragroup_36854_var = program.mainMagnitudezisegmap_intragroup_36854 - self.mainMagnitudezisegmap_intragroup_37880_var = program.mainMagnitudezisegmap_intragroup_37880 - self.mainMagnitudezisegmap_intragroup_42541_var = program.mainMagnitudezisegmap_intragroup_42541 - self.mainMagnitudezisegmap_intragroup_42694_var = program.mainMagnitudezisegmap_intragroup_42694 - self.mainMagnitudezisegmap_intragroup_43143_var = program.mainMagnitudezisegmap_intragroup_43143 - self.mainMagnitudezisegmap_intragroup_43435_var = program.mainMagnitudezisegmap_intragroup_43435 - self.mainMagnitudezisegmap_intragroup_43869_var = program.mainMagnitudezisegmap_intragroup_43869 - self.mainMagnitudezisegmap_intragroup_44075_var = program.mainMagnitudezisegmap_intragroup_44075 - self.mainMagnitudezisegred_large_34797_var = program.mainMagnitudezisegred_large_34797 - self.mainMagnitudezisegred_large_36148_var = program.mainMagnitudezisegred_large_36148 - self.mainMagnitudezisegred_large_36285_var = program.mainMagnitudezisegred_large_36285 - self.mainMagnitudezisegred_large_36415_var = program.mainMagnitudezisegred_large_36415 - self.mainMagnitudezisegred_large_36993_var = program.mainMagnitudezisegred_large_36993 - self.mainMagnitudezisegred_large_37018_var = program.mainMagnitudezisegred_large_37018 - self.mainMagnitudezisegred_large_37181_var = program.mainMagnitudezisegred_large_37181 - self.mainMagnitudezisegred_large_38300_var = program.mainMagnitudezisegred_large_38300 - self.mainMagnitudezisegred_nonseg_37091_var = program.mainMagnitudezisegred_nonseg_37091 - self.mainMagnitudezisegred_small_34797_var = program.mainMagnitudezisegred_small_34797 - self.mainMagnitudezisegred_small_36148_var = program.mainMagnitudezisegred_small_36148 - self.mainMagnitudezisegred_small_36285_var = program.mainMagnitudezisegred_small_36285 - self.mainMagnitudezisegred_small_36415_var = program.mainMagnitudezisegred_small_36415 - self.mainMagnitudezisegred_small_36993_var = program.mainMagnitudezisegred_small_36993 - self.mainMagnitudezisegred_small_37018_var = program.mainMagnitudezisegred_small_37018 - self.mainMagnitudezisegred_small_37181_var = program.mainMagnitudezisegred_small_37181 - self.mainMagnitudezisegred_small_38300_var = program.mainMagnitudezisegred_small_38300 - self.constants = {} - mainzicounter_mem_45784 = np.zeros(10240, dtype=np.int32) - static_mem_46860 = opencl_alloc(self, 40960, "static_mem_46860") - if (40960 != 0): - cl.enqueue_copy(self.queue, static_mem_46860, - normaliseArray(mainzicounter_mem_45784), - is_blocking=synchronous) - self.mainzicounter_mem_45784 = static_mem_46860 - mainzicounter_mem_45981 = np.zeros(10240, dtype=np.int32) - static_mem_46863 = opencl_alloc(self, 40960, "static_mem_46863") - if (40960 != 0): - cl.enqueue_copy(self.queue, static_mem_46863, - normaliseArray(mainzicounter_mem_45981), - is_blocking=synchronous) - self.mainzicounter_mem_45981 = static_mem_46863 - mainzicounter_mem_46069 = np.zeros(10240, dtype=np.int32) - static_mem_46864 = opencl_alloc(self, 40960, "static_mem_46864") - if (40960 != 0): - cl.enqueue_copy(self.queue, static_mem_46864, - normaliseArray(mainzicounter_mem_46069), - is_blocking=synchronous) - self.mainzicounter_mem_46069 = static_mem_46864 - mainzicounter_mem_46201 = np.zeros(10240, dtype=np.int32) - static_mem_46865 = opencl_alloc(self, 40960, "static_mem_46865") - if (40960 != 0): - cl.enqueue_copy(self.queue, static_mem_46865, - normaliseArray(mainzicounter_mem_46201), - is_blocking=synchronous) - self.mainzicounter_mem_46201 = static_mem_46865 - mainzicounter_mem_46393 = np.zeros(10240, dtype=np.int32) - static_mem_46866 = opencl_alloc(self, 40960, "static_mem_46866") - if (40960 != 0): - cl.enqueue_copy(self.queue, static_mem_46866, - normaliseArray(mainzicounter_mem_46393), - is_blocking=synchronous) - self.mainzicounter_mem_46393 = static_mem_46866 - mainzicounter_mem_46453 = np.zeros(10240, dtype=np.int32) - static_mem_46867 = opencl_alloc(self, 40960, "static_mem_46867") - if (40960 != 0): - cl.enqueue_copy(self.queue, static_mem_46867, - normaliseArray(mainzicounter_mem_46453), - is_blocking=synchronous) - self.mainzicounter_mem_46453 = static_mem_46867 - mainzicounter_mem_46493 = np.array([np.int32(0), np.int32(0), np.int32(0), - np.int32(0), np.int32(0), np.int32(0), - np.int32(0), np.int32(0), np.int32(0), - np.int32(0)], dtype=np.int32) - static_mem_46868 = opencl_alloc(self, 40, "static_mem_46868") - if (40 != 0): - cl.enqueue_copy(self.queue, static_mem_46868, - normaliseArray(mainzicounter_mem_46493), - is_blocking=synchronous) - self.mainzicounter_mem_46493 = static_mem_46868 - mainzicounter_mem_46562 = np.zeros(10240, dtype=np.int32) - static_mem_46870 = opencl_alloc(self, 40960, "static_mem_46870") - if (40960 != 0): - cl.enqueue_copy(self.queue, static_mem_46870, - normaliseArray(mainzicounter_mem_46562), - is_blocking=synchronous) - self.mainzicounter_mem_46562 = static_mem_46870 - mainzicounter_mem_46739 = np.zeros(10240, dtype=np.int32) - static_mem_46872 = opencl_alloc(self, 40960, "static_mem_46872") - if (40960 != 0): - cl.enqueue_copy(self.queue, static_mem_46872, - normaliseArray(mainzicounter_mem_46739), - is_blocking=synchronous) - self.mainzicounter_mem_46739 = static_mem_46872 - mainDetailedzicounter_mem_45796 = np.zeros(10240, dtype=np.int32) - static_mem_46873 = opencl_alloc(self, 40960, "static_mem_46873") - if (40960 != 0): - cl.enqueue_copy(self.queue, static_mem_46873, - normaliseArray(mainDetailedzicounter_mem_45796), - is_blocking=synchronous) - self.mainDetailedzicounter_mem_45796 = static_mem_46873 - mainDetailedzicounter_mem_45993 = np.zeros(10240, dtype=np.int32) - static_mem_46876 = opencl_alloc(self, 40960, "static_mem_46876") - if (40960 != 0): - cl.enqueue_copy(self.queue, static_mem_46876, - normaliseArray(mainDetailedzicounter_mem_45993), - is_blocking=synchronous) - self.mainDetailedzicounter_mem_45993 = static_mem_46876 - mainDetailedzicounter_mem_46081 = np.zeros(10240, dtype=np.int32) - static_mem_46877 = opencl_alloc(self, 40960, "static_mem_46877") - if (40960 != 0): - cl.enqueue_copy(self.queue, static_mem_46877, - normaliseArray(mainDetailedzicounter_mem_46081), - is_blocking=synchronous) - self.mainDetailedzicounter_mem_46081 = static_mem_46877 - mainDetailedzicounter_mem_46213 = np.zeros(10240, dtype=np.int32) - static_mem_46878 = opencl_alloc(self, 40960, "static_mem_46878") - if (40960 != 0): - cl.enqueue_copy(self.queue, static_mem_46878, - normaliseArray(mainDetailedzicounter_mem_46213), - is_blocking=synchronous) - self.mainDetailedzicounter_mem_46213 = static_mem_46878 - mainDetailedzicounter_mem_46405 = np.zeros(10240, dtype=np.int32) - static_mem_46879 = opencl_alloc(self, 40960, "static_mem_46879") - if (40960 != 0): - cl.enqueue_copy(self.queue, static_mem_46879, - normaliseArray(mainDetailedzicounter_mem_46405), - is_blocking=synchronous) - self.mainDetailedzicounter_mem_46405 = static_mem_46879 - mainDetailedzicounter_mem_46465 = np.zeros(10240, dtype=np.int32) - static_mem_46880 = opencl_alloc(self, 40960, "static_mem_46880") - if (40960 != 0): - cl.enqueue_copy(self.queue, static_mem_46880, - normaliseArray(mainDetailedzicounter_mem_46465), - is_blocking=synchronous) - self.mainDetailedzicounter_mem_46465 = static_mem_46880 - mainDetailedzicounter_mem_46505 = np.array([np.int32(0), np.int32(0), - np.int32(0), np.int32(0), - np.int32(0), np.int32(0), - np.int32(0), np.int32(0), - np.int32(0), np.int32(0)], - dtype=np.int32) - static_mem_46881 = opencl_alloc(self, 40, "static_mem_46881") - if (40 != 0): - cl.enqueue_copy(self.queue, static_mem_46881, - normaliseArray(mainDetailedzicounter_mem_46505), - is_blocking=synchronous) - self.mainDetailedzicounter_mem_46505 = static_mem_46881 - mainDetailedzicounter_mem_46574 = np.zeros(10240, dtype=np.int32) - static_mem_46883 = opencl_alloc(self, 40960, "static_mem_46883") - if (40960 != 0): - cl.enqueue_copy(self.queue, static_mem_46883, - normaliseArray(mainDetailedzicounter_mem_46574), - is_blocking=synchronous) - self.mainDetailedzicounter_mem_46574 = static_mem_46883 - mainDetailedzicounter_mem_46797 = np.zeros(10240, dtype=np.int32) - static_mem_46885 = opencl_alloc(self, 40960, "static_mem_46885") - if (40960 != 0): - cl.enqueue_copy(self.queue, static_mem_46885, - normaliseArray(mainDetailedzicounter_mem_46797), - is_blocking=synchronous) - self.mainDetailedzicounter_mem_46797 = static_mem_46885 - mainMagnitudezicounter_mem_45785 = np.zeros(10240, dtype=np.int32) - static_mem_46886 = opencl_alloc(self, 40960, "static_mem_46886") - if (40960 != 0): - cl.enqueue_copy(self.queue, static_mem_46886, - normaliseArray(mainMagnitudezicounter_mem_45785), - is_blocking=synchronous) - self.mainMagnitudezicounter_mem_45785 = static_mem_46886 - mainMagnitudezicounter_mem_45982 = np.zeros(10240, dtype=np.int32) - static_mem_46889 = opencl_alloc(self, 40960, "static_mem_46889") - if (40960 != 0): - cl.enqueue_copy(self.queue, static_mem_46889, - normaliseArray(mainMagnitudezicounter_mem_45982), - is_blocking=synchronous) - self.mainMagnitudezicounter_mem_45982 = static_mem_46889 - mainMagnitudezicounter_mem_46070 = np.zeros(10240, dtype=np.int32) - static_mem_46890 = opencl_alloc(self, 40960, "static_mem_46890") - if (40960 != 0): - cl.enqueue_copy(self.queue, static_mem_46890, - normaliseArray(mainMagnitudezicounter_mem_46070), - is_blocking=synchronous) - self.mainMagnitudezicounter_mem_46070 = static_mem_46890 - mainMagnitudezicounter_mem_46202 = np.zeros(10240, dtype=np.int32) - static_mem_46891 = opencl_alloc(self, 40960, "static_mem_46891") - if (40960 != 0): - cl.enqueue_copy(self.queue, static_mem_46891, - normaliseArray(mainMagnitudezicounter_mem_46202), - is_blocking=synchronous) - self.mainMagnitudezicounter_mem_46202 = static_mem_46891 - mainMagnitudezicounter_mem_46394 = np.zeros(10240, dtype=np.int32) - static_mem_46892 = opencl_alloc(self, 40960, "static_mem_46892") - if (40960 != 0): - cl.enqueue_copy(self.queue, static_mem_46892, - normaliseArray(mainMagnitudezicounter_mem_46394), - is_blocking=synchronous) - self.mainMagnitudezicounter_mem_46394 = static_mem_46892 - mainMagnitudezicounter_mem_46454 = np.zeros(10240, dtype=np.int32) - static_mem_46893 = opencl_alloc(self, 40960, "static_mem_46893") - if (40960 != 0): - cl.enqueue_copy(self.queue, static_mem_46893, - normaliseArray(mainMagnitudezicounter_mem_46454), - is_blocking=synchronous) - self.mainMagnitudezicounter_mem_46454 = static_mem_46893 - mainMagnitudezicounter_mem_46494 = np.array([np.int32(0), np.int32(0), - np.int32(0), np.int32(0), - np.int32(0), np.int32(0), - np.int32(0), np.int32(0), - np.int32(0), np.int32(0)], - dtype=np.int32) - static_mem_46894 = opencl_alloc(self, 40, "static_mem_46894") - if (40 != 0): - cl.enqueue_copy(self.queue, static_mem_46894, - normaliseArray(mainMagnitudezicounter_mem_46494), - is_blocking=synchronous) - self.mainMagnitudezicounter_mem_46494 = static_mem_46894 - mainMagnitudezicounter_mem_46563 = np.zeros(10240, dtype=np.int32) - static_mem_46896 = opencl_alloc(self, 40960, "static_mem_46896") - if (40960 != 0): - cl.enqueue_copy(self.queue, static_mem_46896, - normaliseArray(mainMagnitudezicounter_mem_46563), - is_blocking=synchronous) - self.mainMagnitudezicounter_mem_46563 = static_mem_46896 - mainMagnitudezicounter_mem_46760 = np.zeros(10240, dtype=np.int32) - static_mem_46898 = opencl_alloc(self, 40960, "static_mem_46898") - if (40960 != 0): - cl.enqueue_copy(self.queue, static_mem_46898, - normaliseArray(mainMagnitudezicounter_mem_46760), - is_blocking=synchronous) - self.mainMagnitudezicounter_mem_46760 = static_mem_46898 - def futhark_builtinzhgpu_map_transpose_f32(self, destmem_0, destoffset_1, - srcmem_2, srcoffset_3, - num_arrays_4, x_elems_5, - y_elems_6): - if ((num_arrays_4 == np.int32(0)) or ((x_elems_5 == np.int32(0)) or (y_elems_6 == np.int32(0)))): - pass - else: - muly_8 = squot32(np.int32(16), x_elems_5) - mulx_7 = squot32(np.int32(16), y_elems_6) - if ((num_arrays_4 == np.int32(1)) and ((x_elems_5 == np.int32(1)) or (y_elems_6 == np.int32(1)))): - if (sext_i32_i64(((x_elems_5 * y_elems_6) * np.int32(4))) != 0): - cl.enqueue_copy(self.queue, destmem_0, srcmem_2, - dest_offset=np.long(sext_i32_i64(destoffset_1)), - src_offset=np.long(sext_i32_i64(srcoffset_3)), - byte_count=np.long(sext_i32_i64(((x_elems_5 * y_elems_6) * np.int32(4))))) + groups_per_segment_127105 = sdiv_up64(num_groups_81262, + smax64(np.int64(1), + ((m_70861 * k2p2zq_70876) * k2p2zq_70876))) + elements_per_thread_127106 = sdiv_up64(k2p2zq_70876, + (segred_group_sizze_81261 * groups_per_segment_127105)) + virt_num_groups_127107 = (groups_per_segment_127105 * ((m_70861 * k2p2zq_70876) * k2p2zq_70876)) + num_threads_127108 = (num_groups_81262 * segred_group_sizze_81261) + threads_per_segment_127109 = (groups_per_segment_127105 * segred_group_sizze_81261) + group_res_arr_mem_127110 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_81261 * virt_num_groups_127107)), + "group_res_arr_mem_127110") + mainDetailedzicounter_mem_127112 = self.mainDetailedzicounter_mem_127112 + if ((1 * (np.int64(num_groups_81262) * np.int64(segred_group_sizze_81261))) != 0): + self.mainDetailedzisegred_large_79431_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_81261))), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(num_groups_81262), + np.int64(groups_per_segment_127105), + np.int64(elements_per_thread_127106), + np.int64(virt_num_groups_127107), + np.int64(threads_per_segment_127109), + mem_121831, + mem_121835, + mem_121840, + group_res_arr_mem_127110, + mainDetailedzicounter_mem_127112) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_large_79431_var, + ((np.int64(num_groups_81262) * np.int64(segred_group_sizze_81261)),), + (np.int64(segred_group_sizze_81261),)) + if synchronous: + sync(self) + mem_121831 = None + mem_121835 = None + defunc_3_map_res_r_mem_121841 = mem_121840 + defunc_3_map_res_r_mem_121847 = defunc_3_map_res_r_mem_121841 + mem_121850 = opencl_alloc(self, bytes_121990, "mem_121850") + group_sizze_127149 = self.sizes["mainDetailed.group_size_127149"] + num_groups_127150 = sdiv_up64((m_70861 * k2p2zq_70876), + group_sizze_127149) + if ((1 * (np.int64(num_groups_127150) * np.int64(group_sizze_127149))) != 0): + self.mainDetailedzicopy_127146_var.set_args(np.int64(m_70861), + np.int64(n_70864), + np.int64(k2p2zq_70876), + defunc_3_map_res_mem_120231, + mem_121850) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzicopy_127146_var, + ((np.int64(num_groups_127150) * np.int64(group_sizze_127149)),), + (np.int64(group_sizze_127149),)) + if synchronous: + sync(self) + mem_121854 = opencl_alloc(self, bytes_121997, "mem_121854") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121854, np.int64(0), + mem_121335, np.int64(0), + np.int64(1), m_70861, + (k2p2zq_70876 * k2p2zq_70876)) + mem_121335 = None + mem_121858 = opencl_alloc(self, bytes_120258, "mem_121858") + group_sizze_127154 = self.sizes["mainDetailed.group_size_127154"] + num_groups_127155 = sdiv_up64(((m_70861 * k2p2zq_70876) * k2p2zq_70876), + group_sizze_127154) + if ((1 * (np.int64(num_groups_127155) * np.int64(group_sizze_127154))) != 0): + self.mainDetailedzicopy_127151_var.set_args(np.int64(m_70861), + np.int64(k2p2zq_70876), + mem_121854, mem_121858) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzicopy_127151_var, + ((np.int64(num_groups_127155) * np.int64(group_sizze_127154)),), + (np.int64(group_sizze_127154),)) + if synchronous: + sync(self) + mem_121854 = None + mem_121895 = opencl_alloc(self, bytes_121990, "mem_121895") + mem_125243 = opencl_alloc(self, total_sizze_125701, "mem_125243") + double_buffer_mem_125565 = opencl_alloc(self, total_sizze_125702, + "double_buffer_mem_125565") + if ((1 * (np.int64(num_groups_81278) * np.int64(segmap_group_sizze_81277))) != 0): + self.mainDetailedzisegmap_79309_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(m_70948), + np.int64(num_groups_81278), + np.int64(num_threads_125700), + mem_121338, mem_121343, + mem_121850, mem_121858, + mem_121895, mem_125243, + double_buffer_mem_125565) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_79309_var, + ((np.int64(num_groups_81278) * np.int64(segmap_group_sizze_81277)),), + (np.int64(segmap_group_sizze_81277),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + mem_121338 = None + mem_121850 = None + mem_121858 = None + mem_125243 = None + double_buffer_mem_125565 = None + mem_121898 = opencl_alloc(self, bytes_121990, "mem_121898") + self.futhark_builtinzhreplicate_f64(mem_121898, + (m_70861 * k2p2zq_70876), + np.float64(0.0)) + segmap_usable_groups_81348 = sdiv_up64(binop_x_120244, + segmap_group_sizze_81347) + mem_121901 = opencl_alloc(self, bytes_121990, "mem_121901") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121901, np.int64(0), + mem_121895, np.int64(0), + np.int64(1), m_70861, + k2p2zq_70876) + mem_121895 = None + if ((1 * (np.int64(segmap_usable_groups_81348) * np.int64(segmap_group_sizze_81347))) != 0): + self.mainDetailedzisegmap_79266_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(binop_x_120251), + mem_121341, + defunc_3_map_res_r_mem_121609, + mem_121898, mem_121901) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_79266_var, + ((np.int64(segmap_usable_groups_81348) * np.int64(segmap_group_sizze_81347)),), + (np.int64(segmap_group_sizze_81347),)) + if synchronous: + sync(self) + defunc_3_map_res_r_mem_121609 = None + mem_121901 = None + mem_121906 = opencl_alloc(self, bytes_121997, "mem_121906") + self.futhark_builtinzhreplicate_f64(mem_121906, + ((m_70861 * k2p2zq_70876) * k2p2zq_70876), + np.float64(0.0)) + segmap_usable_groups_81419 = sdiv_up64(nest_sizze_81002, + segmap_group_sizze_81418) + mem_121909 = opencl_alloc(self, bytes_121990, "mem_121909") + self.futhark_builtinzhgpu_map_transpose_i64(mem_121909, np.int64(0), + mem_121341, np.int64(0), + np.int64(1), m_70861, + k2p2zq_70876) + mem_121341 = None + if ((1 * (np.int64(segmap_usable_groups_81419) * np.int64(segmap_group_sizze_81418))) != 0): + self.mainDetailedzisegmap_79109_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(binop_x_120251), + defunc_3_map_res_r_mem_121847, + mem_121906, mem_121909) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_79109_var, + ((np.int64(segmap_usable_groups_81419) * np.int64(segmap_group_sizze_81418)),), + (np.int64(segmap_group_sizze_81418),)) + if synchronous: + sync(self) + defunc_3_map_res_r_mem_121847 = None + mem_121909 = None + segmap_usable_groups_81438 = sdiv_up64(nest_sizze_81002, + segmap_group_sizze_81437) + mem_121915 = opencl_alloc(self, bytes_121997, "mem_121915") + if ((1 * (np.int64(segmap_usable_groups_81438) * np.int64(segmap_group_sizze_81437))) != 0): + self.mainDetailedzisegmap_79061_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + mem_121906, mem_121915) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_79061_var, + ((np.int64(segmap_usable_groups_81438) * np.int64(segmap_group_sizze_81437)),), + (np.int64(segmap_group_sizze_81437),)) + if synchronous: + sync(self) + mem_121906 = None + segmap_usable_groups_81447 = sdiv_up64(binop_x_120244, + segmap_group_sizze_81446) + mem_121919 = opencl_alloc(self, bytes_121990, "mem_121919") + if ((1 * (np.int64(segmap_usable_groups_81447) * np.int64(segmap_group_sizze_81446))) != 0): + self.mainDetailedzisegmap_79039_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + mem_121898, mem_121919) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_79039_var, + ((np.int64(segmap_usable_groups_81447) * np.int64(segmap_group_sizze_81446)),), + (np.int64(segmap_group_sizze_81446),)) + if synchronous: + sync(self) + mem_121898 = None + defunc_5_map_res_mem_121929 = mem_121915 + defunc_5_map_res_mem_121930 = mem_121919 + defunc_5_map_res_mem_121931 = mem_121343 + mem_120252 = None + mem_120254 = None + num_recresids_padded_71534 = (defunc_2_reduce_res_70985 - k2p2zq_70876) + replicate_arg_71535 = (m_70861 * num_recresids_padded_71534) + bounds_invalid_upwards_71536 = slt64(replicate_arg_71535, np.int64(0)) + valid_71537 = not(bounds_invalid_upwards_71536) + range_valid_c_71538 = True + assert valid_71537, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 /prelude/array.fut:108:18-23\n #2 recresid.fut:28:14-49\n #3 recresid.fut:100:7-30\n #4 mroc.fut:27:25-38\n #5 mroc.fut:77:27-61\n #6 bfastfinal.fut:45:24-53\n #7 bfastfinal.fut:185:3-72\n #8 bfastfinal.fut:181:1-185:72\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + replicate_arg_71535, + " is invalid.")) + bytes_121932 = (np.int64(8) * replicate_arg_71535) + mem_121934 = opencl_alloc(self, bytes_121932, "mem_121934") + self.futhark_builtinzhreplicate_f64(mem_121934, + (num_recresids_padded_71534 * m_70861), + np.float64(0.0)) + loop_cond_t_res_71540 = slt64(k2p2zq_70876, m_71015) + loop_not_taken_71541 = not(loop_cond_t_res_71540) + protect_assert_disj_71542 = (valid_71064 or loop_not_taken_71541) + range_valid_c_71543 = True + assert protect_assert_disj_71542, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 /prelude/array.fut:108:18-23\n #2 lib/github.com/nhey/lm/linpack.fut:39:16-40\n #3 lib/github.com/nhey/lm/lm.fut:74:36-64\n #4 recresid.fut:62:33-75\n #5 /prelude/soacs.fut:91:28-38\n #6 /prelude/soacs.fut:91:3-61\n #7 recresid.fut:51:11-73:44\n #8 recresid.fut:100:7-30\n #9 mroc.fut:27:25-38\n #10 mroc.fut:77:27-61\n #11 bfastfinal.fut:45:24-53\n #12 bfastfinal.fut:185:3-72\n #13 bfastfinal.fut:181:1-185:72\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + replicate_arg_71062, + " is invalid.")) + segmap_group_sizze_81492 = self.sizes["mainDetailed.segmap_group_size_81470"] + segmap_usable_groups_81493 = sdiv_up_safe64(binop_x_120251, + segmap_group_sizze_81492) + mem_121938 = opencl_alloc(self, bytes_120250, "mem_121938") + if ((1 * (np.int64(segmap_usable_groups_81493) * np.int64(segmap_group_sizze_81492))) != 0): + self.mainDetailedzisegmap_81467_var.set_args(self.global_failure, + np.int64(k2p2zq_70876), + mem_121938) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_81467_var, + ((np.int64(segmap_usable_groups_81493) * np.int64(segmap_group_sizze_81492)),), + (np.int64(segmap_group_sizze_81492),)) + if synchronous: + sync(self) + suff_outer_par_81604 = (self.sizes["mainDetailed.suff_outer_par_9"] <= m_70861) + segmap_group_sizze_82141 = self.sizes["mainDetailed.segmap_group_size_81608"] + max_num_groups_127199 = self.sizes["mainDetailed.segmap_num_groups_81610"] + num_groups_82142 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_70861, + segmap_group_sizze_82141), + sext_i32_i64(max_num_groups_127199)))) + segred_group_sizze_84432 = self.sizes["mainDetailed.segred_group_size_84408"] + max_num_groups_127200 = self.sizes["mainDetailed.segred_num_groups_84410"] + num_groups_84433 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120244, + segred_group_sizze_84432), + sext_i32_i64(max_num_groups_127200)))) + segmap_group_sizze_84453 = self.sizes["mainDetailed.segmap_group_size_84398"] + segred_group_sizze_84463 = self.sizes["mainDetailed.segred_group_size_84379"] + max_num_groups_127201 = self.sizes["mainDetailed.segred_num_groups_84381"] + num_groups_84464 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120244, + segred_group_sizze_84463), + sext_i32_i64(max_num_groups_127201)))) + segmap_group_sizze_84476 = self.sizes["mainDetailed.segmap_group_size_84367"] + suff_outer_par_84530 = (self.sizes["mainDetailed.suff_outer_par_15"] <= m_70861) + segmap_group_sizze_84534 = self.sizes["mainDetailed.segmap_group_size_84149"] + max_num_groups_127202 = self.sizes["mainDetailed.segmap_num_groups_84151"] + num_groups_84535 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_70861, + segmap_group_sizze_84534), + sext_i32_i64(max_num_groups_127202)))) + segred_group_sizze_84556 = self.sizes["mainDetailed.segred_group_size_84226"] + segmap_group_sizze_84567 = self.sizes["mainDetailed.segmap_group_size_84218"] + segmap_group_sizze_84574 = self.sizes["mainDetailed.segmap_group_size_84210"] + segmap_group_sizze_84592 = self.sizes["mainDetailed.segmap_group_size_83859"] + max_num_groups_127203 = self.sizes["mainDetailed.segmap_num_groups_83861"] + num_groups_84593 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_70861, + segmap_group_sizze_84592), + sext_i32_i64(max_num_groups_127203)))) + segmap_group_sizze_84888 = self.sizes["mainDetailed.segmap_group_size_83802"] + segmap_group_sizze_84903 = self.sizes["mainDetailed.segmap_group_size_83768"] + suff_outer_par_84927 = (self.sizes["mainDetailed.suff_outer_par_12"] <= binop_x_120244) + suff_outer_par_85022 = (self.sizes["mainDetailed.suff_outer_par_13"] <= m_70861) + segmap_group_sizze_85025 = self.sizes["mainDetailed.segmap_group_size_83339"] + max_num_groups_127204 = self.sizes["mainDetailed.segmap_num_groups_83341"] + num_groups_85026 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_70861, + segmap_group_sizze_85025), + sext_i32_i64(max_num_groups_127204)))) + suff_outer_par_85056 = (self.sizes["mainDetailed.suff_outer_par_14"] <= binop_x_120244) + segred_group_sizze_85079 = self.sizes["mainDetailed.segred_group_size_83453"] + segmap_group_sizze_85096 = self.sizes["mainDetailed.segmap_group_size_83440"] + segmap_group_sizze_85107 = self.sizes["mainDetailed.segmap_group_size_83429"] + segmap_group_sizze_85118 = self.sizes["mainDetailed.segmap_group_size_83124"] + max_num_groups_127205 = self.sizes["mainDetailed.segmap_num_groups_83126"] + num_groups_85119 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120244, + segmap_group_sizze_85118), + sext_i32_i64(max_num_groups_127205)))) + suff_outer_par_85123 = (self.sizes["mainDetailed.suff_outer_par_10"] <= binop_x_120244) + suff_outer_par_85145 = (self.sizes["mainDetailed.suff_outer_par_11"] <= nest_sizze_81002) + segred_group_sizze_85161 = self.sizes["mainDetailed.segred_group_size_83175"] + max_num_groups_127206 = self.sizes["mainDetailed.segred_num_groups_83177"] + num_groups_85162 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_81260, + segred_group_sizze_85161), + sext_i32_i64(max_num_groups_127206)))) + segmap_group_sizze_85177 = self.sizes["mainDetailed.segmap_group_size_83059"] + max_num_groups_127207 = self.sizes["mainDetailed.segmap_num_groups_83061"] + num_groups_85178 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_70861, + segmap_group_sizze_85177), + sext_i32_i64(max_num_groups_127207)))) + segmap_group_sizze_85256 = self.sizes["mainDetailed.segmap_group_size_83005"] + segmap_group_sizze_85327 = self.sizes["mainDetailed.segmap_group_size_82848"] + segmap_group_sizze_85346 = self.sizes["mainDetailed.segmap_group_size_82801"] + segmap_group_sizze_85355 = self.sizes["mainDetailed.segmap_group_size_82778"] + segmap_group_sizze_85364 = self.sizes["mainDetailed.segmap_group_size_82705"] + segred_group_sizze_85427 = self.sizes["mainDetailed.segred_group_size_85426"] + max_num_groups_127208 = self.sizes["mainDetailed.segred_num_groups_85428"] + num_groups_85429 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_70861, + segred_group_sizze_85427), + sext_i32_i64(max_num_groups_127208)))) + segmap_usable_groups_84454 = sdiv_up_safe64(m_70861, + segmap_group_sizze_84453) + segmap_usable_groups_84477 = sdiv_up_safe64(m_70861, + segmap_group_sizze_84476) + segmap_usable_groups_84568 = sdiv_up_safe64(m_70861, + segmap_group_sizze_84567) + segmap_usable_groups_84575 = sdiv_up_safe64(m_70861, + segmap_group_sizze_84574) + segmap_usable_groups_84889 = sdiv_up_safe64(binop_x_120244, + segmap_group_sizze_84888) + segmap_usable_groups_84904 = sdiv_up_safe64(nest_sizze_81002, + segmap_group_sizze_84903) + segmap_usable_groups_85097 = sdiv_up_safe64(binop_x_120244, + segmap_group_sizze_85096) + segmap_usable_groups_85108 = sdiv_up_safe64(binop_x_120244, + segmap_group_sizze_85107) + segmap_usable_groups_85257 = sdiv_up_safe64(binop_x_120244, + segmap_group_sizze_85256) + segmap_usable_groups_85328 = sdiv_up_safe64(nest_sizze_81002, + segmap_group_sizze_85327) + segmap_usable_groups_85347 = sdiv_up_safe64(nest_sizze_81002, + segmap_group_sizze_85346) + segmap_usable_groups_85356 = sdiv_up_safe64(binop_x_120244, + segmap_group_sizze_85355) + segmap_usable_groups_85365 = sdiv_up_safe64(m_70861, + segmap_group_sizze_85364) + mem_121941 = opencl_alloc(self, bytes_120175, "mem_121941") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121941, np.int64(0), + defunc_3_map_res_mem_120231, + np.int64(0), np.int64(1), + n_70864, m_70861) + mem_121944 = opencl_alloc(self, bytes_120250, "mem_121944") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121944, np.int64(0), + mem_121938, np.int64(0), + np.int64(1), k2p2zq_70876, + k2p2zq_70876) + tile_sizze_116779 = self.sizes["mainDetailed.tile_size_116778"] + group_sizze_116780 = (tile_sizze_116779 * tile_sizze_116779) + mem_121946 = opencl_alloc(self, bytes_120247, "mem_121946") + self.futhark_builtinzhreplicate_f64(mem_121946, k2p2zq_70876, + np.float64(0.0)) + tile_sizze_117141 = self.sizes["mainDetailed.tile_size_117140"] + group_sizze_117142 = (tile_sizze_117141 * tile_sizze_117141) + Ty_117445 = self.sizes["mainDetailed.Ty_117442"] + Ry_117446 = self.sizes["mainDetailed.Ry_117444"] + Tx_117447 = self.sizes["mainDetailed.Tx_117441"] + Rx_117448 = self.sizes["mainDetailed.Rx_117443"] + Tk_117449 = self.sizes["mainDetailed.Tk_117440"] + TxRx_117452 = (Tx_117447 * Rx_117448) + TyRy_117453 = (Ty_117445 * Ry_117446) + a_loc_szz_117455 = (Tk_117449 * TyRy_117453) + binop_x_117456 = (Tx_117447 * Tk_117449) + b_loc_szz_117457 = (Rx_117448 * binop_x_117456) + group_sizze_117462 = (Ty_117445 * Tx_117447) + num_groups_x_116781 = sdiv_up_safe64(m_70861, tile_sizze_116779) + num_groups_y_116782 = sdiv_up_safe64(k2p2zq_70876, tile_sizze_116779) + num_groups_top_116783 = (num_groups_x_116781 * num_groups_y_116782) + num_groups_x_117143 = sdiv_up_safe64(m_70861, tile_sizze_117141) + num_groups_y_117144 = sdiv_up_safe64(k2p2zq_70876, tile_sizze_117141) + num_groups_top_117145 = (num_groups_x_117143 * num_groups_y_117144) + tk_div_tx_117450 = sdiv_up_safe64(Tk_117449, Tx_117447) + tk_div_ty_117451 = sdiv_up_safe64(Tk_117449, Ty_117445) + gridDim_x_117458 = sdiv_up_safe64(k2p2zq_70876, TxRx_117452) + gridDim_y_117459 = sdiv_up_safe64(k2p2zq_70876, TyRy_117453) + binop_y_117460 = (gridDim_x_117458 * gridDim_y_117459) + grid_sizze_117461 = (m_70861 * binop_y_117460) + full_tiles_117490 = squot_safe64(k2p2zq_70876, Tk_117449) + kk_117693 = (Tk_117449 * full_tiles_117490) + padded_sizze_115508 = (m_70861 + y_115507) + mem_121948 = opencl_alloc(self, bytes_121947, "mem_121948") + per_chunk_115510 = squot_safe64(padded_sizze_115508, num_threads_115503) + mem_121992 = opencl_alloc(self, bytes_121990, "mem_121992") + mem_121996 = opencl_alloc(self, bytes_121993, "mem_121996") + mem_122000 = opencl_alloc(self, bytes_121997, "mem_122000") + mem_122003 = opencl_alloc(self, bytes_121990, "mem_122003") + mem_122007 = opencl_alloc(self, bytes_121997, "mem_122007") + bytes_122739 = (np.int64(8) * padded_sizze_115508) + binop_x_122742 = (num_threads_115503 * per_chunk_115510) + bytes_122741 = (np.int64(8) * binop_x_122742) + binop_x_123163 = (k2p2zq_70876 * group_sizze_116780) + bytes_123161 = (np.int64(8) * binop_x_123163) + ctx_val_123177 = (k2p2zq_70876 * tile_sizze_116779) + bytes_123180 = (np.int64(8) * group_sizze_116780) + binop_x_125283 = (np.int64(8) * tile_sizze_116779) + sizze_125284 = (tile_sizze_116779 * binop_x_125283) + bytes_123298 = (np.int64(8) * group_sizze_117142) + binop_x_125308 = (np.int64(8) * tile_sizze_117141) + sizze_125309 = (tile_sizze_117141 * binop_x_125308) + binop_x_123329 = (k2p2zq_70876 * group_sizze_117142) + bytes_123327 = (np.int64(8) * binop_x_123329) + binop_x_123431 = (Ry_117446 * group_sizze_117462) + binop_x_123432 = (Rx_117448 * binop_x_123431) + bytes_123429 = (np.int64(8) * binop_x_123432) + binop_x_123423 = (Ry_117446 * Rx_117448) + bytes_123422 = (np.int64(8) * binop_x_123423) + bytes_123434 = (np.int64(8) * a_loc_szz_117455) + bytes_123436 = (np.int64(8) * b_loc_szz_117457) + bytes_123505 = (np.int64(8) * binop_x_123431) + binop_x_123511 = (Rx_117448 * group_sizze_117462) + bytes_123509 = (np.int64(8) * binop_x_123511) + bytes_123497 = (np.int64(8) * Ry_117446) + bytes_123499 = (np.int64(8) * Rx_117448) + binop_x_125329 = (np.int64(8) * Ty_117445) + binop_x_125330 = (Tx_117447 * binop_x_125329) + binop_x_125331 = (Ry_117446 * binop_x_125330) + sizze_125332 = (Rx_117448 * binop_x_125331) + mem_123728 = opencl_alloc(self, np.int64(1), "mem_123728") + binop_x_125575 = (np.int64(8) * k2p2zq_70876) + double_buffer_sizze_125577 = (np.int64(8) * k2p2zq_70876) + double_buffer_sizze_125578 = (np.int64(16) * k2p2zq_70876) + binop_x_125592 = (np.int64(8) * k2p2zq_70876) + double_buffer_sizze_125594 = (np.int64(8) * k2p2zq_70876) + double_buffer_sizze_125595 = (np.int64(16) * k2p2zq_70876) + num_threads_125708 = (segmap_group_sizze_82141 * num_groups_82142) + total_sizze_125709 = (bytes_120247 * num_threads_125708) + total_sizze_125710 = (bytes_120247 * num_threads_125708) + total_sizze_125711 = (bytes_120269 * num_threads_125708) + total_sizze_125712 = (bytes_120247 * num_threads_125708) + total_sizze_125713 = (bytes_120250 * num_threads_125708) + total_sizze_125714 = (bytes_120250 * num_threads_125708) + total_sizze_125715 = (bytes_120247 * num_threads_125708) + total_sizze_125716 = (bytes_120250 * num_threads_125708) + total_sizze_125717 = (bytes_120247 * num_threads_125708) + total_sizze_125718 = (bytes_120250 * num_threads_125708) + total_sizze_125719 = (bytes_120247 * num_threads_125708) + total_sizze_125720 = (bytes_120250 * num_threads_125708) + total_sizze_125721 = (bytes_120247 * num_threads_125708) + total_sizze_125722 = (bytes_120250 * num_threads_125708) + total_sizze_125723 = (sizze_125149 * num_threads_125708) + total_sizze_125724 = (bytes_120247 * num_threads_125708) + total_sizze_125727 = (sizze_125149 * num_threads_125708) + total_sizze_125728 = (bytes_120247 * num_threads_125708) + total_sizze_125731 = (double_buffer_sizze_125577 * num_threads_125708) + total_sizze_125732 = (double_buffer_sizze_125578 * num_threads_125708) + num_threads_125743 = (segmap_group_sizze_84592 * num_groups_84593) + total_sizze_125744 = (bytes_120247 * num_threads_125743) + total_sizze_125745 = (sizze_125149 * num_threads_125743) + total_sizze_125746 = (bytes_120247 * num_threads_125743) + total_sizze_125748 = (sizze_125149 * num_threads_125743) + total_sizze_125749 = (bytes_120247 * num_threads_125743) + total_sizze_125752 = (double_buffer_sizze_125594 * num_threads_125743) + total_sizze_125753 = (double_buffer_sizze_125595 * num_threads_125743) + num_threads_125756 = (group_sizze_116780 * num_groups_top_116783) + total_sizze_125757 = (bytes_120247 * num_threads_125756) + num_threads_125758 = (segmap_group_sizze_85025 * num_groups_85026) + total_sizze_125759 = (bytes_120250 * num_threads_125758) + total_sizze_125760 = (bytes_120247 * num_threads_125758) + num_threads_125761 = (group_sizze_117142 * num_groups_top_117145) + total_sizze_125762 = (bytes_120247 * num_threads_125761) + num_threads_125766 = (segmap_group_sizze_85118 * num_groups_85119) + total_sizze_125767 = (bytes_120247 * num_threads_125766) + num_threads_125770 = (segmap_group_sizze_85177 * num_groups_85178) + mem_param_121959 = defunc_5_map_res_mem_121929 + mem_param_121967 = defunc_5_map_res_mem_121930 + mem_param_121972 = defunc_5_map_res_mem_121931 + loop_while_71550 = loop_cond_t_res_71540 + r_71551 = k2p2zq_70876 + while loop_while_71550: + x_71556 = sle64(np.int64(0), r_71551) + y_71557 = slt64(r_71551, defunc_2_reduce_res_70985) + bounds_check_71558 = (x_71556 and y_71557) + index_ok_71559 = (ok_or_empty_70954 and bounds_check_71558) + index_certs_71560 = True + assert index_ok_71559, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:34:13-22\n #1 recresid.fut:52:47-75\n #2 /prelude/soacs.fut:91:28-38\n #3 /prelude/soacs.fut:91:3-61\n #4 recresid.fut:51:11-73:44\n #5 recresid.fut:100:7-30\n #6 mroc.fut:27:25-38\n #7 mroc.fut:77:27-61\n #8 bfastfinal.fut:45:24-53\n #9 bfastfinal.fut:185:3-72\n #10 bfastfinal.fut:181:1-185:72\n" % ("Index [", + r_71551, + ", ", + np.int64(0), + ":] out of bounds for array of shape [", + defunc_2_reduce_res_70985, + "][", + k2p2zq_70876, + "].")) + index_certs_71561 = True + assert bounds_check_71558, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:37:17-23\n #1 recresid.fut:52:47-75\n #2 /prelude/soacs.fut:91:28-38\n #3 /prelude/soacs.fut:91:3-61\n #4 recresid.fut:51:11-73:44\n #5 recresid.fut:100:7-30\n #6 mroc.fut:27:25-38\n #7 mroc.fut:77:27-61\n #8 bfastfinal.fut:45:24-53\n #9 bfastfinal.fut:185:3-72\n #10 bfastfinal.fut:181:1-185:72\n" % ("Index [", + r_71551, + "] out of bounds for array of shape [", + defunc_2_reduce_res_70985, + "].")) + rp1_71562 = (np.int64(1) + r_71551) + empty_slice_71563 = (rp1_71562 == np.int64(0)) + i_lte_j_71564 = sle64(np.int64(0), rp1_71562) + y_71565 = (bounds_check_71558 and i_lte_j_71564) + ok_or_empty_71566 = (empty_slice_71563 or y_71565) + index_certs_71567 = True + assert ok_or_empty_71566, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:62:66-75\n #1 /prelude/soacs.fut:91:28-38\n #2 /prelude/soacs.fut:91:3-61\n #3 recresid.fut:51:11-73:44\n #4 recresid.fut:100:7-30\n #5 mroc.fut:27:25-38\n #6 mroc.fut:77:27-61\n #7 bfastfinal.fut:45:24-53\n #8 bfastfinal.fut:185:3-72\n #9 bfastfinal.fut:181:1-185:72\n" % ("Index [:", + rp1_71562, + "] out of bounds for array of shape [", + defunc_2_reduce_res_70985, + "].")) + index_ok_71568 = (ok_or_empty_70954 and ok_or_empty_71566) + index_certs_71569 = True + assert index_ok_71568, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:62:51-63\n #1 /prelude/soacs.fut:91:28-38\n #2 /prelude/soacs.fut:91:3-61\n #3 recresid.fut:51:11-73:44\n #4 recresid.fut:100:7-30\n #5 mroc.fut:27:25-38\n #6 mroc.fut:77:27-61\n #7 bfastfinal.fut:45:24-53\n #8 bfastfinal.fut:185:3-72\n #9 bfastfinal.fut:181:1-185:72\n" % ("Index [:", + rp1_71562, + ", ", + np.int64(0), + ":] out of bounds for array of shape [", + defunc_2_reduce_res_70985, + "][", + k2p2zq_70876, + "].")) + i_p_m_t_s_leq_w_71570 = slt64(r_71551, rp1_71562) + y_71571 = (x_71556 and i_p_m_t_s_leq_w_71570) + y_71572 = (i_lte_j_71564 and y_71571) + ok_or_empty_71573 = (empty_slice_71563 or y_71572) + min_res_71574 = smin64(k2p2zq_70876, rp1_71562) + i_p_m_t_s_leq_w_71575 = slt64(m_70948, rp1_71562) + y_71576 = (zzero_leq_i_p_m_t_s_70949 and i_p_m_t_s_leq_w_71575) + y_71577 = (i_lte_j_70951 and y_71576) + ok_or_empty_71578 = (empty_slice_70947 or y_71577) + index_ok_71579 = (ok_or_empty_70954 and ok_or_empty_71578) + index_certs_71580 = True + assert index_ok_71579, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:80:50-58\n #1 recresid.fut:62:33-75\n #2 /prelude/soacs.fut:91:28-38\n #3 /prelude/soacs.fut:91:3-61\n #4 recresid.fut:51:11-73:44\n #5 recresid.fut:100:7-30\n #6 mroc.fut:27:25-38\n #7 mroc.fut:77:27-61\n #8 bfastfinal.fut:45:24-53\n #9 bfastfinal.fut:185:3-72\n #10 bfastfinal.fut:181:1-185:72\n" % ("Index [:", + k2p2zq_70876, + ", :", + k2p2zq_70876, + "] out of bounds for array of shape [", + k2p2zq_70876, + "][", + rp1_71562, + "].")) + index_certs_71587 = True + assert ok_or_empty_71578, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:92:15-21\n #1 recresid.fut:62:33-75\n #2 /prelude/soacs.fut:91:28-38\n #3 /prelude/soacs.fut:91:3-61\n #4 recresid.fut:51:11-73:44\n #5 recresid.fut:100:7-30\n #6 mroc.fut:27:25-38\n #7 mroc.fut:77:27-61\n #8 bfastfinal.fut:45:24-53\n #9 bfastfinal.fut:185:3-72\n #10 bfastfinal.fut:181:1-185:72\n" % ("Index [:", + k2p2zq_70876, + "] out of bounds for array of shape [", + rp1_71562, + "].")) + nest_sizze_84555 = (m_70861 * rp1_71562) + max_num_groups_127218 = self.sizes["mainDetailed.segred_num_groups_84228"] + num_groups_84557 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_84555, + segred_group_sizze_84556), + sext_i32_i64(max_num_groups_127218)))) + self.futhark_builtinzhreplicate_f64(mem_121992, + (m_70861 * k2p2zq_70876), + np.float64(0.0)) + self.futhark_builtinzhreplicate_f64(mem_121996, + ((m_70861 * np.int64(2)) * k2p2zq_70876), + np.float64(0.0)) + self.futhark_builtinzhreplicate_f64(mem_122000, + ((m_70861 * k2p2zq_70876) * k2p2zq_70876), + np.float64(0.0)) + self.futhark_builtinzhreplicate_f64(mem_122003, + (m_70861 * k2p2zq_70876), + np.float64(0.0)) + self.futhark_builtinzhreplicate_f64(mem_122007, + ((m_70861 * k2p2zq_70876) * k2p2zq_70876), + np.float64(0.0)) + bytes_122015 = (np.int64(8) * nest_sizze_84555) + binop_x_122019 = (k2p2zq_70876 * rp1_71562) + binop_x_122020 = (m_70861 * binop_x_122019) + bytes_122018 = (np.int64(8) * binop_x_122020) + binop_x_122024 = (k2p2zq_70876 * nest_sizze_84555) + bytes_122022 = (np.int64(8) * binop_x_122024) + bytes_122511 = (np.int64(8) * rp1_71562) + binop_x_123636 = (rp1_71562 * binop_x_120244) + bytes_123634 = (np.int64(8) * binop_x_123636) + sizze_125454 = (rp1_71562 * bytes_120247) + double_buffer_sizze_125576 = (rp1_71562 * binop_x_125575) + double_buffer_sizze_125584 = (np.int64(8) * rp1_71562) + double_buffer_sizze_125593 = (rp1_71562 * binop_x_125592) + double_buffer_sizze_125601 = (np.int64(8) * rp1_71562) + total_sizze_125725 = (bytes_122511 * num_threads_125708) + total_sizze_125726 = (sizze_125454 * num_threads_125708) + total_sizze_125729 = (sizze_125454 * num_threads_125708) + total_sizze_125730 = (double_buffer_sizze_125576 * num_threads_125708) + total_sizze_125733 = (double_buffer_sizze_125584 * num_threads_125708) + total_sizze_125747 = (sizze_125454 * num_threads_125743) + total_sizze_125750 = (sizze_125454 * num_threads_125743) + total_sizze_125751 = (double_buffer_sizze_125593 * num_threads_125743) + total_sizze_125771 = (bytes_122511 * num_threads_125770) + total_sizze_125772 = (double_buffer_sizze_125601 * num_threads_125770) + local_memory_capacity_127985 = self.max_local_memory + if ((((sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127985)) and sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127985))) and sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127985))) and sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127985))) and suff_outer_par_81604): + mem_122011 = opencl_alloc(self, bytes_120258, "mem_122011") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122011, np.int64(0), + mem_param_121959, + np.int64(0), np.int64(1), + (k2p2zq_70876 * k2p2zq_70876), + m_70861) + mem_122014 = opencl_alloc(self, bytes_121990, "mem_122014") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122014, np.int64(0), + mem_param_121967, + np.int64(0), np.int64(1), + k2p2zq_70876, m_70861) + mem_122017 = opencl_alloc(self, bytes_122015, "mem_122017") + group_sizze_127222 = self.sizes["mainDetailed.group_size_127222"] + num_groups_127223 = sdiv_up64((m_70861 * rp1_71562), + group_sizze_127222) + if ((1 * (np.int64(num_groups_127223) * np.int64(group_sizze_127222))) != 0): + self.mainDetailedzicopy_127219_var.set_args(np.int64(m_70861), + np.int64(n_70864), + np.int64(rp1_71562), + defunc_3_map_res_mem_120231, + mem_122017) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzicopy_127219_var, + ((np.int64(num_groups_127223) * np.int64(group_sizze_127222)),), + (np.int64(group_sizze_127222),)) + if synchronous: + sync(self) + mem_122021 = opencl_alloc(self, bytes_122018, "mem_122021") + group_sizze_127227 = self.sizes["mainDetailed.group_size_127227"] + num_groups_127228 = sdiv_up64(((m_70861 * k2p2zq_70876) * rp1_71562), + group_sizze_127227) + if ((1 * (np.int64(num_groups_127228) * np.int64(group_sizze_127227))) != 0): + self.mainDetailedzicopy_127224_var.set_args(np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(defunc_2_reduce_res_70985), + np.int64(rp1_71562), + mem_120246, mem_122021) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzicopy_127224_var, + ((np.int64(num_groups_127228) * np.int64(group_sizze_127227)),), + (np.int64(group_sizze_127227),)) + if synchronous: + sync(self) + mem_122025 = opencl_alloc(self, bytes_122022, "mem_122025") + group_sizze_127232 = self.sizes["mainDetailed.group_size_127232"] + num_groups_127233 = sdiv_up64(((m_70861 * k2p2zq_70876) * rp1_71562), + group_sizze_127232) + if ((1 * (np.int64(num_groups_127233) * np.int64(group_sizze_127232))) != 0): + self.mainDetailedzicopy_127229_var.set_args(np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(defunc_2_reduce_res_70985), + np.int64(rp1_71562), + mem_120246, mem_122025) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzicopy_127229_var, + ((np.int64(num_groups_127233) * np.int64(group_sizze_127232)),), + (np.int64(group_sizze_127232),)) + if synchronous: + sync(self) + mem_122650 = opencl_alloc(self, m_70861, "mem_122650") + mem_122654 = opencl_alloc(self, bytes_120258, "mem_122654") + mem_122657 = opencl_alloc(self, bytes_121990, "mem_122657") + mem_122659 = opencl_alloc(self, bytes_120173, "mem_122659") + mem_122661 = opencl_alloc(self, bytes_120173, "mem_122661") + mem_122028 = opencl_alloc(self, total_sizze_125709, "mem_122028") + mem_122042 = opencl_alloc(self, total_sizze_125710, "mem_122042") + mem_122045 = opencl_alloc(self, total_sizze_125711, "mem_122045") + mem_122047 = opencl_alloc(self, total_sizze_125712, "mem_122047") + mem_122382 = opencl_alloc(self, total_sizze_125713, "mem_122382") + mem_122423 = opencl_alloc(self, total_sizze_125714, "mem_122423") + mem_122435 = opencl_alloc(self, total_sizze_125715, "mem_122435") + mem_122464 = opencl_alloc(self, total_sizze_125716, "mem_122464") + mem_122537 = opencl_alloc(self, total_sizze_125717, "mem_122537") + mem_122552 = opencl_alloc(self, total_sizze_125718, "mem_122552") + mem_122564 = opencl_alloc(self, total_sizze_125719, "mem_122564") + mem_122575 = opencl_alloc(self, total_sizze_125720, "mem_122575") + mem_122595 = opencl_alloc(self, total_sizze_125721, "mem_122595") + mem_122598 = opencl_alloc(self, total_sizze_125722, "mem_122598") + mem_125248 = opencl_alloc(self, total_sizze_125723, "mem_125248") + mem_125250 = opencl_alloc(self, total_sizze_125724, "mem_125250") + mem_125258 = opencl_alloc(self, total_sizze_125725, "mem_125258") + mem_125455 = opencl_alloc(self, total_sizze_125726, "mem_125455") + mem_125463 = opencl_alloc(self, total_sizze_125727, "mem_125463") + mem_125465 = opencl_alloc(self, total_sizze_125728, "mem_125465") + mem_125505 = opencl_alloc(self, total_sizze_125729, "mem_125505") + double_buffer_mem_125569 = opencl_alloc(self, total_sizze_125730, + "double_buffer_mem_125569") + double_buffer_mem_125570 = opencl_alloc(self, total_sizze_125731, + "double_buffer_mem_125570") + double_buffer_mem_125571 = opencl_alloc(self, total_sizze_125732, + "double_buffer_mem_125571") + double_buffer_mem_125582 = opencl_alloc(self, total_sizze_125733, + "double_buffer_mem_125582") + if ((1 * (np.int64(num_groups_82142) * np.int64(segmap_group_sizze_82141))) != 0): + self.mainDetailedzisegmap_81606_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_70861), + np.int64(n_70864), + np.int64(k2p2zq_70876), + np.int64(m_70948), + np.byte(y_70952), + np.int64(defunc_2_reduce_res_70985), + np.float64(tol_71054), + np.int64(k_71067), + np.int64(r_71551), + np.int64(rp1_71562), + np.byte(ok_or_empty_71573), + np.int64(min_res_71574), + np.int64(num_groups_82142), + np.int64(binop_x_120251), + np.int64(num_threads_125708), + defunc_3_map_res_mem_120231, + mem_120246, mem_121938, + mem_121941, + mem_param_121972, + mem_122011, mem_122014, + mem_122017, mem_122021, + mem_122025, mem_122028, + mem_122042, mem_122045, + mem_122047, mem_122382, + mem_122423, mem_122435, + mem_122464, mem_122537, + mem_122552, mem_122564, + mem_122575, mem_122595, + mem_122598, mem_122650, + mem_122654, mem_122657, + mem_122659, mem_122661, + mem_125248, mem_125250, + mem_125258, mem_125455, + mem_125463, mem_125465, + mem_125505, + double_buffer_mem_125569, + double_buffer_mem_125570, + double_buffer_mem_125571, + double_buffer_mem_125582) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_81606_var, + ((np.int64(num_groups_82142) * np.int64(segmap_group_sizze_82141)),), + (np.int64(segmap_group_sizze_82141),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + mem_122011 = None + mem_122014 = None + mem_122017 = None + mem_122021 = None + mem_122025 = None + mem_122028 = None + mem_122042 = None + mem_122045 = None + mem_122047 = None + mem_122382 = None + mem_122423 = None + mem_122435 = None + mem_122464 = None + mem_122537 = None + mem_122552 = None + mem_122564 = None + mem_122575 = None + mem_122595 = None + mem_122598 = None + mem_125248 = None + mem_125250 = None + mem_125258 = None + mem_125455 = None + mem_125463 = None + mem_125465 = None + mem_125505 = None + double_buffer_mem_125569 = None + double_buffer_mem_125570 = None + double_buffer_mem_125571 = None + double_buffer_mem_125582 = None + mem_123715 = opencl_alloc(self, bytes_121997, "mem_123715") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123715, np.int64(0), + mem_122654, np.int64(0), + np.int64(1), m_70861, + (k2p2zq_70876 * k2p2zq_70876)) + mem_122654 = None + mem_123719 = opencl_alloc(self, bytes_121990, "mem_123719") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123719, np.int64(0), + mem_122657, np.int64(0), + np.int64(1), m_70861, + k2p2zq_70876) + mem_122657 = None + defunc_7_map_res_mem_123721 = mem_122650 + defunc_7_map_res_mem_123722 = mem_123715 + defunc_7_map_res_mem_123723 = mem_123719 + defunc_7_map_res_mem_123724 = mem_122659 + defunc_7_map_res_mem_123725 = mem_122661 + else: + mem_122665 = opencl_alloc(self, bytes_121997, "mem_122665") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122665, np.int64(0), + mem_param_121959, + np.int64(0), np.int64(1), + k2p2zq_70876, + (m_70861 * k2p2zq_70876)) + mem_122668 = opencl_alloc(self, bytes_120173, "mem_122668") + mem_122671 = opencl_alloc(self, bytes_121990, "mem_122671") + if slt64((k2p2zq_70876 * np.int64(2)), segred_group_sizze_84432): + segment_sizze_nonzzero_127346 = smax64(np.int64(1), k2p2zq_70876) + num_threads_127347 = (num_groups_84433 * segred_group_sizze_84432) + if ((1 * (np.int64(num_groups_84433) * np.int64(segred_group_sizze_84432))) != 0): + self.mainDetailedzisegred_small_84414_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_84432))), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(defunc_2_reduce_res_70985), + np.int64(r_71551), + np.int64(num_groups_84433), + np.int64(segment_sizze_nonzzero_127346), + mem_120246, + mem_122665, + mem_122668, + mem_122671) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_small_84414_var, + ((np.int64(num_groups_84433) * np.int64(segred_group_sizze_84432)),), + (np.int64(segred_group_sizze_84432),)) + if synchronous: + sync(self) + else: + groups_per_segment_127368 = sdiv_up64(num_groups_84433, + smax64(np.int64(1), m_70861)) + elements_per_thread_127369 = sdiv_up64(k2p2zq_70876, + (segred_group_sizze_84432 * groups_per_segment_127368)) + virt_num_groups_127370 = (groups_per_segment_127368 * m_70861) + num_threads_127371 = (num_groups_84433 * segred_group_sizze_84432) + threads_per_segment_127372 = (groups_per_segment_127368 * segred_group_sizze_84432) + group_res_arr_mem_127373 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_84432 * virt_num_groups_127370)), + "group_res_arr_mem_127373") + mainDetailedzicounter_mem_127375 = self.mainDetailedzicounter_mem_127375 + if ((1 * (np.int64(num_groups_84433) * np.int64(segred_group_sizze_84432))) != 0): + self.mainDetailedzisegred_large_84414_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_84432))), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(defunc_2_reduce_res_70985), + np.int64(r_71551), + np.int64(num_groups_84433), + np.int64(groups_per_segment_127368), + np.int64(elements_per_thread_127369), + np.int64(virt_num_groups_127370), + np.int64(threads_per_segment_127372), + mem_120246, + mem_122665, + mem_122668, + mem_122671, + group_res_arr_mem_127373, + mainDetailedzicounter_mem_127375) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_large_84414_var, + ((np.int64(num_groups_84433) * np.int64(segred_group_sizze_84432)),), + (np.int64(segred_group_sizze_84432),)) + if synchronous: + sync(self) + mem_122665 = None + mem_122674 = opencl_alloc(self, bytes_120173, "mem_122674") + if ((1 * (np.int64(segmap_usable_groups_84454) * np.int64(segmap_group_sizze_84453))) != 0): + self.mainDetailedzisegmap_84396_var.set_args(self.global_failure, + np.int64(m_70861), + mem_122668, mem_122674) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_84396_var, + ((np.int64(segmap_usable_groups_84454) * np.int64(segmap_group_sizze_84453)),), + (np.int64(segmap_group_sizze_84453),)) + if synchronous: + sync(self) + mem_122668 = None + mem_122677 = opencl_alloc(self, bytes_120173, "mem_122677") + if slt64((k2p2zq_70876 * np.int64(2)), segred_group_sizze_84463): + segment_sizze_nonzzero_127413 = smax64(np.int64(1), k2p2zq_70876) + num_threads_127414 = (num_groups_84464 * segred_group_sizze_84463) + if ((1 * (np.int64(num_groups_84464) * np.int64(segred_group_sizze_84463))) != 0): + self.mainDetailedzisegred_small_84385_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_84463))), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(defunc_2_reduce_res_70985), + np.int64(r_71551), + np.int64(num_groups_84464), + np.int64(segment_sizze_nonzzero_127413), + mem_120246, + mem_param_121967, + mem_122677) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_small_84385_var, + ((np.int64(num_groups_84464) * np.int64(segred_group_sizze_84463)),), + (np.int64(segred_group_sizze_84463),)) + if synchronous: + sync(self) + else: + groups_per_segment_127434 = sdiv_up64(num_groups_84464, + smax64(np.int64(1), m_70861)) + elements_per_thread_127435 = sdiv_up64(k2p2zq_70876, + (segred_group_sizze_84463 * groups_per_segment_127434)) + virt_num_groups_127436 = (groups_per_segment_127434 * m_70861) + num_threads_127437 = (num_groups_84464 * segred_group_sizze_84463) + threads_per_segment_127438 = (groups_per_segment_127434 * segred_group_sizze_84463) + group_res_arr_mem_127439 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_84463 * virt_num_groups_127436)), + "group_res_arr_mem_127439") + mainDetailedzicounter_mem_127441 = self.mainDetailedzicounter_mem_127441 + if ((1 * (np.int64(num_groups_84464) * np.int64(segred_group_sizze_84463))) != 0): + self.mainDetailedzisegred_large_84385_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_84463))), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(defunc_2_reduce_res_70985), + np.int64(r_71551), + np.int64(num_groups_84464), + np.int64(groups_per_segment_127434), + np.int64(elements_per_thread_127435), + np.int64(virt_num_groups_127436), + np.int64(threads_per_segment_127438), + mem_120246, + mem_param_121967, + mem_122677, + group_res_arr_mem_127439, + mainDetailedzicounter_mem_127441) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_large_84385_var, + ((np.int64(num_groups_84464) * np.int64(segred_group_sizze_84463)),), + (np.int64(segred_group_sizze_84463),)) + if synchronous: + sync(self) + mem_122680 = opencl_alloc(self, bytes_120173, "mem_122680") + mem_122682 = opencl_alloc(self, bytes_120173, "mem_122682") + if ((1 * (np.int64(segmap_usable_groups_84477) * np.int64(segmap_group_sizze_84476))) != 0): + self.mainDetailedzisegmap_84365_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(n_70864), + np.int64(r_71551), + defunc_3_map_res_mem_120231, + mem_122674, mem_122677, + mem_122680, mem_122682) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_84365_var, + ((np.int64(segmap_usable_groups_84477) * np.int64(segmap_group_sizze_84476)),), + (np.int64(segmap_group_sizze_84476),)) + if synchronous: + sync(self) + mem_122677 = None + mem_122686 = opencl_alloc(self, bytes_122022, "mem_122686") + group_sizze_127481 = self.sizes["mainDetailed.group_size_127481"] + num_groups_127482 = sdiv_up64(((m_70861 * k2p2zq_70876) * rp1_71562), + group_sizze_127481) + if ((1 * (np.int64(num_groups_127482) * np.int64(group_sizze_127481))) != 0): + self.mainDetailedzicopy_127478_var.set_args(np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(defunc_2_reduce_res_70985), + np.int64(rp1_71562), + mem_120246, mem_122686) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzicopy_127478_var, + ((np.int64(num_groups_127482) * np.int64(group_sizze_127481)),), + (np.int64(group_sizze_127481),)) + if synchronous: + sync(self) + mem_param_122694 = mem_121992 + mem_param_122705 = mem_121996 + j_84524 = np.int64(0) + one_129896 = np.int64(1) + for counter_129895 in range(k2p2zq_70876): + index_certs_84527 = True + assert ok_or_empty_71573, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/linpack.fut:44:25-30\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:185:3-72\n #11 bfastfinal.fut:181:1-185:72\n" % ("Index [", + j_84524, + ", ", + np.int64(0), + ":] out of bounds for array of shape [", + k2p2zq_70876, + "][", + rp1_71562, + "].")) + local_memory_capacity_127581 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127581)) and suff_outer_par_84530): + mem_122715 = opencl_alloc(self, bytes_121990, "mem_122715") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122715, + np.int64(0), + mem_param_122694, + np.int64(0), + np.int64(1), + k2p2zq_70876, m_70861) + mem_122719 = opencl_alloc(self, bytes_120924, "mem_122719") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122719, + np.int64(0), + mem_param_122705, + np.int64(0), + np.int64(1), + (np.int64(2) * k2p2zq_70876), + m_70861) + mem_122723 = opencl_alloc(self, bytes_121990, "mem_122723") + mem_122727 = opencl_alloc(self, bytes_120924, "mem_122727") + if ((1 * (np.int64(num_groups_84535) * np.int64(segmap_group_sizze_84534))) != 0): + self.mainDetailedzisegmap_84147_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(rp1_71562), + np.int64(j_84524), + np.int64(num_groups_84535), + mem_122686, + mem_122715, + mem_122719, + mem_122723, + mem_122727) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_84147_var, + ((np.int64(num_groups_84535) * np.int64(segmap_group_sizze_84534)),), + (np.int64(segmap_group_sizze_84534),)) + if synchronous: + sync(self) + mem_122715 = None + mem_122719 = None + mem_122751 = opencl_alloc(self, bytes_121990, "mem_122751") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122751, + np.int64(0), + mem_122723, + np.int64(0), + np.int64(1), m_70861, + k2p2zq_70876) + mem_122723 = None + dqrdc2_res_mem_122757 = mem_122751 + dqrdc2_res_mem_122758 = mem_122727 + else: + mem_122730 = opencl_alloc(self, bytes_120173, "mem_122730") + if slt64((rp1_71562 * np.int64(2)), segred_group_sizze_84556): + segment_sizze_nonzzero_127499 = smax64(np.int64(1), rp1_71562) + num_threads_127500 = (num_groups_84557 * segred_group_sizze_84556) + if ((1 * (np.int64(num_groups_84557) * np.int64(segred_group_sizze_84556))) != 0): + self.mainDetailedzisegred_small_84232_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_84556))), + np.int64(m_70861), + np.int64(defunc_2_reduce_res_70985), + np.int64(rp1_71562), + np.int64(j_84524), + np.int64(num_groups_84557), + np.int64(segment_sizze_nonzzero_127499), + mem_120246, + mem_122730) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_small_84232_var, + ((np.int64(num_groups_84557) * np.int64(segred_group_sizze_84556)),), + (np.int64(segred_group_sizze_84556),)) + if synchronous: + sync(self) + else: + groups_per_segment_127520 = sdiv_up64(num_groups_84557, + smax64(np.int64(1), + m_70861)) + elements_per_thread_127521 = sdiv_up64(rp1_71562, + (segred_group_sizze_84556 * groups_per_segment_127520)) + virt_num_groups_127522 = (groups_per_segment_127520 * m_70861) + num_threads_127523 = (num_groups_84557 * segred_group_sizze_84556) + threads_per_segment_127524 = (groups_per_segment_127520 * segred_group_sizze_84556) + group_res_arr_mem_127525 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_84556 * virt_num_groups_127522)), + "group_res_arr_mem_127525") + mainDetailedzicounter_mem_127527 = self.mainDetailedzicounter_mem_127527 + if ((1 * (np.int64(num_groups_84557) * np.int64(segred_group_sizze_84556))) != 0): + self.mainDetailedzisegred_large_84232_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_84556))), + np.int64(m_70861), + np.int64(defunc_2_reduce_res_70985), + np.int64(rp1_71562), + np.int64(j_84524), + np.int64(num_groups_84557), + np.int64(groups_per_segment_127520), + np.int64(elements_per_thread_127521), + np.int64(virt_num_groups_127522), + np.int64(threads_per_segment_127524), + mem_120246, + mem_122730, + group_res_arr_mem_127525, + mainDetailedzicounter_mem_127527) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_large_84232_var, + ((np.int64(num_groups_84557) * np.int64(segred_group_sizze_84556)),), + (np.int64(segred_group_sizze_84556),)) + if synchronous: + sync(self) + mem_122733 = opencl_alloc(self, bytes_120173, "mem_122733") + if ((1 * (np.int64(segmap_usable_groups_84568) * np.int64(segmap_group_sizze_84567))) != 0): + self.mainDetailedzisegmap_84216_var.set_args(self.global_failure, + np.int64(m_70861), + mem_122730, + mem_122733) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_84216_var, + ((np.int64(segmap_usable_groups_84568) * np.int64(segmap_group_sizze_84567)),), + (np.int64(segmap_group_sizze_84567),)) + if synchronous: + sync(self) + mem_122730 = None + if ((1 * (np.int64(segmap_usable_groups_84575) * np.int64(segmap_group_sizze_84574))) != 0): + self.mainDetailedzisegmap_84207_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(j_84524), + mem_param_122694, + mem_122733) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_84207_var, + ((np.int64(segmap_usable_groups_84575) * np.int64(segmap_group_sizze_84574)),), + (np.int64(segmap_group_sizze_84574),)) + if synchronous: + sync(self) + mem_122738 = opencl_alloc(self, bytes_120924, "mem_122738") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122738, + np.int64(0), + mem_param_122705, + np.int64(0), + np.int64(1), + (np.int64(2) * k2p2zq_70876), + m_70861) + mem_122740 = opencl_alloc(self, bytes_122739, "mem_122740") + tmp_offs_127569 = np.int64(0) + if ((m_70861 * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_122740, mem_122733, + dest_offset=np.int64((tmp_offs_127569 * np.int64(8))), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((m_70861 * np.int32(8)))) + if synchronous: + sync(self) + tmp_offs_127569 = (tmp_offs_127569 + m_70861) + if ((y_115507 * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_122740, mem_121948, + dest_offset=np.int64((tmp_offs_127569 * np.int64(8))), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((y_115507 * np.int32(8)))) + if synchronous: + sync(self) + tmp_offs_127569 = (tmp_offs_127569 + y_115507) + mem_122743 = opencl_alloc(self, bytes_122741, "mem_122743") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122743, + np.int64(0), + mem_122740, + np.int64(0), + np.int64(1), + per_chunk_115510, + num_threads_115503) + mem_122740 = None + mem_122748 = opencl_alloc(self, bytes_120924, "mem_122748") + if ((1 * (np.int64(num_groups_84580) * np.int64(segmap_group_sizze_84579))) != 0): + self.mainDetailedzisegmap_84192_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(j_84524), + np.int64(num_groups_84580), + np.int64(num_threads_115503), + np.int64(per_chunk_115510), + mem_122733, + mem_122738, + mem_122743, + mem_122748) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_84192_var, + ((np.int64(num_groups_84580) * np.int64(segmap_group_sizze_84579)),), + (np.int64(segmap_group_sizze_84579),)) + if synchronous: + sync(self) + mem_122733 = None + mem_122738 = None + mem_122743 = None + mem_122755 = opencl_alloc(self, bytes_121990, "mem_122755") + if (((m_70861 * k2p2zq_70876) * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_122755, mem_param_122694, + dest_offset=np.int64(np.int64(0)), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64(((m_70861 * k2p2zq_70876) * np.int32(8)))) + if synchronous: + sync(self) + dqrdc2_res_mem_122757 = mem_122755 + dqrdc2_res_mem_122758 = mem_122748 + mem_122764 = opencl_alloc(self, bytes_121993, "mem_122764") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122764, np.int64(0), + dqrdc2_res_mem_122758, + np.int64(0), + np.int64(1), m_70861, + (np.int64(2) * k2p2zq_70876)) + dqrdc2_res_mem_122758 = None + mem_param_tmp_127483 = dqrdc2_res_mem_122757 + mem_param_tmp_127484 = mem_122764 + mem_param_122694 = mem_param_tmp_127483 + mem_param_122705 = mem_param_tmp_127484 + j_84524 += one_129896 + dqrdc2_res_r_mem_122778 = mem_param_122694 + dqrdc2_res_r_mem_122789 = mem_param_122705 + mem_122686 = None + mem_122793 = opencl_alloc(self, bytes_122018, "mem_122793") + group_sizze_127585 = self.sizes["mainDetailed.group_size_127585"] + num_groups_127586 = sdiv_up64(((m_70861 * k2p2zq_70876) * rp1_71562), + group_sizze_127585) + if ((1 * (np.int64(num_groups_127586) * np.int64(group_sizze_127585))) != 0): + self.mainDetailedzicopy_127582_var.set_args(np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(defunc_2_reduce_res_70985), + np.int64(rp1_71562), + mem_120246, mem_122793) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzicopy_127582_var, + ((np.int64(num_groups_127586) * np.int64(group_sizze_127585)),), + (np.int64(group_sizze_127585),)) + if synchronous: + sync(self) + mem_122796 = opencl_alloc(self, bytes_121990, "mem_122796") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122796, np.int64(0), + dqrdc2_res_r_mem_122778, + np.int64(0), np.int64(1), + k2p2zq_70876, m_70861) + dqrdc2_res_r_mem_122778 = None + mem_122800 = opencl_alloc(self, bytes_120924, "mem_122800") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122800, np.int64(0), + dqrdc2_res_r_mem_122789, + np.int64(0), np.int64(1), + (np.int64(2) * k2p2zq_70876), + m_70861) + dqrdc2_res_r_mem_122789 = None + mem_123127 = opencl_alloc(self, bytes_122018, "mem_123127") + mem_123130 = opencl_alloc(self, bytes_121990, "mem_123130") + mem_123133 = opencl_alloc(self, bytes_121990, "mem_123133") + mem_123135 = opencl_alloc(self, bytes_120173, "mem_123135") + mem_122803 = opencl_alloc(self, total_sizze_125744, "mem_122803") + mem_125265 = opencl_alloc(self, total_sizze_125745, "mem_125265") + mem_125267 = opencl_alloc(self, total_sizze_125746, "mem_125267") + mem_125472 = opencl_alloc(self, total_sizze_125747, "mem_125472") + mem_125480 = opencl_alloc(self, total_sizze_125748, "mem_125480") + mem_125482 = opencl_alloc(self, total_sizze_125749, "mem_125482") + mem_125512 = opencl_alloc(self, total_sizze_125750, "mem_125512") + double_buffer_mem_125586 = opencl_alloc(self, total_sizze_125751, + "double_buffer_mem_125586") + double_buffer_mem_125587 = opencl_alloc(self, total_sizze_125752, + "double_buffer_mem_125587") + double_buffer_mem_125588 = opencl_alloc(self, total_sizze_125753, + "double_buffer_mem_125588") + if ((1 * (np.int64(num_groups_84593) * np.int64(segmap_group_sizze_84592))) != 0): + self.mainDetailedzisegmap_83857_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(m_70948), + np.byte(y_70952), + np.int64(k_71067), + np.int64(rp1_71562), + np.int64(min_res_71574), + np.int64(num_groups_84593), + np.int64(num_threads_125743), + mem_120248, mem_122793, + mem_122796, mem_122800, + mem_122803, mem_123127, + mem_123130, mem_123133, + mem_123135, mem_125265, + mem_125267, mem_125472, + mem_125480, mem_125482, + mem_125512, + double_buffer_mem_125586, + double_buffer_mem_125587, + double_buffer_mem_125588) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_83857_var, + ((np.int64(num_groups_84593) * np.int64(segmap_group_sizze_84592)),), + (np.int64(segmap_group_sizze_84592),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + mem_122793 = None + mem_122796 = None + mem_122800 = None + mem_122803 = None + mem_125265 = None + mem_125267 = None + mem_125472 = None + mem_125480 = None + mem_125482 = None + mem_125512 = None + double_buffer_mem_125586 = None + double_buffer_mem_125587 = None + double_buffer_mem_125588 = None + mem_123138 = opencl_alloc(self, binop_x_120244, "mem_123138") + if ((1 * (np.int64(segmap_usable_groups_84889) * np.int64(segmap_group_sizze_84888))) != 0): + self.mainDetailedzisegmap_83799_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + mem_123135, mem_123138) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_83799_var, + ((np.int64(segmap_usable_groups_84889) * np.int64(segmap_group_sizze_84888)),), + (np.int64(segmap_group_sizze_84888),)) + if synchronous: + sync(self) + mem_123143 = opencl_alloc(self, bytes_121997, "mem_123143") + if ((1 * (np.int64(segmap_usable_groups_84904) * np.int64(segmap_group_sizze_84903))) != 0): + self.mainDetailedzisegmap_83764_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(rp1_71562), + mem_123127, mem_123135, + mem_123138, mem_123143) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_83764_var, + ((np.int64(segmap_usable_groups_84904) * np.int64(segmap_group_sizze_84903)),), + (np.int64(segmap_group_sizze_84903),)) + if synchronous: + sync(self) + mem_123138 = None + local_memory_capacity_127796 = self.max_local_memory + if ((sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127796)) and sle64((((((bytes_123161 + srem64((np.int64(8) - srem64(bytes_123161, + np.int64(8))), + np.int64(8))) + (bytes_123180 + srem64((np.int64(8) - srem64(bytes_123180, + np.int64(8))), + np.int64(8)))) + (bytes_123180 + srem64((np.int64(8) - srem64(bytes_123180, + np.int64(8))), + np.int64(8)))) + (bytes_123161 + srem64((np.int64(8) - srem64(bytes_123161, + np.int64(8))), + np.int64(8)))) + (bytes_120247 + srem64((np.int64(8) - srem64(bytes_120247, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_127796))) and suff_outer_par_84927): + mem_123147 = opencl_alloc(self, bytes_121997, "mem_123147") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123147, np.int64(0), + mem_123143, np.int64(0), + m_70861, k2p2zq_70876, + k2p2zq_70876) + mem_123151 = opencl_alloc(self, bytes_121997, "mem_123151") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123151, np.int64(0), + mem_123147, np.int64(0), + np.int64(1), + k2p2zq_70876, + (m_70861 * k2p2zq_70876)) + mem_123147 = None + mem_123155 = opencl_alloc(self, bytes_121997, "mem_123155") + group_sizze_127669 = self.sizes["mainDetailed.group_size_127669"] + num_groups_127670 = sdiv_up64(((m_70861 * k2p2zq_70876) * k2p2zq_70876), + group_sizze_127669) + if ((1 * (np.int64(num_groups_127670) * np.int64(group_sizze_127669))) != 0): + self.mainDetailedzicopy_127666_var.set_args(np.int64(m_70861), + np.int64(k2p2zq_70876), + mem_123143, + mem_123155) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzicopy_127666_var, + ((np.int64(num_groups_127670) * np.int64(group_sizze_127669)),), + (np.int64(group_sizze_127669),)) + if synchronous: + sync(self) + mem_123233 = opencl_alloc(self, bytes_121997, "mem_123233") + mem_125275 = opencl_alloc(self, total_sizze_125757, "mem_125275") + if ((1 * (np.int64(num_groups_top_116783) * np.int64(group_sizze_116780))) != 0): + self.mainDetailedzisegmap_intragroup_116784_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64(bytes_120247)), + cl.LocalMemory(np.int64(bytes_123161)), + cl.LocalMemory(np.int64(bytes_123180)), + cl.LocalMemory(np.int64(bytes_123180)), + cl.LocalMemory(np.int64(bytes_123161)), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(num_groups_y_116782), + np.int64(ctx_val_123177), + np.int64(num_threads_125756), + mem_121944, + mem_121946, + mem_123151, + mem_123155, + mem_123233, + mem_125275) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_intragroup_116784_var, + ((np.int64(num_groups_top_116783) * np.int64(group_sizze_116780)),), + (np.int64(group_sizze_116780),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + mem_123151 = None + mem_123155 = None + mem_125275 = None + defunc_3_map_res_r_mem_123392 = mem_123233 + else: + mem_123237 = opencl_alloc(self, bytes_121997, "mem_123237") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123237, np.int64(0), + mem_123143, np.int64(0), + m_70861, k2p2zq_70876, + k2p2zq_70876) + mem_123241 = opencl_alloc(self, bytes_121997, "mem_123241") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123241, np.int64(0), + mem_123237, np.int64(0), + np.int64(1), + k2p2zq_70876, + (m_70861 * k2p2zq_70876)) + mem_123237 = None + mem_param_123252 = mem_122000 + i_84991 = np.int64(0) + one_129899 = np.int64(1) + for counter_129898 in range(k2p2zq_70876): + x_84993 = (k2p2zq_70876 - i_84991) + i_84994 = (x_84993 - np.int64(1)) + x_84995 = sle64(np.int64(0), i_84994) + y_84996 = slt64(i_84994, k2p2zq_70876) + bounds_check_84997 = (x_84995 and y_84996) + j_m_i_84998 = (k2p2zq_70876 - x_84993) + empty_slice_84999 = (j_m_i_84998 == np.int64(0)) + m_85000 = (j_m_i_84998 - np.int64(1)) + i_p_m_t_s_85001 = (x_84993 + m_85000) + zzero_leq_i_p_m_t_s_85002 = sle64(np.int64(0), i_p_m_t_s_85001) + i_p_m_t_s_leq_w_85003 = slt64(i_p_m_t_s_85001, k2p2zq_70876) + zzero_lte_i_85004 = sle64(np.int64(0), x_84993) + i_lte_j_85005 = sle64(x_84993, k2p2zq_70876) + y_85006 = (i_p_m_t_s_leq_w_85003 and zzero_lte_i_85004) + y_85007 = (zzero_leq_i_p_m_t_s_85002 and y_85006) + y_85008 = (i_lte_j_85005 and y_85007) + forwards_ok_85009 = (zzero_lte_i_85004 and y_85008) + ok_or_empty_85010 = (empty_slice_84999 or forwards_ok_85009) + index_ok_85011 = (bounds_check_84997 and ok_or_empty_85010) + index_certs_85012 = True + assert index_ok_85011, ("Error: %s%d%s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:28:39-48\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:185:3-72\n #12 bfastfinal.fut:181:1-185:72\n" % ("Index [", + i_84994, + ", ", + x_84993, + ":", + k2p2zq_70876, + "] out of bounds for array of shape [", + k2p2zq_70876, + "][", + k2p2zq_70876, + "].")) + index_certs_85013 = True + assert ok_or_empty_85010, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:28:30-37\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:185:3-72\n #12 bfastfinal.fut:181:1-185:72\n" % ("Index [", + x_84993, + ":", + k2p2zq_70876, + "] out of bounds for array of shape [", + k2p2zq_70876, + "].")) + index_ok_85014 = (bounds_check_84997 and bounds_check_84997) + index_certs_85015 = True + assert index_ok_85014, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:29:38-43\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:185:3-72\n #12 bfastfinal.fut:181:1-185:72\n" % ("Index [", + i_84994, + ", ", + i_84994, + "] out of bounds for array of shape [", + k2p2zq_70876, + "][", + k2p2zq_70876, + "].")) + index_certs_85016 = True + assert bounds_check_84997, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:29:19-22\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:185:3-72\n #12 bfastfinal.fut:181:1-185:72\n" % ("Index [", + i_84994, + "] out of bounds for array of shape [", + k2p2zq_70876, + "].")) + nest_sizze_85078 = (j_m_i_84998 * binop_x_120244) + max_num_groups_127693 = self.sizes["mainDetailed.segred_num_groups_83455"] + num_groups_85080 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_85078, + segred_group_sizze_85079), + sext_i32_i64(max_num_groups_127693)))) + local_memory_capacity_127795 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127795)) and suff_outer_par_85022): + mem_123259 = opencl_alloc(self, bytes_120258, "mem_123259") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123259, + np.int64(0), + mem_param_123252, + np.int64(0), + np.int64(1), + (k2p2zq_70876 * k2p2zq_70876), + m_70861) + mem_123287 = opencl_alloc(self, bytes_120258, "mem_123287") + mem_123263 = opencl_alloc(self, total_sizze_125759, + "mem_123263") + mem_123275 = opencl_alloc(self, total_sizze_125760, + "mem_123275") + if ((1 * (np.int64(num_groups_85026) * np.int64(segmap_group_sizze_85025))) != 0): + self.mainDetailedzisegmap_83337_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(x_84993), + np.int64(i_84994), + np.int64(j_m_i_84998), + np.int64(num_groups_85026), + np.int64(num_threads_125758), + mem_121938, + mem_123143, + mem_123241, + mem_123259, + mem_123263, + mem_123275, + mem_123287) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_83337_var, + ((np.int64(num_groups_85026) * np.int64(segmap_group_sizze_85025)),), + (np.int64(segmap_group_sizze_85025),)) + if synchronous: + sync(self) + mem_123259 = None + mem_123263 = None + mem_123275 = None + mem_123359 = opencl_alloc(self, bytes_121997, "mem_123359") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123359, + np.int64(0), + mem_123287, + np.int64(0), + np.int64(1), + m_70861, + (k2p2zq_70876 * k2p2zq_70876)) + mem_123287 = None + defunc_3_map_res_mem_123366 = mem_123359 + else: + local_memory_capacity_127794 = self.max_local_memory + if (sle64((((bytes_123298 + srem64((np.int64(8) - srem64(bytes_123298, + np.int64(8))), + np.int64(8))) + (bytes_123298 + srem64((np.int64(8) - srem64(bytes_123298, + np.int64(8))), + np.int64(8)))) + (bytes_123327 + srem64((np.int64(8) - srem64(bytes_123327, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_127794)) and suff_outer_par_85056): + mem_123291 = opencl_alloc(self, bytes_121997, "mem_123291") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123291, + np.int64(0), + mem_param_123252, + np.int64(0), + np.int64(1), + k2p2zq_70876, + (m_70861 * k2p2zq_70876)) + mem_123295 = opencl_alloc(self, bytes_121997, "mem_123295") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123295, + np.int64(0), + mem_param_123252, + np.int64(0), + np.int64(1), + k2p2zq_70876, + (m_70861 * k2p2zq_70876)) + num_whole_tiles_117162 = squot64(j_m_i_84998, + tile_sizze_117141) + residual_input_117295 = srem64(j_m_i_84998, tile_sizze_117141) + cond_117296 = (residual_input_117295 == np.int64(0)) + mem_123334 = opencl_alloc(self, bytes_121997, "mem_123334") + mem_125317 = opencl_alloc(self, total_sizze_125762, + "mem_125317") + if ((1 * (np.int64(num_groups_top_117145) * np.int64(group_sizze_117142))) != 0): + self.mainDetailedzisegmap_intragroup_117146_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_123327)), + cl.LocalMemory(np.int64(bytes_123298)), + cl.LocalMemory(np.int64(bytes_123298)), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(x_84993), + np.int64(i_84994), + np.int64(j_m_i_84998), + np.int64(num_groups_y_117144), + np.int64(num_whole_tiles_117162), + np.int64(residual_input_117295), + np.byte(cond_117296), + np.int64(num_threads_125761), + mem_121938, + mem_123143, + mem_123241, + mem_123291, + mem_123295, + mem_123334, + mem_125317) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_intragroup_117146_var, + ((np.int64(num_groups_top_117145) * np.int64(group_sizze_117142)),), + (np.int64(group_sizze_117142),)) + if synchronous: + sync(self) + mem_123291 = None + mem_123295 = None + mem_125317 = None + defunc_3_map_res_mem_123355 = mem_123334 + else: + mem_123338 = opencl_alloc(self, bytes_121990, "mem_123338") + if slt64((j_m_i_84998 * np.int64(2)), + segred_group_sizze_85079): + segment_sizze_nonzzero_127724 = smax64(np.int64(1), + j_m_i_84998) + num_threads_127725 = (num_groups_85080 * segred_group_sizze_85079) + if ((1 * (np.int64(num_groups_85080) * np.int64(segred_group_sizze_85079))) != 0): + self.mainDetailedzisegred_small_83459_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_85079))), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(x_84993), + np.int64(i_84994), + np.int64(j_m_i_84998), + np.int64(num_groups_85080), + np.int64(binop_x_120251), + np.int64(segment_sizze_nonzzero_127724), + mem_123143, + mem_param_123252, + mem_123338) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_small_83459_var, + ((np.int64(num_groups_85080) * np.int64(segred_group_sizze_85079)),), + (np.int64(segred_group_sizze_85079),)) + if synchronous: + sync(self) + else: + groups_per_segment_127745 = sdiv_up64(num_groups_85080, + smax64(np.int64(1), + (m_70861 * k2p2zq_70876))) + elements_per_thread_127746 = sdiv_up64(j_m_i_84998, + (segred_group_sizze_85079 * groups_per_segment_127745)) + virt_num_groups_127747 = (groups_per_segment_127745 * (m_70861 * k2p2zq_70876)) + num_threads_127748 = (num_groups_85080 * segred_group_sizze_85079) + threads_per_segment_127749 = (groups_per_segment_127745 * segred_group_sizze_85079) + group_res_arr_mem_127750 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_85079 * virt_num_groups_127747)), + "group_res_arr_mem_127750") + mainDetailedzicounter_mem_127752 = self.mainDetailedzicounter_mem_127752 + if ((1 * (np.int64(num_groups_85080) * np.int64(segred_group_sizze_85079))) != 0): + self.mainDetailedzisegred_large_83459_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_85079))), + np.int64(k2p2zq_70876), + np.int64(x_84993), + np.int64(i_84994), + np.int64(j_m_i_84998), + np.int64(num_groups_85080), + np.int64(binop_x_120251), + np.int64(groups_per_segment_127745), + np.int64(elements_per_thread_127746), + np.int64(virt_num_groups_127747), + np.int64(threads_per_segment_127749), + mem_123143, + mem_param_123252, + mem_123338, + group_res_arr_mem_127750, + mainDetailedzicounter_mem_127752) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_large_83459_var, + ((np.int64(num_groups_85080) * np.int64(segred_group_sizze_85079)),), + (np.int64(segred_group_sizze_85079),)) + if synchronous: + sync(self) + mem_123342 = opencl_alloc(self, bytes_121990, "mem_123342") + if ((1 * (np.int64(segmap_usable_groups_85097) * np.int64(segmap_group_sizze_85096))) != 0): + self.mainDetailedzisegmap_83437_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(i_84994), + mem_121938, + mem_123143, + mem_123338, + mem_123342) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_83437_var, + ((np.int64(segmap_usable_groups_85097) * np.int64(segmap_group_sizze_85096)),), + (np.int64(segmap_group_sizze_85096),)) + if synchronous: + sync(self) + mem_123338 = None + if ((1 * (np.int64(segmap_usable_groups_85108) * np.int64(segmap_group_sizze_85107))) != 0): + self.mainDetailedzisegmap_83425_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(i_84994), + np.int64(binop_x_120251), + mem_param_123252, + mem_123342) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_83425_var, + ((np.int64(segmap_usable_groups_85108) * np.int64(segmap_group_sizze_85107)),), + (np.int64(segmap_group_sizze_85107),)) + if synchronous: + sync(self) + mem_123342 = None + defunc_3_map_res_mem_123355 = mem_param_123252 + mem_123364 = opencl_alloc(self, bytes_121997, "mem_123364") + if ((((m_70861 * k2p2zq_70876) * k2p2zq_70876) * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_123364, + defunc_3_map_res_mem_123355, + dest_offset=np.int64(np.int64(0)), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((((m_70861 * k2p2zq_70876) * k2p2zq_70876) * np.int32(8)))) + if synchronous: + sync(self) + defunc_3_map_res_mem_123355 = None + defunc_3_map_res_mem_123366 = mem_123364 + mem_param_tmp_127691 = defunc_3_map_res_mem_123366 + mem_param_123252 = mem_param_tmp_127691 + i_84991 += one_129899 + defunc_3_map_res_r_mem_123380 = mem_param_123252 + mem_123241 = None + defunc_3_map_res_r_mem_123392 = defunc_3_map_res_r_mem_123380 + mem_123143 = None + local_memory_capacity_127929 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127929)) and suff_outer_par_85123): + mem_123396 = opencl_alloc(self, bytes_121997, "mem_123396") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123396, np.int64(0), + defunc_3_map_res_r_mem_123392, + np.int64(0), m_70861, + k2p2zq_70876, + k2p2zq_70876) + mem_123415 = opencl_alloc(self, bytes_121997, "mem_123415") + mem_123399 = opencl_alloc(self, total_sizze_125767, "mem_123399") + if ((1 * (np.int64(num_groups_85119) * np.int64(segmap_group_sizze_85118))) != 0): + self.mainDetailedzisegmap_83121_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(num_groups_85119), + np.int64(binop_x_120251), + np.int64(num_threads_125766), + defunc_3_map_res_r_mem_123392, + mem_123396, + mem_123399, + mem_123415) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_83121_var, + ((np.int64(num_groups_85119) * np.int64(segmap_group_sizze_85118)),), + (np.int64(segmap_group_sizze_85118),)) + if synchronous: + sync(self) + mem_123396 = None + mem_123399 = None + mem_123628 = opencl_alloc(self, bytes_121997, "mem_123628") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123628, np.int64(0), + mem_123415, np.int64(0), + np.int64(1), + (m_70861 * k2p2zq_70876), + k2p2zq_70876) + mem_123415 = None + defunc_3_map_res_r_mem_123630 = mem_123628 + else: + local_memory_capacity_127928 = self.max_local_memory + if (sle64(((bytes_123434 + srem64((np.int64(8) - srem64(bytes_123434, + np.int64(8))), + np.int64(8))) + (bytes_123436 + srem64((np.int64(8) - srem64(bytes_123436, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_127928)) and suff_outer_par_85145): + mem_123419 = opencl_alloc(self, bytes_121997, "mem_123419") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123419, + np.int64(0), + defunc_3_map_res_r_mem_123392, + np.int64(0), m_70861, + k2p2zq_70876, + k2p2zq_70876) + mem_123610 = opencl_alloc(self, bytes_121997, "mem_123610") + if ((1 * (np.int64(grid_sizze_117461) * np.int64(group_sizze_117462))) != 0): + self.mainDetailedzisegmap_intragroup_117465_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_123436)), + cl.LocalMemory(np.int64(bytes_123434)), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(gridDim_x_117458), + np.int64(gridDim_y_117459), + np.int64(full_tiles_117490), + np.int64(kk_117693), + np.int64(binop_x_120251), + defunc_3_map_res_r_mem_123392, + mem_123419, + mem_123610) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_intragroup_117465_var, + ((np.int64(grid_sizze_117461) * np.int64(group_sizze_117462)),), + (np.int64(group_sizze_117462),)) + if synchronous: + sync(self) + mem_123419 = None + defunc_3_map_res_r_mem_123624 = mem_123610 + else: + mem_123614 = opencl_alloc(self, bytes_121997, "mem_123614") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123614, + np.int64(0), + defunc_3_map_res_r_mem_123392, + np.int64(0), + np.int64(1), + k2p2zq_70876, + (m_70861 * k2p2zq_70876)) + mem_123618 = opencl_alloc(self, bytes_121997, "mem_123618") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123618, + np.int64(0), + defunc_3_map_res_r_mem_123392, + np.int64(0), m_70861, + k2p2zq_70876, + k2p2zq_70876) + mem_123623 = opencl_alloc(self, bytes_121997, "mem_123623") + if slt64((k2p2zq_70876 * np.int64(2)), segred_group_sizze_85161): + segment_sizze_nonzzero_127868 = smax64(np.int64(1), + k2p2zq_70876) + num_threads_127869 = (num_groups_85162 * segred_group_sizze_85161) + if ((1 * (np.int64(num_groups_85162) * np.int64(segred_group_sizze_85161))) != 0): + self.mainDetailedzisegred_small_83181_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_85161))), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(num_groups_85162), + np.int64(segment_sizze_nonzzero_127868), + mem_123614, + mem_123618, + mem_123623) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_small_83181_var, + ((np.int64(num_groups_85162) * np.int64(segred_group_sizze_85161)),), + (np.int64(segred_group_sizze_85161),)) + if synchronous: + sync(self) + else: + groups_per_segment_127889 = sdiv_up64(num_groups_85162, + smax64(np.int64(1), + ((m_70861 * k2p2zq_70876) * k2p2zq_70876))) + elements_per_thread_127890 = sdiv_up64(k2p2zq_70876, + (segred_group_sizze_85161 * groups_per_segment_127889)) + virt_num_groups_127891 = (groups_per_segment_127889 * ((m_70861 * k2p2zq_70876) * k2p2zq_70876)) + num_threads_127892 = (num_groups_85162 * segred_group_sizze_85161) + threads_per_segment_127893 = (groups_per_segment_127889 * segred_group_sizze_85161) + group_res_arr_mem_127894 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_85161 * virt_num_groups_127891)), + "group_res_arr_mem_127894") + mainDetailedzicounter_mem_127896 = self.mainDetailedzicounter_mem_127896 + if ((1 * (np.int64(num_groups_85162) * np.int64(segred_group_sizze_85161))) != 0): + self.mainDetailedzisegred_large_83181_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_85161))), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(num_groups_85162), + np.int64(groups_per_segment_127889), + np.int64(elements_per_thread_127890), + np.int64(virt_num_groups_127891), + np.int64(threads_per_segment_127893), + mem_123614, + mem_123618, + mem_123623, + group_res_arr_mem_127894, + mainDetailedzicounter_mem_127896) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_large_83181_var, + ((np.int64(num_groups_85162) * np.int64(segred_group_sizze_85161)),), + (np.int64(segred_group_sizze_85161),)) + if synchronous: + sync(self) + mem_123614 = None + mem_123618 = None + defunc_3_map_res_r_mem_123624 = mem_123623 + defunc_3_map_res_r_mem_123630 = defunc_3_map_res_r_mem_123624 + mem_123633 = opencl_alloc(self, bytes_122015, "mem_123633") + group_sizze_127933 = self.sizes["mainDetailed.group_size_127933"] + num_groups_127934 = sdiv_up64((m_70861 * rp1_71562), + group_sizze_127933) + if ((1 * (np.int64(num_groups_127934) * np.int64(group_sizze_127933))) != 0): + self.mainDetailedzicopy_127930_var.set_args(np.int64(m_70861), + np.int64(n_70864), + np.int64(rp1_71562), + defunc_3_map_res_mem_120231, + mem_123633) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzicopy_127930_var, + ((np.int64(num_groups_127934) * np.int64(group_sizze_127933)),), + (np.int64(group_sizze_127933),)) + if synchronous: + sync(self) + mem_123637 = opencl_alloc(self, bytes_123634, "mem_123637") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123637, np.int64(0), + mem_123127, np.int64(0), + np.int64(1), m_70861, + (k2p2zq_70876 * rp1_71562)) + mem_123127 = None + mem_123641 = opencl_alloc(self, bytes_122018, "mem_123641") + group_sizze_127938 = self.sizes["mainDetailed.group_size_127938"] + num_groups_127939 = sdiv_up64(((m_70861 * k2p2zq_70876) * rp1_71562), + group_sizze_127938) + if ((1 * (np.int64(num_groups_127939) * np.int64(group_sizze_127938))) != 0): + self.mainDetailedzicopy_127935_var.set_args(np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(rp1_71562), + mem_123637, mem_123641) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzicopy_127935_var, + ((np.int64(num_groups_127939) * np.int64(group_sizze_127938)),), + (np.int64(group_sizze_127938),)) + if synchronous: + sync(self) + mem_123637 = None + mem_123678 = opencl_alloc(self, bytes_122015, "mem_123678") + mem_125341 = opencl_alloc(self, total_sizze_125771, "mem_125341") + double_buffer_mem_125599 = opencl_alloc(self, total_sizze_125772, + "double_buffer_mem_125599") + if ((1 * (np.int64(num_groups_85178) * np.int64(segmap_group_sizze_85177))) != 0): + self.mainDetailedzisegmap_83057_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(r_71551), + np.int64(rp1_71562), + np.int64(num_groups_85178), + np.int64(num_threads_125770), + mem_123130, mem_123135, + mem_123633, mem_123641, + mem_123678, mem_125341, + double_buffer_mem_125599) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_83057_var, + ((np.int64(num_groups_85178) * np.int64(segmap_group_sizze_85177)),), + (np.int64(segmap_group_sizze_85177),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + mem_123130 = None + mem_123633 = None + mem_123641 = None + mem_125341 = None + double_buffer_mem_125599 = None + mem_123681 = opencl_alloc(self, bytes_122015, "mem_123681") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123681, np.int64(0), + mem_123678, np.int64(0), + np.int64(1), m_70861, + rp1_71562) + mem_123678 = None + if ((1 * (np.int64(segmap_usable_groups_85257) * np.int64(segmap_group_sizze_85256))) != 0): + self.mainDetailedzisegmap_83002_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(rp1_71562), + np.int64(binop_x_120251), + mem_122003, mem_123133, + defunc_3_map_res_r_mem_123392, + mem_123681) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_83002_var, + ((np.int64(segmap_usable_groups_85257) * np.int64(segmap_group_sizze_85256)),), + (np.int64(segmap_group_sizze_85256),)) + if synchronous: + sync(self) + defunc_3_map_res_r_mem_123392 = None + mem_123681 = None + mem_123685 = opencl_alloc(self, bytes_121990, "mem_123685") + self.futhark_builtinzhgpu_map_transpose_i64(mem_123685, np.int64(0), + mem_123133, np.int64(0), + np.int64(1), m_70861, + k2p2zq_70876) + mem_123133 = None + if ((1 * (np.int64(segmap_usable_groups_85328) * np.int64(segmap_group_sizze_85327))) != 0): + self.mainDetailedzisegmap_82845_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(binop_x_120251), + mem_122007, + defunc_3_map_res_r_mem_123630, + mem_123685) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_82845_var, + ((np.int64(segmap_usable_groups_85328) * np.int64(segmap_group_sizze_85327)),), + (np.int64(segmap_group_sizze_85327),)) + if synchronous: + sync(self) + defunc_3_map_res_r_mem_123630 = None + mem_123685 = None + mem_123691 = opencl_alloc(self, bytes_121997, "mem_123691") + if ((1 * (np.int64(segmap_usable_groups_85347) * np.int64(segmap_group_sizze_85346))) != 0): + self.mainDetailedzisegmap_82797_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + mem_122007, mem_123691) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_82797_var, + ((np.int64(segmap_usable_groups_85347) * np.int64(segmap_group_sizze_85346)),), + (np.int64(segmap_group_sizze_85346),)) + if synchronous: + sync(self) + mem_123695 = opencl_alloc(self, bytes_121990, "mem_123695") + if ((1 * (np.int64(segmap_usable_groups_85356) * np.int64(segmap_group_sizze_85355))) != 0): + self.mainDetailedzisegmap_82775_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + mem_122003, mem_123695) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_82775_var, + ((np.int64(segmap_usable_groups_85356) * np.int64(segmap_group_sizze_85355)),), + (np.int64(segmap_group_sizze_85355),)) + if synchronous: + sync(self) + mem_123699 = opencl_alloc(self, bytes_120258, "mem_123699") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123699, np.int64(0), + mem_param_121959, + np.int64(0), np.int64(1), + (k2p2zq_70876 * k2p2zq_70876), + m_70861) + mem_123702 = opencl_alloc(self, bytes_121990, "mem_123702") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123702, np.int64(0), + mem_param_121967, + np.int64(0), np.int64(1), + k2p2zq_70876, m_70861) + mem_123705 = opencl_alloc(self, bytes_121990, "mem_123705") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123705, np.int64(0), + mem_122671, np.int64(0), + np.int64(1), k2p2zq_70876, + m_70861) + mem_122671 = None + mem_123708 = opencl_alloc(self, bytes_121990, "mem_123708") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123708, np.int64(0), + mem_123695, np.int64(0), + np.int64(1), k2p2zq_70876, + m_70861) + mem_123711 = opencl_alloc(self, m_70861, "mem_123711") + if ((1 * (np.int64(segmap_usable_groups_85365) * np.int64(segmap_group_sizze_85364))) != 0): + self.mainDetailedzisegmap_82703_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(defunc_2_reduce_res_70985), + np.float64(tol_71054), + np.int64(r_71551), + mem_120246, mem_121941, + mem_param_121972, + mem_122674, mem_122680, + mem_122682, mem_123135, + mem_123699, mem_123702, + mem_123705, mem_123708, + mem_123711) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_82703_var, + ((np.int64(segmap_usable_groups_85365) * np.int64(segmap_group_sizze_85364)),), + (np.int64(segmap_group_sizze_85364),)) + if synchronous: + sync(self) + mem_122674 = None + mem_122680 = None + mem_123699 = None + mem_123702 = None + mem_123705 = None + mem_123708 = None + defunc_7_map_res_mem_123721 = mem_123711 + defunc_7_map_res_mem_123722 = mem_123691 + defunc_7_map_res_mem_123723 = mem_123695 + defunc_7_map_res_mem_123724 = mem_123135 + defunc_7_map_res_mem_123725 = mem_122682 + i_72137 = (r_71551 - k2p2zq_70876) + x_72138 = sle64(np.int64(0), i_72137) + y_72139 = slt64(i_72137, num_recresids_padded_71534) + bounds_check_72140 = (x_72138 and y_72139) + index_ok_72141 = (ok_or_empty_70975 and bounds_check_72140) + index_certs_72142 = True + assert index_ok_72141, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:74:9-75:74\n #1 recresid.fut:100:7-30\n #2 mroc.fut:27:25-38\n #3 mroc.fut:77:27-61\n #4 bfastfinal.fut:45:24-53\n #5 bfastfinal.fut:185:3-72\n #6 bfastfinal.fut:181:1-185:72\n" % ("Index [", + i_72137, + ", ", + np.int64(0), + ":] out of bounds for array of shape [", + num_recresids_padded_71534, + "][", + m_70861, + "].")) + if ((m_70861 * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_121934, defunc_7_map_res_mem_123725, + dest_offset=np.int64(((i_72137 * m_70861) * np.int64(8))), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((m_70861 * np.int32(8)))) if synchronous: sync(self) - else: - if (sle32(x_elems_5, np.int32(8)) and slt32(np.int32(16), y_elems_6)): - if ((((1 * (np.long(sdiv_up32(x_elems_5, - np.int32(16))) * np.long(np.int32(16)))) * (np.long(sdiv_up32(sdiv_up32(y_elems_6, - muly_8), - np.int32(16))) * np.long(np.int32(16)))) * (np.long(num_arrays_4) * np.long(np.int32(1)))) != 0): - self.gpu_map_transpose_f32_low_width_var.set_args(cl.LocalMemory(np.long(np.int64(1088))), - np.int32(destoffset_1), - np.int32(srcoffset_3), - np.int32(num_arrays_4), - np.int32(x_elems_5), - np.int32(y_elems_6), - np.int32(mulx_7), - np.int32(muly_8), - destmem_0, - srcmem_2) + defunc_7_map_res_mem_123725 = None + mainDetailedzicounter_mem_127986 = self.mainDetailedzicounter_mem_127986 + group_res_arr_mem_127988 = opencl_alloc(self, + (np.int32(1) * (segred_group_sizze_85427 * num_groups_85429)), + "group_res_arr_mem_127988") + num_threads_127990 = (num_groups_85429 * segred_group_sizze_85427) + if ((1 * (np.int64(num_groups_85429) * np.int64(segred_group_sizze_85427))) != 0): + self.mainDetailedzisegred_nonseg_85434_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(1) * segred_group_sizze_85427))), + cl.LocalMemory(np.int64(np.int32(1))), + np.int64(m_70861), + np.int64(num_groups_85429), + np.int64(num_threads_127990), + defunc_7_map_res_mem_123721, + mem_123728, + mainDetailedzicounter_mem_127986, + group_res_arr_mem_127988) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_nonseg_85434_var, + ((np.int64(num_groups_85429) * np.int64(segred_group_sizze_85427)),), + (np.int64(segred_group_sizze_85427),)) + if synchronous: + sync(self) + defunc_7_map_res_mem_123721 = None + read_res_129902 = np.empty(1, dtype=ct.c_bool) + cl.enqueue_copy(self.queue, read_res_129902, mem_123728, + device_offset=(np.int64(np.int64(0)) * 1), + is_blocking=synchronous) + sync(self) + defunc_2_reduce_comm_res_72144 = read_res_129902[0] + loop_cond_t_res_72149 = slt64(rp1_71562, m_71015) + x_72150 = (defunc_2_reduce_comm_res_72144 and loop_cond_t_res_72149) + mem_param_tmp_127209 = defunc_7_map_res_mem_123722 + mem_param_tmp_127210 = defunc_7_map_res_mem_123723 + mem_param_tmp_127211 = defunc_7_map_res_mem_123724 + loop_while_tmp_127212 = x_72150 + r_tmp_127213 = rp1_71562 + mem_param_121959 = mem_param_tmp_127209 + mem_param_121967 = mem_param_tmp_127210 + mem_param_121972 = mem_param_tmp_127211 + loop_while_71550 = loop_while_tmp_127212 + r_71551 = r_tmp_127213 + mrecresid_nn_res_mem_123746 = mem_param_121959 + mrecresid_nn_res_mem_123754 = mem_param_121967 + mrecresid_nn_res_mem_123759 = mem_param_121972 + mrecresid_nn_res_71544 = loop_while_71550 + mrecresid_nn_res_71545 = r_71551 + mem_120248 = None + defunc_5_map_res_mem_121929 = None + defunc_5_map_res_mem_121930 = None + defunc_5_map_res_mem_121931 = None + mem_121938 = None + mem_121941 = None + mem_121944 = None + mem_121946 = None + mem_121948 = None + mem_121992 = None + mem_121996 = None + mem_122000 = None + mem_122003 = None + mem_122007 = None + mem_123728 = None + bounds_invalid_upwards_72151 = slt64(defunc_2_reduce_res_70985, + mrecresid_nn_res_71545) + distance_72152 = (defunc_2_reduce_res_70985 - mrecresid_nn_res_71545) + valid_72153 = not(bounds_invalid_upwards_72151) + range_valid_c_72154 = True + assert valid_72153, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:78:63-68\n #1 recresid.fut:100:7-30\n #2 mroc.fut:27:25-38\n #3 mroc.fut:77:27-61\n #4 bfastfinal.fut:45:24-53\n #5 bfastfinal.fut:185:3-72\n #6 bfastfinal.fut:181:1-185:72\n" % ("Range ", + mrecresid_nn_res_71545, + "..<", + defunc_2_reduce_res_70985, + " is invalid.")) + suff_outer_par_85472 = (self.sizes["mainDetailed.suff_outer_par_16"] <= m_70861) + intra_avail_par_85466 = smin64(k2p2zq_70876, binop_x_120251) + computed_group_sizze_85437 = smax64(k2p2zq_70876, binop_x_120251) + fits_85589 = sle64(computed_group_sizze_85437, max_group_sizze_77729) + suff_intra_par_85587 = (self.sizes["mainDetailed.suff_intra_par_17"] <= intra_avail_par_85466) + intra_suff_and_fits_85590 = (suff_intra_par_85587 and fits_85589) + segmap_group_sizze_85531 = self.sizes["mainDetailed.segmap_group_size_85476"] + max_num_groups_128016 = self.sizes["mainDetailed.segmap_num_groups_85478"] + num_groups_85532 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_70861, + segmap_group_sizze_85531), + sext_i32_i64(max_num_groups_128016)))) + segred_group_sizze_85831 = self.sizes["mainDetailed.segred_group_size_85807"] + max_num_groups_128017 = self.sizes["mainDetailed.segred_num_groups_85809"] + num_groups_85832 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120244, + segred_group_sizze_85831), + sext_i32_i64(max_num_groups_128017)))) + segmap_group_sizze_85852 = self.sizes["mainDetailed.segmap_group_size_85799"] + segred_group_sizze_85859 = self.sizes["mainDetailed.segred_group_size_85780"] + max_num_groups_128018 = self.sizes["mainDetailed.segred_num_groups_85782"] + num_groups_85860 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120244, + segred_group_sizze_85859), + sext_i32_i64(max_num_groups_128018)))) + segmap_group_sizze_85872 = self.sizes["mainDetailed.segmap_group_size_85768"] + segmap_group_sizze_85884 = self.sizes["mainDetailed.segmap_group_size_85654"] + max_num_groups_128019 = self.sizes["mainDetailed.segmap_num_groups_85656"] + num_groups_85885 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120244, + segmap_group_sizze_85884), + sext_i32_i64(max_num_groups_128019)))) + suff_outer_par_85889 = (self.sizes["mainDetailed.suff_outer_par_18"] <= binop_x_120244) + segred_group_sizze_85918 = self.sizes["mainDetailed.segred_group_size_85705"] + max_num_groups_128020 = self.sizes["mainDetailed.segred_num_groups_85707"] + num_groups_85919 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_121999, + segred_group_sizze_85918), + sext_i32_i64(max_num_groups_128020)))) + segmap_group_sizze_85941 = self.sizes["mainDetailed.segmap_group_size_85692"] + segmap_usable_groups_85853 = sdiv_up_safe64(m_70861, + segmap_group_sizze_85852) + segmap_usable_groups_85873 = sdiv_up_safe64(m_70861, + segmap_group_sizze_85872) + segmap_usable_groups_85942 = sdiv_up_safe64(binop_x_120244, + segmap_group_sizze_85941) + num_threads_125779 = (segmap_group_sizze_85531 * num_groups_85532) + total_sizze_125780 = (bytes_120247 * num_threads_125779) + total_sizze_125781 = (bytes_120247 * num_threads_125779) + total_sizze_125782 = (bytes_120250 * num_threads_125779) + total_sizze_125783 = (bytes_120247 * num_threads_125779) + num_threads_125789 = (segmap_group_sizze_85884 * num_groups_85885) + total_sizze_125790 = (bytes_120247 * num_threads_125789) + mem_param_123778 = mrecresid_nn_res_mem_123746 + mem_param_123786 = mrecresid_nn_res_mem_123754 + i_72158 = np.int64(0) + one_129907 = np.int64(1) + for counter_129906 in range(distance_72152): + index_primexp_72162 = (mrecresid_nn_res_71545 + i_72158) + x_72163 = sle64(np.int64(0), index_primexp_72162) + y_72164 = slt64(index_primexp_72162, defunc_2_reduce_res_70985) + bounds_check_72165 = (x_72163 and y_72164) + index_ok_72166 = (ok_or_empty_70954 and bounds_check_72165) + index_certs_72167 = True + assert index_ok_72166, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:34:13-22\n #1 /prelude/soacs.fut:83:25-33\n #2 /prelude/soacs.fut:83:3-53\n #3 recresid.fut:80:17-56\n #4 recresid.fut:100:7-30\n #5 mroc.fut:27:25-38\n #6 mroc.fut:77:27-61\n #7 bfastfinal.fut:45:24-53\n #8 bfastfinal.fut:185:3-72\n #9 bfastfinal.fut:181:1-185:72\n" % ("Index [", + index_primexp_72162, + ", ", + np.int64(0), + ":] out of bounds for array of shape [", + defunc_2_reduce_res_70985, + "][", + k2p2zq_70876, + "].")) + index_certs_72168 = True + assert bounds_check_72165, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:37:17-23\n #1 /prelude/soacs.fut:83:25-33\n #2 /prelude/soacs.fut:83:3-53\n #3 recresid.fut:80:17-56\n #4 recresid.fut:100:7-30\n #5 mroc.fut:27:25-38\n #6 mroc.fut:77:27-61\n #7 bfastfinal.fut:45:24-53\n #8 bfastfinal.fut:185:3-72\n #9 bfastfinal.fut:181:1-185:72\n" % ("Index [", + index_primexp_72162, + "] out of bounds for array of shape [", + defunc_2_reduce_res_70985, + "].")) + local_memory_capacity_128289 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_128289)) and suff_outer_par_85472): + mem_123798 = opencl_alloc(self, bytes_120258, "mem_123798") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123798, np.int64(0), + mem_param_123778, + np.int64(0), np.int64(1), + (k2p2zq_70876 * k2p2zq_70876), + m_70861) + mem_123801 = opencl_alloc(self, bytes_121990, "mem_123801") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123801, np.int64(0), + mem_param_123786, + np.int64(0), np.int64(1), + k2p2zq_70876, m_70861) + mem_123869 = opencl_alloc(self, bytes_120258, "mem_123869") + mem_123872 = opencl_alloc(self, bytes_121990, "mem_123872") + mem_123874 = opencl_alloc(self, bytes_120173, "mem_123874") + mem_123804 = opencl_alloc(self, total_sizze_125780, "mem_123804") + mem_123818 = opencl_alloc(self, total_sizze_125781, "mem_123818") + mem_123821 = opencl_alloc(self, total_sizze_125782, "mem_123821") + mem_123840 = opencl_alloc(self, total_sizze_125783, "mem_123840") + if ((1 * (np.int64(num_groups_85532) * np.int64(segmap_group_sizze_85531))) != 0): + self.mainDetailedzisegmap_85474_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(n_70864), + np.int64(k2p2zq_70876), + np.int64(defunc_2_reduce_res_70985), + np.int64(index_primexp_72162), + np.int64(num_groups_85532), + np.int64(num_threads_125779), + defunc_3_map_res_mem_120231, + mem_120246, mem_123798, + mem_123801, mem_123804, + mem_123818, mem_123821, + mem_123840, mem_123869, + mem_123872, mem_123874) cl.enqueue_nd_range_kernel(self.queue, - self.gpu_map_transpose_f32_low_width_var, - ((np.long(sdiv_up32(x_elems_5, - np.int32(16))) * np.long(np.int32(16))), - (np.long(sdiv_up32(sdiv_up32(y_elems_6, - muly_8), - np.int32(16))) * np.long(np.int32(16))), - (np.long(num_arrays_4) * np.long(np.int32(1)))), - (np.long(np.int32(16)), - np.long(np.int32(16)), - np.long(np.int32(1)))) + self.mainDetailedzisegmap_85474_var, + ((np.int64(num_groups_85532) * np.int64(segmap_group_sizze_85531)),), + (np.int64(segmap_group_sizze_85531),)) if synchronous: sync(self) + mem_123798 = None + mem_123801 = None + mem_123804 = None + mem_123818 = None + mem_123821 = None + mem_123840 = None + mem_123966 = opencl_alloc(self, bytes_121997, "mem_123966") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123966, np.int64(0), + mem_123869, np.int64(0), + np.int64(1), m_70861, + (k2p2zq_70876 * k2p2zq_70876)) + mem_123869 = None + mem_123970 = opencl_alloc(self, bytes_121990, "mem_123970") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123970, np.int64(0), + mem_123872, np.int64(0), + np.int64(1), m_70861, + k2p2zq_70876) + mem_123872 = None + defunc_7_map_res_mem_123972 = mem_123966 + defunc_7_map_res_mem_123973 = mem_123970 + defunc_7_map_res_mem_123974 = mem_123874 else: - if (sle32(y_elems_6, np.int32(8)) and slt32(np.int32(16), x_elems_5)): - if ((((1 * (np.long(sdiv_up32(sdiv_up32(x_elems_5, mulx_7), - np.int32(16))) * np.long(np.int32(16)))) * (np.long(sdiv_up32(y_elems_6, - np.int32(16))) * np.long(np.int32(16)))) * (np.long(num_arrays_4) * np.long(np.int32(1)))) != 0): - self.gpu_map_transpose_f32_low_height_var.set_args(cl.LocalMemory(np.long(np.int64(1088))), - np.int32(destoffset_1), - np.int32(srcoffset_3), - np.int32(num_arrays_4), - np.int32(x_elems_5), - np.int32(y_elems_6), - np.int32(mulx_7), - np.int32(muly_8), - destmem_0, - srcmem_2) + local_memory_capacity_128288 = self.max_local_memory + if (sle64((((((((bytes_120247 + srem64((np.int64(8) - srem64(bytes_120247, + np.int64(8))), + np.int64(8))) + ((np.int32(8) * k2p2zq_70876) + srem64((np.int64(8) - srem64((np.int32(8) * k2p2zq_70876), + np.int64(8))), + np.int64(8)))) + ((np.int32(8) * k2p2zq_70876) + srem64((np.int64(8) - srem64((np.int32(8) * k2p2zq_70876), + np.int64(8))), + np.int64(8)))) + (bytes_120247 + srem64((np.int64(8) - srem64(bytes_120247, + np.int64(8))), + np.int64(8)))) + (bytes_120250 + srem64((np.int64(8) - srem64(bytes_120250, + np.int64(8))), + np.int64(8)))) + ((np.int32(8) * (k2p2zq_70876 * k2p2zq_70876)) + srem64((np.int64(8) - srem64((np.int32(8) * (k2p2zq_70876 * k2p2zq_70876)), + np.int64(8))), + np.int64(8)))) + (bytes_120247 + srem64((np.int64(8) - srem64(bytes_120247, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_128288)) and intra_suff_and_fits_85590): + mem_123892 = opencl_alloc(self, bytes_121997, "mem_123892") + mem_123895 = opencl_alloc(self, bytes_121990, "mem_123895") + mem_123897 = opencl_alloc(self, bytes_120173, "mem_123897") + if ((1 * (np.int64(m_70861) * np.int64(computed_group_sizze_85437))) != 0): + self.mainDetailedzisegmap_intragroup_85470_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_120247)), + cl.LocalMemory(np.int64((np.int32(8) * (k2p2zq_70876 * k2p2zq_70876)))), + cl.LocalMemory(np.int64(bytes_120250)), + cl.LocalMemory(np.int64(bytes_120247)), + cl.LocalMemory(np.int64((np.int32(8) * k2p2zq_70876))), + cl.LocalMemory(np.int64((np.int32(8) * k2p2zq_70876))), + cl.LocalMemory(np.int64(bytes_120247)), + np.int64(m_70861), + np.int64(n_70864), + np.int64(k2p2zq_70876), + np.int64(defunc_2_reduce_res_70985), + np.int64(index_primexp_72162), + np.int64(computed_group_sizze_85437), + np.int64(binop_x_120251), + defunc_3_map_res_mem_120231, + mem_120246, + mem_param_123778, + mem_param_123786, + mem_123892, + mem_123895, + mem_123897) cl.enqueue_nd_range_kernel(self.queue, - self.gpu_map_transpose_f32_low_height_var, - ((np.long(sdiv_up32(sdiv_up32(x_elems_5, - mulx_7), - np.int32(16))) * np.long(np.int32(16))), - (np.long(sdiv_up32(y_elems_6, - np.int32(16))) * np.long(np.int32(16))), - (np.long(num_arrays_4) * np.long(np.int32(1)))), - (np.long(np.int32(16)), - np.long(np.int32(16)), - np.long(np.int32(1)))) + self.mainDetailedzisegmap_intragroup_85470_var, + ((np.int64(m_70861) * np.int64(computed_group_sizze_85437)),), + (np.int64(computed_group_sizze_85437),)) if synchronous: sync(self) + defunc_7_map_res_mem_123960 = mem_123892 + defunc_7_map_res_mem_123961 = mem_123895 + defunc_7_map_res_mem_123962 = mem_123897 else: - if (sle32(x_elems_5, np.int32(8)) and sle32(y_elems_6, - np.int32(8))): - if ((1 * (np.long(sdiv_up32(((num_arrays_4 * x_elems_5) * y_elems_6), - np.int32(256))) * np.long(np.int32(256)))) != 0): - self.gpu_map_transpose_f32_small_var.set_args(cl.LocalMemory(np.long(np.int64(1))), - np.int32(destoffset_1), - np.int32(srcoffset_3), - np.int32(num_arrays_4), - np.int32(x_elems_5), - np.int32(y_elems_6), - np.int32(mulx_7), - np.int32(muly_8), - destmem_0, - srcmem_2) + mem_123901 = opencl_alloc(self, bytes_121997, "mem_123901") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123901, np.int64(0), + mem_param_123778, + np.int64(0), + np.int64(1), + k2p2zq_70876, + (m_70861 * k2p2zq_70876)) + mem_123904 = opencl_alloc(self, bytes_120173, "mem_123904") + mem_123907 = opencl_alloc(self, bytes_121990, "mem_123907") + if slt64((k2p2zq_70876 * np.int64(2)), segred_group_sizze_85831): + segment_sizze_nonzzero_128079 = smax64(np.int64(1), k2p2zq_70876) + num_threads_128080 = (num_groups_85832 * segred_group_sizze_85831) + if ((1 * (np.int64(num_groups_85832) * np.int64(segred_group_sizze_85831))) != 0): + self.mainDetailedzisegred_small_85813_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_85831))), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(defunc_2_reduce_res_70985), + np.int64(index_primexp_72162), + np.int64(num_groups_85832), + np.int64(segment_sizze_nonzzero_128079), + mem_120246, + mem_123901, + mem_123904, + mem_123907) cl.enqueue_nd_range_kernel(self.queue, - self.gpu_map_transpose_f32_small_var, - ((np.long(sdiv_up32(((num_arrays_4 * x_elems_5) * y_elems_6), - np.int32(256))) * np.long(np.int32(256))),), - (np.long(np.int32(256)),)) + self.mainDetailedzisegred_small_85813_var, + ((np.int64(num_groups_85832) * np.int64(segred_group_sizze_85831)),), + (np.int64(segred_group_sizze_85831),)) if synchronous: sync(self) else: - if ((((1 * (np.long(sdiv_up32(x_elems_5, - np.int32(32))) * np.long(np.int32(32)))) * (np.long(sdiv_up32(y_elems_6, - np.int32(32))) * np.long(np.int32(8)))) * (np.long(num_arrays_4) * np.long(np.int32(1)))) != 0): - self.gpu_map_transpose_f32_var.set_args(cl.LocalMemory(np.long(np.int64(4224))), - np.int32(destoffset_1), - np.int32(srcoffset_3), - np.int32(num_arrays_4), - np.int32(x_elems_5), - np.int32(y_elems_6), - np.int32(mulx_7), - np.int32(muly_8), - destmem_0, srcmem_2) + groups_per_segment_128101 = sdiv_up64(num_groups_85832, + smax64(np.int64(1), + m_70861)) + elements_per_thread_128102 = sdiv_up64(k2p2zq_70876, + (segred_group_sizze_85831 * groups_per_segment_128101)) + virt_num_groups_128103 = (groups_per_segment_128101 * m_70861) + num_threads_128104 = (num_groups_85832 * segred_group_sizze_85831) + threads_per_segment_128105 = (groups_per_segment_128101 * segred_group_sizze_85831) + group_res_arr_mem_128106 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_85831 * virt_num_groups_128103)), + "group_res_arr_mem_128106") + mainDetailedzicounter_mem_128108 = self.mainDetailedzicounter_mem_128108 + if ((1 * (np.int64(num_groups_85832) * np.int64(segred_group_sizze_85831))) != 0): + self.mainDetailedzisegred_large_85813_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_85831))), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(defunc_2_reduce_res_70985), + np.int64(index_primexp_72162), + np.int64(num_groups_85832), + np.int64(groups_per_segment_128101), + np.int64(elements_per_thread_128102), + np.int64(virt_num_groups_128103), + np.int64(threads_per_segment_128105), + mem_120246, + mem_123901, + mem_123904, + mem_123907, + group_res_arr_mem_128106, + mainDetailedzicounter_mem_128108) cl.enqueue_nd_range_kernel(self.queue, - self.gpu_map_transpose_f32_var, - ((np.long(sdiv_up32(x_elems_5, - np.int32(32))) * np.long(np.int32(32))), - (np.long(sdiv_up32(y_elems_6, - np.int32(32))) * np.long(np.int32(8))), - (np.long(num_arrays_4) * np.long(np.int32(1)))), - (np.long(np.int32(32)), - np.long(np.int32(8)), - np.long(np.int32(1)))) + self.mainDetailedzisegred_large_85813_var, + ((np.int64(num_groups_85832) * np.int64(segred_group_sizze_85831)),), + (np.int64(segred_group_sizze_85831),)) if synchronous: sync(self) - return () - def futhark_builtinzhreplicate_f32(self, mem_46314, num_elems_46315, - val_46316): - group_sizze_46321 = self.sizes["builtin#replicate_f32.group_size_46321"] - num_groups_46322 = sdiv_up64(num_elems_46315, group_sizze_46321) - if ((1 * (np.long(num_groups_46322) * np.long(group_sizze_46321))) != 0): - self.builtinzhreplicate_f32zireplicate_46318_var.set_args(np.int32(num_elems_46315), - np.float32(val_46316), - mem_46314) - cl.enqueue_nd_range_kernel(self.queue, - self.builtinzhreplicate_f32zireplicate_46318_var, - ((np.long(num_groups_46322) * np.long(group_sizze_46321)),), - (np.long(group_sizze_46321),)) - if synchronous: - sync(self) - return () - def futhark_builtinzhreplicate_i32(self, mem_46323, num_elems_46324, - val_46325): - group_sizze_46330 = self.sizes["builtin#replicate_i32.group_size_46330"] - num_groups_46331 = sdiv_up64(num_elems_46324, group_sizze_46330) - if ((1 * (np.long(num_groups_46331) * np.long(group_sizze_46330))) != 0): - self.builtinzhreplicate_i32zireplicate_46327_var.set_args(np.int32(num_elems_46324), - np.int32(val_46325), - mem_46323) - cl.enqueue_nd_range_kernel(self.queue, - self.builtinzhreplicate_i32zireplicate_46327_var, - ((np.long(num_groups_46331) * np.long(group_sizze_46330)),), - (np.long(group_sizze_46330),)) - if synchronous: - sync(self) - return () - def futhark_convertToFloat(self, images_mem_44380, m_27757, n_27758, p_27759, - nan_value_27760): - y_29875 = (n_27758 * p_27759) - nest_sizze_29876 = (m_27757 * y_29875) - segmap_group_sizze_29877 = self.sizes["convertToFloat.segmap_group_size_29820"] - segmap_usable_groups_29878 = sdiv_up64(nest_sizze_29876, - segmap_group_sizze_29877) - binop_x_44383 = (m_27757 * n_27758) - binop_x_44384 = (p_27759 * binop_x_44383) - bytes_44382 = (np.int64(4) * binop_x_44384) - mem_44385 = opencl_alloc(self, bytes_44382, "mem_44385") - if ((1 * (np.long(segmap_usable_groups_29878) * np.long(segmap_group_sizze_29877))) != 0): - self.convertToFloatzisegmap_29816_var.set_args(self.global_failure, - np.int64(m_27757), - np.int64(n_27758), - np.int64(p_27759), - np.int16(nan_value_27760), - images_mem_44380, - mem_44385) - cl.enqueue_nd_range_kernel(self.queue, - self.convertToFloatzisegmap_29816_var, - ((np.long(segmap_usable_groups_29878) * np.long(segmap_group_sizze_29877)),), - (np.long(segmap_group_sizze_29877),)) - if synchronous: - sync(self) - out_mem_45676 = mem_44385 - return out_mem_45676 - def futhark_main(self, mappingindices_mem_44380, images_mem_44381, N_29165, - m_29166, trend_29167, k_29168, n_29169, freq_29170, - hfrac_29171, lam_29172): - i32_res_29175 = sext_i32_i64(n_29169) - x_29176 = (np.int32(2) * k_29168) - k2p2_29177 = (np.int32(2) + x_29176) - cond_29178 = slt32(np.int32(0), trend_29167) - if cond_29178: - k2p2zq_29179 = k2p2_29177 - else: - k2p2zq_f_res_29180 = (k2p2_29177 - np.int32(1)) - k2p2zq_29179 = k2p2zq_f_res_29180 - i32_res_29181 = sext_i32_i64(k2p2zq_29179) - binop_x_44384 = (N_29165 * i32_res_29181) - bytes_44383 = (np.int64(4) * binop_x_44384) - if cond_29178: - bounds_invalid_upwards_29183 = slt64(i32_res_29181, np.int64(0)) - valid_29184 = not(bounds_invalid_upwards_29183) - range_valid_c_29185 = True - assert valid_29184, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 helpers.fut:2:3-8\n #2 helpers.fut:37:10-20\n #3 bfastfinal.fut:29:17-66\n #4 bfastfinal.fut:189:5-58\n #5 bfastfinal.fut:184:1-190:25\n" % ("Range ", - np.int64(0), - "..", - np.int64(1), - "..<", - i32_res_29181, - " is invalid.")) - segmap_group_sizze_38734 = self.sizes["main.segmap_group_size_38664"] - segmap_usable_groups_38735 = sdiv_up64(binop_x_44384, - segmap_group_sizze_38734) - mem_44385 = opencl_alloc(self, bytes_44383, "mem_44385") - if ((1 * (np.long(segmap_usable_groups_38735) * np.long(segmap_group_sizze_38734))) != 0): - self.mainzisegmap_38661_var.set_args(self.global_failure, - np.int64(N_29165), - np.float32(freq_29170), - np.int64(i32_res_29181), - mappingindices_mem_44380, - mem_44385) - cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_38661_var, - ((np.long(segmap_usable_groups_38735) * np.long(segmap_group_sizze_38734)),), - (np.long(segmap_group_sizze_38734),)) + mem_123910 = opencl_alloc(self, bytes_120173, "mem_123910") + if ((1 * (np.int64(segmap_usable_groups_85853) * np.int64(segmap_group_sizze_85852))) != 0): + self.mainDetailedzisegmap_85797_var.set_args(self.global_failure, + np.int64(m_70861), + mem_123904, + mem_123910) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_85797_var, + ((np.int64(segmap_usable_groups_85853) * np.int64(segmap_group_sizze_85852)),), + (np.int64(segmap_group_sizze_85852),)) + if synchronous: + sync(self) + mem_123904 = None + mem_123913 = opencl_alloc(self, bytes_120173, "mem_123913") + if slt64((k2p2zq_70876 * np.int64(2)), segred_group_sizze_85859): + segment_sizze_nonzzero_128146 = smax64(np.int64(1), k2p2zq_70876) + num_threads_128147 = (num_groups_85860 * segred_group_sizze_85859) + if ((1 * (np.int64(num_groups_85860) * np.int64(segred_group_sizze_85859))) != 0): + self.mainDetailedzisegred_small_85786_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_85859))), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(defunc_2_reduce_res_70985), + np.int64(index_primexp_72162), + np.int64(num_groups_85860), + np.int64(segment_sizze_nonzzero_128146), + mem_120246, + mem_param_123786, + mem_123913) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_small_85786_var, + ((np.int64(num_groups_85860) * np.int64(segred_group_sizze_85859)),), + (np.int64(segred_group_sizze_85859),)) + if synchronous: + sync(self) + else: + groups_per_segment_128167 = sdiv_up64(num_groups_85860, + smax64(np.int64(1), + m_70861)) + elements_per_thread_128168 = sdiv_up64(k2p2zq_70876, + (segred_group_sizze_85859 * groups_per_segment_128167)) + virt_num_groups_128169 = (groups_per_segment_128167 * m_70861) + num_threads_128170 = (num_groups_85860 * segred_group_sizze_85859) + threads_per_segment_128171 = (groups_per_segment_128167 * segred_group_sizze_85859) + group_res_arr_mem_128172 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_85859 * virt_num_groups_128169)), + "group_res_arr_mem_128172") + mainDetailedzicounter_mem_128174 = self.mainDetailedzicounter_mem_128174 + if ((1 * (np.int64(num_groups_85860) * np.int64(segred_group_sizze_85859))) != 0): + self.mainDetailedzisegred_large_85786_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_85859))), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(defunc_2_reduce_res_70985), + np.int64(index_primexp_72162), + np.int64(num_groups_85860), + np.int64(groups_per_segment_128167), + np.int64(elements_per_thread_128168), + np.int64(virt_num_groups_128169), + np.int64(threads_per_segment_128171), + mem_120246, + mem_param_123786, + mem_123913, + group_res_arr_mem_128172, + mainDetailedzicounter_mem_128174) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_large_85786_var, + ((np.int64(num_groups_85860) * np.int64(segred_group_sizze_85859)),), + (np.int64(segred_group_sizze_85859),)) + if synchronous: + sync(self) + mem_123916 = opencl_alloc(self, bytes_120173, "mem_123916") + mem_123918 = opencl_alloc(self, bytes_120173, "mem_123918") + if ((1 * (np.int64(segmap_usable_groups_85873) * np.int64(segmap_group_sizze_85872))) != 0): + self.mainDetailedzisegmap_85766_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(n_70864), + np.int64(index_primexp_72162), + defunc_3_map_res_mem_120231, + mem_123910, + mem_123913, + mem_123916, + mem_123918) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_85766_var, + ((np.int64(segmap_usable_groups_85873) * np.int64(segmap_group_sizze_85872)),), + (np.int64(segmap_group_sizze_85872),)) + if synchronous: + sync(self) + mem_123913 = None + local_memory_capacity_128287 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_128287)) and suff_outer_par_85889): + mem_123937 = opencl_alloc(self, bytes_121997, "mem_123937") + mem_123940 = opencl_alloc(self, bytes_121990, "mem_123940") + mem_123921 = opencl_alloc(self, total_sizze_125790, "mem_123921") + if ((1 * (np.int64(num_groups_85885) * np.int64(segmap_group_sizze_85884))) != 0): + self.mainDetailedzisegmap_85651_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(defunc_2_reduce_res_70985), + np.int64(index_primexp_72162), + np.int64(num_groups_85885), + np.int64(num_threads_125789), + mem_120246, + mem_param_123786, + mem_123901, + mem_123907, + mem_123910, + mem_123916, + mem_123921, + mem_123937, + mem_123940) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_85651_var, + ((np.int64(num_groups_85885) * np.int64(segmap_group_sizze_85884)),), + (np.int64(segmap_group_sizze_85884),)) + if synchronous: + sync(self) + mem_123921 = None + mem_123956 = opencl_alloc(self, bytes_121997, "mem_123956") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123956, + np.int64(0), + mem_123937, + np.int64(0), + np.int64(1), + (m_70861 * k2p2zq_70876), + k2p2zq_70876) + mem_123937 = None + defunc_7_map_res_mem_123958 = mem_123956 + defunc_7_map_res_mem_123959 = mem_123940 + else: + mem_123944 = opencl_alloc(self, bytes_121990, "mem_123944") + mem_123948 = opencl_alloc(self, bytes_121997, "mem_123948") + if slt64((k2p2zq_70876 * np.int64(2)), segred_group_sizze_85918): + segment_sizze_nonzzero_128222 = smax64(np.int64(1), + k2p2zq_70876) + num_threads_128223 = (num_groups_85919 * segred_group_sizze_85918) + if ((1 * (np.int64(num_groups_85919) * np.int64(segred_group_sizze_85918))) != 0): + self.mainDetailedzisegred_small_85711_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_85918))), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(defunc_2_reduce_res_70985), + np.int64(index_primexp_72162), + np.int64(num_groups_85919), + np.int64(binop_x_120251), + np.int64(segment_sizze_nonzzero_128222), + mem_120246, + mem_param_123778, + mem_123907, + mem_123910, + mem_123944, + mem_123948) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_small_85711_var, + ((np.int64(num_groups_85919) * np.int64(segred_group_sizze_85918)),), + (np.int64(segred_group_sizze_85918),)) + if synchronous: + sync(self) + else: + groups_per_segment_128243 = sdiv_up64(num_groups_85919, + smax64(np.int64(1), + (m_70861 * k2p2zq_70876))) + elements_per_thread_128244 = sdiv_up64(k2p2zq_70876, + (segred_group_sizze_85918 * groups_per_segment_128243)) + virt_num_groups_128245 = (groups_per_segment_128243 * (m_70861 * k2p2zq_70876)) + num_threads_128246 = (num_groups_85919 * segred_group_sizze_85918) + threads_per_segment_128247 = (groups_per_segment_128243 * segred_group_sizze_85918) + group_res_arr_mem_128248 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_85918 * virt_num_groups_128245)), + "group_res_arr_mem_128248") + mainDetailedzicounter_mem_128250 = self.mainDetailedzicounter_mem_128250 + if ((1 * (np.int64(num_groups_85919) * np.int64(segred_group_sizze_85918))) != 0): + self.mainDetailedzisegred_large_85711_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_85918))), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(defunc_2_reduce_res_70985), + np.int64(index_primexp_72162), + np.int64(num_groups_85919), + np.int64(binop_x_120251), + np.int64(groups_per_segment_128243), + np.int64(elements_per_thread_128244), + np.int64(virt_num_groups_128245), + np.int64(threads_per_segment_128247), + mem_120246, + mem_param_123778, + mem_123907, + mem_123910, + mem_123944, + mem_123948, + group_res_arr_mem_128248, + mainDetailedzicounter_mem_128250) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_large_85711_var, + ((np.int64(num_groups_85919) * np.int64(segred_group_sizze_85918)),), + (np.int64(segred_group_sizze_85918),)) + if synchronous: + sync(self) + mem_123952 = opencl_alloc(self, bytes_121990, "mem_123952") + if ((1 * (np.int64(segmap_usable_groups_85942) * np.int64(segmap_group_sizze_85941))) != 0): + self.mainDetailedzisegmap_85689_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + mem_param_123786, + mem_123916, + mem_123944, + mem_123952) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_85689_var, + ((np.int64(segmap_usable_groups_85942) * np.int64(segmap_group_sizze_85941)),), + (np.int64(segmap_group_sizze_85941),)) + if synchronous: + sync(self) + mem_123944 = None + defunc_7_map_res_mem_123958 = mem_123948 + defunc_7_map_res_mem_123959 = mem_123952 + mem_123901 = None + mem_123907 = None + mem_123910 = None + mem_123916 = None + defunc_7_map_res_mem_123960 = defunc_7_map_res_mem_123958 + defunc_7_map_res_mem_123961 = defunc_7_map_res_mem_123959 + defunc_7_map_res_mem_123962 = mem_123918 + defunc_7_map_res_mem_123972 = defunc_7_map_res_mem_123960 + defunc_7_map_res_mem_123973 = defunc_7_map_res_mem_123961 + defunc_7_map_res_mem_123974 = defunc_7_map_res_mem_123962 + i_72253 = (index_primexp_72162 - k2p2zq_70876) + x_72254 = sle64(np.int64(0), i_72253) + y_72255 = slt64(i_72253, num_recresids_padded_71534) + bounds_check_72256 = (x_72254 and y_72255) + index_ok_72257 = (ok_or_empty_70975 and bounds_check_72256) + index_certs_72258 = True + assert index_ok_72257, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:81:7-82:29\n #1 recresid.fut:100:7-30\n #2 mroc.fut:27:25-38\n #3 mroc.fut:77:27-61\n #4 bfastfinal.fut:45:24-53\n #5 bfastfinal.fut:185:3-72\n #6 bfastfinal.fut:181:1-185:72\n" % ("Index [", + i_72253, + ", ", + np.int64(0), + ":] out of bounds for array of shape [", + num_recresids_padded_71534, + "][", + m_70861, + "].")) + if ((m_70861 * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_121934, defunc_7_map_res_mem_123974, + dest_offset=np.int64(((i_72253 * m_70861) * np.int64(8))), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((m_70861 * np.int32(8)))) if synchronous: sync(self) - binop_p_mem_44390 = mem_44385 - else: - bounds_invalid_upwards_29209 = slt64(i32_res_29181, np.int64(0)) - valid_29210 = not(bounds_invalid_upwards_29209) - range_valid_c_29211 = True - assert valid_29210, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 helpers.fut:2:3-8\n #2 helpers.fut:50:10-22\n #3 bfastfinal.fut:30:17-64\n #4 bfastfinal.fut:189:5-58\n #5 bfastfinal.fut:184:1-190:25\n" % ("Range ", - np.int64(0), - "..", - np.int64(1), - "..<", - i32_res_29181, - " is invalid.")) - segmap_group_sizze_38908 = self.sizes["main.segmap_group_size_38842"] - segmap_usable_groups_38909 = sdiv_up64(binop_x_44384, - segmap_group_sizze_38908) - mem_44389 = opencl_alloc(self, bytes_44383, "mem_44389") - if ((1 * (np.long(segmap_usable_groups_38909) * np.long(segmap_group_sizze_38908))) != 0): - self.mainzisegmap_38839_var.set_args(self.global_failure, - np.int64(N_29165), - np.float32(freq_29170), - np.int64(i32_res_29181), - mappingindices_mem_44380, - mem_44389) - cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_38839_var, - ((np.long(segmap_usable_groups_38909) * np.long(segmap_group_sizze_38908)),), - (np.long(segmap_group_sizze_38908),)) + defunc_7_map_res_mem_123974 = None + mem_param_tmp_128021 = defunc_7_map_res_mem_123972 + mem_param_tmp_128022 = defunc_7_map_res_mem_123973 + mem_param_123778 = mem_param_tmp_128021 + mem_param_123786 = mem_param_tmp_128022 + i_72158 += one_129907 + retsT_mem_123990 = mem_param_123778 + retsT_mem_123998 = mem_param_123786 + defunc_3_map_res_mem_120231 = None + mem_120246 = None + mrecresid_nn_res_mem_123746 = None + mrecresid_nn_res_mem_123754 = None + Nmk_72261 = (np.int64(1) + num_recresids_padded_71534) + bounds_invalid_upwards_72262 = slt64(Nmk_72261, np.int64(0)) + valid_72263 = not(bounds_invalid_upwards_72262) + range_valid_c_72264 = True + assert valid_72263, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 mroc.fut:36:70-77\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 mroc.fut:32:5-38:15\n #5 mroc.fut:77:27-61\n #6 bfastfinal.fut:45:24-53\n #7 bfastfinal.fut:185:3-72\n #8 bfastfinal.fut:181:1-185:72\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + Nmk_72261, + " is invalid.")) + suff_outer_par_85970 = (self.sizes["mainDetailed.suff_outer_par_19"] <= m_70861) + intra_avail_par_85966 = smin64(num_recresids_padded_71534, Nmk_72261) + computed_group_sizze_85959 = smax64(num_recresids_padded_71534, Nmk_72261) + fits_86072 = sle64(computed_group_sizze_85959, max_group_sizze_77729) + suff_intra_par_86070 = (self.sizes["mainDetailed.suff_intra_par_20"] <= intra_avail_par_85966) + intra_suff_and_fits_86073 = (suff_intra_par_86070 and fits_86072) + segmap_group_sizze_86022 = self.sizes["mainDetailed.segmap_group_size_85974"] + max_num_groups_128290 = self.sizes["mainDetailed.segmap_num_groups_85976"] + num_groups_86023 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_70861, + segmap_group_sizze_86022), + sext_i32_i64(max_num_groups_128290)))) + segmap_group_sizze_86220 = self.sizes["mainDetailed.segmap_group_size_86213"] + segred_group_sizze_86229 = self.sizes["mainDetailed.segred_group_size_86194"] + max_num_groups_128291 = self.sizes["mainDetailed.segred_num_groups_86196"] + num_groups_86230 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(replicate_arg_71535, + segred_group_sizze_86229), + sext_i32_i64(max_num_groups_128291)))) + segmap_group_sizze_86243 = self.sizes["mainDetailed.segmap_group_size_86185"] + segred_group_sizze_86251 = self.sizes["mainDetailed.segred_group_size_86164"] + max_num_groups_128292 = self.sizes["mainDetailed.segred_num_groups_86166"] + num_groups_86252 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(replicate_arg_71535, + segred_group_sizze_86251), + sext_i32_i64(max_num_groups_128292)))) + segmap_group_sizze_86266 = self.sizes["mainDetailed.segmap_group_size_86150"] + nest_sizze_86277 = (m_70861 * Nmk_72261) + segscan_group_sizze_86278 = self.sizes["mainDetailed.segscan_group_size_86123"] + max_num_groups_128293 = self.sizes["mainDetailed.segscan_num_groups_86125"] + num_groups_86279 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_86277, + segscan_group_sizze_86278), + sext_i32_i64(max_num_groups_128293)))) + bytes_124022 = (np.int64(8) * nest_sizze_86277) + bytes_124008 = (np.int64(8) * Nmk_72261) + num_threads_125793 = (segmap_group_sizze_86022 * num_groups_86023) + total_sizze_125794 = (bytes_124008 * num_threads_125793) + local_memory_capacity_128536 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_128536)) and suff_outer_par_85970): + mem_124024 = opencl_alloc(self, bytes_124022, "mem_124024") + mem_124026 = opencl_alloc(self, bytes_120173, "mem_124026") + mem_124009 = opencl_alloc(self, total_sizze_125794, "mem_124009") + if ((1 * (np.int64(num_groups_86023) * np.int64(segmap_group_sizze_86022))) != 0): + self.mainDetailedzisegmap_85972_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(num_recresids_padded_71534), + np.int64(Nmk_72261), + np.int64(num_groups_86023), + np.int64(num_threads_125793), + defunc_3_map_res_mem_120230, + mem_121934, mem_124009, + mem_124024, mem_124026) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_85972_var, + ((np.int64(num_groups_86023) * np.int64(segmap_group_sizze_86022)),), + (np.int64(segmap_group_sizze_86022),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + mem_124009 = None + mem_124066 = opencl_alloc(self, bytes_124022, "mem_124066") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124066, np.int64(0), + mem_124024, np.int64(0), + np.int64(1), m_70861, + Nmk_72261) + mem_124024 = None + defunc_3_map_res_mem_124068 = mem_124066 + defunc_3_map_res_mem_124069 = mem_124026 + else: + local_memory_capacity_128535 = self.max_local_memory + if (sle64(((((np.int32(8) * num_recresids_padded_71534) + srem64((np.int64(8) - srem64((np.int32(8) * num_recresids_padded_71534), + np.int64(8))), + np.int64(8))) + ((np.int32(8) * num_recresids_padded_71534) + srem64((np.int64(8) - srem64((np.int32(8) * num_recresids_padded_71534), + np.int64(8))), + np.int64(8)))) + (bytes_124008 + srem64((np.int64(8) - srem64(bytes_124008, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_128535)) and intra_suff_and_fits_86073): + mem_124035 = opencl_alloc(self, bytes_124022, "mem_124035") + mem_124037 = opencl_alloc(self, bytes_120173, "mem_124037") + if ((1 * (np.int64(m_70861) * np.int64(computed_group_sizze_85959))) != 0): + self.mainDetailedzisegmap_intragroup_85968_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64(bytes_124008)), + cl.LocalMemory(np.int64((np.int32(8) * num_recresids_padded_71534))), + cl.LocalMemory(np.int64((np.int32(8) * num_recresids_padded_71534))), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(num_recresids_padded_71534), + np.int64(Nmk_72261), + np.int64(computed_group_sizze_85959), + defunc_3_map_res_mem_120230, + mem_121934, + mem_124035, + mem_124037) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_intragroup_85968_var, + ((np.int64(m_70861) * np.int64(computed_group_sizze_85959)),), + (np.int64(computed_group_sizze_85959),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + defunc_3_map_res_mem_124062 = mem_124035 + defunc_3_map_res_mem_124063 = mem_124037 + else: + segmap_usable_groups_86221 = sdiv_up64(m_70861, + segmap_group_sizze_86220) + mem_124040 = opencl_alloc(self, bytes_120173, "mem_124040") + mem_124042 = opencl_alloc(self, bytes_120173, "mem_124042") + if ((1 * (np.int64(segmap_usable_groups_86221) * np.int64(segmap_group_sizze_86220))) != 0): + self.mainDetailedzisegmap_86211_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + defunc_3_map_res_mem_120230, + mem_124040, mem_124042) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_86211_var, + ((np.int64(segmap_usable_groups_86221) * np.int64(segmap_group_sizze_86220)),), + (np.int64(segmap_group_sizze_86220),)) + if synchronous: + sync(self) + mem_124045 = opencl_alloc(self, bytes_121932, "mem_124045") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124045, np.int64(0), + mem_121934, np.int64(0), + np.int64(1), m_70861, + num_recresids_padded_71534) + mem_124048 = opencl_alloc(self, bytes_120173, "mem_124048") + if slt64((num_recresids_padded_71534 * np.int64(2)), + segred_group_sizze_86229): + segment_sizze_nonzzero_128339 = smax64(np.int64(1), + num_recresids_padded_71534) + num_threads_128340 = (num_groups_86230 * segred_group_sizze_86229) + if ((1 * (np.int64(num_groups_86230) * np.int64(segred_group_sizze_86229))) != 0): + self.mainDetailedzisegred_small_86200_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_86229))), + np.int64(m_70861), + np.int64(num_recresids_padded_71534), + np.int64(num_groups_86230), + np.int64(segment_sizze_nonzzero_128339), + mem_124045, + mem_124048) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_small_86200_var, + ((np.int64(num_groups_86230) * np.int64(segred_group_sizze_86229)),), + (np.int64(segred_group_sizze_86229),)) + if synchronous: + sync(self) + else: + groups_per_segment_128364 = sdiv_up64(num_groups_86230, + smax64(np.int64(1), m_70861)) + elements_per_thread_128365 = sdiv_up64(num_recresids_padded_71534, + (segred_group_sizze_86229 * groups_per_segment_128364)) + virt_num_groups_128366 = (groups_per_segment_128364 * m_70861) + num_threads_128367 = (num_groups_86230 * segred_group_sizze_86229) + threads_per_segment_128368 = (groups_per_segment_128364 * segred_group_sizze_86229) + group_res_arr_mem_128369 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_86229 * virt_num_groups_128366)), + "group_res_arr_mem_128369") + mainDetailedzicounter_mem_128371 = self.mainDetailedzicounter_mem_128371 + if ((1 * (np.int64(num_groups_86230) * np.int64(segred_group_sizze_86229))) != 0): + self.mainDetailedzisegred_large_86200_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_86229))), + np.int64(num_recresids_padded_71534), + np.int64(num_groups_86230), + np.int64(groups_per_segment_128364), + np.int64(elements_per_thread_128365), + np.int64(virt_num_groups_128366), + mem_124045, + mem_124048, + group_res_arr_mem_128369, + mainDetailedzicounter_mem_128371) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_large_86200_var, + ((np.int64(num_groups_86230) * np.int64(segred_group_sizze_86229)),), + (np.int64(segred_group_sizze_86229),)) + if synchronous: + sync(self) + segmap_usable_groups_86244 = sdiv_up64(m_70861, + segmap_group_sizze_86243) + mem_124051 = opencl_alloc(self, bytes_120173, "mem_124051") + if ((1 * (np.int64(segmap_usable_groups_86244) * np.int64(segmap_group_sizze_86243))) != 0): + self.mainDetailedzisegmap_86183_var.set_args(self.global_failure, + np.int64(m_70861), + mem_124040, mem_124048, + mem_124051) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_86183_var, + ((np.int64(segmap_usable_groups_86244) * np.int64(segmap_group_sizze_86243)),), + (np.int64(segmap_group_sizze_86243),)) + if synchronous: + sync(self) + mem_124048 = None + mem_124054 = opencl_alloc(self, bytes_120173, "mem_124054") + if slt64((num_recresids_padded_71534 * np.int64(2)), + segred_group_sizze_86251): + segment_sizze_nonzzero_128414 = smax64(np.int64(1), + num_recresids_padded_71534) + num_threads_128415 = (num_groups_86252 * segred_group_sizze_86251) + if ((1 * (np.int64(num_groups_86252) * np.int64(segred_group_sizze_86251))) != 0): + self.mainDetailedzisegred_small_86170_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_86251))), + np.int64(m_70861), + np.int64(num_recresids_padded_71534), + np.int64(num_groups_86252), + np.int64(segment_sizze_nonzzero_128414), + mem_124045, + mem_124051, + mem_124054) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_small_86170_var, + ((np.int64(num_groups_86252) * np.int64(segred_group_sizze_86251)),), + (np.int64(segred_group_sizze_86251),)) + if synchronous: + sync(self) + else: + groups_per_segment_128435 = sdiv_up64(num_groups_86252, + smax64(np.int64(1), m_70861)) + elements_per_thread_128436 = sdiv_up64(num_recresids_padded_71534, + (segred_group_sizze_86251 * groups_per_segment_128435)) + virt_num_groups_128437 = (groups_per_segment_128435 * m_70861) + num_threads_128438 = (num_groups_86252 * segred_group_sizze_86251) + threads_per_segment_128439 = (groups_per_segment_128435 * segred_group_sizze_86251) + group_res_arr_mem_128440 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_86251 * virt_num_groups_128437)), + "group_res_arr_mem_128440") + mainDetailedzicounter_mem_128442 = self.mainDetailedzicounter_mem_128442 + if ((1 * (np.int64(num_groups_86252) * np.int64(segred_group_sizze_86251))) != 0): + self.mainDetailedzisegred_large_86170_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_86251))), + np.int64(num_recresids_padded_71534), + np.int64(num_groups_86252), + np.int64(groups_per_segment_128435), + np.int64(elements_per_thread_128436), + np.int64(virt_num_groups_128437), + np.int64(threads_per_segment_128439), + mem_124045, + mem_124051, + mem_124054, + group_res_arr_mem_128440, + mainDetailedzicounter_mem_128442) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_large_86170_var, + ((np.int64(num_groups_86252) * np.int64(segred_group_sizze_86251)),), + (np.int64(segred_group_sizze_86251),)) + if synchronous: + sync(self) + mem_124051 = None + segmap_usable_groups_86267 = sdiv_up64(m_70861, + segmap_group_sizze_86266) + mem_124057 = opencl_alloc(self, bytes_120173, "mem_124057") + if ((1 * (np.int64(segmap_usable_groups_86267) * np.int64(segmap_group_sizze_86266))) != 0): + self.mainDetailedzisegmap_86148_var.set_args(self.global_failure, + np.int64(m_70861), + mem_124040, mem_124054, + mem_124057) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_86148_var, + ((np.int64(segmap_usable_groups_86267) * np.int64(segmap_group_sizze_86266)),), + (np.int64(segmap_group_sizze_86266),)) + if synchronous: + sync(self) + mem_124040 = None + mem_124054 = None + mem_124061 = opencl_alloc(self, bytes_124022, "mem_124061") + if slt64(np.int64(0), (m_70861 * Nmk_72261)): + stage1_max_num_groups_128479 = self.max_group_size + stage1_num_groups_128480 = smin64(stage1_max_num_groups_128479, + num_groups_86279) + num_threads_128481 = sext_i64_i32((stage1_num_groups_128480 * segscan_group_sizze_86278)) + if ((1 * (np.int64(stage1_num_groups_128480) * np.int64(segscan_group_sizze_86278))) != 0): + self.mainDetailedziscan_stage1_86129_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64(smax64(np.int64(1), + (np.int32(8) * segscan_group_sizze_86278)))), + np.int64(m_70861), + np.int64(num_recresids_padded_71534), + np.int64(Nmk_72261), + np.int32(num_threads_128481), + mem_124045, + mem_124057, + mem_124061) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedziscan_stage1_86129_var, + ((np.int64(stage1_num_groups_128480) * np.int64(segscan_group_sizze_86278)),), + (np.int64(segscan_group_sizze_86278),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + if ((1 * (np.int64(np.int64(1)) * np.int64(stage1_num_groups_128480))) != 0): + self.mainDetailedziscan_stage2_86129_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(smax64(np.int64(1), + (np.int32(8) * stage1_num_groups_128480)))), + np.int64(m_70861), + np.int64(Nmk_72261), + np.int64(stage1_num_groups_128480), + np.int32(num_threads_128481), + mem_124061) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedziscan_stage2_86129_var, + ((np.int64(np.int64(1)) * np.int64(stage1_num_groups_128480)),), + (np.int64(stage1_num_groups_128480),)) + if synchronous: + sync(self) + required_groups_128523 = sext_i64_i32(sdiv_up64((m_70861 * Nmk_72261), + segscan_group_sizze_86278)) + if ((1 * (np.int64(num_groups_86279) * np.int64(segscan_group_sizze_86278))) != 0): + self.mainDetailedziscan_stage3_86129_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(Nmk_72261), + np.int64(num_groups_86279), + np.int32(num_threads_128481), + np.int32(required_groups_128523), + mem_124061) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedziscan_stage3_86129_var, + ((np.int64(num_groups_86279) * np.int64(segscan_group_sizze_86278)),), + (np.int64(segscan_group_sizze_86278),)) + if synchronous: + sync(self) + mem_124045 = None + mem_124057 = None + defunc_3_map_res_mem_124062 = mem_124061 + defunc_3_map_res_mem_124063 = mem_124042 + defunc_3_map_res_mem_124068 = defunc_3_map_res_mem_124062 + defunc_3_map_res_mem_124069 = defunc_3_map_res_mem_124063 + defunc_3_map_res_mem_120230 = None + mem_121934 = None + empty_slice_72310 = (num_recresids_padded_71534 == np.int64(0)) + zzero_leq_i_p_m_t_s_72311 = sle64(np.int64(0), num_recresids_padded_71534) + i_p_m_t_s_leq_w_72312 = slt64(num_recresids_padded_71534, Nmk_72261) + i_lte_j_72313 = sle64(np.int64(1), Nmk_72261) + y_72314 = (zzero_leq_i_p_m_t_s_72311 and i_p_m_t_s_leq_w_72312) + y_72315 = (i_lte_j_72313 and y_72314) + ok_or_empty_72316 = (empty_slice_72310 or y_72315) + index_certs_72317 = True + assert ok_or_empty_72316, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 mroc.fut:57:12-22\n #1 /prelude/soacs.fut:67:19-23\n #2 /prelude/soacs.fut:67:3-37\n #3 mroc.fut:78:15-34\n #4 bfastfinal.fut:45:24-53\n #5 bfastfinal.fut:185:3-72\n #6 bfastfinal.fut:181:1-185:72\n" % ("Index [", + np.int64(1), + ":] out of bounds for array of shape [", + Nmk_72261, + "].")) + range_valid_c_72320 = True + assert valid_72263, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 mroc.fut:72:13-18\n #2 mroc.fut:80:16-46\n #3 bfastfinal.fut:45:24-53\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + Nmk_72261, + " is invalid.")) + segmap_group_sizze_86355 = self.sizes["mainDetailed.segmap_group_size_86347"] + segmap_usable_groups_86356 = sdiv_up64(m_70861, segmap_group_sizze_86355) + mem_124072 = opencl_alloc(self, bytes_120173, "mem_124072") + mem_124074 = opencl_alloc(self, bytes_120173, "mem_124074") + if ((1 * (np.int64(segmap_usable_groups_86356) * np.int64(segmap_group_sizze_86355))) != 0): + self.mainDetailedzisegmap_86345_var.set_args(self.global_failure, + np.int64(m_70861), + defunc_3_map_res_mem_124069, + mem_124072, mem_124074) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_86345_var, + ((np.int64(segmap_usable_groups_86356) * np.int64(segmap_group_sizze_86355)),), + (np.int64(segmap_group_sizze_86355),)) if synchronous: sync(self) - binop_p_mem_44390 = mem_44389 - x_29234 = (N_29165 * N_29165) - y_29235 = (np.int64(2) * N_29165) - x_29236 = (x_29234 + y_29235) - x_29237 = (np.int64(1) + x_29236) - y_29238 = (np.int64(1) + N_29165) - zzero_29239 = (y_29238 == np.int64(0)) - nonzzero_29240 = not(zzero_29239) - nonzzero_cert_29241 = True - assert nonzzero_29240, ("Error: %s\n\nBacktrace:\n-> #0 bfastfinal.fut:35:32-60\n #1 bfastfinal.fut:189:5-58\n #2 bfastfinal.fut:184:1-190:25\n" % ("division by zero",)) - x_29242 = sdiv64(x_29237, y_29238) - x_29243 = (x_29242 - N_29165) - binop_p_29244 = (x_29243 - np.int64(1)) - defunc_0_f_res_29245 = sext_i64_i32(binop_p_29244) - i32_res_29246 = sitofp_i32_f32(defunc_0_f_res_29245) - segmap_group_sizze_38991 = self.sizes["main.segmap_group_size_38970"] - segmap_usable_groups_38992 = sdiv_up64(binop_x_44384, - segmap_group_sizze_38991) - mem_44393 = opencl_alloc(self, bytes_44383, "mem_44393") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44393, np.int64(0), - binop_p_mem_44390, np.int64(0), - np.int64(1), N_29165, - i32_res_29181) - mem_44397 = opencl_alloc(self, bytes_44383, "mem_44397") - if ((1 * (np.long(segmap_usable_groups_38992) * np.long(segmap_group_sizze_38991))) != 0): - self.mainzisegmap_38967_var.set_args(self.global_failure, - np.int64(N_29165), - np.int64(i32_res_29181), - np.float32(i32_res_29246), mem_44393, - mem_44397) - cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_38967_var, - ((np.long(segmap_usable_groups_38992) * np.long(segmap_group_sizze_38991)),), - (np.long(segmap_group_sizze_38991),)) + segmap_group_sizze_86369 = self.sizes["mainDetailed.segmap_group_size_86316"] + segmap_usable_groups_86370 = sdiv_up64(nest_sizze_86277, + segmap_group_sizze_86369) + mem_124078 = opencl_alloc(self, bytes_124022, "mem_124078") + if ((1 * (np.int64(segmap_usable_groups_86370) * np.int64(segmap_group_sizze_86369))) != 0): + self.mainDetailedzisegmap_86313_var.set_args(self.global_failure, + np.int64(m_70861), + np.float64(conf_70870), + np.int64(Nmk_72261), + mem_124072, mem_124074, + mem_124078) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_86313_var, + ((np.int64(segmap_usable_groups_86370) * np.int64(segmap_group_sizze_86369)),), + (np.int64(segmap_group_sizze_86369),)) + if synchronous: + sync(self) + mem_124072 = None + mem_124074 = None + suff_outer_par_86394 = (self.sizes["mainDetailed.suff_outer_par_21"] <= m_70861) + fits_86670 = sle64(num_recresids_padded_71534, max_group_sizze_77729) + suff_intra_par_86668 = (self.sizes["mainDetailed.suff_intra_par_22"] <= num_recresids_padded_71534) + intra_suff_and_fits_86671 = (suff_intra_par_86668 and fits_86670) + segmap_group_sizze_86533 = self.sizes["mainDetailed.segmap_group_size_86398"] + segmap_group_sizze_86984 = self.sizes["mainDetailed.segmap_group_size_86978"] + segred_group_sizze_86991 = self.sizes["mainDetailed.segred_group_size_86954"] + max_num_groups_128547 = self.sizes["mainDetailed.segred_num_groups_86956"] + num_groups_86992 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(replicate_arg_71535, + segred_group_sizze_86991), + sext_i32_i64(max_num_groups_128547)))) + segmap_group_sizze_87010 = self.sizes["mainDetailed.segmap_group_size_86851"] + segred_group_sizze_87111 = self.sizes["mainDetailed.segred_group_size_86829"] + max_num_groups_128548 = self.sizes["mainDetailed.segred_num_groups_86831"] + num_groups_87112 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(replicate_arg_71535, + segred_group_sizze_87111), + sext_i32_i64(max_num_groups_128548)))) + segmap_group_sizze_87127 = self.sizes["mainDetailed.segmap_group_size_86811"] + bytes_124087 = (np.int64(8) * segmap_group_sizze_86533) + local_memory_capacity_128710 = self.max_local_memory + if (sle64(((bytes_124087 + srem64((np.int64(8) - srem64(bytes_124087, + np.int64(8))), + np.int64(8))) + (bytes_124087 + srem64((np.int64(8) - srem64(bytes_124087, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_128710)) and suff_outer_par_86394): + segmap_usable_groups_86534 = sdiv_up64(m_70861, + segmap_group_sizze_86533) + mem_124081 = opencl_alloc(self, bytes_124022, "mem_124081") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124081, np.int64(0), + defunc_3_map_res_mem_124068, + np.int64(0), np.int64(1), + Nmk_72261, m_70861) + mem_124084 = opencl_alloc(self, bytes_124022, "mem_124084") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124084, np.int64(0), + mem_124078, np.int64(0), + np.int64(1), Nmk_72261, + m_70861) + num_whole_tiles_117920 = squot64(num_recresids_padded_71534, + segmap_group_sizze_86533) + residual_input_118032 = srem64(num_recresids_padded_71534, + segmap_group_sizze_86533) + cond_118033 = (residual_input_118032 == np.int64(0)) + mem_124113 = opencl_alloc(self, bytes_120173, "mem_124113") + if ((1 * (np.int64(segmap_usable_groups_86534) * np.int64(segmap_group_sizze_86533))) != 0): + self.mainDetailedzisegmap_intragroup_117900_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_124087)), + cl.LocalMemory(np.int64(bytes_124087)), + np.int64(m_70861), + np.float64(level_70867), + np.int64(num_recresids_padded_71534), + np.int64(num_whole_tiles_117920), + np.int64(residual_input_118032), + np.byte(cond_118033), + defunc_3_map_res_mem_124069, + mem_124081, + mem_124084, + mem_124113) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_intragroup_117900_var, + ((np.int64(segmap_usable_groups_86534) * np.int64(segmap_group_sizze_86533)),), + (np.int64(segmap_group_sizze_86533),)) + if synchronous: + sync(self) + mem_124081 = None + mem_124084 = None + defunc_1_map_res_mem_124135 = mem_124113 + else: + local_memory_capacity_128709 = self.max_local_memory + if (sle64((((np.int32(8) * num_recresids_padded_71534) + srem64((np.int64(8) - srem64((np.int32(8) * num_recresids_padded_71534), + np.int64(8))), + np.int64(8))) + ((np.int32(8) * num_recresids_padded_71534) + srem64((np.int64(8) - srem64((np.int32(8) * num_recresids_padded_71534), + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_128709)) and intra_suff_and_fits_86671): + mem_124118 = opencl_alloc(self, bytes_120173, "mem_124118") + if ((1 * (np.int64(m_70861) * np.int64(num_recresids_padded_71534))) != 0): + self.mainDetailedzisegmap_intragroup_86392_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * num_recresids_padded_71534))), + cl.LocalMemory(np.int64((np.int32(8) * num_recresids_padded_71534))), + np.float64(level_70867), + np.int64(num_recresids_padded_71534), + np.int64(Nmk_72261), + defunc_3_map_res_mem_124068, + defunc_3_map_res_mem_124069, + mem_124078, + mem_124118) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_intragroup_86392_var, + ((np.int64(m_70861) * np.int64(num_recresids_padded_71534)),), + (np.int64(num_recresids_padded_71534),)) + if synchronous: + sync(self) + defunc_1_map_res_mem_124134 = mem_124118 + else: + segmap_usable_groups_86985 = sdiv_up64(m_70861, + segmap_group_sizze_86984) + mem_124121 = opencl_alloc(self, bytes_120173, "mem_124121") + if ((1 * (np.int64(segmap_usable_groups_86985) * np.int64(segmap_group_sizze_86984))) != 0): + self.mainDetailedzisegmap_86976_var.set_args(self.global_failure, + np.int64(m_70861), + defunc_3_map_res_mem_124069, + mem_124121) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_86976_var, + ((np.int64(segmap_usable_groups_86985) * np.int64(segmap_group_sizze_86984)),), + (np.int64(segmap_group_sizze_86984),)) + if synchronous: + sync(self) + mem_124124 = opencl_alloc(self, bytes_120173, "mem_124124") + if slt64((num_recresids_padded_71534 * np.int64(2)), + segred_group_sizze_86991): + segment_sizze_nonzzero_128579 = smax64(np.int64(1), + num_recresids_padded_71534) + num_threads_128580 = (num_groups_86992 * segred_group_sizze_86991) + if ((1 * (np.int64(num_groups_86992) * np.int64(segred_group_sizze_86991))) != 0): + self.mainDetailedzisegred_small_86960_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_86991))), + np.int64(m_70861), + np.int64(num_recresids_padded_71534), + np.int64(Nmk_72261), + np.int64(num_groups_86992), + np.int64(segment_sizze_nonzzero_128579), + defunc_3_map_res_mem_124068, + mem_124121, + mem_124124) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_small_86960_var, + ((np.int64(num_groups_86992) * np.int64(segred_group_sizze_86991)),), + (np.int64(segred_group_sizze_86991),)) + if synchronous: + sync(self) + else: + groups_per_segment_128600 = sdiv_up64(num_groups_86992, + smax64(np.int64(1), m_70861)) + elements_per_thread_128601 = sdiv_up64(num_recresids_padded_71534, + (segred_group_sizze_86991 * groups_per_segment_128600)) + virt_num_groups_128602 = (groups_per_segment_128600 * m_70861) + num_threads_128603 = (num_groups_86992 * segred_group_sizze_86991) + threads_per_segment_128604 = (groups_per_segment_128600 * segred_group_sizze_86991) + group_res_arr_mem_128605 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_86991 * virt_num_groups_128602)), + "group_res_arr_mem_128605") + mainDetailedzicounter_mem_128607 = self.mainDetailedzicounter_mem_128607 + if ((1 * (np.int64(num_groups_86992) * np.int64(segred_group_sizze_86991))) != 0): + self.mainDetailedzisegred_large_86960_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_86991))), + np.int64(num_recresids_padded_71534), + np.int64(Nmk_72261), + np.int64(num_groups_86992), + np.int64(groups_per_segment_128600), + np.int64(elements_per_thread_128601), + np.int64(virt_num_groups_128602), + np.int64(threads_per_segment_128604), + defunc_3_map_res_mem_124068, + mem_124121, + mem_124124, + group_res_arr_mem_128605, + mainDetailedzicounter_mem_128607) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_large_86960_var, + ((np.int64(num_groups_86992) * np.int64(segred_group_sizze_86991)),), + (np.int64(segred_group_sizze_86991),)) + if synchronous: + sync(self) + mem_124121 = None + segmap_usable_groups_87011 = sdiv_up64(m_70861, + segmap_group_sizze_87010) + mem_124127 = opencl_alloc(self, bytes_120173, "mem_124127") + if ((1 * (np.int64(segmap_usable_groups_87011) * np.int64(segmap_group_sizze_87010))) != 0): + self.mainDetailedzisegmap_86849_var.set_args(self.global_failure, + np.int64(m_70861), + mem_124124, mem_124127) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_86849_var, + ((np.int64(segmap_usable_groups_87011) * np.int64(segmap_group_sizze_87010)),), + (np.int64(segmap_group_sizze_87010),)) + if synchronous: + sync(self) + mem_124124 = None + mem_124130 = opencl_alloc(self, bytes_120173, "mem_124130") + if slt64((num_recresids_padded_71534 * np.int64(2)), + segred_group_sizze_87111): + segment_sizze_nonzzero_128644 = smax64(np.int64(1), + num_recresids_padded_71534) + num_threads_128645 = (num_groups_87112 * segred_group_sizze_87111) + if ((1 * (np.int64(num_groups_87112) * np.int64(segred_group_sizze_87111))) != 0): + self.mainDetailedzisegred_small_86835_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_87111))), + np.int64(m_70861), + np.int64(num_recresids_padded_71534), + np.int64(Nmk_72261), + np.int64(num_groups_87112), + np.int64(segment_sizze_nonzzero_128644), + defunc_3_map_res_mem_124068, + mem_124078, + mem_124130) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_small_86835_var, + ((np.int64(num_groups_87112) * np.int64(segred_group_sizze_87111)),), + (np.int64(segred_group_sizze_87111),)) + if synchronous: + sync(self) + else: + groups_per_segment_128665 = sdiv_up64(num_groups_87112, + smax64(np.int64(1), m_70861)) + elements_per_thread_128666 = sdiv_up64(num_recresids_padded_71534, + (segred_group_sizze_87111 * groups_per_segment_128665)) + virt_num_groups_128667 = (groups_per_segment_128665 * m_70861) + num_threads_128668 = (num_groups_87112 * segred_group_sizze_87111) + threads_per_segment_128669 = (groups_per_segment_128665 * segred_group_sizze_87111) + group_res_arr_mem_128670 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_87111 * virt_num_groups_128667)), + "group_res_arr_mem_128670") + mainDetailedzicounter_mem_128672 = self.mainDetailedzicounter_mem_128672 + if ((1 * (np.int64(num_groups_87112) * np.int64(segred_group_sizze_87111))) != 0): + self.mainDetailedzisegred_large_86835_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_87111))), + np.int64(num_recresids_padded_71534), + np.int64(Nmk_72261), + np.int64(num_groups_87112), + np.int64(groups_per_segment_128665), + np.int64(elements_per_thread_128666), + np.int64(virt_num_groups_128667), + np.int64(threads_per_segment_128669), + defunc_3_map_res_mem_124068, + mem_124078, + mem_124130, + group_res_arr_mem_128670, + mainDetailedzicounter_mem_128672) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_large_86835_var, + ((np.int64(num_groups_87112) * np.int64(segred_group_sizze_87111)),), + (np.int64(segred_group_sizze_87111),)) + if synchronous: + sync(self) + segmap_usable_groups_87128 = sdiv_up64(m_70861, + segmap_group_sizze_87127) + mem_124133 = opencl_alloc(self, bytes_120173, "mem_124133") + if ((1 * (np.int64(segmap_usable_groups_87128) * np.int64(segmap_group_sizze_87127))) != 0): + self.mainDetailedzisegmap_86809_var.set_args(self.global_failure, + np.int64(m_70861), + np.float64(level_70867), + defunc_3_map_res_mem_124069, + mem_124127, mem_124130, + mem_124133) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_86809_var, + ((np.int64(segmap_usable_groups_87128) * np.int64(segmap_group_sizze_87127)),), + (np.int64(segmap_group_sizze_87127),)) + if synchronous: + sync(self) + mem_124127 = None + mem_124130 = None + defunc_1_map_res_mem_124134 = mem_124133 + defunc_1_map_res_mem_124135 = defunc_1_map_res_mem_124134 + defunc_3_map_res_mem_124068 = None + defunc_3_map_res_mem_124069 = None + mem_124078 = None + hist_inds_mem_124138 = defunc_1_map_res_mem_124135 + else: + mem_124137 = opencl_alloc(self, bytes_120173, "mem_124137") + self.futhark_builtinzhreplicate_i64(mem_124137, m_70861, hist_70869) + hist_inds_mem_124138 = mem_124137 + segmap_group_sizze_87181 = self.sizes["mainDetailed.segmap_group_size_87157"] + segmap_usable_groups_87182 = sdiv_up64(binop_x_120126, + segmap_group_sizze_87181) + mem_124142 = opencl_alloc(self, bytes_120125, "mem_124142") + if ((1 * (np.int64(segmap_usable_groups_87182) * np.int64(segmap_group_sizze_87181))) != 0): + self.mainDetailedzisegmap_87154_var.set_args(self.global_failure, + np.int64(N_70860), + np.int64(m_70861), + images_mem_120108, + hist_inds_mem_124138, + mem_124142) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_87154_var, + ((np.int64(segmap_usable_groups_87182) * np.int64(segmap_group_sizze_87181)),), + (np.int64(segmap_group_sizze_87181),)) if synchronous: sync(self) - eq_x_y_29254 = (np.int64(0) == i32_res_29181) - p_and_eq_x_y_29255 = (cond_29178 and eq_x_y_29254) - not_p_29256 = not(cond_29178) - p_and_eq_x_y_29257 = (eq_x_y_29254 and not_p_29256) - empty_slice_29258 = (p_and_eq_x_y_29255 or p_and_eq_x_y_29257) - m_29259 = (i32_res_29181 - np.int64(1)) - zzero_leq_i_p_m_t_s_29260 = sle64(np.int64(0), m_29259) - i_p_m_t_s_leq_w_29261 = slt64(m_29259, i32_res_29181) - i_lte_j_29262 = sle64(np.int64(0), i32_res_29181) - y_29263 = (zzero_leq_i_p_m_t_s_29260 and i_p_m_t_s_leq_w_29261) - y_29264 = (i_lte_j_29262 and y_29263) - ok_or_empty_29265 = (empty_slice_29258 or y_29264) - empty_slice_29266 = (i32_res_29175 == np.int64(0)) - m_29267 = (i32_res_29175 - np.int64(1)) - zzero_leq_i_p_m_t_s_29268 = sle64(np.int64(0), m_29267) - i_p_m_t_s_leq_w_29269 = slt64(m_29267, N_29165) - i_lte_j_29270 = sle64(np.int64(0), i32_res_29175) - y_29271 = (zzero_leq_i_p_m_t_s_29268 and i_p_m_t_s_leq_w_29269) - y_29272 = (i_lte_j_29270 and y_29271) - ok_or_empty_29273 = (empty_slice_29266 or y_29272) - index_ok_29274 = (ok_or_empty_29265 and ok_or_empty_29273) - index_certs_29275 = True - assert index_ok_29274, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 bfastfinal.fut:38:13-21\n #1 bfastfinal.fut:189:5-58\n #2 bfastfinal.fut:184:1-190:25\n" % ("Index [", - np.int64(0), - ":, :", - i32_res_29175, - "] out of bounds for array of shape [", - i32_res_29181, - "][", - N_29165, - "].")) - empty_slice_29277 = (i32_res_29181 == np.int64(0)) - ok_or_empty_29278 = (y_29264 or empty_slice_29277) - index_ok_29279 = (ok_or_empty_29273 and ok_or_empty_29278) - index_certs_29280 = True - assert index_ok_29279, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 bfastfinal.fut:39:13-22\n #1 bfastfinal.fut:189:5-58\n #2 bfastfinal.fut:184:1-190:25\n" % ("Index [:", - i32_res_29175, - ", ", - np.int64(0), - ":] out of bounds for array of shape [", - N_29165, - "][", - i32_res_29181, - "].")) - empty_slice_29282 = (m_29166 == np.int64(0)) - m_29283 = (m_29166 - np.int64(1)) - zzero_leq_i_p_m_t_s_29284 = sle64(np.int64(0), m_29283) - i_p_m_t_s_leq_w_29285 = slt64(m_29283, m_29166) - i_lte_j_29286 = sle64(np.int64(0), m_29166) - y_29287 = (zzero_leq_i_p_m_t_s_29284 and i_p_m_t_s_leq_w_29285) - y_29288 = (i_lte_j_29286 and y_29287) - ok_or_empty_29289 = (empty_slice_29282 or y_29288) - index_ok_29290 = (ok_or_empty_29273 and ok_or_empty_29289) - index_certs_29291 = True - assert index_ok_29290, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 bfastfinal.fut:40:13-26\n #1 bfastfinal.fut:189:5-58\n #2 bfastfinal.fut:184:1-190:25\n" % ("Index [", - np.int64(0), - ":, :", - i32_res_29175, - "] out of bounds for array of shape [", - m_29166, - "][", - N_29165, - "].")) - suff_outer_par_38998 = (self.sizes["main.suff_outer_par_6"] <= m_29166) - segmap_group_sizze_39024 = self.sizes["main.segmap_group_size_39002"] - max_num_groups_45694 = self.sizes["main.segmap_num_groups_39004"] - num_groups_39025 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(m_29166, - segmap_group_sizze_39024), - sext_i32_i64(max_num_groups_45694)))) - nest_sizze_39200 = (m_29166 * i32_res_29181) - segmap_group_sizze_39201 = self.sizes["main.segmap_group_size_39049"] - max_num_groups_45695 = self.sizes["main.segmap_num_groups_39051"] - num_groups_39202 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(nest_sizze_39200, - segmap_group_sizze_39201), - sext_i32_i64(max_num_groups_45695)))) - suff_outer_par_39206 = (self.sizes["main.suff_outer_par_7"] <= nest_sizze_39200) - y_39230 = (i32_res_29181 * i32_res_29181) - comparatee_39231 = (m_29166 * y_39230) - suff_outer_par_39232 = (self.sizes["main.suff_outer_par_8"] <= comparatee_39231) - nest_sizze_39252 = (i32_res_29175 * comparatee_39231) - segred_group_sizze_39253 = self.sizes["main.segred_group_size_39109"] - max_num_groups_45696 = self.sizes["main.segred_num_groups_39111"] - num_groups_39254 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(nest_sizze_39252, - segred_group_sizze_39253), - sext_i32_i64(max_num_groups_45696)))) - Tx_42527 = self.sizes["main.Tx_42525"] - Ty_42528 = self.sizes["main.Ty_42526"] - Ty_42529 = smin64(i32_res_29181, Ty_42528) - Tx_42530 = smin64(i32_res_29181, Tx_42527) - gridDim_zz_42533 = sdiv_up64(m_29166, np.int64(30)) - group_sizze_tile3d_42536 = (Ty_42529 * Tx_42530) - binop_x_44399 = (N_29165 * m_29166) - bytes_44398 = (np.int64(4) * binop_x_44399) - bytes_44443 = (np.int64(4) * comparatee_39231) - bytes_44402 = (np.int64(4) * y_39230) - binop_x_44546 = (i32_res_29181 * nest_sizze_39200) - bytes_44544 = (np.int64(4) * binop_x_44546) - bytes_44448 = (np.int64(4) * i32_res_29181) - binop_x_44477 = (np.int64(30) * group_sizze_tile3d_42536) - bytes_44475 = (np.int64(4) * binop_x_44477) - binop_x_45447 = (np.int64(4) * Ty_42529) - binop_x_45448 = (Tx_42530 * binop_x_45447) - sizze_45449 = (np.int64(30) * binop_x_45448) - num_threads_45633 = (segmap_group_sizze_39024 * num_groups_39025) - total_sizze_45634 = (bytes_44402 * num_threads_45633) - num_threads_45635 = (segmap_group_sizze_39201 * num_groups_39202) - total_sizze_45636 = (bytes_44448 * num_threads_45635) - local_memory_capacity_45818 = self.max_local_memory + suff_outer_par_87191 = (self.sizes["mainDetailed.suff_outer_par_23"] <= m_70861) + segmap_group_sizze_87217 = self.sizes["mainDetailed.segmap_group_size_87195"] + max_num_groups_128716 = self.sizes["mainDetailed.segmap_num_groups_87197"] + num_groups_87218 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_70861, + segmap_group_sizze_87217), + sext_i32_i64(max_num_groups_128716)))) + segmap_group_sizze_87394 = self.sizes["mainDetailed.segmap_group_size_87242"] + max_num_groups_128717 = self.sizes["mainDetailed.segmap_num_groups_87244"] + num_groups_87395 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120244, + segmap_group_sizze_87394), + sext_i32_i64(max_num_groups_128717)))) + suff_outer_par_87399 = (self.sizes["mainDetailed.suff_outer_par_24"] <= binop_x_120244) + comparatee_87424 = (m_70861 * binop_x_120251) + suff_outer_par_87425 = (self.sizes["mainDetailed.suff_outer_par_25"] <= comparatee_87424) + nest_sizze_87445 = (n_70864 * comparatee_87424) + segred_group_sizze_87446 = self.sizes["mainDetailed.segred_group_size_87302"] + max_num_groups_128718 = self.sizes["mainDetailed.segred_num_groups_87304"] + num_groups_87447 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_87445, + segred_group_sizze_87446), + sext_i32_i64(max_num_groups_128718)))) + Tx_118224 = self.sizes["mainDetailed.Tx_118222"] + Ty_118225 = self.sizes["mainDetailed.Ty_118223"] + Ty_118226 = smin64(k2p2zq_70876, Ty_118225) + Tx_118227 = smin64(k2p2zq_70876, Tx_118224) + gridDim_zz_118230 = sdiv_up64(m_70861, np.int64(30)) + group_sizze_tile3d_118233 = (Ty_118226 * Tx_118227) + bytes_124188 = (np.int64(8) * comparatee_87424) + binop_x_124222 = (np.int64(30) * group_sizze_tile3d_118233) + bytes_124220 = (np.int64(8) * binop_x_124222) + binop_x_125361 = (np.int64(8) * Ty_118226) + binop_x_125362 = (Tx_118227 * binop_x_125361) + sizze_125363 = (np.int64(30) * binop_x_125362) + num_threads_125812 = (segmap_group_sizze_87217 * num_groups_87218) + total_sizze_125813 = (bytes_120250 * num_threads_125812) + num_threads_125814 = (segmap_group_sizze_87394 * num_groups_87395) + total_sizze_125815 = (bytes_120247 * num_threads_125814) + local_memory_capacity_128840 = self.max_local_memory if (sle64(np.int64(0), - sext_i32_i64(local_memory_capacity_45818)) and suff_outer_par_38998): - mem_44400 = opencl_alloc(self, bytes_44398, "mem_44400") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44400, np.int64(0), - images_mem_44381, np.int64(0), - np.int64(1), N_29165, m_29166) - mem_44446 = opencl_alloc(self, bytes_44443, "mem_44446") - mem_44404 = opencl_alloc(self, total_sizze_45634, "mem_44404") - if ((1 * (np.long(num_groups_39025) * np.long(segmap_group_sizze_39024))) != 0): - self.mainzisegmap_39000_var.set_args(self.global_failure, - np.int64(N_29165), - np.int64(m_29166), - np.int32(n_29169), - np.int32(k2p2zq_29179), - np.int64(i32_res_29181), - np.int64(num_groups_39025), - binop_p_mem_44390, mem_44397, - mem_44400, mem_44404, mem_44446) - cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_39000_var, - ((np.long(num_groups_39025) * np.long(segmap_group_sizze_39024)),), - (np.long(segmap_group_sizze_39024),)) + sext_i32_i64(local_memory_capacity_128840)) and suff_outer_par_87191): + mem_124145 = opencl_alloc(self, bytes_120125, "mem_124145") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124145, np.int64(0), + mem_124142, np.int64(0), + np.int64(1), N_70860, m_70861) + mem_124191 = opencl_alloc(self, bytes_124188, "mem_124191") + mem_124149 = opencl_alloc(self, total_sizze_125813, "mem_124149") + if ((1 * (np.int64(num_groups_87218) * np.int64(segmap_group_sizze_87217))) != 0): + self.mainDetailedzisegmap_87193_var.set_args(self.global_failure, + np.int64(N_70860), + np.int64(m_70861), + np.int64(n_70864), + np.int64(k2p2zq_70876), + np.int64(num_groups_87218), + np.int64(num_threads_125812), + binop_p_mem_120117, + mem_120124, mem_124145, + mem_124149, mem_124191) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_87193_var, + ((np.int64(num_groups_87218) * np.int64(segmap_group_sizze_87217)),), + (np.int64(segmap_group_sizze_87217),)) if synchronous: sync(self) - mem_44400 = None - mem_44404 = None - mem_44547 = opencl_alloc(self, bytes_44544, "mem_44547") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44547, np.int64(0), - mem_44446, np.int64(0), - np.int64(1), m_29166, - (i32_res_29181 * i32_res_29181)) - mem_44446 = None - defunc_3_map_res_mem_44549 = mem_44547 + mem_124145 = None + mem_124149 = None + mem_124292 = opencl_alloc(self, bytes_121997, "mem_124292") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124292, np.int64(0), + mem_124191, np.int64(0), + np.int64(1), m_70861, + (k2p2zq_70876 * k2p2zq_70876)) + mem_124191 = None + defunc_3_map_res_mem_124294 = mem_124292 else: - local_memory_capacity_45817 = self.max_local_memory + local_memory_capacity_128839 = self.max_local_memory if (sle64(np.int64(0), - sext_i32_i64(local_memory_capacity_45817)) and suff_outer_par_39206): - mem_44465 = opencl_alloc(self, bytes_44544, "mem_44465") - mem_44449 = opencl_alloc(self, total_sizze_45636, "mem_44449") - if ((1 * (np.long(num_groups_39202) * np.long(segmap_group_sizze_39201))) != 0): - self.mainzisegmap_39046_var.set_args(self.global_failure, - np.int64(N_29165), - np.int64(m_29166), - np.int32(n_29169), - np.int32(k2p2zq_29179), - np.int64(i32_res_29181), - np.int64(num_groups_39202), - images_mem_44381, mem_44393, - mem_44397, mem_44449, mem_44465) - cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_39046_var, - ((np.long(num_groups_39202) * np.long(segmap_group_sizze_39201)),), - (np.long(segmap_group_sizze_39201),)) + sext_i32_i64(local_memory_capacity_128839)) and suff_outer_par_87399): + mem_124210 = opencl_alloc(self, bytes_121997, "mem_124210") + mem_124194 = opencl_alloc(self, total_sizze_125815, "mem_124194") + if ((1 * (np.int64(num_groups_87395) * np.int64(segmap_group_sizze_87394))) != 0): + self.mainDetailedzisegmap_87239_var.set_args(self.global_failure, + np.int64(N_70860), + np.int64(m_70861), + np.int64(n_70864), + np.int64(k2p2zq_70876), + np.int64(num_groups_87395), + np.int64(num_threads_125814), + mem_120120, mem_120124, + mem_124142, mem_124194, + mem_124210) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_87239_var, + ((np.int64(num_groups_87395) * np.int64(segmap_group_sizze_87394)),), + (np.int64(segmap_group_sizze_87394),)) if synchronous: sync(self) - mem_44449 = None - mem_44541 = opencl_alloc(self, bytes_44544, "mem_44541") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44541, np.int64(0), - mem_44465, np.int64(0), + mem_124194 = None + mem_124286 = opencl_alloc(self, bytes_121997, "mem_124286") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124286, np.int64(0), + mem_124210, np.int64(0), np.int64(1), - (m_29166 * i32_res_29181), - i32_res_29181) - mem_44465 = None - defunc_3_map_res_mem_44543 = mem_44541 + (m_70861 * k2p2zq_70876), + k2p2zq_70876) + mem_124210 = None + defunc_3_map_res_mem_124288 = mem_124286 else: - local_memory_capacity_45816 = self.max_local_memory - if (sle64(np.int64(120), - sext_i32_i64(local_memory_capacity_45816)) and suff_outer_par_39232): - mem_44468 = opencl_alloc(self, bytes_44398, "mem_44468") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44468, np.int64(0), - images_mem_44381, - np.int64(0), np.int64(1), - N_29165, m_29166) - gridDim_x_42531 = sdiv_up64(i32_res_29181, Tx_42530) - gridDim_y_42532 = sdiv_up64(i32_res_29181, Ty_42529) - binop_x_42534 = (gridDim_y_42532 * gridDim_zz_42533) - grid_sizze_tile3d_42535 = (gridDim_x_42531 * binop_x_42534) - count_shmem_42537 = sdiv_up64(np.int64(30), group_sizze_tile3d_42536) - mem_44528 = opencl_alloc(self, bytes_44544, "mem_44528") - if ((1 * (np.long(grid_sizze_tile3d_42535) * np.long(group_sizze_tile3d_42536))) != 0): - self.mainzisegmap_intragroup_42541_var.set_args(self.global_failure, - cl.LocalMemory(np.long(np.int64(120))), - np.int64(m_29166), - np.int32(n_29169), - np.int64(i32_res_29181), - np.int64(Ty_42529), - np.int64(Tx_42530), - np.int64(gridDim_x_42531), - np.int64(gridDim_y_42532), - np.int64(group_sizze_tile3d_42536), - np.int64(count_shmem_42537), - mem_44393, - mem_44397, - mem_44468, - mem_44528) + local_memory_capacity_128838 = self.max_local_memory + if (sle64(np.int64(240), + sext_i32_i64(local_memory_capacity_128838)) and suff_outer_par_87425): + mem_124213 = opencl_alloc(self, bytes_120125, "mem_124213") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124213, np.int64(0), + mem_124142, np.int64(0), + np.int64(1), N_70860, + m_70861) + gridDim_x_118228 = sdiv_up64(k2p2zq_70876, Tx_118227) + gridDim_y_118229 = sdiv_up64(k2p2zq_70876, Ty_118226) + binop_x_118231 = (gridDim_y_118229 * gridDim_zz_118230) + grid_sizze_tile3d_118232 = (gridDim_x_118228 * binop_x_118231) + count_shmem_118234 = sdiv_up64(np.int64(30), + group_sizze_tile3d_118233) + mem_124273 = opencl_alloc(self, bytes_121997, "mem_124273") + if ((1 * (np.int64(grid_sizze_tile3d_118232) * np.int64(group_sizze_tile3d_118233))) != 0): + self.mainDetailedzisegmap_intragroup_118238_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int64(240))), + np.int64(m_70861), + np.int64(n_70864), + np.int64(k2p2zq_70876), + np.int64(Ty_118226), + np.int64(Tx_118227), + np.int64(gridDim_x_118228), + np.int64(gridDim_y_118229), + np.int64(group_sizze_tile3d_118233), + np.int64(count_shmem_118234), + mem_120120, + mem_120124, + mem_124213, + mem_124273) cl.enqueue_nd_range_kernel(self.queue, - self.mainzisegmap_intragroup_42541_var, - ((np.long(grid_sizze_tile3d_42535) * np.long(group_sizze_tile3d_42536)),), - (np.long(group_sizze_tile3d_42536),)) + self.mainDetailedzisegmap_intragroup_118238_var, + ((np.int64(grid_sizze_tile3d_118232) * np.int64(group_sizze_tile3d_118233)),), + (np.int64(group_sizze_tile3d_118233),)) if synchronous: sync(self) - mem_44468 = None - defunc_3_map_res_mem_44537 = mem_44528 + mem_124213 = None + defunc_3_map_res_mem_124282 = mem_124273 else: - mem_44531 = opencl_alloc(self, bytes_44383, "mem_44531") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44531, np.int64(0), - mem_44397, np.int64(0), - np.int64(1), - i32_res_29181, N_29165) - mem_44536 = opencl_alloc(self, bytes_44544, "mem_44536") - if slt64((i32_res_29175 * np.int64(2)), segred_group_sizze_39253): - segment_sizze_nonzzero_45756 = smax64(np.int64(1), i32_res_29175) - num_threads_45757 = (num_groups_39254 * segred_group_sizze_39253) - if ((1 * (np.long(num_groups_39254) * np.long(segred_group_sizze_39253))) != 0): - self.mainzisegred_small_39115_var.set_args(self.global_failure, - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_39253))), - np.int64(N_29165), - np.int64(m_29166), - np.int64(i32_res_29175), - np.int64(i32_res_29181), - np.int64(num_groups_39254), - np.int64(segment_sizze_nonzzero_45756), - images_mem_44381, - binop_p_mem_44390, - mem_44531, mem_44536) + mem_124276 = opencl_alloc(self, bytes_120110, "mem_124276") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124276, np.int64(0), + mem_120124, np.int64(0), + np.int64(1), k2p2zq_70876, + N_70860) + mem_124281 = opencl_alloc(self, bytes_121997, "mem_124281") + if slt64((n_70864 * np.int64(2)), segred_group_sizze_87446): + segment_sizze_nonzzero_128778 = smax64(np.int64(1), n_70864) + num_threads_128779 = (num_groups_87447 * segred_group_sizze_87446) + if ((1 * (np.int64(num_groups_87447) * np.int64(segred_group_sizze_87446))) != 0): + self.mainDetailedzisegred_small_87308_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_87446))), + np.int64(N_70860), + np.int64(m_70861), + np.int64(n_70864), + np.int64(k2p2zq_70876), + np.int64(num_groups_87447), + np.int64(segment_sizze_nonzzero_128778), + binop_p_mem_120117, + mem_124142, + mem_124276, + mem_124281) cl.enqueue_nd_range_kernel(self.queue, - self.mainzisegred_small_39115_var, - ((np.long(num_groups_39254) * np.long(segred_group_sizze_39253)),), - (np.long(segred_group_sizze_39253),)) + self.mainDetailedzisegred_small_87308_var, + ((np.int64(num_groups_87447) * np.int64(segred_group_sizze_87446)),), + (np.int64(segred_group_sizze_87446),)) if synchronous: sync(self) else: - groups_per_segment_45777 = sdiv_up64(num_groups_39254, - smax64(np.int64(1), - ((m_29166 * i32_res_29181) * i32_res_29181))) - elements_per_thread_45778 = sdiv_up64(i32_res_29175, - (segred_group_sizze_39253 * groups_per_segment_45777)) - virt_num_groups_45779 = (groups_per_segment_45777 * ((m_29166 * i32_res_29181) * i32_res_29181)) - num_threads_45780 = (num_groups_39254 * segred_group_sizze_39253) - threads_per_segment_45781 = (groups_per_segment_45777 * segred_group_sizze_39253) - group_res_arr_mem_45782 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_39253 * virt_num_groups_45779)), - "group_res_arr_mem_45782") - mainzicounter_mem_45784 = self.mainzicounter_mem_45784 - if ((1 * (np.long(num_groups_39254) * np.long(segred_group_sizze_39253))) != 0): - self.mainzisegred_large_39115_var.set_args(self.global_failure, - cl.LocalMemory(np.long(np.int32(1))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_39253))), - np.int64(N_29165), - np.int64(i32_res_29175), - np.int64(i32_res_29181), - np.int64(num_groups_39254), - np.int64(groups_per_segment_45777), - np.int64(elements_per_thread_45778), - np.int64(virt_num_groups_45779), - np.int64(threads_per_segment_45781), - images_mem_44381, - binop_p_mem_44390, - mem_44531, mem_44536, - group_res_arr_mem_45782, - mainzicounter_mem_45784) + groups_per_segment_128799 = sdiv_up64(num_groups_87447, + smax64(np.int64(1), + ((m_70861 * k2p2zq_70876) * k2p2zq_70876))) + elements_per_thread_128800 = sdiv_up64(n_70864, + (segred_group_sizze_87446 * groups_per_segment_128799)) + virt_num_groups_128801 = (groups_per_segment_128799 * ((m_70861 * k2p2zq_70876) * k2p2zq_70876)) + num_threads_128802 = (num_groups_87447 * segred_group_sizze_87446) + threads_per_segment_128803 = (groups_per_segment_128799 * segred_group_sizze_87446) + group_res_arr_mem_128804 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_87446 * virt_num_groups_128801)), + "group_res_arr_mem_128804") + mainDetailedzicounter_mem_128806 = self.mainDetailedzicounter_mem_128806 + if ((1 * (np.int64(num_groups_87447) * np.int64(segred_group_sizze_87446))) != 0): + self.mainDetailedzisegred_large_87308_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_87446))), + np.int64(N_70860), + np.int64(n_70864), + np.int64(k2p2zq_70876), + np.int64(num_groups_87447), + np.int64(groups_per_segment_128799), + np.int64(elements_per_thread_128800), + np.int64(virt_num_groups_128801), + np.int64(threads_per_segment_128803), + binop_p_mem_120117, + mem_124142, + mem_124276, + mem_124281, + group_res_arr_mem_128804, + mainDetailedzicounter_mem_128806) cl.enqueue_nd_range_kernel(self.queue, - self.mainzisegred_large_39115_var, - ((np.long(num_groups_39254) * np.long(segred_group_sizze_39253)),), - (np.long(segred_group_sizze_39253),)) + self.mainDetailedzisegred_large_87308_var, + ((np.int64(num_groups_87447) * np.int64(segred_group_sizze_87446)),), + (np.int64(segred_group_sizze_87446),)) if synchronous: sync(self) - mem_44531 = None - defunc_3_map_res_mem_44537 = mem_44536 - defunc_3_map_res_mem_44543 = defunc_3_map_res_mem_44537 - defunc_3_map_res_mem_44549 = defunc_3_map_res_mem_44543 - m_29312 = (np.int32(2) * k2p2zq_29179) - x_29313 = (np.int64(2) * i32_res_29181) - nm_29314 = (i32_res_29181 * x_29313) - bounds_invalid_upwards_29315 = slt64(nm_29314, np.int64(0)) - valid_29316 = not(bounds_invalid_upwards_29315) - range_valid_c_29317 = True - assert valid_29316, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 helpers.fut:2:3-8\n #2 helpers.fut:79:21-29\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:189:5-58\n #5 bfastfinal.fut:184:1-190:25\n" % ("Range ", - np.int64(0), - "..", - np.int64(1), - "..<", - nm_29314, - " is invalid.")) - zzero_29322 = (m_29312 == np.int32(0)) - nonzzero_29323 = not(zzero_29322) - nonzzero_cert_29324 = True - assert nonzzero_29323, ("Error: %s\n\nBacktrace:\n-> #0 helpers.fut:74:41-47\n #1 helpers.fut:74:14-79:30\n #2 bfastfinal.fut:50:35-50\n #3 bfastfinal.fut:189:5-58\n #4 bfastfinal.fut:184:1-190:25\n" % ("division by zero",)) - loop_nonempty_29325 = slt32(np.int32(0), k2p2zq_29179) - loop_not_taken_29326 = not(loop_nonempty_29325) - protect_assert_disj_29327 = (nonzzero_29323 or loop_not_taken_29326) - nonzzero_cert_29328 = True - assert protect_assert_disj_29327, ("Error: %s\n\nBacktrace:\n-> #0 helpers.fut:60:43-49\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:189:5-58\n #5 bfastfinal.fut:184:1-190:25\n" % ("division by zero",)) - i32_res_29329 = sext_i32_i64(m_29312) - x_29330 = (i32_res_29181 * i32_res_29329) - dim_ok_29331 = (x_29330 == nm_29314) - dim_ok_cert_29332 = True - assert dim_ok_29331, ("Error: %s\n\nBacktrace:\n-> #0 /prelude/array.fut:141:3-33\n #1 helpers.fut:81:16-43\n #2 bfastfinal.fut:50:35-50\n #3 bfastfinal.fut:189:5-58\n #4 bfastfinal.fut:184:1-190:25\n" % ("new shape has different number of elements than old shape",)) - j_m_i_29333 = (x_29313 - i32_res_29181) - empty_slice_29334 = (j_m_i_29333 == np.int64(0)) - m_29335 = (j_m_i_29333 - np.int64(1)) - i_p_m_t_s_29336 = (i32_res_29181 + m_29335) - zzero_leq_i_p_m_t_s_29337 = sle64(np.int64(0), i_p_m_t_s_29336) - i_p_m_t_s_leq_w_29338 = slt64(i_p_m_t_s_29336, i32_res_29329) - i_lte_j_29339 = sle64(i32_res_29181, x_29313) - y_29340 = (i_lte_j_29262 and i_p_m_t_s_leq_w_29338) - y_29341 = (zzero_leq_i_p_m_t_s_29337 and y_29340) - y_29342 = (i_lte_j_29339 and y_29341) - forwards_ok_29343 = (i_lte_j_29262 and y_29342) - ok_or_empty_29344 = (empty_slice_29334 or forwards_ok_29343) - index_ok_29345 = (ok_or_empty_29278 and ok_or_empty_29344) - index_certs_29346 = True - assert index_ok_29345, ("Error: %s%d%s%d%s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 helpers.fut:83:8-30\n #1 bfastfinal.fut:50:35-50\n #2 bfastfinal.fut:189:5-58\n #3 bfastfinal.fut:184:1-190:25\n" % ("Index [", + mem_124276 = None + defunc_3_map_res_mem_124282 = mem_124281 + defunc_3_map_res_mem_124288 = defunc_3_map_res_mem_124282 + defunc_3_map_res_mem_124294 = defunc_3_map_res_mem_124288 + m_72499 = (np.int64(2) * k2p2zq_70876) + nm_72500 = (k2p2zq_70876 * m_72499) + bounds_invalid_upwards_72501 = slt64(nm_72500, np.int64(0)) + valid_72502 = not(bounds_invalid_upwards_72501) + range_valid_c_72503 = True + assert valid_72502, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 helpers.fut:73:21-27\n #2 bfastfinal.fut:61:35-50\n #3 bfastfinal.fut:185:3-72\n #4 bfastfinal.fut:181:1-185:72\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + nm_72500, + " is invalid.")) + zzero_72505 = (m_72499 == np.int64(0)) + nonzzero_72506 = not(zzero_72505) + nonzzero_cert_72507 = True + assert nonzzero_72506, ("Error: %s\n\nBacktrace:\n-> #0 helpers.fut:68:41-47\n #1 helpers.fut:68:14-73:28\n #2 bfastfinal.fut:61:35-50\n #3 bfastfinal.fut:185:3-72\n #4 bfastfinal.fut:181:1-185:72\n" % ("division by zero",)) + loop_nonempty_72508 = slt64(np.int64(0), k2p2zq_70876) + loop_not_taken_72509 = not(loop_nonempty_72508) + protect_assert_disj_72510 = (nonzzero_72506 or loop_not_taken_72509) + nonzzero_cert_72511 = True + assert protect_assert_disj_72510, ("Error: %s\n\nBacktrace:\n-> #0 helpers.fut:54:43-49\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n" % ("division by zero",)) + j_m_i_72512 = (m_72499 - k2p2zq_70876) + empty_slice_72513 = (j_m_i_72512 == np.int64(0)) + m_72514 = (j_m_i_72512 - np.int64(1)) + i_p_m_t_s_72515 = (k2p2zq_70876 + m_72514) + zzero_leq_i_p_m_t_s_72516 = sle64(np.int64(0), i_p_m_t_s_72515) + i_p_m_t_s_leq_w_72517 = slt64(i_p_m_t_s_72515, m_72499) + i_lte_j_72518 = sle64(k2p2zq_70876, m_72499) + y_72519 = (i_lte_j_70951 and i_p_m_t_s_leq_w_72517) + y_72520 = (zzero_leq_i_p_m_t_s_72516 and y_72519) + y_72521 = (i_lte_j_72518 and y_72520) + forwards_ok_72522 = (i_lte_j_70951 and y_72521) + ok_or_empty_72523 = (empty_slice_72513 or forwards_ok_72522) + index_ok_72524 = (ok_or_empty_70954 and ok_or_empty_72523) + index_certs_72525 = True + assert index_ok_72524, ("Error: %s%d%s%d%s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 helpers.fut:77:8-30\n #1 bfastfinal.fut:61:35-50\n #2 bfastfinal.fut:185:3-72\n #3 bfastfinal.fut:181:1-185:72\n" % ("Index [", np.int64(0), ":", - i32_res_29181, + k2p2zq_70876, ", ", - i32_res_29181, + k2p2zq_70876, ":", - x_29313, + m_72499, "] out of bounds for array of shape [", - i32_res_29181, + k2p2zq_70876, "][", - i32_res_29329, + m_72499, "].")) - dim_match_29347 = (i32_res_29181 == j_m_i_29333) - empty_or_match_cert_29348 = True - assert dim_match_29347, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 helpers.fut:83:8-45\n #1 bfastfinal.fut:50:35-50\n #2 bfastfinal.fut:189:5-58\n #3 bfastfinal.fut:184:1-190:25\n" % ("Value of (core language) shape (", - i32_res_29181, + dim_match_72526 = (k2p2zq_70876 == j_m_i_72512) + empty_or_match_cert_72527 = True + assert dim_match_72526, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 helpers.fut:77:8-45\n #1 bfastfinal.fut:61:35-50\n #2 bfastfinal.fut:185:3-72\n #3 bfastfinal.fut:181:1-185:72\n" % ("Value of (core language) shape (", + k2p2zq_70876, ", ", - j_m_i_29333, + j_m_i_72512, ") cannot match shape of type `[", - i32_res_29181, + k2p2zq_70876, "][", - i32_res_29181, - "]f32`.")) - max_group_sizze_39548 = self.max_group_size - fits_39549 = sle64(nm_29314, max_group_sizze_39548) - suff_intra_par_39547 = (self.sizes["main.suff_intra_par_11"] <= nm_29314) - intra_suff_and_fits_39550 = (suff_intra_par_39547 and fits_39549) - nest_sizze_40214 = (m_29166 * nm_29314) - segmap_group_sizze_40215 = self.sizes["main.segmap_group_size_40158"] - suff_intra_par_40254 = (self.sizes["main.suff_intra_par_13"] <= nm_29314) - intra_suff_and_fits_40255 = (fits_39549 and suff_intra_par_40254) - segmap_group_sizze_40307 = self.sizes["main.segmap_group_size_40040"] - segmap_group_sizze_40323 = self.sizes["main.segmap_group_size_39941"] - segmap_group_sizze_40373 = self.sizes["main.segmap_group_size_39871"] - y_40383 = (i32_res_29181 * j_m_i_29333) - nest_sizze_40384 = (m_29166 * y_40383) - segmap_group_sizze_40385 = self.sizes["main.segmap_group_size_39642"] - segmap_usable_groups_40308 = sdiv_up_safe64(m_29166, - segmap_group_sizze_40307) - segmap_usable_groups_40324 = sdiv_up_safe64(nest_sizze_40214, - segmap_group_sizze_40323) - segmap_usable_groups_40374 = sdiv_up_safe64(nest_sizze_40214, - segmap_group_sizze_40373) - bytes_44552 = (np.int64(4) * nm_29314) - bytes_44575 = (np.int64(4) * nest_sizze_40214) - binop_x_44626 = (j_m_i_29333 * nest_sizze_39200) - bytes_44624 = (np.int64(4) * binop_x_44626) - local_memory_capacity_45819 = self.max_local_memory - if intra_suff_and_fits_39550: - defunc_3_map_res_ixfn_44628 = i32_res_29181 - else: - defunc_3_map_res_ixfn_44628 = j_m_i_29333 - local_memory_capacity_45882 = self.max_local_memory - if (sle64(((bytes_44552 + srem64((np.int64(8) - srem64(bytes_44552, - np.int64(8))), - np.int64(8))) + (bytes_44552 + srem64((np.int64(8) - srem64(bytes_44552, - np.int64(8))), - np.int64(8)))), - sext_i32_i64(local_memory_capacity_45882)) and intra_suff_and_fits_39550): - mem_44573 = opencl_alloc(self, bytes_44544, "mem_44573") - if ((1 * (np.long(m_29166) * np.long(nm_29314))) != 0): - self.mainzisegmap_intragroup_39374_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long(bytes_44552)), - cl.LocalMemory(np.long(bytes_44552)), - np.int32(k2p2zq_29179), - np.int64(i32_res_29181), - np.int32(m_29312), - np.int64(nm_29314), - np.int64(i32_res_29329), - defunc_3_map_res_mem_44549, - mem_44573) + k2p2zq_70876, + "]f64`.")) + fits_87532 = sle64(nm_72500, max_group_sizze_77729) + suff_intra_par_87530 = (self.sizes["mainDetailed.suff_intra_par_26"] <= nm_72500) + intra_suff_and_fits_87533 = (suff_intra_par_87530 and fits_87532) + nest_sizze_87963 = (m_70861 * nm_72500) + segmap_group_sizze_87964 = self.sizes["mainDetailed.segmap_group_size_87916"] + suff_intra_par_87995 = (self.sizes["mainDetailed.suff_intra_par_27"] <= nm_72500) + intra_suff_and_fits_87996 = (fits_87532 and suff_intra_par_87995) + segmap_group_sizze_88044 = self.sizes["mainDetailed.segmap_group_size_87832"] + segmap_group_sizze_88057 = self.sizes["mainDetailed.segmap_group_size_87743"] + segmap_group_sizze_88102 = self.sizes["mainDetailed.segmap_group_size_87723"] + segmap_group_sizze_88112 = self.sizes["mainDetailed.segmap_group_size_87613"] + segmap_usable_groups_88045 = sdiv_up_safe64(m_70861, + segmap_group_sizze_88044) + segmap_usable_groups_88058 = sdiv_up_safe64(nest_sizze_87963, + segmap_group_sizze_88057) + segmap_usable_groups_88103 = sdiv_up_safe64(nest_sizze_87963, + segmap_group_sizze_88102) + bytes_124297 = (np.int64(8) * nm_72500) + bytes_124320 = (np.int64(8) * nest_sizze_87963) + local_memory_capacity_128897 = self.max_local_memory + if (sle64(((bytes_124297 + srem64((np.int64(8) - srem64(bytes_124297, + np.int64(8))), + np.int64(8))) + (bytes_124297 + srem64((np.int64(8) - srem64(bytes_124297, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_128897)) and intra_suff_and_fits_87533): + mem_124318 = opencl_alloc(self, bytes_121997, "mem_124318") + if ((1 * (np.int64(m_70861) * np.int64(nm_72500))) != 0): + self.mainDetailedzisegmap_intragroup_87528_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64(bytes_124297)), + cl.LocalMemory(np.int64(bytes_124297)), + np.int64(k2p2zq_70876), + np.int64(m_72499), + np.int64(nm_72500), + defunc_3_map_res_mem_124294, + mem_124318) cl.enqueue_nd_range_kernel(self.queue, - self.mainzisegmap_intragroup_39374_var, - ((np.long(m_29166) * np.long(nm_29314)),), - (np.long(nm_29314),)) + self.mainDetailedzisegmap_intragroup_87528_var, + ((np.int64(m_70861) * np.int64(nm_72500)),), + (np.int64(nm_72500),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - defunc_3_map_res_mem_44629 = mem_44573 + defunc_3_map_res_mem_124372 = mem_124318 else: - segmap_usable_groups_40216 = sdiv_up64(nest_sizze_40214, - segmap_group_sizze_40215) - mem_44577 = opencl_alloc(self, bytes_44575, "mem_44577") - if ((1 * (np.long(segmap_usable_groups_40216) * np.long(segmap_group_sizze_40215))) != 0): - self.mainzisegmap_40155_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - np.int64(m_29166), - np.int32(k2p2zq_29179), - np.int64(i32_res_29181), - np.int32(m_29312), - np.int64(nm_29314), - defunc_3_map_res_mem_44549, - mem_44577) - cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_40155_var, - ((np.long(segmap_usable_groups_40216) * np.long(segmap_group_sizze_40215)),), - (np.long(segmap_group_sizze_40215),)) + segmap_usable_groups_87965 = sdiv_up64(nest_sizze_87963, + segmap_group_sizze_87964) + mem_124322 = opencl_alloc(self, bytes_124320, "mem_124322") + if ((1 * (np.int64(segmap_usable_groups_87965) * np.int64(segmap_group_sizze_87964))) != 0): + self.mainDetailedzisegmap_87913_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(m_72499), + np.int64(nm_72500), + defunc_3_map_res_mem_124294, + mem_124322) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_87913_var, + ((np.int64(segmap_usable_groups_87965) * np.int64(segmap_group_sizze_87964)),), + (np.int64(segmap_group_sizze_87964),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - ctx_param_ext_44578 = m_29166 - ctx_param_ext_44579 = nm_29314 - ctx_param_ext_44580 = np.int64(0) - ctx_param_ext_44581 = nm_29314 - ctx_param_ext_44582 = m_29166 - ctx_param_ext_44583 = np.int64(1) - ctx_param_ext_44584 = nm_29314 - mem_param_44585 = mem_44577 - i_40240 = np.int32(0) - one_46862 = np.int32(1) - for counter_46861 in range(k2p2zq_29179): - i32_res_40242 = sext_i32_i64(i_40240) - x_40243 = sle64(np.int64(0), i32_res_40242) - y_40244 = slt64(i32_res_40242, nm_29314) - bounds_check_40245 = (x_40243 and y_40244) - index_certs_40246 = True - assert bounds_check_40245, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 helpers.fut:59:16-27\n #1 helpers.fut:80:16-34\n #2 bfastfinal.fut:50:35-50\n #3 bfastfinal.fut:189:5-58\n #4 bfastfinal.fut:184:1-190:25\n" % ("Index [", - i32_res_40242, - "] out of bounds for array of shape [", - nm_29314, - "].")) - local_memory_capacity_45842 = self.max_local_memory - if intra_suff_and_fits_40255: - gauss_jordan_res_ixfn_44607 = m_29166 + ctx_param_ext_124323 = m_70861 + ctx_param_ext_124324 = nm_72500 + ctx_param_ext_124325 = np.int64(0) + ctx_param_ext_124326 = nm_72500 + ctx_param_ext_124327 = m_70861 + ctx_param_ext_124328 = np.int64(1) + ctx_param_ext_124329 = nm_72500 + mem_param_124330 = mem_124322 + i_87986 = np.int64(0) + one_129914 = np.int64(1) + for counter_129913 in range(k2p2zq_70876): + y_87988 = slt64(i_87986, nm_72500) + index_certs_87989 = True + assert y_87988, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 helpers.fut:53:16-19\n #1 helpers.fut:74:16-34\n #2 bfastfinal.fut:61:35-50\n #3 bfastfinal.fut:185:3-72\n #4 bfastfinal.fut:181:1-185:72\n" % ("Index [", + i_87986, + "] out of bounds for array of shape [", + nm_72500, + "].")) + local_memory_capacity_128863 = self.max_local_memory + if intra_suff_and_fits_87996: + gauss_jordan_res_ixfn_124351 = m_70861 else: - gauss_jordan_res_ixfn_44607 = ctx_param_ext_44582 - local_memory_capacity_45843 = self.max_local_memory - if intra_suff_and_fits_40255: - gauss_jordan_res_ixfn_44608 = nm_29314 + gauss_jordan_res_ixfn_124351 = ctx_param_ext_124327 + local_memory_capacity_128864 = self.max_local_memory + if intra_suff_and_fits_87996: + gauss_jordan_res_ixfn_124352 = nm_72500 else: - gauss_jordan_res_ixfn_44608 = ctx_param_ext_44584 - local_memory_capacity_45844 = self.max_local_memory - if intra_suff_and_fits_40255: - gauss_jordan_res_ixfn_44609 = m_29166 + gauss_jordan_res_ixfn_124352 = ctx_param_ext_124329 + local_memory_capacity_128865 = self.max_local_memory + if intra_suff_and_fits_87996: + gauss_jordan_res_ixfn_124353 = m_70861 else: - gauss_jordan_res_ixfn_44609 = ctx_param_ext_44578 - local_memory_capacity_45845 = self.max_local_memory - if intra_suff_and_fits_40255: - gauss_jordan_res_ixfn_44610 = nm_29314 + gauss_jordan_res_ixfn_124353 = ctx_param_ext_124323 + local_memory_capacity_128866 = self.max_local_memory + if intra_suff_and_fits_87996: + gauss_jordan_res_ixfn_124354 = nm_72500 else: - gauss_jordan_res_ixfn_44610 = ctx_param_ext_44579 - local_memory_capacity_45846 = self.max_local_memory - if intra_suff_and_fits_40255: - gauss_jordan_res_ixfn_44611 = nm_29314 + gauss_jordan_res_ixfn_124354 = ctx_param_ext_124324 + local_memory_capacity_128867 = self.max_local_memory + if intra_suff_and_fits_87996: + gauss_jordan_res_ixfn_124355 = nm_72500 else: - gauss_jordan_res_ixfn_44611 = ctx_param_ext_44581 - local_memory_capacity_45847 = self.max_local_memory - if intra_suff_and_fits_40255: - gauss_jordan_res_ixfn_44612 = np.int64(1) + gauss_jordan_res_ixfn_124355 = ctx_param_ext_124326 + local_memory_capacity_128868 = self.max_local_memory + if intra_suff_and_fits_87996: + gauss_jordan_res_ixfn_124356 = np.int64(1) else: - gauss_jordan_res_ixfn_44612 = ctx_param_ext_44583 - local_memory_capacity_45848 = self.max_local_memory - if intra_suff_and_fits_40255: - gauss_jordan_res_ixfn_44613 = np.int64(0) + gauss_jordan_res_ixfn_124356 = ctx_param_ext_124328 + local_memory_capacity_128869 = self.max_local_memory + if intra_suff_and_fits_87996: + gauss_jordan_res_ixfn_124357 = np.int64(0) else: - gauss_jordan_res_ixfn_44613 = ctx_param_ext_44580 - local_memory_capacity_45876 = self.max_local_memory - if ((sle64(np.int64(0), - sext_i32_i64(local_memory_capacity_45876)) and sle64((bytes_44552 + srem64((np.int64(8) - srem64(bytes_44552, - np.int64(8))), - np.int64(8))), - sext_i32_i64(local_memory_capacity_45876))) and intra_suff_and_fits_40255): - mem_44590 = opencl_alloc(self, bytes_44575, "mem_44590") - group_sizze_45852 = self.sizes["main.group_size_45852"] - num_groups_45853 = sdiv_up64((m_29166 * nm_29314), group_sizze_45852) - if ((1 * (np.long(num_groups_45853) * np.long(group_sizze_45852))) != 0): - self.mainzicopy_45849_var.set_args(np.int64(m_29166), - np.int64(nm_29314), - np.int64(ctx_param_ext_44580), - np.int64(ctx_param_ext_44581), - np.int64(ctx_param_ext_44583), - mem_param_44585, mem_44590) - cl.enqueue_nd_range_kernel(self.queue, self.mainzicopy_45849_var, - ((np.long(num_groups_45853) * np.long(group_sizze_45852)),), - (np.long(group_sizze_45852),)) - if synchronous: - sync(self) - mem_44598 = opencl_alloc(self, bytes_44575, "mem_44598") - if ((1 * (np.long(m_29166) * np.long(nm_29314))) != 0): - self.mainzisegmap_intragroup_39701_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long(bytes_44552)), - np.int64(m_29166), - np.int32(k2p2zq_29179), - np.int32(m_29312), - np.int64(nm_29314), - np.int32(i_40240), - np.int64(i32_res_40242), - np.int64(ctx_param_ext_44580), - np.int64(ctx_param_ext_44581), - np.int64(ctx_param_ext_44583), - mem_param_44585, - mem_44590, - mem_44598) + gauss_jordan_res_ixfn_124357 = ctx_param_ext_124325 + local_memory_capacity_128891 = self.max_local_memory + if (sle64(((bytes_124297 + srem64((np.int64(8) - srem64(bytes_124297, + np.int64(8))), + np.int64(8))) + (bytes_124297 + srem64((np.int64(8) - srem64(bytes_124297, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_128891)) and intra_suff_and_fits_87996): + mem_124342 = opencl_alloc(self, bytes_124320, "mem_124342") + if ((1 * (np.int64(m_70861) * np.int64(nm_72500))) != 0): + self.mainDetailedzisegmap_intragroup_87666_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64(bytes_124297)), + cl.LocalMemory(np.int64(bytes_124297)), + np.int64(k2p2zq_70876), + np.int64(m_72499), + np.int64(nm_72500), + np.int64(i_87986), + np.int64(ctx_param_ext_124325), + np.int64(ctx_param_ext_124326), + np.int64(ctx_param_ext_124328), + mem_param_124330, + mem_124342) cl.enqueue_nd_range_kernel(self.queue, - self.mainzisegmap_intragroup_39701_var, - ((np.long(m_29166) * np.long(nm_29314)),), - (np.long(nm_29314),)) + self.mainDetailedzisegmap_intragroup_87666_var, + ((np.int64(m_70861) * np.int64(nm_72500)),), + (np.int64(nm_72500),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - mem_44590 = None - gauss_jordan_res_mem_44614 = mem_44598 + gauss_jordan_res_mem_124358 = mem_124342 else: - mem_44601 = opencl_alloc(self, m_29166, "mem_44601") - if ((1 * (np.long(segmap_usable_groups_40308) * np.long(segmap_group_sizze_40307))) != 0): - self.mainzisegmap_40038_var.set_args(self.global_failure, - np.int64(m_29166), - np.int64(i32_res_40242), - np.int64(ctx_param_ext_44580), - np.int64(ctx_param_ext_44581), - np.int64(ctx_param_ext_44583), - mem_param_44585, mem_44601) - cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_40038_var, - ((np.long(segmap_usable_groups_40308) * np.long(segmap_group_sizze_40307)),), - (np.long(segmap_group_sizze_40307),)) + mem_124345 = opencl_alloc(self, m_70861, "mem_124345") + if ((1 * (np.int64(segmap_usable_groups_88045) * np.int64(segmap_group_sizze_88044))) != 0): + self.mainDetailedzisegmap_87830_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(i_87986), + np.int64(ctx_param_ext_124325), + np.int64(ctx_param_ext_124326), + np.int64(ctx_param_ext_124328), + mem_param_124330, + mem_124345) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_87830_var, + ((np.int64(segmap_usable_groups_88045) * np.int64(segmap_group_sizze_88044)),), + (np.int64(segmap_group_sizze_88044),)) if synchronous: sync(self) - mem_44605 = opencl_alloc(self, bytes_44575, "mem_44605") - if ((1 * (np.long(segmap_usable_groups_40324) * np.long(segmap_group_sizze_40323))) != 0): - self.mainzisegmap_39938_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - np.int64(m_29166), - np.int32(k2p2zq_29179), - np.int32(m_29312), - np.int64(nm_29314), - np.int32(i_40240), - np.int64(i32_res_40242), - np.int64(ctx_param_ext_44580), - np.int64(ctx_param_ext_44581), - np.int64(ctx_param_ext_44583), - mem_param_44585, mem_44601, - mem_44605) - cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_39938_var, - ((np.long(segmap_usable_groups_40324) * np.long(segmap_group_sizze_40323)),), - (np.long(segmap_group_sizze_40323),)) + mem_124349 = opencl_alloc(self, bytes_124320, "mem_124349") + if ((1 * (np.int64(segmap_usable_groups_88058) * np.int64(segmap_group_sizze_88057))) != 0): + self.mainDetailedzisegmap_87740_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(m_72499), + np.int64(nm_72500), + np.int64(i_87986), + np.int64(ctx_param_ext_124325), + np.int64(ctx_param_ext_124326), + np.int64(ctx_param_ext_124328), + mem_param_124330, + mem_124345, mem_124349) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_87740_var, + ((np.int64(segmap_usable_groups_88058) * np.int64(segmap_group_sizze_88057)),), + (np.int64(segmap_group_sizze_88057),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - mem_44601 = None - if ((1 * (np.long(segmap_usable_groups_40374) * np.long(segmap_group_sizze_40373))) != 0): - self.mainzisegmap_39868_var.set_args(self.global_failure, - np.int64(m_29166), - np.int64(nm_29314), - np.int64(ctx_param_ext_44580), - np.int64(ctx_param_ext_44581), - np.int64(ctx_param_ext_44583), - mem_param_44585, mem_44605) - cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_39868_var, - ((np.long(segmap_usable_groups_40374) * np.long(segmap_group_sizze_40373)),), - (np.long(segmap_group_sizze_40373),)) + mem_124345 = None + if ((1 * (np.int64(segmap_usable_groups_88103) * np.int64(segmap_group_sizze_88102))) != 0): + self.mainDetailedzisegmap_87720_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(nm_72500), + np.int64(ctx_param_ext_124325), + np.int64(ctx_param_ext_124326), + np.int64(ctx_param_ext_124328), + mem_param_124330, + mem_124349) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_87720_var, + ((np.int64(segmap_usable_groups_88103) * np.int64(segmap_group_sizze_88102)),), + (np.int64(segmap_group_sizze_88102),)) if synchronous: sync(self) - mem_44605 = None - gauss_jordan_res_mem_44614 = mem_param_44585 - ctx_param_ext_tmp_45833 = gauss_jordan_res_ixfn_44609 - ctx_param_ext_tmp_45834 = gauss_jordan_res_ixfn_44610 - ctx_param_ext_tmp_45835 = gauss_jordan_res_ixfn_44613 - ctx_param_ext_tmp_45836 = gauss_jordan_res_ixfn_44611 - ctx_param_ext_tmp_45837 = gauss_jordan_res_ixfn_44607 - ctx_param_ext_tmp_45838 = gauss_jordan_res_ixfn_44612 - ctx_param_ext_tmp_45839 = gauss_jordan_res_ixfn_44608 - mem_param_tmp_45840 = gauss_jordan_res_mem_44614 - ctx_param_ext_44578 = ctx_param_ext_tmp_45833 - ctx_param_ext_44579 = ctx_param_ext_tmp_45834 - ctx_param_ext_44580 = ctx_param_ext_tmp_45835 - ctx_param_ext_44581 = ctx_param_ext_tmp_45836 - ctx_param_ext_44582 = ctx_param_ext_tmp_45837 - ctx_param_ext_44583 = ctx_param_ext_tmp_45838 - ctx_param_ext_44584 = ctx_param_ext_tmp_45839 - mem_param_44585 = mem_param_tmp_45840 - i_40240 += one_46862 - gauss_jordan_res_r_ixfn_44615 = ctx_param_ext_44578 - gauss_jordan_res_r_ixfn_44616 = ctx_param_ext_44579 - gauss_jordan_res_r_ixfn_44617 = ctx_param_ext_44580 - gauss_jordan_res_r_ixfn_44618 = ctx_param_ext_44581 - gauss_jordan_res_r_ixfn_44619 = ctx_param_ext_44582 - gauss_jordan_res_r_ixfn_44620 = ctx_param_ext_44583 - gauss_jordan_res_r_ixfn_44621 = ctx_param_ext_44584 - gauss_jordan_res_r_mem_44622 = mem_param_44585 - mem_44577 = None - segmap_usable_groups_40386 = sdiv_up64(nest_sizze_40384, - segmap_group_sizze_40385) - mem_44627 = opencl_alloc(self, bytes_44624, "mem_44627") - if ((1 * (np.long(segmap_usable_groups_40386) * np.long(segmap_group_sizze_40385))) != 0): - self.mainzisegmap_39638_var.set_args(self.global_failure, - np.int64(m_29166), - np.int64(i32_res_29181), - np.int64(nm_29314), - np.int64(i32_res_29329), - np.int64(x_29330), - np.int64(j_m_i_29333), - np.int64(gauss_jordan_res_r_ixfn_44617), - np.int64(gauss_jordan_res_r_ixfn_44618), - np.int64(gauss_jordan_res_r_ixfn_44620), - gauss_jordan_res_r_mem_44622, - mem_44627) - cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_39638_var, - ((np.long(segmap_usable_groups_40386) * np.long(segmap_group_sizze_40385)),), - (np.long(segmap_group_sizze_40385),)) + mem_124349 = None + gauss_jordan_res_mem_124358 = mem_param_124330 + ctx_param_ext_tmp_128854 = gauss_jordan_res_ixfn_124353 + ctx_param_ext_tmp_128855 = gauss_jordan_res_ixfn_124354 + ctx_param_ext_tmp_128856 = gauss_jordan_res_ixfn_124357 + ctx_param_ext_tmp_128857 = gauss_jordan_res_ixfn_124355 + ctx_param_ext_tmp_128858 = gauss_jordan_res_ixfn_124351 + ctx_param_ext_tmp_128859 = gauss_jordan_res_ixfn_124356 + ctx_param_ext_tmp_128860 = gauss_jordan_res_ixfn_124352 + mem_param_tmp_128861 = gauss_jordan_res_mem_124358 + ctx_param_ext_124323 = ctx_param_ext_tmp_128854 + ctx_param_ext_124324 = ctx_param_ext_tmp_128855 + ctx_param_ext_124325 = ctx_param_ext_tmp_128856 + ctx_param_ext_124326 = ctx_param_ext_tmp_128857 + ctx_param_ext_124327 = ctx_param_ext_tmp_128858 + ctx_param_ext_124328 = ctx_param_ext_tmp_128859 + ctx_param_ext_124329 = ctx_param_ext_tmp_128860 + mem_param_124330 = mem_param_tmp_128861 + i_87986 += one_129914 + gauss_jordan_res_r_ixfn_124359 = ctx_param_ext_124323 + gauss_jordan_res_r_ixfn_124360 = ctx_param_ext_124324 + gauss_jordan_res_r_ixfn_124361 = ctx_param_ext_124325 + gauss_jordan_res_r_ixfn_124362 = ctx_param_ext_124326 + gauss_jordan_res_r_ixfn_124363 = ctx_param_ext_124327 + gauss_jordan_res_r_ixfn_124364 = ctx_param_ext_124328 + gauss_jordan_res_r_ixfn_124365 = ctx_param_ext_124329 + gauss_jordan_res_r_mem_124366 = mem_param_124330 + mem_124322 = None + segmap_usable_groups_88113 = sdiv_up64(comparatee_87424, + segmap_group_sizze_88112) + mem_124371 = opencl_alloc(self, bytes_121997, "mem_124371") + if ((1 * (np.int64(segmap_usable_groups_88113) * np.int64(segmap_group_sizze_88112))) != 0): + self.mainDetailedzisegmap_87609_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(m_72499), + np.int64(nm_72500), + np.int64(gauss_jordan_res_r_ixfn_124361), + np.int64(gauss_jordan_res_r_ixfn_124362), + np.int64(gauss_jordan_res_r_ixfn_124364), + gauss_jordan_res_r_mem_124366, + mem_124371) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_87609_var, + ((np.int64(segmap_usable_groups_88113) * np.int64(segmap_group_sizze_88112)),), + (np.int64(segmap_group_sizze_88112),)) if synchronous: sync(self) - gauss_jordan_res_r_mem_44622 = None - defunc_3_map_res_mem_44629 = mem_44627 - defunc_3_map_res_mem_44549 = None - suff_outer_par_40394 = (self.sizes["main.suff_outer_par_16"] <= m_29166) - segmap_group_sizze_40416 = self.sizes["main.segmap_group_size_40398"] - max_num_groups_45883 = self.sizes["main.segmap_num_groups_40400"] - num_groups_40417 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(m_29166, - segmap_group_sizze_40416), - sext_i32_i64(max_num_groups_45883)))) - suff_outer_par_40501 = (self.sizes["main.suff_outer_par_17"] <= nest_sizze_39200) - nest_sizze_40517 = (i32_res_29175 * nest_sizze_39200) - segred_group_sizze_40518 = self.sizes["main.segred_group_size_40460"] - max_num_groups_45884 = self.sizes["main.segred_num_groups_40462"] - num_groups_40519 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(nest_sizze_40517, - segred_group_sizze_40518), - sext_i32_i64(max_num_groups_45884)))) - Ty_42675 = self.sizes["main.Ty_42672"] - Ry_42676 = self.sizes["main.Ry_42674"] - Tx_42677 = self.sizes["main.Tx_42671"] - Rx_42678 = self.sizes["main.Rx_42673"] - Tk_42679 = self.sizes["main.Tk_42670"] - TxRx_42682 = (Tx_42677 * Rx_42678) - TyRy_42683 = (Ty_42675 * Ry_42676) - a_loc_szz_42685 = (Tk_42679 * TyRy_42683) - binop_x_42686 = (Tx_42677 * Tk_42679) - b_loc_szz_42687 = (Rx_42678 * binop_x_42686) - group_sizze_42691 = (Ty_42675 * Tx_42677) - bytes_44648 = (np.int64(4) * nest_sizze_39200) - binop_x_44662 = (Ry_42676 * group_sizze_42691) - binop_x_44663 = (Rx_42678 * binop_x_44662) - bytes_44660 = (np.int64(4) * binop_x_44663) - binop_x_44654 = (Ry_42676 * Rx_42678) - bytes_44653 = (np.int64(4) * binop_x_44654) - bytes_44665 = (np.int64(4) * a_loc_szz_42685) - bytes_44667 = (np.int64(4) * b_loc_szz_42687) - bytes_44736 = (np.int64(4) * binop_x_44662) - binop_x_44742 = (Rx_42678 * group_sizze_42691) - bytes_44740 = (np.int64(4) * binop_x_44742) - bytes_44728 = (np.int64(4) * Ry_42676) - bytes_44730 = (np.int64(4) * Rx_42678) - binop_x_45464 = (np.int64(4) * Ty_42675) - binop_x_45465 = (Tx_42677 * binop_x_45464) - binop_x_45466 = (Ry_42676 * binop_x_45465) - sizze_45467 = (Rx_42678 * binop_x_45466) - num_threads_45646 = (segmap_group_sizze_40416 * num_groups_40417) - total_sizze_45647 = (bytes_44448 * num_threads_45646) - local_memory_capacity_46014 = self.max_local_memory + gauss_jordan_res_r_mem_124366 = None + defunc_3_map_res_mem_124372 = mem_124371 + defunc_3_map_res_mem_124294 = None + suff_outer_par_88120 = (self.sizes["mainDetailed.suff_outer_par_28"] <= m_70861) + segmap_group_sizze_88142 = self.sizes["mainDetailed.segmap_group_size_88124"] + max_num_groups_128898 = self.sizes["mainDetailed.segmap_num_groups_88126"] + num_groups_88143 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_70861, + segmap_group_sizze_88142), + sext_i32_i64(max_num_groups_128898)))) + suff_outer_par_88227 = (self.sizes["mainDetailed.suff_outer_par_29"] <= binop_x_120244) + nest_sizze_88243 = (n_70864 * binop_x_120244) + segred_group_sizze_88244 = self.sizes["mainDetailed.segred_group_size_88186"] + max_num_groups_128899 = self.sizes["mainDetailed.segred_num_groups_88188"] + num_groups_88245 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_88243, + segred_group_sizze_88244), + sext_i32_i64(max_num_groups_128899)))) + Ty_118372 = self.sizes["mainDetailed.Ty_118369"] + Ry_118373 = self.sizes["mainDetailed.Ry_118371"] + Tx_118374 = self.sizes["mainDetailed.Tx_118368"] + Rx_118375 = self.sizes["mainDetailed.Rx_118370"] + Tk_118376 = self.sizes["mainDetailed.Tk_118367"] + TxRx_118379 = (Tx_118374 * Rx_118375) + TyRy_118380 = (Ty_118372 * Ry_118373) + a_loc_szz_118382 = (Tk_118376 * TyRy_118380) + binop_x_118383 = (Tx_118374 * Tk_118376) + b_loc_szz_118384 = (Rx_118375 * binop_x_118383) + group_sizze_118388 = (Ty_118372 * Tx_118374) + binop_x_124405 = (Ry_118373 * group_sizze_118388) + binop_x_124406 = (Rx_118375 * binop_x_124405) + bytes_124403 = (np.int64(8) * binop_x_124406) + binop_x_124397 = (Ry_118373 * Rx_118375) + bytes_124396 = (np.int64(8) * binop_x_124397) + bytes_124408 = (np.int64(8) * a_loc_szz_118382) + bytes_124410 = (np.int64(8) * b_loc_szz_118384) + bytes_124479 = (np.int64(8) * binop_x_124405) + binop_x_124485 = (Rx_118375 * group_sizze_118388) + bytes_124483 = (np.int64(8) * binop_x_124485) + bytes_124471 = (np.int64(8) * Ry_118373) + bytes_124473 = (np.int64(8) * Rx_118375) + binop_x_125378 = (np.int64(8) * Ty_118372) + binop_x_125379 = (Tx_118374 * binop_x_125378) + binop_x_125380 = (Ry_118373 * binop_x_125379) + sizze_125381 = (Rx_118375 * binop_x_125380) + num_threads_125825 = (segmap_group_sizze_88142 * num_groups_88143) + total_sizze_125826 = (bytes_120247 * num_threads_125825) + local_memory_capacity_129029 = self.max_local_memory if (sle64(np.int64(0), - sext_i32_i64(local_memory_capacity_46014)) and suff_outer_par_40394): - mem_44632 = opencl_alloc(self, bytes_44398, "mem_44632") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44632, np.int64(0), - images_mem_44381, np.int64(0), - np.int64(1), N_29165, m_29166) - mem_44650 = opencl_alloc(self, bytes_44648, "mem_44650") - mem_44635 = opencl_alloc(self, total_sizze_45647, "mem_44635") - if ((1 * (np.long(num_groups_40417) * np.long(segmap_group_sizze_40416))) != 0): - self.mainzisegmap_40396_var.set_args(self.global_failure, - np.int64(N_29165), - np.int64(m_29166), - np.int32(n_29169), - np.int32(k2p2zq_29179), - np.int64(i32_res_29181), - np.int64(num_groups_40417), - binop_p_mem_44390, mem_44632, - mem_44635, mem_44650) - cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_40396_var, - ((np.long(num_groups_40417) * np.long(segmap_group_sizze_40416)),), - (np.long(segmap_group_sizze_40416),)) + sext_i32_i64(local_memory_capacity_129029)) and suff_outer_par_88120): + mem_124375 = opencl_alloc(self, bytes_120125, "mem_124375") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124375, np.int64(0), + mem_124142, np.int64(0), + np.int64(1), N_70860, m_70861) + mem_124393 = opencl_alloc(self, bytes_121990, "mem_124393") + mem_124378 = opencl_alloc(self, total_sizze_125826, "mem_124378") + if ((1 * (np.int64(num_groups_88143) * np.int64(segmap_group_sizze_88142))) != 0): + self.mainDetailedzisegmap_88122_var.set_args(self.global_failure, + np.int64(N_70860), + np.int64(m_70861), + np.int64(n_70864), + np.int64(k2p2zq_70876), + np.int64(num_groups_88143), + np.int64(num_threads_125825), + binop_p_mem_120117, + mem_124375, mem_124378, + mem_124393) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_88122_var, + ((np.int64(num_groups_88143) * np.int64(segmap_group_sizze_88142)),), + (np.int64(segmap_group_sizze_88142),)) if synchronous: sync(self) - mem_44632 = None - mem_44635 = None - mem_44848 = opencl_alloc(self, bytes_44648, "mem_44848") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44848, np.int64(0), - mem_44650, np.int64(0), - np.int64(1), m_29166, - i32_res_29181) - mem_44650 = None - defunc_3_map_res_mem_44850 = mem_44848 + mem_124375 = None + mem_124378 = None + mem_124591 = opencl_alloc(self, bytes_121990, "mem_124591") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124591, np.int64(0), + mem_124393, np.int64(0), + np.int64(1), m_70861, + k2p2zq_70876) + mem_124393 = None + defunc_3_map_res_mem_124593 = mem_124591 else: - local_memory_capacity_46013 = self.max_local_memory - if (sle64(((bytes_44665 + srem64((np.int64(8) - srem64(bytes_44665, - np.int64(8))), - np.int64(8))) + (bytes_44667 + srem64((np.int64(8) - srem64(bytes_44667, - np.int64(8))), - np.int64(8)))), - sext_i32_i64(local_memory_capacity_46013)) and suff_outer_par_40501): - tk_div_tx_42680 = sdiv_up64(Tk_42679, Tx_42677) - tk_div_ty_42681 = sdiv_up64(Tk_42679, Ty_42675) - gridDim_x_42688 = sdiv_up64(i32_res_29181, TxRx_42682) - gridDim_y_42689 = sdiv_up64(m_29166, TyRy_42683) - grid_sizze_42690 = (gridDim_x_42688 * gridDim_y_42689) - full_tiles_42719 = squot64(i32_res_29175, Tk_42679) - kk_42926 = (Tk_42679 * full_tiles_42719) - mem_44840 = opencl_alloc(self, bytes_44648, "mem_44840") - if ((1 * (np.long(grid_sizze_42690) * np.long(group_sizze_42691))) != 0): - self.mainzisegmap_intragroup_42694_var.set_args(self.global_failure, - cl.LocalMemory(np.long(bytes_44667)), - cl.LocalMemory(np.long(bytes_44665)), - np.int64(N_29165), - np.int64(m_29166), - np.int64(i32_res_29175), - np.int64(i32_res_29181), - np.int64(gridDim_x_42688), - np.int64(full_tiles_42719), - np.int64(kk_42926), - images_mem_44381, - mem_44393, mem_44840) + local_memory_capacity_129028 = self.max_local_memory + if (sle64(((bytes_124408 + srem64((np.int64(8) - srem64(bytes_124408, + np.int64(8))), + np.int64(8))) + (bytes_124410 + srem64((np.int64(8) - srem64(bytes_124410, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_129028)) and suff_outer_par_88227): + tk_div_tx_118377 = sdiv_up64(Tk_118376, Tx_118374) + tk_div_ty_118378 = sdiv_up64(Tk_118376, Ty_118372) + gridDim_x_118385 = sdiv_up64(k2p2zq_70876, TxRx_118379) + gridDim_y_118386 = sdiv_up64(m_70861, TyRy_118380) + grid_sizze_118387 = (gridDim_x_118385 * gridDim_y_118386) + full_tiles_118416 = squot64(n_70864, Tk_118376) + kk_118623 = (Tk_118376 * full_tiles_118416) + mem_124583 = opencl_alloc(self, bytes_121990, "mem_124583") + if ((1 * (np.int64(grid_sizze_118387) * np.int64(group_sizze_118388))) != 0): + self.mainDetailedzisegmap_intragroup_118391_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_124410)), + cl.LocalMemory(np.int64(bytes_124408)), + np.int64(N_70860), + np.int64(m_70861), + np.int64(n_70864), + np.int64(k2p2zq_70876), + np.int64(gridDim_x_118385), + np.int64(full_tiles_118416), + np.int64(kk_118623), + mem_120120, + mem_124142, + mem_124583) cl.enqueue_nd_range_kernel(self.queue, - self.mainzisegmap_intragroup_42694_var, - ((np.long(grid_sizze_42690) * np.long(group_sizze_42691)),), - (np.long(group_sizze_42691),)) + self.mainDetailedzisegmap_intragroup_118391_var, + ((np.int64(grid_sizze_118387) * np.int64(group_sizze_118388)),), + (np.int64(group_sizze_118388),)) if synchronous: sync(self) - defunc_3_map_res_mem_44845 = mem_44840 + defunc_3_map_res_mem_124588 = mem_124583 else: - mem_44844 = opencl_alloc(self, bytes_44648, "mem_44844") - if slt64((i32_res_29175 * np.int64(2)), segred_group_sizze_40518): - segment_sizze_nonzzero_45953 = smax64(np.int64(1), i32_res_29175) - num_threads_45954 = (num_groups_40519 * segred_group_sizze_40518) - if ((1 * (np.long(num_groups_40519) * np.long(segred_group_sizze_40518))) != 0): - self.mainzisegred_small_40466_var.set_args(self.global_failure, - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_40518))), - np.int64(N_29165), - np.int64(m_29166), - np.int64(i32_res_29175), - np.int64(i32_res_29181), - np.int64(num_groups_40519), - np.int64(segment_sizze_nonzzero_45953), - images_mem_44381, - binop_p_mem_44390, - mem_44844) + mem_124587 = opencl_alloc(self, bytes_121990, "mem_124587") + if slt64((n_70864 * np.int64(2)), segred_group_sizze_88244): + segment_sizze_nonzzero_128968 = smax64(np.int64(1), n_70864) + num_threads_128969 = (num_groups_88245 * segred_group_sizze_88244) + if ((1 * (np.int64(num_groups_88245) * np.int64(segred_group_sizze_88244))) != 0): + self.mainDetailedzisegred_small_88192_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_88244))), + np.int64(N_70860), + np.int64(m_70861), + np.int64(n_70864), + np.int64(k2p2zq_70876), + np.int64(num_groups_88245), + np.int64(segment_sizze_nonzzero_128968), + binop_p_mem_120117, + mem_124142, + mem_124587) cl.enqueue_nd_range_kernel(self.queue, - self.mainzisegred_small_40466_var, - ((np.long(num_groups_40519) * np.long(segred_group_sizze_40518)),), - (np.long(segred_group_sizze_40518),)) + self.mainDetailedzisegred_small_88192_var, + ((np.int64(num_groups_88245) * np.int64(segred_group_sizze_88244)),), + (np.int64(segred_group_sizze_88244),)) if synchronous: sync(self) else: - groups_per_segment_45974 = sdiv_up64(num_groups_40519, - smax64(np.int64(1), - (m_29166 * i32_res_29181))) - elements_per_thread_45975 = sdiv_up64(i32_res_29175, - (segred_group_sizze_40518 * groups_per_segment_45974)) - virt_num_groups_45976 = (groups_per_segment_45974 * (m_29166 * i32_res_29181)) - num_threads_45977 = (num_groups_40519 * segred_group_sizze_40518) - threads_per_segment_45978 = (groups_per_segment_45974 * segred_group_sizze_40518) - group_res_arr_mem_45979 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_40518 * virt_num_groups_45976)), - "group_res_arr_mem_45979") - mainzicounter_mem_45981 = self.mainzicounter_mem_45981 - if ((1 * (np.long(num_groups_40519) * np.long(segred_group_sizze_40518))) != 0): - self.mainzisegred_large_40466_var.set_args(self.global_failure, - cl.LocalMemory(np.long(np.int32(1))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_40518))), - np.int64(N_29165), - np.int64(i32_res_29175), - np.int64(i32_res_29181), - np.int64(num_groups_40519), - np.int64(groups_per_segment_45974), - np.int64(elements_per_thread_45975), - np.int64(virt_num_groups_45976), - np.int64(threads_per_segment_45978), - images_mem_44381, - binop_p_mem_44390, - mem_44844, - group_res_arr_mem_45979, - mainzicounter_mem_45981) + groups_per_segment_128989 = sdiv_up64(num_groups_88245, + smax64(np.int64(1), + (m_70861 * k2p2zq_70876))) + elements_per_thread_128990 = sdiv_up64(n_70864, + (segred_group_sizze_88244 * groups_per_segment_128989)) + virt_num_groups_128991 = (groups_per_segment_128989 * (m_70861 * k2p2zq_70876)) + num_threads_128992 = (num_groups_88245 * segred_group_sizze_88244) + threads_per_segment_128993 = (groups_per_segment_128989 * segred_group_sizze_88244) + group_res_arr_mem_128994 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_88244 * virt_num_groups_128991)), + "group_res_arr_mem_128994") + mainDetailedzicounter_mem_128996 = self.mainDetailedzicounter_mem_128996 + if ((1 * (np.int64(num_groups_88245) * np.int64(segred_group_sizze_88244))) != 0): + self.mainDetailedzisegred_large_88192_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_88244))), + np.int64(N_70860), + np.int64(n_70864), + np.int64(k2p2zq_70876), + np.int64(num_groups_88245), + np.int64(groups_per_segment_128989), + np.int64(elements_per_thread_128990), + np.int64(virt_num_groups_128991), + np.int64(threads_per_segment_128993), + binop_p_mem_120117, + mem_124142, + mem_124587, + group_res_arr_mem_128994, + mainDetailedzicounter_mem_128996) cl.enqueue_nd_range_kernel(self.queue, - self.mainzisegred_large_40466_var, - ((np.long(num_groups_40519) * np.long(segred_group_sizze_40518)),), - (np.long(segred_group_sizze_40518),)) + self.mainDetailedzisegred_large_88192_var, + ((np.int64(num_groups_88245) * np.int64(segred_group_sizze_88244)),), + (np.int64(segred_group_sizze_88244),)) if synchronous: sync(self) - defunc_3_map_res_mem_44845 = mem_44844 - defunc_3_map_res_mem_44850 = defunc_3_map_res_mem_44845 - binop_p_mem_44390 = None - mem_44393 = None - suff_outer_par_40535 = (self.sizes["main.suff_outer_par_18"] <= m_29166) - segmap_group_sizze_40556 = self.sizes["main.segmap_group_size_40539"] - max_num_groups_46015 = self.sizes["main.segmap_num_groups_40541"] - num_groups_40557 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(m_29166, - segmap_group_sizze_40556), - sext_i32_i64(max_num_groups_46015)))) - suff_outer_par_40636 = (self.sizes["main.suff_outer_par_19"] <= nest_sizze_39200) - segred_group_sizze_40651 = self.sizes["main.segred_group_size_40597"] - max_num_groups_46016 = self.sizes["main.segred_num_groups_40599"] - num_groups_40652 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(binop_x_44546, - segred_group_sizze_40651), - sext_i32_i64(max_num_groups_46016)))) - tile_sizze_43138 = self.sizes["main.tile_size_43137"] - group_sizze_43139 = (tile_sizze_43138 * tile_sizze_43138) - bytes_44882 = (np.int64(4) * group_sizze_43139) - binop_x_45480 = (np.int64(4) * tile_sizze_43138) - sizze_45481 = (tile_sizze_43138 * binop_x_45480) - num_threads_45650 = (segmap_group_sizze_40556 * num_groups_40557) - total_sizze_45651 = (bytes_44448 * num_threads_45650) - local_memory_capacity_46102 = self.max_local_memory + defunc_3_map_res_mem_124588 = mem_124587 + defunc_3_map_res_mem_124593 = defunc_3_map_res_mem_124588 + binop_p_mem_120117 = None + mem_120120 = None + suff_outer_par_88261 = (self.sizes["mainDetailed.suff_outer_par_30"] <= m_70861) + segmap_group_sizze_88282 = self.sizes["mainDetailed.segmap_group_size_88265"] + max_num_groups_129030 = self.sizes["mainDetailed.segmap_num_groups_88267"] + num_groups_88283 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_70861, + segmap_group_sizze_88282), + sext_i32_i64(max_num_groups_129030)))) + suff_outer_par_88362 = (self.sizes["mainDetailed.suff_outer_par_31"] <= binop_x_120244) + segred_group_sizze_88377 = self.sizes["mainDetailed.segred_group_size_88323"] + max_num_groups_129031 = self.sizes["mainDetailed.segred_num_groups_88325"] + num_groups_88378 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_121999, + segred_group_sizze_88377), + sext_i32_i64(max_num_groups_129031)))) + tile_sizze_118835 = self.sizes["mainDetailed.tile_size_118834"] + group_sizze_118836 = (tile_sizze_118835 * tile_sizze_118835) + bytes_124625 = (np.int64(8) * group_sizze_118836) + binop_x_125394 = (np.int64(8) * tile_sizze_118835) + sizze_125395 = (tile_sizze_118835 * binop_x_125394) + num_threads_125829 = (segmap_group_sizze_88282 * num_groups_88283) + total_sizze_125830 = (bytes_120247 * num_threads_125829) + local_memory_capacity_129117 = self.max_local_memory if (sle64(np.int64(0), - sext_i32_i64(local_memory_capacity_46102)) and suff_outer_par_40535): - mem_44854 = opencl_alloc(self, bytes_44443, "mem_44854") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44854, np.int64(0), - defunc_3_map_res_mem_44629, + sext_i32_i64(local_memory_capacity_129117)) and suff_outer_par_88261): + mem_124597 = opencl_alloc(self, bytes_124188, "mem_124597") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124597, np.int64(0), + defunc_3_map_res_mem_124372, np.int64(0), np.int64(1), - (i32_res_29181 * i32_res_29181), - m_29166) - mem_44857 = opencl_alloc(self, bytes_44648, "mem_44857") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44857, np.int64(0), - defunc_3_map_res_mem_44850, + (k2p2zq_70876 * k2p2zq_70876), + m_70861) + mem_124600 = opencl_alloc(self, bytes_121990, "mem_124600") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124600, np.int64(0), + defunc_3_map_res_mem_124593, np.int64(0), np.int64(1), - i32_res_29181, m_29166) - mem_44875 = opencl_alloc(self, bytes_44648, "mem_44875") - mem_44860 = opencl_alloc(self, total_sizze_45651, "mem_44860") - if ((1 * (np.long(num_groups_40557) * np.long(segmap_group_sizze_40556))) != 0): - self.mainzisegmap_40537_var.set_args(self.global_failure, - np.int64(m_29166), - np.int32(k2p2zq_29179), - np.int64(i32_res_29181), - np.int64(num_groups_40557), - mem_44854, mem_44857, mem_44860, - mem_44875) - cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_40537_var, - ((np.long(num_groups_40557) * np.long(segmap_group_sizze_40556)),), - (np.long(segmap_group_sizze_40556),)) + k2p2zq_70876, m_70861) + mem_124618 = opencl_alloc(self, bytes_121990, "mem_124618") + mem_124603 = opencl_alloc(self, total_sizze_125830, "mem_124603") + if ((1 * (np.int64(num_groups_88283) * np.int64(segmap_group_sizze_88282))) != 0): + self.mainDetailedzisegmap_88263_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(num_groups_88283), + np.int64(num_threads_125829), + mem_124597, mem_124600, + mem_124603, mem_124618) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_88263_var, + ((np.int64(num_groups_88283) * np.int64(segmap_group_sizze_88282)),), + (np.int64(segmap_group_sizze_88282),)) if synchronous: sync(self) - mem_44854 = None - mem_44857 = None - mem_44860 = None - mem_44914 = opencl_alloc(self, bytes_44648, "mem_44914") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44914, np.int64(0), - mem_44875, np.int64(0), - np.int64(1), m_29166, - i32_res_29181) - mem_44875 = None - defunc_4_map_res_mem_44916 = mem_44914 + mem_124597 = None + mem_124600 = None + mem_124603 = None + mem_124657 = opencl_alloc(self, bytes_121990, "mem_124657") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124657, np.int64(0), + mem_124618, np.int64(0), + np.int64(1), m_70861, + k2p2zq_70876) + mem_124618 = None + defunc_4_map_res_mem_124659 = mem_124657 else: - local_memory_capacity_46101 = self.max_local_memory - if (sle64(((bytes_44882 + srem64((np.int64(8) - srem64(bytes_44882, - np.int64(8))), - np.int64(8))) + (bytes_44882 + srem64((np.int64(8) - srem64(bytes_44882, - np.int64(8))), - np.int64(8)))), - sext_i32_i64(local_memory_capacity_46101)) and suff_outer_par_40636): - mem_44879 = opencl_alloc(self, bytes_44544, "mem_44879") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44879, np.int64(0), - defunc_3_map_res_mem_44629, + local_memory_capacity_129116 = self.max_local_memory + if (sle64(((bytes_124625 + srem64((np.int64(8) - srem64(bytes_124625, + np.int64(8))), + np.int64(8))) + (bytes_124625 + srem64((np.int64(8) - srem64(bytes_124625, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_129116)) and suff_outer_par_88362): + mem_124622 = opencl_alloc(self, bytes_121997, "mem_124622") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124622, np.int64(0), + defunc_3_map_res_mem_124372, np.int64(0), np.int64(1), - i32_res_29181, - (m_29166 * i32_res_29181)) - num_groups_x_43140 = sdiv_up64(m_29166, tile_sizze_43138) - num_groups_y_43141 = sdiv_up64(i32_res_29181, tile_sizze_43138) - num_groups_top_43142 = (num_groups_x_43140 * num_groups_y_43141) - num_whole_tiles_43159 = squot64(i32_res_29181, tile_sizze_43138) - residual_input_43286 = srem64(i32_res_29181, tile_sizze_43138) - cond_43287 = (residual_input_43286 == np.int64(0)) - mem_44906 = opencl_alloc(self, bytes_44648, "mem_44906") - if ((1 * (np.long(num_groups_top_43142) * np.long(group_sizze_43139))) != 0): - self.mainzisegmap_intragroup_43143_var.set_args(self.global_failure, - cl.LocalMemory(np.long(bytes_44882)), - cl.LocalMemory(np.long(bytes_44882)), - np.int64(m_29166), - np.int64(i32_res_29181), - np.int64(num_groups_y_43141), - np.int64(num_whole_tiles_43159), - np.int64(residual_input_43286), - np.byte(cond_43287), - defunc_3_map_res_mem_44850, - mem_44879, mem_44906) + k2p2zq_70876, + (m_70861 * k2p2zq_70876)) + num_groups_x_118837 = sdiv_up64(m_70861, tile_sizze_118835) + num_groups_y_118838 = sdiv_up64(k2p2zq_70876, tile_sizze_118835) + num_groups_top_118839 = (num_groups_x_118837 * num_groups_y_118838) + num_whole_tiles_118856 = squot64(k2p2zq_70876, tile_sizze_118835) + residual_input_118983 = srem64(k2p2zq_70876, tile_sizze_118835) + cond_118984 = (residual_input_118983 == np.int64(0)) + mem_124649 = opencl_alloc(self, bytes_121990, "mem_124649") + if ((1 * (np.int64(num_groups_top_118839) * np.int64(group_sizze_118836))) != 0): + self.mainDetailedzisegmap_intragroup_118840_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_124625)), + cl.LocalMemory(np.int64(bytes_124625)), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(num_groups_y_118838), + np.int64(num_whole_tiles_118856), + np.int64(residual_input_118983), + np.byte(cond_118984), + defunc_3_map_res_mem_124593, + mem_124622, + mem_124649) cl.enqueue_nd_range_kernel(self.queue, - self.mainzisegmap_intragroup_43143_var, - ((np.long(num_groups_top_43142) * np.long(group_sizze_43139)),), - (np.long(group_sizze_43139),)) + self.mainDetailedzisegmap_intragroup_118840_var, + ((np.int64(num_groups_top_118839) * np.int64(group_sizze_118836)),), + (np.int64(group_sizze_118836),)) if synchronous: sync(self) - mem_44879 = None - defunc_4_map_res_mem_44911 = mem_44906 + mem_124622 = None + defunc_4_map_res_mem_124654 = mem_124649 else: - mem_44910 = opencl_alloc(self, bytes_44648, "mem_44910") - if slt64((i32_res_29181 * np.int64(2)), segred_group_sizze_40651): - segment_sizze_nonzzero_46041 = smax64(np.int64(1), i32_res_29181) - num_threads_46042 = (num_groups_40652 * segred_group_sizze_40651) - if ((1 * (np.long(num_groups_40652) * np.long(segred_group_sizze_40651))) != 0): - self.mainzisegred_small_40603_var.set_args(self.global_failure, - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_40651))), - np.int64(m_29166), - np.int64(i32_res_29181), - np.int64(num_groups_40652), - np.int64(segment_sizze_nonzzero_46041), - defunc_3_map_res_mem_44629, - defunc_3_map_res_mem_44850, - mem_44910) + mem_124653 = opencl_alloc(self, bytes_121990, "mem_124653") + if slt64((k2p2zq_70876 * np.int64(2)), segred_group_sizze_88377): + segment_sizze_nonzzero_129056 = smax64(np.int64(1), k2p2zq_70876) + num_threads_129057 = (num_groups_88378 * segred_group_sizze_88377) + if ((1 * (np.int64(num_groups_88378) * np.int64(segred_group_sizze_88377))) != 0): + self.mainDetailedzisegred_small_88329_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_88377))), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(num_groups_88378), + np.int64(segment_sizze_nonzzero_129056), + defunc_3_map_res_mem_124372, + defunc_3_map_res_mem_124593, + mem_124653) cl.enqueue_nd_range_kernel(self.queue, - self.mainzisegred_small_40603_var, - ((np.long(num_groups_40652) * np.long(segred_group_sizze_40651)),), - (np.long(segred_group_sizze_40651),)) + self.mainDetailedzisegred_small_88329_var, + ((np.int64(num_groups_88378) * np.int64(segred_group_sizze_88377)),), + (np.int64(segred_group_sizze_88377),)) if synchronous: sync(self) else: - groups_per_segment_46062 = sdiv_up64(num_groups_40652, - smax64(np.int64(1), - (m_29166 * i32_res_29181))) - elements_per_thread_46063 = sdiv_up64(i32_res_29181, - (segred_group_sizze_40651 * groups_per_segment_46062)) - virt_num_groups_46064 = (groups_per_segment_46062 * (m_29166 * i32_res_29181)) - num_threads_46065 = (num_groups_40652 * segred_group_sizze_40651) - threads_per_segment_46066 = (groups_per_segment_46062 * segred_group_sizze_40651) - group_res_arr_mem_46067 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_40651 * virt_num_groups_46064)), - "group_res_arr_mem_46067") - mainzicounter_mem_46069 = self.mainzicounter_mem_46069 - if ((1 * (np.long(num_groups_40652) * np.long(segred_group_sizze_40651))) != 0): - self.mainzisegred_large_40603_var.set_args(self.global_failure, - cl.LocalMemory(np.long(np.int32(1))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_40651))), - np.int64(i32_res_29181), - np.int64(num_groups_40652), - np.int64(groups_per_segment_46062), - np.int64(elements_per_thread_46063), - np.int64(virt_num_groups_46064), - np.int64(threads_per_segment_46066), - defunc_3_map_res_mem_44629, - defunc_3_map_res_mem_44850, - mem_44910, - group_res_arr_mem_46067, - mainzicounter_mem_46069) + groups_per_segment_129077 = sdiv_up64(num_groups_88378, + smax64(np.int64(1), + (m_70861 * k2p2zq_70876))) + elements_per_thread_129078 = sdiv_up64(k2p2zq_70876, + (segred_group_sizze_88377 * groups_per_segment_129077)) + virt_num_groups_129079 = (groups_per_segment_129077 * (m_70861 * k2p2zq_70876)) + num_threads_129080 = (num_groups_88378 * segred_group_sizze_88377) + threads_per_segment_129081 = (groups_per_segment_129077 * segred_group_sizze_88377) + group_res_arr_mem_129082 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_88377 * virt_num_groups_129079)), + "group_res_arr_mem_129082") + mainDetailedzicounter_mem_129084 = self.mainDetailedzicounter_mem_129084 + if ((1 * (np.int64(num_groups_88378) * np.int64(segred_group_sizze_88377))) != 0): + self.mainDetailedzisegred_large_88329_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_88377))), + np.int64(k2p2zq_70876), + np.int64(num_groups_88378), + np.int64(groups_per_segment_129077), + np.int64(elements_per_thread_129078), + np.int64(virt_num_groups_129079), + np.int64(threads_per_segment_129081), + defunc_3_map_res_mem_124372, + defunc_3_map_res_mem_124593, + mem_124653, + group_res_arr_mem_129082, + mainDetailedzicounter_mem_129084) cl.enqueue_nd_range_kernel(self.queue, - self.mainzisegred_large_40603_var, - ((np.long(num_groups_40652) * np.long(segred_group_sizze_40651)),), - (np.long(segred_group_sizze_40651),)) + self.mainDetailedzisegred_large_88329_var, + ((np.int64(num_groups_88378) * np.int64(segred_group_sizze_88377)),), + (np.int64(segred_group_sizze_88377),)) if synchronous: sync(self) - defunc_4_map_res_mem_44911 = mem_44910 - defunc_4_map_res_mem_44916 = defunc_4_map_res_mem_44911 - defunc_3_map_res_mem_44629 = None - defunc_3_map_res_mem_44850 = None - suff_outer_par_40667 = (self.sizes["main.suff_outer_par_20"] <= m_29166) - segmap_group_sizze_40687 = self.sizes["main.segmap_group_size_40671"] - max_num_groups_46103 = self.sizes["main.segmap_num_groups_40673"] - num_groups_40688 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(m_29166, - segmap_group_sizze_40687), - sext_i32_i64(max_num_groups_46103)))) - suff_outer_par_40764 = (self.sizes["main.suff_outer_par_21"] <= binop_x_44399) - nest_sizze_40778 = (i32_res_29181 * binop_x_44399) - segred_group_sizze_40779 = self.sizes["main.segred_group_size_40727"] - max_num_groups_46104 = self.sizes["main.segred_num_groups_40729"] - num_groups_40780 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(nest_sizze_40778, - segred_group_sizze_40779), - sext_i32_i64(max_num_groups_46104)))) - Ty_43416 = self.sizes["main.Ty_43413"] - Ry_43417 = self.sizes["main.Ry_43415"] - Tx_43418 = self.sizes["main.Tx_43412"] - Rx_43419 = self.sizes["main.Rx_43414"] - Tk_43420 = self.sizes["main.Tk_43411"] - TxRx_43423 = (Tx_43418 * Rx_43419) - TyRy_43424 = (Ty_43416 * Ry_43417) - a_loc_szz_43426 = (Tk_43420 * TyRy_43424) - binop_x_43427 = (Tx_43418 * Tk_43420) - b_loc_szz_43428 = (Rx_43419 * binop_x_43427) - group_sizze_43432 = (Ty_43416 * Tx_43418) - bytes_44921 = (np.int64(4) * N_29165) - binop_x_44952 = (Ry_43417 * group_sizze_43432) - binop_x_44953 = (Rx_43419 * binop_x_44952) - bytes_44950 = (np.int64(4) * binop_x_44953) - binop_x_44944 = (Ry_43417 * Rx_43419) - bytes_44943 = (np.int64(4) * binop_x_44944) - bytes_44955 = (np.int64(4) * a_loc_szz_43426) - bytes_44957 = (np.int64(4) * b_loc_szz_43428) - bytes_45026 = (np.int64(4) * binop_x_44952) - binop_x_45032 = (Rx_43419 * group_sizze_43432) - bytes_45030 = (np.int64(4) * binop_x_45032) - bytes_45018 = (np.int64(4) * Ry_43417) - bytes_45020 = (np.int64(4) * Rx_43419) - binop_x_45494 = (np.int64(4) * Ty_43416) - binop_x_45495 = (Tx_43418 * binop_x_45494) - binop_x_45496 = (Ry_43417 * binop_x_45495) - sizze_45497 = (Rx_43419 * binop_x_45496) - num_threads_45654 = (segmap_group_sizze_40687 * num_groups_40688) - total_sizze_45655 = (bytes_44921 * num_threads_45654) - local_memory_capacity_46234 = self.max_local_memory + defunc_4_map_res_mem_124654 = mem_124653 + defunc_4_map_res_mem_124659 = defunc_4_map_res_mem_124654 + defunc_3_map_res_mem_124372 = None + defunc_3_map_res_mem_124593 = None + suff_outer_par_88393 = (self.sizes["mainDetailed.suff_outer_par_32"] <= m_70861) + segmap_group_sizze_88413 = self.sizes["mainDetailed.segmap_group_size_88397"] + max_num_groups_129118 = self.sizes["mainDetailed.segmap_num_groups_88399"] + num_groups_88414 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_70861, + segmap_group_sizze_88413), + sext_i32_i64(max_num_groups_129118)))) + suff_outer_par_88490 = (self.sizes["mainDetailed.suff_outer_par_33"] <= binop_x_120126) + nest_sizze_88504 = (k2p2zq_70876 * binop_x_120126) + segred_group_sizze_88505 = self.sizes["mainDetailed.segred_group_size_88453"] + max_num_groups_129119 = self.sizes["mainDetailed.segred_num_groups_88455"] + num_groups_88506 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_88504, + segred_group_sizze_88505), + sext_i32_i64(max_num_groups_129119)))) + Ty_119113 = self.sizes["mainDetailed.Ty_119110"] + Ry_119114 = self.sizes["mainDetailed.Ry_119112"] + Tx_119115 = self.sizes["mainDetailed.Tx_119109"] + Rx_119116 = self.sizes["mainDetailed.Rx_119111"] + Tk_119117 = self.sizes["mainDetailed.Tk_119108"] + TxRx_119120 = (Tx_119115 * Rx_119116) + TyRy_119121 = (Ty_119113 * Ry_119114) + a_loc_szz_119123 = (Tk_119117 * TyRy_119121) + binop_x_119124 = (Tx_119115 * Tk_119117) + b_loc_szz_119125 = (Rx_119116 * binop_x_119124) + group_sizze_119129 = (Ty_119113 * Tx_119115) + bytes_124664 = (np.int64(8) * N_70860) + binop_x_124695 = (Ry_119114 * group_sizze_119129) + binop_x_124696 = (Rx_119116 * binop_x_124695) + bytes_124693 = (np.int64(8) * binop_x_124696) + binop_x_124687 = (Ry_119114 * Rx_119116) + bytes_124686 = (np.int64(8) * binop_x_124687) + bytes_124698 = (np.int64(8) * a_loc_szz_119123) + bytes_124700 = (np.int64(8) * b_loc_szz_119125) + bytes_124769 = (np.int64(8) * binop_x_124695) + binop_x_124775 = (Rx_119116 * group_sizze_119129) + bytes_124773 = (np.int64(8) * binop_x_124775) + bytes_124761 = (np.int64(8) * Ry_119114) + bytes_124763 = (np.int64(8) * Rx_119116) + binop_x_125408 = (np.int64(8) * Ty_119113) + binop_x_125409 = (Tx_119115 * binop_x_125408) + binop_x_125410 = (Ry_119114 * binop_x_125409) + sizze_125411 = (Rx_119116 * binop_x_125410) + num_threads_125833 = (segmap_group_sizze_88413 * num_groups_88414) + total_sizze_125834 = (bytes_124664 * num_threads_125833) + local_memory_capacity_129249 = self.max_local_memory if (sle64(np.int64(0), - sext_i32_i64(local_memory_capacity_46234)) and suff_outer_par_40667): - mem_44919 = opencl_alloc(self, bytes_44648, "mem_44919") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44919, np.int64(0), - defunc_4_map_res_mem_44916, + sext_i32_i64(local_memory_capacity_129249)) and suff_outer_par_88393): + mem_124662 = opencl_alloc(self, bytes_121990, "mem_124662") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124662, np.int64(0), + defunc_4_map_res_mem_124659, np.int64(0), np.int64(1), - i32_res_29181, m_29166) - mem_44937 = opencl_alloc(self, bytes_44398, "mem_44937") - mem_44922 = opencl_alloc(self, total_sizze_45655, "mem_44922") - if ((1 * (np.long(num_groups_40688) * np.long(segmap_group_sizze_40687))) != 0): - self.mainzisegmap_40669_var.set_args(self.global_failure, - np.int64(N_29165), - np.int64(m_29166), - np.int32(k2p2zq_29179), - np.int64(i32_res_29181), - np.int64(num_groups_40688), - mem_44397, mem_44919, mem_44922, - mem_44937) - cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_40669_var, - ((np.long(num_groups_40688) * np.long(segmap_group_sizze_40687)),), - (np.long(segmap_group_sizze_40687),)) + k2p2zq_70876, m_70861) + mem_124680 = opencl_alloc(self, bytes_120125, "mem_124680") + mem_124665 = opencl_alloc(self, total_sizze_125834, "mem_124665") + if ((1 * (np.int64(num_groups_88414) * np.int64(segmap_group_sizze_88413))) != 0): + self.mainDetailedzisegmap_88395_var.set_args(self.global_failure, + np.int64(N_70860), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(num_groups_88414), + np.int64(num_threads_125833), + mem_120124, mem_124662, + mem_124665, mem_124680) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_88395_var, + ((np.int64(num_groups_88414) * np.int64(segmap_group_sizze_88413)),), + (np.int64(segmap_group_sizze_88413),)) if synchronous: sync(self) - mem_44919 = None - mem_44922 = None - mem_45138 = opencl_alloc(self, bytes_44398, "mem_45138") - self.futhark_builtinzhgpu_map_transpose_f32(mem_45138, np.int64(0), - mem_44937, np.int64(0), - np.int64(1), m_29166, N_29165) - mem_44937 = None - defunc_3_map_res_mem_45140 = mem_45138 + mem_124662 = None + mem_124665 = None + mem_124881 = opencl_alloc(self, bytes_120125, "mem_124881") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124881, np.int64(0), + mem_124680, np.int64(0), + np.int64(1), m_70861, N_70860) + mem_124680 = None + defunc_3_map_res_mem_124883 = mem_124881 else: - local_memory_capacity_46233 = self.max_local_memory - if (sle64(((bytes_44955 + srem64((np.int64(8) - srem64(bytes_44955, - np.int64(8))), - np.int64(8))) + (bytes_44957 + srem64((np.int64(8) - srem64(bytes_44957, - np.int64(8))), - np.int64(8)))), - sext_i32_i64(local_memory_capacity_46233)) and suff_outer_par_40764): - mem_44940 = opencl_alloc(self, bytes_44383, "mem_44940") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44940, np.int64(0), - mem_44397, np.int64(0), - np.int64(1), i32_res_29181, - N_29165) - tk_div_tx_43421 = sdiv_up64(Tk_43420, Tx_43418) - tk_div_ty_43422 = sdiv_up64(Tk_43420, Ty_43416) - gridDim_x_43429 = sdiv_up64(N_29165, TxRx_43423) - gridDim_y_43430 = sdiv_up64(m_29166, TyRy_43424) - grid_sizze_43431 = (gridDim_x_43429 * gridDim_y_43430) - full_tiles_43460 = squot64(i32_res_29181, Tk_43420) - kk_43663 = (Tk_43420 * full_tiles_43460) - mem_45130 = opencl_alloc(self, bytes_44398, "mem_45130") - if ((1 * (np.long(grid_sizze_43431) * np.long(group_sizze_43432))) != 0): - self.mainzisegmap_intragroup_43435_var.set_args(self.global_failure, - cl.LocalMemory(np.long(bytes_44957)), - cl.LocalMemory(np.long(bytes_44955)), - np.int64(N_29165), - np.int64(m_29166), - np.int64(i32_res_29181), - np.int64(gridDim_x_43429), - np.int64(full_tiles_43460), - np.int64(kk_43663), - defunc_4_map_res_mem_44916, - mem_44940, mem_45130) + local_memory_capacity_129248 = self.max_local_memory + if (sle64(((bytes_124698 + srem64((np.int64(8) - srem64(bytes_124698, + np.int64(8))), + np.int64(8))) + (bytes_124700 + srem64((np.int64(8) - srem64(bytes_124700, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_129248)) and suff_outer_par_88490): + mem_124683 = opencl_alloc(self, bytes_120110, "mem_124683") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124683, np.int64(0), + mem_120124, np.int64(0), + np.int64(1), k2p2zq_70876, + N_70860) + tk_div_tx_119118 = sdiv_up64(Tk_119117, Tx_119115) + tk_div_ty_119119 = sdiv_up64(Tk_119117, Ty_119113) + gridDim_x_119126 = sdiv_up64(N_70860, TxRx_119120) + gridDim_y_119127 = sdiv_up64(m_70861, TyRy_119121) + grid_sizze_119128 = (gridDim_x_119126 * gridDim_y_119127) + full_tiles_119157 = squot64(k2p2zq_70876, Tk_119117) + kk_119360 = (Tk_119117 * full_tiles_119157) + mem_124873 = opencl_alloc(self, bytes_120125, "mem_124873") + if ((1 * (np.int64(grid_sizze_119128) * np.int64(group_sizze_119129))) != 0): + self.mainDetailedzisegmap_intragroup_119132_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_124700)), + cl.LocalMemory(np.int64(bytes_124698)), + np.int64(N_70860), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(gridDim_x_119126), + np.int64(full_tiles_119157), + np.int64(kk_119360), + defunc_4_map_res_mem_124659, + mem_124683, + mem_124873) cl.enqueue_nd_range_kernel(self.queue, - self.mainzisegmap_intragroup_43435_var, - ((np.long(grid_sizze_43431) * np.long(group_sizze_43432)),), - (np.long(group_sizze_43432),)) + self.mainDetailedzisegmap_intragroup_119132_var, + ((np.int64(grid_sizze_119128) * np.int64(group_sizze_119129)),), + (np.int64(group_sizze_119129),)) if synchronous: sync(self) - mem_44940 = None - defunc_3_map_res_mem_45135 = mem_45130 + mem_124683 = None + defunc_3_map_res_mem_124878 = mem_124873 else: - mem_45134 = opencl_alloc(self, bytes_44398, "mem_45134") - if slt64((i32_res_29181 * np.int64(2)), segred_group_sizze_40779): - segment_sizze_nonzzero_46173 = smax64(np.int64(1), i32_res_29181) - num_threads_46174 = (num_groups_40780 * segred_group_sizze_40779) - if ((1 * (np.long(num_groups_40780) * np.long(segred_group_sizze_40779))) != 0): - self.mainzisegred_small_40733_var.set_args(self.global_failure, - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_40779))), - np.int64(N_29165), - np.int64(m_29166), - np.int64(i32_res_29181), - np.int64(num_groups_40780), - np.int64(segment_sizze_nonzzero_46173), - mem_44397, - defunc_4_map_res_mem_44916, - mem_45134) + mem_124877 = opencl_alloc(self, bytes_120125, "mem_124877") + if slt64((k2p2zq_70876 * np.int64(2)), segred_group_sizze_88505): + segment_sizze_nonzzero_129188 = smax64(np.int64(1), k2p2zq_70876) + num_threads_129189 = (num_groups_88506 * segred_group_sizze_88505) + if ((1 * (np.int64(num_groups_88506) * np.int64(segred_group_sizze_88505))) != 0): + self.mainDetailedzisegred_small_88459_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_88505))), + np.int64(N_70860), + np.int64(m_70861), + np.int64(k2p2zq_70876), + np.int64(num_groups_88506), + np.int64(segment_sizze_nonzzero_129188), + mem_120124, + defunc_4_map_res_mem_124659, + mem_124877) cl.enqueue_nd_range_kernel(self.queue, - self.mainzisegred_small_40733_var, - ((np.long(num_groups_40780) * np.long(segred_group_sizze_40779)),), - (np.long(segred_group_sizze_40779),)) + self.mainDetailedzisegred_small_88459_var, + ((np.int64(num_groups_88506) * np.int64(segred_group_sizze_88505)),), + (np.int64(segred_group_sizze_88505),)) if synchronous: sync(self) else: - groups_per_segment_46194 = sdiv_up64(num_groups_40780, - smax64(np.int64(1), - (m_29166 * N_29165))) - elements_per_thread_46195 = sdiv_up64(i32_res_29181, - (segred_group_sizze_40779 * groups_per_segment_46194)) - virt_num_groups_46196 = (groups_per_segment_46194 * (m_29166 * N_29165)) - num_threads_46197 = (num_groups_40780 * segred_group_sizze_40779) - threads_per_segment_46198 = (groups_per_segment_46194 * segred_group_sizze_40779) - group_res_arr_mem_46199 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_40779 * virt_num_groups_46196)), - "group_res_arr_mem_46199") - mainzicounter_mem_46201 = self.mainzicounter_mem_46201 - if ((1 * (np.long(num_groups_40780) * np.long(segred_group_sizze_40779))) != 0): - self.mainzisegred_large_40733_var.set_args(self.global_failure, - cl.LocalMemory(np.long(np.int32(1))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_40779))), - np.int64(N_29165), - np.int64(i32_res_29181), - np.int64(num_groups_40780), - np.int64(groups_per_segment_46194), - np.int64(elements_per_thread_46195), - np.int64(virt_num_groups_46196), - np.int64(threads_per_segment_46198), - mem_44397, - defunc_4_map_res_mem_44916, - mem_45134, - group_res_arr_mem_46199, - mainzicounter_mem_46201) + groups_per_segment_129209 = sdiv_up64(num_groups_88506, + smax64(np.int64(1), + (m_70861 * N_70860))) + elements_per_thread_129210 = sdiv_up64(k2p2zq_70876, + (segred_group_sizze_88505 * groups_per_segment_129209)) + virt_num_groups_129211 = (groups_per_segment_129209 * (m_70861 * N_70860)) + num_threads_129212 = (num_groups_88506 * segred_group_sizze_88505) + threads_per_segment_129213 = (groups_per_segment_129209 * segred_group_sizze_88505) + group_res_arr_mem_129214 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_88505 * virt_num_groups_129211)), + "group_res_arr_mem_129214") + mainDetailedzicounter_mem_129216 = self.mainDetailedzicounter_mem_129216 + if ((1 * (np.int64(num_groups_88506) * np.int64(segred_group_sizze_88505))) != 0): + self.mainDetailedzisegred_large_88459_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_88505))), + np.int64(N_70860), + np.int64(k2p2zq_70876), + np.int64(num_groups_88506), + np.int64(groups_per_segment_129209), + np.int64(elements_per_thread_129210), + np.int64(virt_num_groups_129211), + np.int64(threads_per_segment_129213), + mem_120124, + defunc_4_map_res_mem_124659, + mem_124877, + group_res_arr_mem_129214, + mainDetailedzicounter_mem_129216) cl.enqueue_nd_range_kernel(self.queue, - self.mainzisegred_large_40733_var, - ((np.long(num_groups_40780) * np.long(segred_group_sizze_40779)),), - (np.long(segred_group_sizze_40779),)) + self.mainDetailedzisegred_large_88459_var, + ((np.int64(num_groups_88506) * np.int64(segred_group_sizze_88505)),), + (np.int64(segred_group_sizze_88505),)) if synchronous: sync(self) - defunc_3_map_res_mem_45135 = mem_45134 - defunc_3_map_res_mem_45140 = defunc_3_map_res_mem_45135 - mem_44397 = None - defunc_4_map_res_mem_44916 = None - i_29469 = (N_29165 - np.int64(1)) - x_29470 = sle64(np.int64(0), i_29469) - y_29471 = slt64(i_29469, N_29165) - bounds_check_29472 = (x_29470 and y_29471) - index_certs_29473 = True - assert bounds_check_29472, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:26:29-34\n #1 helpers.fut:20:13-20\n #2 bfastfinal.fut:76:16-75\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 bfastfinal.fut:71:5-77:25\n #6 bfastfinal.fut:189:5-58\n #7 bfastfinal.fut:184:1-190:25\n" % ("Index [", - i_29469, + defunc_3_map_res_mem_124878 = mem_124877 + defunc_3_map_res_mem_124883 = defunc_3_map_res_mem_124878 + mem_120124 = None + defunc_4_map_res_mem_124659 = None + i_72637 = (N_70860 - np.int64(1)) + x_72638 = sle64(np.int64(0), i_72637) + y_72639 = slt64(i_72637, N_70860) + bounds_check_72640 = (x_72638 and y_72639) + index_certs_72641 = True + assert bounds_check_72640, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:26:29-34\n #1 helpers.fut:14:13-20\n #2 bfastfinal.fut:87:16-75\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 bfastfinal.fut:82:5-88:25\n #6 bfastfinal.fut:185:3-72\n #7 bfastfinal.fut:181:1-185:72\n" % ("Index [", + i_72637, "] out of bounds for array of shape [", - N_29165, + N_70860, "].")) - fits_40909 = sle64(N_29165, max_group_sizze_39548) - suff_intra_par_40907 = (self.sizes["main.suff_intra_par_24"] <= N_29165) - intra_suff_and_fits_40910 = (suff_intra_par_40907 and fits_40909) - segscan_group_sizze_41059 = self.sizes["main.segscan_group_size_41036"] - max_num_groups_46235 = self.sizes["main.segscan_num_groups_41038"] - num_groups_41060 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(binop_x_44399, - segscan_group_sizze_41059), - sext_i32_i64(max_num_groups_46235)))) - segmap_group_sizze_41079 = self.sizes["main.segmap_group_size_41027"] - segmap_group_sizze_41115 = self.sizes["main.segmap_group_size_40952"] - bytes_45152 = (np.int64(4) * m_29166) - bytes_45143 = (np.int64(8) * N_29165) - bytes_45161 = (np.int64(8) * binop_x_44399) - local_memory_capacity_46337 = self.max_local_memory - if (sle64(((((bytes_45143 + srem64((np.int64(8) - srem64(bytes_45143, - np.int64(8))), - np.int64(8))) + (bytes_44921 + srem64((np.int64(8) - srem64(bytes_44921, - np.int64(8))), - np.int64(8)))) + (bytes_44921 + srem64((np.int64(8) - srem64(bytes_44921, - np.int64(8))), - np.int64(8)))) + (bytes_44921 + srem64((np.int64(8) - srem64(bytes_44921, - np.int64(8))), - np.int64(8)))), - sext_i32_i64(local_memory_capacity_46337)) and intra_suff_and_fits_40910): - mem_45153 = opencl_alloc(self, bytes_45152, "mem_45153") - mem_45156 = opencl_alloc(self, bytes_44398, "mem_45156") - mem_45159 = opencl_alloc(self, bytes_44398, "mem_45159") - if ((1 * (np.long(m_29166) * np.long(N_29165))) != 0): - self.mainzisegmap_intragroup_40832_var.set_args(self.global_failure, - cl.LocalMemory(np.long(bytes_44921)), - cl.LocalMemory(np.long(bytes_44921)), - cl.LocalMemory(np.long(bytes_44921)), - cl.LocalMemory(np.long(bytes_45143)), - np.int64(N_29165), - np.int64(i_29469), - images_mem_44381, - defunc_3_map_res_mem_45140, - mem_45153, mem_45156, - mem_45159) + fits_88532 = sle64(N_70860, max_group_sizze_77729) + suff_intra_par_88530 = (self.sizes["mainDetailed.suff_intra_par_34"] <= N_70860) + intra_suff_and_fits_88533 = (suff_intra_par_88530 and fits_88532) + segscan_group_sizze_88668 = self.sizes["mainDetailed.segscan_group_size_88645"] + max_num_groups_129250 = self.sizes["mainDetailed.segscan_num_groups_88647"] + num_groups_88669 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120126, + segscan_group_sizze_88668), + sext_i32_i64(max_num_groups_129250)))) + segmap_group_sizze_88717 = self.sizes["mainDetailed.segmap_group_size_88576"] + local_memory_capacity_129334 = self.max_local_memory + if (sle64(((((bytes_124664 + srem64((np.int64(8) - srem64(bytes_124664, + np.int64(8))), + np.int64(8))) + (bytes_124664 + srem64((np.int64(8) - srem64(bytes_124664, + np.int64(8))), + np.int64(8)))) + (bytes_124664 + srem64((np.int64(8) - srem64(bytes_124664, + np.int64(8))), + np.int64(8)))) + (bytes_124664 + srem64((np.int64(8) - srem64(bytes_124664, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_129334)) and intra_suff_and_fits_88533): + mem_124896 = opencl_alloc(self, bytes_120173, "mem_124896") + mem_124899 = opencl_alloc(self, bytes_120125, "mem_124899") + mem_124902 = opencl_alloc(self, bytes_120125, "mem_124902") + if ((1 * (np.int64(m_70861) * np.int64(N_70860))) != 0): + self.mainDetailedzisegmap_intragroup_88528_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_124664)), + cl.LocalMemory(np.int64(bytes_124664)), + cl.LocalMemory(np.int64(bytes_124664)), + cl.LocalMemory(np.int64(bytes_124664)), + np.int64(N_70860), + np.int64(i_72637), + mem_124142, + defunc_3_map_res_mem_124883, + mem_124896, + mem_124899, + mem_124902) cl.enqueue_nd_range_kernel(self.queue, - self.mainzisegmap_intragroup_40832_var, - ((np.long(m_29166) * np.long(N_29165)),), - (np.long(N_29165),)) + self.mainDetailedzisegmap_intragroup_88528_var, + ((np.int64(m_70861) * np.int64(N_70860)),), + (np.int64(N_70860),)) if synchronous: sync(self) - defunc_4_map_res_mem_45177 = mem_45153 - defunc_4_map_res_mem_45178 = mem_45156 - defunc_4_map_res_mem_45179 = mem_45159 + defunc_4_map_res_mem_124919 = mem_124896 + defunc_4_map_res_mem_124920 = mem_124899 + defunc_4_map_res_mem_124921 = mem_124902 else: - mem_45163 = opencl_alloc(self, bytes_45161, "mem_45163") - mem_45166 = opencl_alloc(self, bytes_44398, "mem_45166") - if slt64(np.int64(0), (m_29166 * N_29165)): - stage1_max_num_groups_46253 = self.max_group_size - stage1_num_groups_46254 = smin64(stage1_max_num_groups_46253, - num_groups_41060) - num_threads_46255 = sext_i64_i32((stage1_num_groups_46254 * segscan_group_sizze_41059)) - if ((1 * (np.long(stage1_num_groups_46254) * np.long(segscan_group_sizze_41059))) != 0): - self.mainziscan_stage1_41042_var.set_args(self.global_failure, - cl.LocalMemory(np.long(smax64(np.int64(1), - (np.int32(8) * segscan_group_sizze_41059)))), - np.int64(N_29165), - np.int64(m_29166), - np.int32(num_threads_46255), - images_mem_44381, - defunc_3_map_res_mem_45140, - mem_45163, mem_45166) + mem_124906 = opencl_alloc(self, bytes_120125, "mem_124906") + mem_124909 = opencl_alloc(self, bytes_120125, "mem_124909") + if slt64(np.int64(0), (m_70861 * N_70860)): + stage1_max_num_groups_129268 = self.max_group_size + stage1_num_groups_129269 = smin64(stage1_max_num_groups_129268, + num_groups_88669) + num_threads_129270 = sext_i64_i32((stage1_num_groups_129269 * segscan_group_sizze_88668)) + if ((1 * (np.int64(stage1_num_groups_129269) * np.int64(segscan_group_sizze_88668))) != 0): + self.mainDetailedziscan_stage1_88651_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(smax64(np.int64(1), + (np.int32(8) * segscan_group_sizze_88668)))), + np.int64(N_70860), + np.int64(m_70861), + np.int32(num_threads_129270), + mem_124142, + defunc_3_map_res_mem_124883, + mem_124906, + mem_124909) cl.enqueue_nd_range_kernel(self.queue, - self.mainziscan_stage1_41042_var, - ((np.long(stage1_num_groups_46254) * np.long(segscan_group_sizze_41059)),), - (np.long(segscan_group_sizze_41059),)) + self.mainDetailedziscan_stage1_88651_var, + ((np.int64(stage1_num_groups_129269) * np.int64(segscan_group_sizze_88668)),), + (np.int64(segscan_group_sizze_88668),)) if synchronous: sync(self) - if ((1 * (np.long(np.int64(1)) * np.long(stage1_num_groups_46254))) != 0): - self.mainziscan_stage2_41042_var.set_args(self.global_failure, - cl.LocalMemory(np.long(smax64(np.int64(1), - (np.int32(8) * stage1_num_groups_46254)))), - np.int64(N_29165), - np.int64(m_29166), - np.int64(stage1_num_groups_46254), - np.int32(num_threads_46255), - mem_45163) + if ((1 * (np.int64(np.int64(1)) * np.int64(stage1_num_groups_129269))) != 0): + self.mainDetailedziscan_stage2_88651_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(smax64(np.int64(1), + (np.int32(8) * stage1_num_groups_129269)))), + np.int64(N_70860), + np.int64(m_70861), + np.int64(stage1_num_groups_129269), + np.int32(num_threads_129270), + mem_124906) cl.enqueue_nd_range_kernel(self.queue, - self.mainziscan_stage2_41042_var, - ((np.long(np.int64(1)) * np.long(stage1_num_groups_46254)),), - (np.long(stage1_num_groups_46254),)) + self.mainDetailedziscan_stage2_88651_var, + ((np.int64(np.int64(1)) * np.int64(stage1_num_groups_129269)),), + (np.int64(stage1_num_groups_129269),)) if synchronous: sync(self) - required_groups_46297 = sext_i64_i32(sdiv_up64((m_29166 * N_29165), - segscan_group_sizze_41059)) - if ((1 * (np.long(num_groups_41060) * np.long(segscan_group_sizze_41059))) != 0): - self.mainziscan_stage3_41042_var.set_args(self.global_failure, - np.int64(N_29165), - np.int64(m_29166), - np.int64(num_groups_41060), - np.int32(num_threads_46255), - np.int32(required_groups_46297), - mem_45163) + required_groups_129312 = sext_i64_i32(sdiv_up64((m_70861 * N_70860), + segscan_group_sizze_88668)) + if ((1 * (np.int64(num_groups_88669) * np.int64(segscan_group_sizze_88668))) != 0): + self.mainDetailedziscan_stage3_88651_var.set_args(self.global_failure, + np.int64(N_70860), + np.int64(m_70861), + np.int64(num_groups_88669), + np.int32(num_threads_129270), + np.int32(required_groups_129312), + mem_124906) cl.enqueue_nd_range_kernel(self.queue, - self.mainziscan_stage3_41042_var, - ((np.long(num_groups_41060) * np.long(segscan_group_sizze_41059)),), - (np.long(segscan_group_sizze_41059),)) + self.mainDetailedziscan_stage3_88651_var, + ((np.int64(num_groups_88669) * np.int64(segscan_group_sizze_88668)),), + (np.int64(segscan_group_sizze_88668),)) if synchronous: sync(self) - segmap_usable_groups_41080 = sdiv_up64(m_29166, segmap_group_sizze_41079) - mem_45169 = opencl_alloc(self, bytes_45152, "mem_45169") - if ((1 * (np.long(segmap_usable_groups_41080) * np.long(segmap_group_sizze_41079))) != 0): - self.mainzisegmap_41025_var.set_args(self.global_failure, - np.int64(N_29165), - np.int64(m_29166), - np.int64(i_29469), mem_45163, - mem_45169) - cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_41025_var, - ((np.long(segmap_usable_groups_41080) * np.long(segmap_group_sizze_41079)),), - (np.long(segmap_group_sizze_41079),)) + mem_124911 = opencl_alloc(self, bytes_120173, "mem_124911") + group_sizze_129327 = self.sizes["mainDetailed.group_size_129327"] + num_groups_129328 = sdiv_up64(m_70861, group_sizze_129327) + if ((1 * (np.int64(num_groups_129328) * np.int64(group_sizze_129327))) != 0): + self.mainDetailedzicopy_129324_var.set_args(np.int64(N_70860), + np.int64(m_70861), + np.int64(i_72637), + mem_124906, mem_124911) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzicopy_129324_var, + ((np.int64(num_groups_129328) * np.int64(group_sizze_129327)),), + (np.int64(group_sizze_129327),)) if synchronous: sync(self) - mem_45172 = opencl_alloc(self, bytes_44398, "mem_45172") - self.futhark_builtinzhreplicate_f32(mem_45172, (m_29166 * N_29165), + mem_124914 = opencl_alloc(self, bytes_120125, "mem_124914") + self.futhark_builtinzhreplicate_f64(mem_124914, (m_70861 * N_70860), np.nan) - mem_45175 = opencl_alloc(self, bytes_44398, "mem_45175") - self.futhark_builtinzhreplicate_i32(mem_45175, (m_29166 * N_29165), - np.int32(0)) - segmap_usable_groups_41116 = sdiv_up64(binop_x_44399, - segmap_group_sizze_41115) - if ((1 * (np.long(segmap_usable_groups_41116) * np.long(segmap_group_sizze_41115))) != 0): - self.mainzisegmap_40949_var.set_args(self.global_failure, - np.int64(N_29165), - np.int64(m_29166), mem_45163, - mem_45166, mem_45172, mem_45175) - cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_40949_var, - ((np.long(segmap_usable_groups_41116) * np.long(segmap_group_sizze_41115)),), - (np.long(segmap_group_sizze_41115),)) + mem_124917 = opencl_alloc(self, bytes_120125, "mem_124917") + self.futhark_builtinzhreplicate_i64(mem_124917, (m_70861 * N_70860), + np.int64(0)) + segmap_usable_groups_88718 = sdiv_up64(binop_x_120126, + segmap_group_sizze_88717) + if ((1 * (np.int64(segmap_usable_groups_88718) * np.int64(segmap_group_sizze_88717))) != 0): + self.mainDetailedzisegmap_88573_var.set_args(self.global_failure, + np.int64(N_70860), + np.int64(m_70861), + mem_124906, mem_124909, + mem_124914, mem_124917) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_88573_var, + ((np.int64(segmap_usable_groups_88718) * np.int64(segmap_group_sizze_88717)),), + (np.int64(segmap_group_sizze_88717),)) if synchronous: sync(self) - mem_45163 = None - mem_45166 = None - defunc_4_map_res_mem_45177 = mem_45169 - defunc_4_map_res_mem_45178 = mem_45172 - defunc_4_map_res_mem_45179 = mem_45175 - defunc_3_map_res_mem_45140 = None - suff_outer_par_41164 = (self.sizes["main.suff_outer_par_28"] <= m_29166) - fits_41250 = sle64(i32_res_29175, max_group_sizze_39548) - suff_intra_par_41248 = (self.sizes["main.suff_intra_par_29"] <= i32_res_29175) - intra_suff_and_fits_41251 = (suff_intra_par_41248 and fits_41250) - segmap_group_sizze_41211 = self.sizes["main.segmap_group_size_41176"] - nest_sizze_41346 = (m_29166 * i32_res_29175) - segred_group_sizze_41347 = self.sizes["main.segred_group_size_41330"] - max_num_groups_46338 = self.sizes["main.segred_num_groups_41332"] - num_groups_41348 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(nest_sizze_41346, - segred_group_sizze_41347), - sext_i32_i64(max_num_groups_46338)))) - segred_group_sizze_41361 = self.sizes["main.segred_group_size_41305"] - max_num_groups_46339 = self.sizes["main.segred_num_groups_41307"] - num_groups_41362 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(nest_sizze_41346, - segred_group_sizze_41361), - sext_i32_i64(max_num_groups_46339)))) - segmap_group_sizze_41381 = self.sizes["main.segmap_group_size_41290"] - bytes_45188 = (np.int64(4) * segmap_group_sizze_41211) - local_memory_capacity_46491 = self.max_local_memory - if (sle64(((bytes_45188 + srem64((np.int64(8) - srem64(bytes_45188, - np.int64(8))), - np.int64(8))) + (bytes_45188 + srem64((np.int64(8) - srem64(bytes_45188, - np.int64(8))), - np.int64(8)))), - sext_i32_i64(local_memory_capacity_46491)) and suff_outer_par_41164): - segmap_usable_groups_41212 = sdiv_up64(m_29166, segmap_group_sizze_41211) - mem_45182 = opencl_alloc(self, bytes_44398, "mem_45182") - self.futhark_builtinzhgpu_map_transpose_f32(mem_45182, np.int64(0), - images_mem_44381, np.int64(0), - np.int64(1), N_29165, m_29166) - mem_45185 = opencl_alloc(self, bytes_44398, "mem_45185") - self.futhark_builtinzhgpu_map_transpose_f32(mem_45185, np.int64(0), - defunc_4_map_res_mem_45178, - np.int64(0), np.int64(1), - N_29165, m_29166) - num_whole_tiles_43891 = squot64(i32_res_29175, segmap_group_sizze_41211) - residual_input_43992 = srem64(i32_res_29175, segmap_group_sizze_41211) - cond_43993 = (residual_input_43992 == np.int64(0)) - mem_45216 = opencl_alloc(self, bytes_45152, "mem_45216") - mem_45218 = opencl_alloc(self, bytes_45152, "mem_45218") - mem_45220 = opencl_alloc(self, bytes_45152, "mem_45220") - if ((1 * (np.long(segmap_usable_groups_41212) * np.long(segmap_group_sizze_41211))) != 0): - self.mainzisegmap_intragroup_43869_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long(bytes_45188)), - cl.LocalMemory(np.long(bytes_45188)), - np.int64(N_29165), - np.int64(m_29166), - np.int32(n_29169), - np.float32(hfrac_29171), - np.int64(i32_res_29175), - np.int32(k2p2_29177), - np.int64(num_whole_tiles_43891), - np.int64(residual_input_43992), - np.byte(cond_43993), - mem_45182, mem_45185, - mem_45216, mem_45218, - mem_45220) + mem_124906 = None + mem_124909 = None + defunc_4_map_res_mem_124919 = mem_124911 + defunc_4_map_res_mem_124920 = mem_124914 + defunc_4_map_res_mem_124921 = mem_124917 + mem_124924 = opencl_alloc(self, bytes_120125, "mem_124924") + if (((m_70861 * N_70860) * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_124924, defunc_4_map_res_mem_124920, + dest_offset=np.int64(np.int64(0)), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64(((m_70861 * N_70860) * np.int32(8)))) + if synchronous: + sync(self) + suff_outer_par_88742 = (self.sizes["mainDetailed.suff_outer_par_35"] <= m_70861) + suff_intra_par_88816 = (self.sizes["mainDetailed.suff_intra_par_36"] <= n_70864) + intra_suff_and_fits_88819 = (fits_77730 and suff_intra_par_88816) + segmap_group_sizze_88780 = self.sizes["mainDetailed.segmap_group_size_88746"] + segred_group_sizze_88915 = self.sizes["mainDetailed.segred_group_size_88898"] + max_num_groups_129335 = self.sizes["mainDetailed.segred_num_groups_88900"] + num_groups_88916 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_77763, + segred_group_sizze_88915), + sext_i32_i64(max_num_groups_129335)))) + segred_group_sizze_88929 = self.sizes["mainDetailed.segred_group_size_88874"] + max_num_groups_129336 = self.sizes["mainDetailed.segred_num_groups_88876"] + num_groups_88930 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_77763, + segred_group_sizze_88929), + sext_i32_i64(max_num_groups_129336)))) + segmap_group_sizze_88948 = self.sizes["mainDetailed.segmap_group_size_88859"] + local_memory_capacity_129484 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_129484)) and suff_outer_par_88742): + segmap_usable_groups_88781 = sdiv_up64(m_70861, segmap_group_sizze_88780) + mem_124927 = opencl_alloc(self, bytes_120125, "mem_124927") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124927, np.int64(0), + mem_124142, np.int64(0), + np.int64(1), N_70860, m_70861) + mem_124930 = opencl_alloc(self, bytes_120125, "mem_124930") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124930, np.int64(0), + mem_124924, np.int64(0), + np.int64(1), N_70860, m_70861) + mem_124933 = opencl_alloc(self, bytes_120173, "mem_124933") + mem_124935 = opencl_alloc(self, bytes_120173, "mem_124935") + mem_124937 = opencl_alloc(self, bytes_120173, "mem_124937") + if ((1 * (np.int64(segmap_usable_groups_88781) * np.int64(segmap_group_sizze_88780))) != 0): + self.mainDetailedzisegmap_88744_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(N_70860), + np.int64(m_70861), + np.int64(n_70864), + np.float64(hfrac_70866), + np.int64(k2p2_70874), + mem_124927, mem_124930, + mem_124933, mem_124935, + mem_124937) cl.enqueue_nd_range_kernel(self.queue, - self.mainzisegmap_intragroup_43869_var, - ((np.long(segmap_usable_groups_41212) * np.long(segmap_group_sizze_41211)),), - (np.long(segmap_group_sizze_41211),)) + self.mainDetailedzisegmap_88744_var, + ((np.int64(segmap_usable_groups_88781) * np.int64(segmap_group_sizze_88780)),), + (np.int64(segmap_group_sizze_88780),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - mem_45182 = None - mem_45185 = None - defunc_3_map_res_mem_45244 = mem_45216 - defunc_3_map_res_mem_45245 = mem_45218 - defunc_3_map_res_mem_45246 = mem_45220 + mem_124927 = None + mem_124930 = None + defunc_3_map_res_mem_124961 = mem_124933 + defunc_3_map_res_mem_124962 = mem_124935 + defunc_3_map_res_mem_124963 = mem_124937 else: - local_memory_capacity_46490 = self.max_local_memory - if (sle64((((np.int32(4) * i32_res_29175) + srem64((np.int64(8) - srem64((np.int32(4) * i32_res_29175), - np.int64(8))), - np.int64(8))) + ((np.int32(4) * i32_res_29175) + srem64((np.int64(8) - srem64((np.int32(4) * i32_res_29175), - np.int64(8))), - np.int64(8)))), - sext_i32_i64(local_memory_capacity_46490)) and intra_suff_and_fits_41251): - mem_45225 = opencl_alloc(self, bytes_45152, "mem_45225") - mem_45227 = opencl_alloc(self, bytes_45152, "mem_45227") - mem_45229 = opencl_alloc(self, bytes_45152, "mem_45229") - if ((1 * (np.long(m_29166) * np.long(i32_res_29175))) != 0): - self.mainzisegmap_intragroup_41172_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long((np.int32(4) * i32_res_29175))), - cl.LocalMemory(np.long((np.int32(4) * i32_res_29175))), - np.int64(N_29165), - np.float32(hfrac_29171), - np.int64(i32_res_29175), - np.int32(k2p2_29177), - images_mem_44381, - defunc_4_map_res_mem_45178, - mem_45225, mem_45227, - mem_45229) + local_memory_capacity_129483 = self.max_local_memory + if (sle64((((np.int32(8) * n_70864) + srem64((np.int64(8) - srem64((np.int32(8) * n_70864), + np.int64(8))), + np.int64(8))) + ((np.int32(8) * n_70864) + srem64((np.int64(8) - srem64((np.int32(8) * n_70864), + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_129483)) and intra_suff_and_fits_88819): + mem_124942 = opencl_alloc(self, bytes_120173, "mem_124942") + mem_124944 = opencl_alloc(self, bytes_120173, "mem_124944") + mem_124946 = opencl_alloc(self, bytes_120173, "mem_124946") + if ((1 * (np.int64(m_70861) * np.int64(n_70864))) != 0): + self.mainDetailedzisegmap_intragroup_88740_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64((np.int32(8) * n_70864))), + cl.LocalMemory(np.int64((np.int32(8) * n_70864))), + np.int64(N_70860), + np.int64(n_70864), + np.float64(hfrac_70866), + np.int64(k2p2_70874), + mem_124142, + mem_124924, + mem_124942, + mem_124944, + mem_124946) cl.enqueue_nd_range_kernel(self.queue, - self.mainzisegmap_intragroup_41172_var, - ((np.long(m_29166) * np.long(i32_res_29175)),), - (np.long(i32_res_29175),)) + self.mainDetailedzisegmap_intragroup_88740_var, + ((np.int64(m_70861) * np.int64(n_70864)),), + (np.int64(n_70864),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - defunc_3_map_res_mem_45241 = mem_45225 - defunc_3_map_res_mem_45242 = mem_45227 - defunc_3_map_res_mem_45243 = mem_45229 + defunc_3_map_res_mem_124958 = mem_124942 + defunc_3_map_res_mem_124959 = mem_124944 + defunc_3_map_res_mem_124960 = mem_124946 else: - mem_45232 = opencl_alloc(self, bytes_45152, "mem_45232") - if slt64((i32_res_29175 * np.int64(2)), segred_group_sizze_41347): - segment_sizze_nonzzero_46365 = smax64(np.int64(1), i32_res_29175) - num_threads_46366 = (num_groups_41348 * segred_group_sizze_41347) - if ((1 * (np.long(num_groups_41348) * np.long(segred_group_sizze_41347))) != 0): - self.mainzisegred_small_41336_var.set_args(self.global_failure, - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_41347))), - np.int64(N_29165), - np.int64(m_29166), - np.int64(i32_res_29175), - np.int64(num_groups_41348), - np.int64(segment_sizze_nonzzero_46365), - images_mem_44381, - mem_45232) + mem_124949 = opencl_alloc(self, bytes_120173, "mem_124949") + if slt64((n_70864 * np.int64(2)), segred_group_sizze_88915): + segment_sizze_nonzzero_129358 = smax64(np.int64(1), n_70864) + num_threads_129359 = (num_groups_88916 * segred_group_sizze_88915) + if ((1 * (np.int64(num_groups_88916) * np.int64(segred_group_sizze_88915))) != 0): + self.mainDetailedzisegred_small_88904_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_88915))), + np.int64(N_70860), + np.int64(m_70861), + np.int64(n_70864), + np.int64(num_groups_88916), + np.int64(segment_sizze_nonzzero_129358), + mem_124142, + mem_124949) cl.enqueue_nd_range_kernel(self.queue, - self.mainzisegred_small_41336_var, - ((np.long(num_groups_41348) * np.long(segred_group_sizze_41347)),), - (np.long(segred_group_sizze_41347),)) + self.mainDetailedzisegred_small_88904_var, + ((np.int64(num_groups_88916) * np.int64(segred_group_sizze_88915)),), + (np.int64(segred_group_sizze_88915),)) if synchronous: sync(self) else: - groups_per_segment_46386 = sdiv_up64(num_groups_41348, - smax64(np.int64(1), m_29166)) - elements_per_thread_46387 = sdiv_up64(i32_res_29175, - (segred_group_sizze_41347 * groups_per_segment_46386)) - virt_num_groups_46388 = (groups_per_segment_46386 * m_29166) - num_threads_46389 = (num_groups_41348 * segred_group_sizze_41347) - threads_per_segment_46390 = (groups_per_segment_46386 * segred_group_sizze_41347) - group_res_arr_mem_46391 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_41347 * virt_num_groups_46388)), - "group_res_arr_mem_46391") - mainzicounter_mem_46393 = self.mainzicounter_mem_46393 - if ((1 * (np.long(num_groups_41348) * np.long(segred_group_sizze_41347))) != 0): - self.mainzisegred_large_41336_var.set_args(self.global_failure, - cl.LocalMemory(np.long(np.int32(1))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_41347))), - np.int64(N_29165), - np.int64(i32_res_29175), - np.int64(num_groups_41348), - np.int64(groups_per_segment_46386), - np.int64(elements_per_thread_46387), - np.int64(virt_num_groups_46388), - np.int64(threads_per_segment_46390), - images_mem_44381, - mem_45232, - group_res_arr_mem_46391, - mainzicounter_mem_46393) + groups_per_segment_129379 = sdiv_up64(num_groups_88916, + smax64(np.int64(1), m_70861)) + elements_per_thread_129380 = sdiv_up64(n_70864, + (segred_group_sizze_88915 * groups_per_segment_129379)) + virt_num_groups_129381 = (groups_per_segment_129379 * m_70861) + num_threads_129382 = (num_groups_88916 * segred_group_sizze_88915) + threads_per_segment_129383 = (groups_per_segment_129379 * segred_group_sizze_88915) + group_res_arr_mem_129384 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_88915 * virt_num_groups_129381)), + "group_res_arr_mem_129384") + mainDetailedzicounter_mem_129386 = self.mainDetailedzicounter_mem_129386 + if ((1 * (np.int64(num_groups_88916) * np.int64(segred_group_sizze_88915))) != 0): + self.mainDetailedzisegred_large_88904_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_88915))), + np.int64(N_70860), + np.int64(n_70864), + np.int64(num_groups_88916), + np.int64(groups_per_segment_129379), + np.int64(elements_per_thread_129380), + np.int64(virt_num_groups_129381), + np.int64(threads_per_segment_129383), + mem_124142, + mem_124949, + group_res_arr_mem_129384, + mainDetailedzicounter_mem_129386) cl.enqueue_nd_range_kernel(self.queue, - self.mainzisegred_large_41336_var, - ((np.long(num_groups_41348) * np.long(segred_group_sizze_41347)),), - (np.long(segred_group_sizze_41347),)) + self.mainDetailedzisegred_large_88904_var, + ((np.int64(num_groups_88916) * np.int64(segred_group_sizze_88915)),), + (np.int64(segred_group_sizze_88915),)) if synchronous: sync(self) - mem_45235 = opencl_alloc(self, bytes_45152, "mem_45235") - if slt64((i32_res_29175 * np.int64(2)), segred_group_sizze_41361): - segment_sizze_nonzzero_46425 = smax64(np.int64(1), i32_res_29175) - num_threads_46426 = (num_groups_41362 * segred_group_sizze_41361) - if ((1 * (np.long(num_groups_41362) * np.long(segred_group_sizze_41361))) != 0): - self.mainzisegred_small_41311_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_41361))), - np.int64(N_29165), - np.int64(m_29166), - np.int64(i32_res_29175), - np.int64(num_groups_41362), - np.int64(segment_sizze_nonzzero_46425), - defunc_4_map_res_mem_45178, - mem_45232, mem_45235) + mem_124952 = opencl_alloc(self, bytes_120173, "mem_124952") + if slt64((n_70864 * np.int64(2)), segred_group_sizze_88929): + segment_sizze_nonzzero_129418 = smax64(np.int64(1), n_70864) + num_threads_129419 = (num_groups_88930 * segred_group_sizze_88929) + if ((1 * (np.int64(num_groups_88930) * np.int64(segred_group_sizze_88929))) != 0): + self.mainDetailedzisegred_small_88880_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_88929))), + np.int64(N_70860), + np.int64(m_70861), + np.int64(n_70864), + np.int64(num_groups_88930), + np.int64(segment_sizze_nonzzero_129418), + mem_124924, + mem_124949, + mem_124952) cl.enqueue_nd_range_kernel(self.queue, - self.mainzisegred_small_41311_var, - ((np.long(num_groups_41362) * np.long(segred_group_sizze_41361)),), - (np.long(segred_group_sizze_41361),)) + self.mainDetailedzisegred_small_88880_var, + ((np.int64(num_groups_88930) * np.int64(segred_group_sizze_88929)),), + (np.int64(segred_group_sizze_88929),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) else: - groups_per_segment_46446 = sdiv_up64(num_groups_41362, - smax64(np.int64(1), m_29166)) - elements_per_thread_46447 = sdiv_up64(i32_res_29175, - (segred_group_sizze_41361 * groups_per_segment_46446)) - virt_num_groups_46448 = (groups_per_segment_46446 * m_29166) - num_threads_46449 = (num_groups_41362 * segred_group_sizze_41361) - threads_per_segment_46450 = (groups_per_segment_46446 * segred_group_sizze_41361) - group_res_arr_mem_46451 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_41361 * virt_num_groups_46448)), - "group_res_arr_mem_46451") - mainzicounter_mem_46453 = self.mainzicounter_mem_46453 - if ((1 * (np.long(num_groups_41362) * np.long(segred_group_sizze_41361))) != 0): - self.mainzisegred_large_41311_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long(np.int32(1))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_41361))), - np.int64(N_29165), - np.int64(i32_res_29175), - np.int64(num_groups_41362), - np.int64(groups_per_segment_46446), - np.int64(elements_per_thread_46447), - np.int64(virt_num_groups_46448), - np.int64(threads_per_segment_46450), - defunc_4_map_res_mem_45178, - mem_45232, mem_45235, - group_res_arr_mem_46451, - mainzicounter_mem_46453) + groups_per_segment_129439 = sdiv_up64(num_groups_88930, + smax64(np.int64(1), m_70861)) + elements_per_thread_129440 = sdiv_up64(n_70864, + (segred_group_sizze_88929 * groups_per_segment_129439)) + virt_num_groups_129441 = (groups_per_segment_129439 * m_70861) + num_threads_129442 = (num_groups_88930 * segred_group_sizze_88929) + threads_per_segment_129443 = (groups_per_segment_129439 * segred_group_sizze_88929) + group_res_arr_mem_129444 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_88929 * virt_num_groups_129441)), + "group_res_arr_mem_129444") + mainDetailedzicounter_mem_129446 = self.mainDetailedzicounter_mem_129446 + if ((1 * (np.int64(num_groups_88930) * np.int64(segred_group_sizze_88929))) != 0): + self.mainDetailedzisegred_large_88880_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_88929))), + np.int64(N_70860), + np.int64(n_70864), + np.int64(num_groups_88930), + np.int64(groups_per_segment_129439), + np.int64(elements_per_thread_129440), + np.int64(virt_num_groups_129441), + np.int64(threads_per_segment_129443), + mem_124924, + mem_124949, + mem_124952, + group_res_arr_mem_129444, + mainDetailedzicounter_mem_129446) cl.enqueue_nd_range_kernel(self.queue, - self.mainzisegred_large_41311_var, - ((np.long(num_groups_41362) * np.long(segred_group_sizze_41361)),), - (np.long(segred_group_sizze_41361),)) + self.mainDetailedzisegred_large_88880_var, + ((np.int64(num_groups_88930) * np.int64(segred_group_sizze_88929)),), + (np.int64(segred_group_sizze_88929),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - segmap_usable_groups_41382 = sdiv_up64(m_29166, - segmap_group_sizze_41381) - mem_45238 = opencl_alloc(self, bytes_45152, "mem_45238") - mem_45240 = opencl_alloc(self, bytes_45152, "mem_45240") - if ((1 * (np.long(segmap_usable_groups_41382) * np.long(segmap_group_sizze_41381))) != 0): - self.mainzisegmap_41288_var.set_args(self.global_failure, - np.int64(m_29166), - np.float32(hfrac_29171), - np.int32(k2p2_29177), mem_45232, - mem_45235, mem_45238, mem_45240) - cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_41288_var, - ((np.long(segmap_usable_groups_41382) * np.long(segmap_group_sizze_41381)),), - (np.long(segmap_group_sizze_41381),)) + segmap_usable_groups_88949 = sdiv_up64(m_70861, + segmap_group_sizze_88948) + mem_124955 = opencl_alloc(self, bytes_120173, "mem_124955") + mem_124957 = opencl_alloc(self, bytes_120173, "mem_124957") + if ((1 * (np.int64(segmap_usable_groups_88949) * np.int64(segmap_group_sizze_88948))) != 0): + self.mainDetailedzisegmap_88857_var.set_args(self.global_failure, + np.int64(m_70861), + np.float64(hfrac_70866), + np.int64(k2p2_70874), + mem_124949, mem_124952, + mem_124955, mem_124957) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_88857_var, + ((np.int64(segmap_usable_groups_88949) * np.int64(segmap_group_sizze_88948)),), + (np.int64(segmap_group_sizze_88948),)) if synchronous: sync(self) - mem_45235 = None - defunc_3_map_res_mem_45241 = mem_45238 - defunc_3_map_res_mem_45242 = mem_45232 - defunc_3_map_res_mem_45243 = mem_45240 - defunc_3_map_res_mem_45244 = defunc_3_map_res_mem_45241 - defunc_3_map_res_mem_45245 = defunc_3_map_res_mem_45242 - defunc_3_map_res_mem_45246 = defunc_3_map_res_mem_45243 - segred_group_sizze_41402 = self.sizes["main.segred_group_size_41401"] - max_num_groups_46492 = self.sizes["main.segred_num_groups_41403"] - num_groups_41404 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(m_29166, - segred_group_sizze_41402), - sext_i32_i64(max_num_groups_46492)))) - mem_45249 = opencl_alloc(self, np.int64(4), "mem_45249") - mainzicounter_mem_46493 = self.mainzicounter_mem_46493 - group_res_arr_mem_46495 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_41402 * num_groups_41404)), - "group_res_arr_mem_46495") - num_threads_46497 = (num_groups_41404 * segred_group_sizze_41402) - if ((1 * (np.long(num_groups_41404) * np.long(segred_group_sizze_41402))) != 0): - self.mainzisegred_nonseg_41409_var.set_args(self.global_failure, - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_41402))), - cl.LocalMemory(np.long(np.int32(1))), - np.int64(m_29166), - np.int64(num_groups_41404), - np.int64(num_threads_46497), - defunc_3_map_res_mem_45244, - mem_45249, - mainzicounter_mem_46493, - group_res_arr_mem_46495) - cl.enqueue_nd_range_kernel(self.queue, self.mainzisegred_nonseg_41409_var, - ((np.long(num_groups_41404) * np.long(segred_group_sizze_41402)),), - (np.long(segred_group_sizze_41402),)) + mem_124952 = None + defunc_3_map_res_mem_124958 = mem_124955 + defunc_3_map_res_mem_124959 = mem_124949 + defunc_3_map_res_mem_124960 = mem_124957 + defunc_3_map_res_mem_124961 = defunc_3_map_res_mem_124958 + defunc_3_map_res_mem_124962 = defunc_3_map_res_mem_124959 + defunc_3_map_res_mem_124963 = defunc_3_map_res_mem_124960 + mem_124142 = None + segred_group_sizze_88969 = self.sizes["mainDetailed.segred_group_size_88968"] + max_num_groups_129485 = self.sizes["mainDetailed.segred_num_groups_88970"] + num_groups_88971 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_70861, + segred_group_sizze_88969), + sext_i32_i64(max_num_groups_129485)))) + mem_124966 = opencl_alloc(self, np.int64(8), "mem_124966") + mainDetailedzicounter_mem_129486 = self.mainDetailedzicounter_mem_129486 + group_res_arr_mem_129488 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_88969 * num_groups_88971)), + "group_res_arr_mem_129488") + num_threads_129490 = (num_groups_88971 * segred_group_sizze_88969) + if ((1 * (np.int64(num_groups_88971) * np.int64(segred_group_sizze_88969))) != 0): + self.mainDetailedzisegred_nonseg_88976_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_88969))), + cl.LocalMemory(np.int64(np.int32(1))), + np.int64(m_70861), + np.int64(num_groups_88971), + np.int64(num_threads_129490), + defunc_3_map_res_mem_124961, + mem_124966, + mainDetailedzicounter_mem_129486, + group_res_arr_mem_129488) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_nonseg_88976_var, + ((np.int64(num_groups_88971) * np.int64(segred_group_sizze_88969)),), + (np.int64(segred_group_sizze_88969),)) if synchronous: sync(self) - read_res_46869 = np.empty(1, dtype=ct.c_int32) - cl.enqueue_copy(self.queue, read_res_46869, mem_45249, - device_offset=(np.long(np.int64(0)) * 4), + read_res_129921 = np.empty(1, dtype=ct.c_int64) + cl.enqueue_copy(self.queue, read_res_129921, mem_124966, + device_offset=(np.int64(np.int64(0)) * 8), is_blocking=synchronous) sync(self) - defunc_2_reduce_comm_res_29563 = read_res_46869[0] - mem_45249 = None - i32_res_29568 = sext_i32_i64(defunc_2_reduce_comm_res_29563) - bounds_invalid_upwards_29569 = slt64(i32_res_29568, np.int64(0)) - valid_29570 = not(bounds_invalid_upwards_29569) - range_valid_c_29571 = True - assert valid_29570, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 helpers.fut:5:3-18\n #2 bfastfinal.fut:102:34-46\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:97:17-104:24\n #5 bfastfinal.fut:189:5-58\n #6 bfastfinal.fut:184:1-190:25\n" % ("Range ", - np.int64(0), - "..", - np.int64(1), - "..<", - i32_res_29568, - " is invalid.")) - suff_outer_par_41441 = (self.sizes["main.suff_outer_par_31"] <= m_29166) - segmap_group_sizze_41468 = self.sizes["main.segmap_group_size_41445"] - nest_sizze_41519 = (m_29166 * i32_res_29568) - segred_group_sizze_41520 = self.sizes["main.segred_group_size_41493"] - max_num_groups_46523 = self.sizes["main.segred_num_groups_41495"] - num_groups_41521 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(nest_sizze_41519, - segred_group_sizze_41520), - sext_i32_i64(max_num_groups_46523)))) - bytes_45252 = (np.int64(4) * segmap_group_sizze_41468) - local_memory_capacity_46594 = self.max_local_memory - if (sle64(((bytes_45252 + srem64((np.int64(8) - srem64(bytes_45252, - np.int64(8))), - np.int64(8))) + (bytes_45252 + srem64((np.int64(8) - srem64(bytes_45252, - np.int64(8))), - np.int64(8)))), - sext_i32_i64(local_memory_capacity_46594)) and suff_outer_par_41441): - segmap_usable_groups_41469 = sdiv_up64(m_29166, segmap_group_sizze_41468) - num_whole_tiles_44095 = squot64(i32_res_29568, segmap_group_sizze_41468) - residual_input_44206 = srem64(i32_res_29568, segmap_group_sizze_41468) - cond_44207 = (residual_input_44206 == np.int64(0)) - mem_45275 = opencl_alloc(self, bytes_45152, "mem_45275") - if ((1 * (np.long(segmap_usable_groups_41469) * np.long(segmap_group_sizze_41468))) != 0): - self.mainzisegmap_intragroup_44075_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long(bytes_45252)), - cl.LocalMemory(np.long(bytes_45252)), - np.int64(N_29165), - np.int64(m_29166), - np.int64(i32_res_29568), - np.int64(num_whole_tiles_44095), - np.int64(residual_input_44206), - np.byte(cond_44207), - defunc_4_map_res_mem_45178, - defunc_3_map_res_mem_45244, - defunc_3_map_res_mem_45245, - mem_45275) + defunc_2_reduce_comm_res_72722 = read_res_129921[0] + mem_124966 = None + bounds_invalid_upwards_72728 = slt64(defunc_2_reduce_comm_res_72722, + np.int64(0)) + valid_72729 = not(bounds_invalid_upwards_72728) + range_valid_c_72730 = True + assert valid_72729, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 bfastfinal.fut:113:34-42\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:108:17-115:24\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + defunc_2_reduce_comm_res_72722, + " is invalid.")) + suff_outer_par_88978 = (self.sizes["mainDetailed.suff_outer_par_37"] <= m_70861) + segmap_group_sizze_89004 = self.sizes["mainDetailed.segmap_group_size_88982"] + nest_sizze_89053 = (m_70861 * defunc_2_reduce_comm_res_72722) + segred_group_sizze_89054 = self.sizes["mainDetailed.segred_group_size_89028"] + max_num_groups_129516 = self.sizes["mainDetailed.segred_num_groups_89030"] + num_groups_89055 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_89053, + segred_group_sizze_89054), + sext_i32_i64(max_num_groups_129516)))) + local_memory_capacity_129583 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_129583)) and suff_outer_par_88978): + segmap_usable_groups_89005 = sdiv_up64(m_70861, segmap_group_sizze_89004) + mem_124969 = opencl_alloc(self, bytes_120173, "mem_124969") + if ((1 * (np.int64(segmap_usable_groups_89005) * np.int64(segmap_group_sizze_89004))) != 0): + self.mainDetailedzisegmap_88980_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(N_70860), + np.int64(m_70861), + np.int64(defunc_2_reduce_comm_res_72722), + mem_124924, + defunc_3_map_res_mem_124961, + defunc_3_map_res_mem_124962, + mem_124969) cl.enqueue_nd_range_kernel(self.queue, - self.mainzisegmap_intragroup_44075_var, - ((np.long(segmap_usable_groups_41469) * np.long(segmap_group_sizze_41468)),), - (np.long(segmap_group_sizze_41468),)) + self.mainDetailedzisegmap_88980_var, + ((np.int64(segmap_usable_groups_89005) * np.int64(segmap_group_sizze_89004)),), + (np.int64(segmap_group_sizze_89004),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - defunc_0_f_res_mem_45279 = mem_45275 + defunc_0_f_res_mem_124973 = mem_124969 else: - mem_45278 = opencl_alloc(self, bytes_45152, "mem_45278") - if slt64((i32_res_29568 * np.int64(2)), segred_group_sizze_41520): - segment_sizze_nonzzero_46534 = smax64(np.int64(1), i32_res_29568) - num_threads_46535 = (num_groups_41521 * segred_group_sizze_41520) - if ((1 * (np.long(num_groups_41521) * np.long(segred_group_sizze_41520))) != 0): - self.mainzisegred_small_41499_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_41520))), - np.int64(N_29165), - np.int64(m_29166), - np.int64(i32_res_29568), - np.int64(num_groups_41521), - np.int64(segment_sizze_nonzzero_46534), - defunc_4_map_res_mem_45178, - defunc_3_map_res_mem_45244, - defunc_3_map_res_mem_45245, - mem_45278) + mem_124972 = opencl_alloc(self, bytes_120173, "mem_124972") + if slt64((defunc_2_reduce_comm_res_72722 * np.int64(2)), + segred_group_sizze_89054): + segment_sizze_nonzzero_129523 = smax64(np.int64(1), + defunc_2_reduce_comm_res_72722) + num_threads_129524 = (num_groups_89055 * segred_group_sizze_89054) + if ((1 * (np.int64(num_groups_89055) * np.int64(segred_group_sizze_89054))) != 0): + self.mainDetailedzisegred_small_89034_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_89054))), + np.int64(N_70860), + np.int64(m_70861), + np.int64(defunc_2_reduce_comm_res_72722), + np.int64(num_groups_89055), + np.int64(segment_sizze_nonzzero_129523), + mem_124924, + defunc_3_map_res_mem_124961, + defunc_3_map_res_mem_124962, + mem_124972) cl.enqueue_nd_range_kernel(self.queue, - self.mainzisegred_small_41499_var, - ((np.long(num_groups_41521) * np.long(segred_group_sizze_41520)),), - (np.long(segred_group_sizze_41520),)) + self.mainDetailedzisegred_small_89034_var, + ((np.int64(num_groups_89055) * np.int64(segred_group_sizze_89054)),), + (np.int64(segred_group_sizze_89054),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) else: - groups_per_segment_46555 = sdiv_up64(num_groups_41521, - smax64(np.int64(1), m_29166)) - elements_per_thread_46556 = sdiv_up64(i32_res_29568, - (segred_group_sizze_41520 * groups_per_segment_46555)) - virt_num_groups_46557 = (groups_per_segment_46555 * m_29166) - num_threads_46558 = (num_groups_41521 * segred_group_sizze_41520) - threads_per_segment_46559 = (groups_per_segment_46555 * segred_group_sizze_41520) - group_res_arr_mem_46560 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_41520 * virt_num_groups_46557)), - "group_res_arr_mem_46560") - mainzicounter_mem_46562 = self.mainzicounter_mem_46562 - if ((1 * (np.long(num_groups_41521) * np.long(segred_group_sizze_41520))) != 0): - self.mainzisegred_large_41499_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long(np.int32(1))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_41520))), - np.int64(N_29165), - np.int64(i32_res_29568), - np.int64(num_groups_41521), - np.int64(groups_per_segment_46555), - np.int64(elements_per_thread_46556), - np.int64(virt_num_groups_46557), - np.int64(threads_per_segment_46559), - defunc_4_map_res_mem_45178, - defunc_3_map_res_mem_45244, - defunc_3_map_res_mem_45245, - mem_45278, - group_res_arr_mem_46560, - mainzicounter_mem_46562) + groups_per_segment_129544 = sdiv_up64(num_groups_89055, + smax64(np.int64(1), m_70861)) + elements_per_thread_129545 = sdiv_up64(defunc_2_reduce_comm_res_72722, + (segred_group_sizze_89054 * groups_per_segment_129544)) + virt_num_groups_129546 = (groups_per_segment_129544 * m_70861) + num_threads_129547 = (num_groups_89055 * segred_group_sizze_89054) + threads_per_segment_129548 = (groups_per_segment_129544 * segred_group_sizze_89054) + group_res_arr_mem_129549 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_89054 * virt_num_groups_129546)), + "group_res_arr_mem_129549") + mainDetailedzicounter_mem_129551 = self.mainDetailedzicounter_mem_129551 + if ((1 * (np.int64(num_groups_89055) * np.int64(segred_group_sizze_89054))) != 0): + self.mainDetailedzisegred_large_89034_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_89054))), + np.int64(N_70860), + np.int64(defunc_2_reduce_comm_res_72722), + np.int64(num_groups_89055), + np.int64(groups_per_segment_129544), + np.int64(elements_per_thread_129545), + np.int64(virt_num_groups_129546), + np.int64(threads_per_segment_129548), + mem_124924, + defunc_3_map_res_mem_124961, + defunc_3_map_res_mem_124962, + mem_124972, + group_res_arr_mem_129549, + mainDetailedzicounter_mem_129551) cl.enqueue_nd_range_kernel(self.queue, - self.mainzisegred_large_41499_var, - ((np.long(num_groups_41521) * np.long(segred_group_sizze_41520)),), - (np.long(segred_group_sizze_41520),)) + self.mainDetailedzisegred_large_89034_var, + ((np.int64(num_groups_89055) * np.int64(segred_group_sizze_89054)),), + (np.int64(segred_group_sizze_89054),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - defunc_0_f_res_mem_45279 = mem_45278 - iota32_arg_29597 = (N_29165 - i32_res_29175) - bounds_invalid_upwards_29598 = slt64(iota32_arg_29597, np.int64(0)) - valid_29599 = not(bounds_invalid_upwards_29598) - range_valid_c_29600 = True - assert valid_29599, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 helpers.fut:2:3-8\n #2 bfastfinal.fut:109:22-35\n #3 bfastfinal.fut:189:5-58\n #4 bfastfinal.fut:184:1-190:25\n" % ("Range ", - np.int64(0), - "..", - np.int64(1), - "..<", - iota32_arg_29597, - " is invalid.")) - i_29602 = (n_29169 - np.int32(1)) - i_29603 = sext_i32_i64(i_29602) - x_29604 = sle64(np.int64(0), i_29603) - y_29605 = slt64(i_29603, N_29165) - bounds_check_29606 = (x_29604 and y_29605) - index_certs_29607 = True - assert bounds_check_29606, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 bfastfinal.fut:107:64-84\n #1 bfastfinal.fut:106:15-109:36\n #2 bfastfinal.fut:189:5-58\n #3 bfastfinal.fut:184:1-190:25\n" % ("Index [", - i_29603, - "] out of bounds for array of shape [", - N_29165, - "].")) - read_res_46871 = np.empty(1, dtype=ct.c_int32) - cl.enqueue_copy(self.queue, read_res_46871, mappingindices_mem_44380, - device_offset=(np.long(i_29603) * 4), + defunc_0_f_res_mem_124973 = mem_124972 + iota_arg_72752 = (N_70860 - n_70864) + bounds_invalid_upwards_72753 = slt64(iota_arg_72752, np.int64(0)) + valid_72754 = not(bounds_invalid_upwards_72753) + range_valid_c_72755 = True + assert valid_72754, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 bfastfinal.fut:120:22-31\n #2 bfastfinal.fut:185:3-72\n #3 bfastfinal.fut:181:1-185:72\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + iota_arg_72752, + " is invalid.")) + index_certs_72757 = True + assert y_70960, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 bfastfinal.fut:118:72-92\n #1 bfastfinal.fut:117:15-120:32\n #2 bfastfinal.fut:185:3-72\n #3 bfastfinal.fut:181:1-185:72\n" % ("Index [", + m_70956, + "] out of bounds for array of shape [", + N_70860, + "].")) + read_res_129923 = np.empty(1, dtype=ct.c_int64) + cl.enqueue_copy(self.queue, read_res_129923, mappingindices_mem_120107, + device_offset=(np.int64(m_70956) * 8), is_blocking=synchronous) sync(self) - r32_arg_29608 = read_res_46871[0] - i32_res_29609 = sitofp_i32_f32(r32_arg_29608) - range_valid_c_29610 = True - assert valid_29599, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 helpers.fut:2:3-8\n #2 bfastfinal.fut:119:20-35\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:114:20-131:9\n #5 bfastfinal.fut:189:5-58\n #6 bfastfinal.fut:184:1-190:25\n" % ("Range ", - np.int64(0), - "..", - np.int64(1), - "..<", - iota32_arg_29597, - " is invalid.")) - segmap_group_sizze_41611 = self.sizes["main.segmap_group_size_41591"] - segmap_usable_groups_41612 = sdiv_up64(iota32_arg_29597, - segmap_group_sizze_41611) - bytes_45281 = (np.int64(4) * iota32_arg_29597) - mem_45282 = opencl_alloc(self, bytes_45281, "mem_45282") - mem_45284 = opencl_alloc(self, bytes_45281, "mem_45284") - if ((1 * (np.long(segmap_usable_groups_41612) * np.long(segmap_group_sizze_41611))) != 0): - self.mainzisegmap_41589_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - np.int64(N_29165), np.int32(n_29169), - np.float32(lam_29172), - np.int64(iota32_arg_29597), - np.float32(i32_res_29609), - mappingindices_mem_44380, mem_45282, - mem_45284) - cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_41589_var, - ((np.long(segmap_usable_groups_41612) * np.long(segmap_group_sizze_41611)),), - (np.long(segmap_group_sizze_41611),)) + i64_arg_72758 = read_res_129923[0] + i64_res_72759 = sitofp_i64_f64(i64_arg_72758) + segmap_group_sizze_89097 = self.sizes["mainDetailed.segmap_group_size_89079"] + segmap_usable_groups_89098 = sdiv_up64(iota_arg_72752, + segmap_group_sizze_89097) + bytes_124975 = (np.int64(8) * iota_arg_72752) + mem_124976 = opencl_alloc(self, bytes_124975, "mem_124976") + if ((1 * (np.int64(segmap_usable_groups_89098) * np.int64(segmap_group_sizze_89097))) != 0): + self.mainDetailedzisegmap_89077_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(N_70860), + np.int64(n_70864), + np.float64(lam_70868), + np.int64(iota_arg_72752), + np.float64(i64_res_72759), + mappingindices_mem_120107, + mem_124976) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_89077_var, + ((np.int64(segmap_usable_groups_89098) * np.int64(segmap_group_sizze_89097)),), + (np.int64(segmap_group_sizze_89097),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - mem_45282 = None - fits_41879 = sle64(iota32_arg_29597, max_group_sizze_39548) - suff_intra_par_41877 = (self.sizes["main.suff_intra_par_34"] <= iota32_arg_29597) - intra_suff_and_fits_41880 = (suff_intra_par_41877 and fits_41879) - segmap_group_sizze_42177 = self.sizes["main.segmap_group_size_42157"] - max_num_groups_46600 = self.sizes["main.segmap_num_groups_42159"] - num_groups_42178 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(m_29166, - segmap_group_sizze_42177), - sext_i32_i64(max_num_groups_46600)))) - nest_sizze_42199 = (m_29166 * iota32_arg_29597) - segscan_group_sizze_42200 = self.sizes["main.segscan_group_size_42108"] - max_num_groups_46601 = self.sizes["main.segscan_num_groups_42110"] - num_groups_42201 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(nest_sizze_42199, - segscan_group_sizze_42200), - sext_i32_i64(max_num_groups_46601)))) - segred_group_sizze_42246 = self.sizes["main.segred_group_size_42054"] - max_num_groups_46602 = self.sizes["main.segred_num_groups_42056"] - num_groups_42247 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(nest_sizze_42199, - segred_group_sizze_42246), - sext_i32_i64(max_num_groups_46602)))) - segmap_group_sizze_42285 = self.sizes["main.segmap_group_size_42003"] - bytes_45300 = (np.int64(4) * nest_sizze_42199) - local_memory_capacity_46795 = self.max_local_memory - if (sle64(((((bytes_45281 + srem64((np.int64(8) - srem64(bytes_45281, - np.int64(8))), - np.int64(8))) + ((np.int32(1) * iota32_arg_29597) + srem64((np.int64(8) - srem64((np.int32(1) * iota32_arg_29597), - np.int64(8))), - np.int64(8)))) + ((np.int32(4) * iota32_arg_29597) + srem64((np.int64(8) - srem64((np.int32(4) * iota32_arg_29597), - np.int64(8))), - np.int64(8)))) + ((np.int32(4) * iota32_arg_29597) + srem64((np.int64(8) - srem64((np.int32(4) * iota32_arg_29597), - np.int64(8))), - np.int64(8)))), - sext_i32_i64(local_memory_capacity_46795)) and intra_suff_and_fits_41880): - mem_45291 = opencl_alloc(self, bytes_45152, "mem_45291") - mem_45293 = opencl_alloc(self, bytes_45152, "mem_45293") - if ((1 * (np.long(m_29166) * np.long(iota32_arg_29597))) != 0): - self.mainzisegmap_intragroup_41640_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long((np.int32(4) * iota32_arg_29597))), - cl.LocalMemory(np.long((np.int32(4) * iota32_arg_29597))), - cl.LocalMemory(np.long((np.int32(1) * iota32_arg_29597))), - cl.LocalMemory(np.long(bytes_45281)), - np.int64(N_29165), - np.int32(n_29169), - np.int64(iota32_arg_29597), - defunc_4_map_res_mem_45177, - defunc_4_map_res_mem_45178, - defunc_4_map_res_mem_45179, - defunc_3_map_res_mem_45244, - defunc_3_map_res_mem_45245, - defunc_3_map_res_mem_45246, - defunc_0_f_res_mem_45279, - mem_45284, mem_45291, - mem_45293) + iota_arg_72776 = (N_70860 - n_70864) + bounds_invalid_upwards_72777 = slt64(iota_arg_72776, np.int64(0)) + valid_72778 = not(bounds_invalid_upwards_72777) + range_valid_c_72779 = True + assert valid_72778, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 bfastfinal.fut:130:20-31\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:125:20-142:9\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + iota_arg_72776, + " is invalid.")) + max_res_72781 = smax64(np.int64(1), iota_arg_72776) + bounds_invalid_upwards_72782 = slt64(max_res_72781, np.int64(1)) + distance_72783 = (max_res_72781 - np.int64(1)) + valid_72784 = not(bounds_invalid_upwards_72782) + range_valid_c_72785 = True + assert valid_72784, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/diku-dk/sorts/insertion_sort.fut:16:30-45\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:127:13-132:42\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:125:20-142:9\n #5 bfastfinal.fut:185:3-72\n #6 bfastfinal.fut:181:1-185:72\n" % ("Range ", + np.int64(1), + "..<", + max_res_72781, + " is invalid.")) + dim_match_72786 = (iota_arg_72752 == iota_arg_72776) + empty_or_match_cert_72787 = True + assert dim_match_72786, ("Error: %s\n\nBacktrace:\n-> #0 unknown location\n #1 bfastfinal.fut:127:13-132:42\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:125:20-142:9\n #4 bfastfinal.fut:185:3-72\n #5 bfastfinal.fut:181:1-185:72\n" % ("Function return value does not match shape of declared return type.",)) + segmap_group_sizze_89270 = self.sizes["mainDetailed.segmap_group_size_89263"] + segmap_usable_groups_89271 = sdiv_up64(m_70861, segmap_group_sizze_89270) + mem_124979 = opencl_alloc(self, bytes_120173, "mem_124979") + if ((1 * (np.int64(segmap_usable_groups_89271) * np.int64(segmap_group_sizze_89270))) != 0): + self.mainDetailedzisegmap_89261_var.set_args(self.global_failure, + np.int64(m_70861), + defunc_4_map_res_mem_124919, + defunc_3_map_res_mem_124962, + mem_124979) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_89261_var, + ((np.int64(segmap_usable_groups_89271) * np.int64(segmap_group_sizze_89270)),), + (np.int64(segmap_group_sizze_89270),)) + if synchronous: + sync(self) + nest_sizze_89281 = (m_70861 * iota_arg_72776) + segmap_group_sizze_89282 = self.sizes["mainDetailed.segmap_group_size_89210"] + segmap_usable_groups_89283 = sdiv_up64(nest_sizze_89281, + segmap_group_sizze_89282) + bytes_124981 = (np.int64(8) * nest_sizze_89281) + mem_124983 = opencl_alloc(self, bytes_124981, "mem_124983") + if ((1 * (np.int64(segmap_usable_groups_89283) * np.int64(segmap_group_sizze_89282))) != 0): + self.mainDetailedzisegmap_89207_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(N_70860), + np.int64(m_70861), + np.int64(iota_arg_72776), + defunc_4_map_res_mem_124920, + mem_124924, + defunc_3_map_res_mem_124962, + mem_124979, mem_124983) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_89207_var, + ((np.int64(segmap_usable_groups_89283) * np.int64(segmap_group_sizze_89282)),), + (np.int64(segmap_group_sizze_89282),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + mem_124924 = None + segmap_group_sizze_89308 = self.sizes["mainDetailed.segmap_group_size_89119"] + segmap_usable_groups_89309 = sdiv_up64(m_70861, segmap_group_sizze_89308) + mem_124986 = opencl_alloc(self, bytes_124981, "mem_124986") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124986, np.int64(0), + mem_124983, np.int64(0), + np.int64(1), iota_arg_72776, + m_70861) + mem_124983 = None + mem_125017 = opencl_alloc(self, bytes_120173, "mem_125017") + bytes_124996 = (np.int64(8) * iota_arg_72776) + num_threads_125851 = (segmap_group_sizze_89308 * segmap_usable_groups_89309) + total_sizze_125852 = (bytes_124996 * num_threads_125851) + mem_124997 = opencl_alloc(self, total_sizze_125852, "mem_124997") + if ((1 * (np.int64(segmap_usable_groups_89309) * np.int64(segmap_group_sizze_89308))) != 0): + self.mainDetailedzisegmap_89117_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_70861), + np.int64(iota_arg_72752), + np.int64(iota_arg_72776), + np.int64(distance_72783), + np.int64(num_threads_125851), + defunc_4_map_res_mem_124919, + defunc_3_map_res_mem_124962, + mem_124979, mem_124986, + mem_124997, mem_125017) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegmap_89117_var, + ((np.int64(segmap_usable_groups_89309) * np.int64(segmap_group_sizze_89308)),), + (np.int64(segmap_group_sizze_89308),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + mem_124979 = None + mem_124986 = None + mem_124997 = None + dim_match_72886 = (iota_arg_72776 == iota_arg_72752) + empty_or_match_cert_72887 = True + assert dim_match_72886, ("Error: %s\n\nBacktrace:\n-> #0 bfastfinal.fut:173:24-72\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:147:38-175:9\n #3 bfastfinal.fut:185:3-72\n #4 bfastfinal.fut:181:1-185:72\n" % ("function arguments of wrong shape",)) + empty_or_match_cert_72888 = True + assert dim_match_72786, ("Error: %s\n\nBacktrace:\n-> #0 unknown location\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:147:38-175:9\n #3 bfastfinal.fut:185:3-72\n #4 bfastfinal.fut:181:1-185:72\n" % ("Function return value does not match shape of declared return type.",)) + suff_outer_par_89400 = (self.sizes["mainDetailed.suff_outer_par_38"] <= m_70861) + intra_avail_par_89395 = smin64(iota_arg_72752, iota_arg_72776) + computed_group_sizze_89388 = smax64(iota_arg_72752, iota_arg_72776) + fits_89622 = sle64(computed_group_sizze_89388, max_group_sizze_77729) + suff_intra_par_89620 = (self.sizes["mainDetailed.suff_intra_par_39"] <= intra_avail_par_89395) + intra_suff_and_fits_89623 = (suff_intra_par_89620 and fits_89622) + segmap_group_sizze_89511 = self.sizes["mainDetailed.segmap_group_size_89404"] + max_num_groups_129609 = self.sizes["mainDetailed.segmap_num_groups_89406"] + num_groups_89512 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_70861, + segmap_group_sizze_89511), + sext_i32_i64(max_num_groups_129609)))) + segmap_group_sizze_89917 = self.sizes["mainDetailed.segmap_group_size_89910"] + segscan_group_sizze_89925 = self.sizes["mainDetailed.segscan_group_size_89873"] + max_num_groups_129610 = self.sizes["mainDetailed.segscan_num_groups_89875"] + num_groups_89926 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_89281, + segscan_group_sizze_89925), + sext_i32_i64(max_num_groups_129610)))) + segmap_group_sizze_89957 = self.sizes["mainDetailed.segmap_group_size_89862"] + nest_sizze_89967 = (m_70861 * iota_arg_72752) + segred_group_sizze_89968 = self.sizes["mainDetailed.segred_group_size_89822"] + max_num_groups_129611 = self.sizes["mainDetailed.segred_num_groups_89824"] + num_groups_89969 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_89967, + segred_group_sizze_89968), + sext_i32_i64(max_num_groups_129611)))) + segmap_group_sizze_90005 = self.sizes["mainDetailed.segmap_group_size_89787"] + segmap_group_sizze_90050 = self.sizes["mainDetailed.segmap_group_size_89740"] + bytes_125061 = (np.int64(8) * nest_sizze_89967) + local_memory_capacity_129612 = self.max_local_memory + if intra_suff_and_fits_89623: + defunc_0_f_res_ixfn_125120 = iota_arg_72752 + else: + defunc_0_f_res_ixfn_125120 = iota_arg_72776 + num_threads_125853 = (segmap_group_sizze_89511 * num_groups_89512) + total_sizze_125854 = (bytes_124996 * num_threads_125853) + total_sizze_125855 = (bytes_124975 * num_threads_125853) + total_sizze_125856 = (bytes_124996 * num_threads_125853) + local_memory_capacity_129836 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_129836)) and suff_outer_par_89400): + mem_125063 = opencl_alloc(self, bytes_125061, "mem_125063") + mem_125066 = opencl_alloc(self, bytes_125061, "mem_125066") + mem_125068 = opencl_alloc(self, bytes_120173, "mem_125068") + mem_125070 = opencl_alloc(self, bytes_120173, "mem_125070") + mem_125020 = opencl_alloc(self, total_sizze_125854, "mem_125020") + mem_125034 = opencl_alloc(self, total_sizze_125855, "mem_125034") + mem_125048 = opencl_alloc(self, total_sizze_125856, "mem_125048") + if ((1 * (np.int64(num_groups_89512) * np.int64(segmap_group_sizze_89511))) != 0): + self.mainDetailedzisegmap_89402_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(N_70860), + np.int64(m_70861), + np.int64(n_70864), + np.int64(iota_arg_72752), + np.int64(iota_arg_72776), + np.int64(num_groups_89512), + np.int64(num_threads_125853), + defunc_4_map_res_mem_124919, + defunc_4_map_res_mem_124920, + defunc_4_map_res_mem_124921, + defunc_3_map_res_mem_124961, + defunc_3_map_res_mem_124962, + defunc_3_map_res_mem_124963, + defunc_0_f_res_mem_124973, + mem_124976, mem_125020, + mem_125034, mem_125048, + mem_125063, mem_125066, + mem_125068, mem_125070) cl.enqueue_nd_range_kernel(self.queue, - self.mainzisegmap_intragroup_41640_var, - ((np.long(m_29166) * np.long(iota32_arg_29597)),), - (np.long(iota32_arg_29597),)) + self.mainDetailedzisegmap_89402_var, + ((np.int64(num_groups_89512) * np.int64(segmap_group_sizze_89511)),), + (np.int64(segmap_group_sizze_89511),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - defunc_0_f_res_mem_45315 = mem_45291 - defunc_0_f_res_mem_45316 = mem_45293 + mem_125020 = None + mem_125034 = None + mem_125048 = None + mem_125127 = opencl_alloc(self, bytes_125061, "mem_125127") + self.futhark_builtinzhgpu_map_transpose_f64(mem_125127, np.int64(0), + mem_125063, np.int64(0), + np.int64(1), m_70861, + iota_arg_72752) + mem_125063 = None + mem_125131 = opencl_alloc(self, bytes_125061, "mem_125131") + self.futhark_builtinzhgpu_map_transpose_f64(mem_125131, np.int64(0), + mem_125066, np.int64(0), + np.int64(1), m_70861, + iota_arg_72752) + mem_125066 = None + defunc_0_f_res_mem_125137 = mem_125127 + defunc_0_f_res_mem_125138 = mem_125131 + defunc_0_f_res_mem_125139 = mem_125068 + defunc_0_f_res_mem_125140 = mem_125070 else: - mem_45296 = opencl_alloc(self, bytes_45152, "mem_45296") - mem_45298 = opencl_alloc(self, bytes_45152, "mem_45298") - if ((1 * (np.long(num_groups_42178) * np.long(segmap_group_sizze_42177))) != 0): - self.mainzisegmap_42155_var.set_args(self.global_failure, - np.int64(m_29166), - np.int64(num_groups_42178), - defunc_4_map_res_mem_45177, - defunc_3_map_res_mem_45245, - defunc_3_map_res_mem_45246, - mem_45296, mem_45298) - cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_42155_var, - ((np.long(num_groups_42178) * np.long(segmap_group_sizze_42177)),), - (np.long(segmap_group_sizze_42177),)) - if synchronous: - sync(self) - mem_45302 = opencl_alloc(self, bytes_45300, "mem_45302") - if slt64(np.int64(0), (m_29166 * iota32_arg_29597)): - stage1_max_num_groups_46636 = self.max_group_size - stage1_num_groups_46637 = smin64(stage1_max_num_groups_46636, - num_groups_42201) - num_threads_46638 = sext_i64_i32((stage1_num_groups_46637 * segscan_group_sizze_42200)) - if ((1 * (np.long(stage1_num_groups_46637) * np.long(segscan_group_sizze_42200))) != 0): - self.mainziscan_stage1_42114_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long(smax64(np.int64(1), - (np.int32(4) * segscan_group_sizze_42200)))), - np.int64(N_29165), - np.int64(m_29166), - np.int64(iota32_arg_29597), - np.int32(num_threads_46638), - defunc_4_map_res_mem_45178, - defunc_3_map_res_mem_45244, - defunc_3_map_res_mem_45245, - defunc_0_f_res_mem_45279, - mem_45298, mem_45302) + local_memory_capacity_129835 = self.max_local_memory + if (sle64(((((((bytes_124996 + srem64((np.int64(8) - srem64(bytes_124996, + np.int64(8))), + np.int64(8))) + (bytes_124975 + srem64((np.int64(8) - srem64(bytes_124975, + np.int64(8))), + np.int64(8)))) + ((np.int32(1) * iota_arg_72752) + srem64((np.int64(8) - srem64((np.int32(1) * iota_arg_72752), + np.int64(8))), + np.int64(8)))) + ((np.int32(8) * iota_arg_72752) + srem64((np.int64(8) - srem64((np.int32(8) * iota_arg_72752), + np.int64(8))), + np.int64(8)))) + ((np.int32(8) * iota_arg_72752) + srem64((np.int64(8) - srem64((np.int32(8) * iota_arg_72752), + np.int64(8))), + np.int64(8)))) + (bytes_124996 + srem64((np.int64(8) - srem64(bytes_124996, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_129835)) and intra_suff_and_fits_89623): + mem_125083 = opencl_alloc(self, bytes_125061, "mem_125083") + mem_125086 = opencl_alloc(self, bytes_125061, "mem_125086") + mem_125088 = opencl_alloc(self, bytes_120173, "mem_125088") + mem_125090 = opencl_alloc(self, bytes_120173, "mem_125090") + if ((1 * (np.int64(m_70861) * np.int64(computed_group_sizze_89388))) != 0): + self.mainDetailedzisegmap_intragroup_89398_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64(bytes_124996)), + cl.LocalMemory(np.int64((np.int32(8) * iota_arg_72752))), + cl.LocalMemory(np.int64((np.int32(8) * iota_arg_72752))), + cl.LocalMemory(np.int64((np.int32(1) * iota_arg_72752))), + cl.LocalMemory(np.int64(bytes_124975)), + cl.LocalMemory(np.int64(bytes_124996)), + np.int64(N_70860), + np.int64(n_70864), + np.int64(iota_arg_72752), + np.int64(iota_arg_72776), + np.int64(computed_group_sizze_89388), + defunc_4_map_res_mem_124919, + defunc_4_map_res_mem_124920, + defunc_4_map_res_mem_124921, + defunc_3_map_res_mem_124961, + defunc_3_map_res_mem_124962, + defunc_3_map_res_mem_124963, + defunc_0_f_res_mem_124973, + mem_124976, + mem_125083, + mem_125086, + mem_125088, + mem_125090) cl.enqueue_nd_range_kernel(self.queue, - self.mainziscan_stage1_42114_var, - ((np.long(stage1_num_groups_46637) * np.long(segscan_group_sizze_42200)),), - (np.long(segscan_group_sizze_42200),)) + self.mainDetailedzisegmap_intragroup_89398_var, + ((np.int64(m_70861) * np.int64(computed_group_sizze_89388)),), + (np.int64(computed_group_sizze_89388),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - if ((1 * (np.long(np.int64(1)) * np.long(stage1_num_groups_46637))) != 0): - self.mainziscan_stage2_42114_var.set_args(self.global_failure, - cl.LocalMemory(np.long(smax64(np.int64(1), - (np.int32(4) * stage1_num_groups_46637)))), - np.int64(m_29166), - np.int64(iota32_arg_29597), - np.int64(stage1_num_groups_46637), - np.int32(num_threads_46638), - mem_45302) + defunc_0_f_res_mem_125121 = mem_125083 + defunc_0_f_res_mem_125122 = mem_125086 + defunc_0_f_res_mem_125123 = mem_125088 + defunc_0_f_res_mem_125124 = mem_125090 + else: + segmap_usable_groups_89918 = sdiv_up64(m_70861, + segmap_group_sizze_89917) + mem_125093 = opencl_alloc(self, bytes_120173, "mem_125093") + if ((1 * (np.int64(segmap_usable_groups_89918) * np.int64(segmap_group_sizze_89917))) != 0): + self.mainDetailedzisegmap_89908_var.set_args(self.global_failure, + np.int64(m_70861), + defunc_4_map_res_mem_124919, + defunc_3_map_res_mem_124962, + mem_125093) cl.enqueue_nd_range_kernel(self.queue, - self.mainziscan_stage2_42114_var, - ((np.long(np.int64(1)) * np.long(stage1_num_groups_46637)),), - (np.long(stage1_num_groups_46637),)) + self.mainDetailedzisegmap_89908_var, + ((np.int64(segmap_usable_groups_89918) * np.int64(segmap_group_sizze_89917)),), + (np.int64(segmap_group_sizze_89917),)) if synchronous: sync(self) - required_groups_46680 = sext_i64_i32(sdiv_up64((m_29166 * iota32_arg_29597), - segscan_group_sizze_42200)) - if ((1 * (np.long(num_groups_42201) * np.long(segscan_group_sizze_42200))) != 0): - self.mainziscan_stage3_42114_var.set_args(self.global_failure, - np.int64(m_29166), - np.int64(iota32_arg_29597), - np.int64(num_groups_42201), - np.int32(num_threads_46638), - np.int32(required_groups_46680), - mem_45302) + mem_125097 = opencl_alloc(self, bytes_124981, "mem_125097") + if slt64(np.int64(0), (m_70861 * iota_arg_72776)): + stage1_max_num_groups_129666 = self.max_group_size + stage1_num_groups_129667 = smin64(stage1_max_num_groups_129666, + num_groups_89926) + num_threads_129668 = sext_i64_i32((stage1_num_groups_129667 * segscan_group_sizze_89925)) + if ((1 * (np.int64(stage1_num_groups_129667) * np.int64(segscan_group_sizze_89925))) != 0): + self.mainDetailedziscan_stage1_89879_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64(smax64(np.int64(1), + (np.int32(8) * segscan_group_sizze_89925)))), + np.int64(N_70860), + np.int64(m_70861), + np.int64(iota_arg_72776), + np.int32(num_threads_129668), + defunc_4_map_res_mem_124920, + defunc_3_map_res_mem_124961, + defunc_3_map_res_mem_124962, + defunc_0_f_res_mem_124973, + mem_125093, + mem_125097) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedziscan_stage1_89879_var, + ((np.int64(stage1_num_groups_129667) * np.int64(segscan_group_sizze_89925)),), + (np.int64(segscan_group_sizze_89925),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + if ((1 * (np.int64(np.int64(1)) * np.int64(stage1_num_groups_129667))) != 0): + self.mainDetailedziscan_stage2_89879_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(smax64(np.int64(1), + (np.int32(8) * stage1_num_groups_129667)))), + np.int64(m_70861), + np.int64(iota_arg_72776), + np.int64(stage1_num_groups_129667), + np.int32(num_threads_129668), + mem_125097) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedziscan_stage2_89879_var, + ((np.int64(np.int64(1)) * np.int64(stage1_num_groups_129667)),), + (np.int64(stage1_num_groups_129667),)) + if synchronous: + sync(self) + required_groups_129710 = sext_i64_i32(sdiv_up64((m_70861 * iota_arg_72776), + segscan_group_sizze_89925)) + if ((1 * (np.int64(num_groups_89926) * np.int64(segscan_group_sizze_89925))) != 0): + self.mainDetailedziscan_stage3_89879_var.set_args(self.global_failure, + np.int64(m_70861), + np.int64(iota_arg_72776), + np.int64(num_groups_89926), + np.int32(num_threads_129668), + np.int32(required_groups_129710), + mem_125097) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedziscan_stage3_89879_var, + ((np.int64(num_groups_89926) * np.int64(segscan_group_sizze_89925)),), + (np.int64(segscan_group_sizze_89925),)) + if synchronous: + sync(self) + segmap_usable_groups_89958 = sdiv_up64(m_70861, + segmap_group_sizze_89957) + mem_125100 = opencl_alloc(self, bytes_120173, "mem_125100") + if ((1 * (np.int64(segmap_usable_groups_89958) * np.int64(segmap_group_sizze_89957))) != 0): + self.mainDetailedzisegmap_89860_var.set_args(self.global_failure, + np.int64(m_70861), + defunc_3_map_res_mem_124962, + defunc_3_map_res_mem_124963, + mem_125100) cl.enqueue_nd_range_kernel(self.queue, - self.mainziscan_stage3_42114_var, - ((np.long(num_groups_42201) * np.long(segscan_group_sizze_42200)),), - (np.long(segscan_group_sizze_42200),)) + self.mainDetailedzisegmap_89860_var, + ((np.int64(segmap_usable_groups_89958) * np.int64(segmap_group_sizze_89957)),), + (np.int64(segmap_group_sizze_89957),)) if synchronous: sync(self) - mem_45305 = opencl_alloc(self, m_29166, "mem_45305") - mem_45307 = opencl_alloc(self, bytes_45152, "mem_45307") - mem_45309 = opencl_alloc(self, bytes_45152, "mem_45309") - if slt64((iota32_arg_29597 * np.int64(2)), segred_group_sizze_42246): - segment_sizze_nonzzero_46692 = smax64(np.int64(1), iota32_arg_29597) - num_threads_46693 = (num_groups_42247 * segred_group_sizze_42246) - if ((1 * (np.long(num_groups_42247) * np.long(segred_group_sizze_42246))) != 0): - self.mainzisegred_small_42060_var.set_args(self.global_failure, - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_42246))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_42246))), - cl.LocalMemory(np.long((np.int32(1) * segred_group_sizze_42246))), - np.int64(m_29166), - np.int64(iota32_arg_29597), - np.int64(num_groups_42247), - np.int64(segment_sizze_nonzzero_46692), - mem_45284, mem_45296, - mem_45298, mem_45302, - mem_45305, mem_45307, - mem_45309) + mem_125103 = opencl_alloc(self, m_70861, "mem_125103") + mem_125105 = opencl_alloc(self, bytes_120173, "mem_125105") + mem_125107 = opencl_alloc(self, bytes_120173, "mem_125107") + mem_125110 = opencl_alloc(self, bytes_125061, "mem_125110") + if slt64((iota_arg_72752 * np.int64(2)), segred_group_sizze_89968): + segment_sizze_nonzzero_129727 = smax64(np.int64(1), iota_arg_72752) + num_threads_129728 = (num_groups_89969 * segred_group_sizze_89968) + if ((1 * (np.int64(num_groups_89969) * np.int64(segred_group_sizze_89968))) != 0): + self.mainDetailedzisegred_small_89828_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_89968))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_89968))), + cl.LocalMemory(np.int64((np.int32(1) * segred_group_sizze_89968))), + np.int64(m_70861), + np.int64(iota_arg_72752), + np.int64(iota_arg_72776), + np.int64(num_groups_89969), + np.int64(segment_sizze_nonzzero_129727), + mem_124976, + mem_125093, + mem_125097, + mem_125100, + mem_125103, + mem_125105, + mem_125107, + mem_125110) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_small_89828_var, + ((np.int64(num_groups_89969) * np.int64(segred_group_sizze_89968)),), + (np.int64(segred_group_sizze_89968),)) + if synchronous: + sync(self) + else: + groups_per_segment_129763 = sdiv_up64(num_groups_89969, + smax64(np.int64(1), m_70861)) + elements_per_thread_129764 = sdiv_up64(iota_arg_72752, + (segred_group_sizze_89968 * groups_per_segment_129763)) + virt_num_groups_129765 = (groups_per_segment_129763 * m_70861) + num_threads_129766 = (num_groups_89969 * segred_group_sizze_89968) + threads_per_segment_129767 = (groups_per_segment_129763 * segred_group_sizze_89968) + group_res_arr_mem_129768 = opencl_alloc(self, + (np.int32(1) * (segred_group_sizze_89968 * virt_num_groups_129765)), + "group_res_arr_mem_129768") + group_res_arr_mem_129770 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_89968 * virt_num_groups_129765)), + "group_res_arr_mem_129770") + group_res_arr_mem_129772 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_89968 * virt_num_groups_129765)), + "group_res_arr_mem_129772") + mainDetailedzicounter_mem_129774 = self.mainDetailedzicounter_mem_129774 + if ((1 * (np.int64(num_groups_89969) * np.int64(segred_group_sizze_89968))) != 0): + self.mainDetailedzisegred_large_89828_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_89968))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_89968))), + cl.LocalMemory(np.int64((np.int32(1) * segred_group_sizze_89968))), + np.int64(iota_arg_72752), + np.int64(iota_arg_72776), + np.int64(num_groups_89969), + np.int64(groups_per_segment_129763), + np.int64(elements_per_thread_129764), + np.int64(virt_num_groups_129765), + mem_124976, + mem_125093, + mem_125097, + mem_125100, + mem_125103, + mem_125105, + mem_125107, + mem_125110, + group_res_arr_mem_129768, + group_res_arr_mem_129770, + group_res_arr_mem_129772, + mainDetailedzicounter_mem_129774) + cl.enqueue_nd_range_kernel(self.queue, + self.mainDetailedzisegred_large_89828_var, + ((np.int64(num_groups_89969) * np.int64(segred_group_sizze_89968)),), + (np.int64(segred_group_sizze_89968),)) + if synchronous: + sync(self) + mem_125097 = None + mem_125100 = None + segmap_usable_groups_90006 = sdiv_up64(m_70861, + segmap_group_sizze_90005) + mem_125113 = opencl_alloc(self, bytes_120173, "mem_125113") + mem_125115 = opencl_alloc(self, bytes_120173, "mem_125115") + if ((1 * (np.int64(segmap_usable_groups_90006) * np.int64(segmap_group_sizze_90005))) != 0): + self.mainDetailedzisegmap_89785_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(N_70860), + np.int64(m_70861), + np.int64(n_70864), + defunc_4_map_res_mem_124921, + defunc_3_map_res_mem_124962, + mem_125093, mem_125103, + mem_125105, mem_125107, + mem_125113, mem_125115) cl.enqueue_nd_range_kernel(self.queue, - self.mainzisegred_small_42060_var, - ((np.long(num_groups_42247) * np.long(segred_group_sizze_42246)),), - (np.long(segred_group_sizze_42246),)) + self.mainDetailedzisegmap_89785_var, + ((np.int64(segmap_usable_groups_90006) * np.int64(segmap_group_sizze_90005)),), + (np.int64(segmap_group_sizze_90005),)) if synchronous: sync(self) - else: - groups_per_segment_46728 = sdiv_up64(num_groups_42247, - smax64(np.int64(1), m_29166)) - elements_per_thread_46729 = sdiv_up64(iota32_arg_29597, - (segred_group_sizze_42246 * groups_per_segment_46728)) - virt_num_groups_46730 = (groups_per_segment_46728 * m_29166) - num_threads_46731 = (num_groups_42247 * segred_group_sizze_42246) - threads_per_segment_46732 = (groups_per_segment_46728 * segred_group_sizze_42246) - group_res_arr_mem_46733 = opencl_alloc(self, - (np.int32(1) * (segred_group_sizze_42246 * virt_num_groups_46730)), - "group_res_arr_mem_46733") - group_res_arr_mem_46735 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_42246 * virt_num_groups_46730)), - "group_res_arr_mem_46735") - group_res_arr_mem_46737 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_42246 * virt_num_groups_46730)), - "group_res_arr_mem_46737") - mainzicounter_mem_46739 = self.mainzicounter_mem_46739 - if ((1 * (np.long(num_groups_42247) * np.long(segred_group_sizze_42246))) != 0): - self.mainzisegred_large_42060_var.set_args(self.global_failure, - cl.LocalMemory(np.long(np.int32(1))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_42246))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_42246))), - cl.LocalMemory(np.long((np.int32(1) * segred_group_sizze_42246))), - np.int64(iota32_arg_29597), - np.int64(num_groups_42247), - np.int64(groups_per_segment_46728), - np.int64(elements_per_thread_46729), - np.int64(virt_num_groups_46730), - mem_45284, mem_45296, - mem_45298, mem_45302, - mem_45305, mem_45307, - mem_45309, - group_res_arr_mem_46733, - group_res_arr_mem_46735, - group_res_arr_mem_46737, - mainzicounter_mem_46739) + self.failure_is_an_option = np.int32(1) + mem_125103 = None + mem_125105 = None + mem_125107 = None + mem_125118 = opencl_alloc(self, bytes_124981, "mem_125118") + self.futhark_builtinzhreplicate_f64(mem_125118, + (m_70861 * iota_arg_72776), np.nan) + segmap_usable_groups_90051 = sdiv_up64(nest_sizze_89281, + segmap_group_sizze_90050) + if ((1 * (np.int64(segmap_usable_groups_90051) * np.int64(segmap_group_sizze_90050))) != 0): + self.mainDetailedzisegmap_89737_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(N_70860), + np.int64(m_70861), + np.int64(n_70864), + np.int64(iota_arg_72752), + np.int64(iota_arg_72776), + defunc_4_map_res_mem_124921, + defunc_3_map_res_mem_124962, + mem_125093, mem_125110, + mem_125118) cl.enqueue_nd_range_kernel(self.queue, - self.mainzisegred_large_42060_var, - ((np.long(num_groups_42247) * np.long(segred_group_sizze_42246)),), - (np.long(segred_group_sizze_42246),)) + self.mainDetailedzisegmap_89737_var, + ((np.int64(segmap_usable_groups_90051) * np.int64(segmap_group_sizze_90050)),), + (np.int64(segmap_group_sizze_90050),)) if synchronous: sync(self) - mem_45296 = None - mem_45302 = None - segmap_usable_groups_42286 = sdiv_up64(m_29166, segmap_group_sizze_42285) - mem_45312 = opencl_alloc(self, bytes_45152, "mem_45312") - mem_45314 = opencl_alloc(self, bytes_45152, "mem_45314") - if ((1 * (np.long(segmap_usable_groups_42286) * np.long(segmap_group_sizze_42285))) != 0): - self.mainzisegmap_42001_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - np.int64(N_29165), - np.int64(m_29166), - np.int32(n_29169), - defunc_4_map_res_mem_45179, - defunc_3_map_res_mem_45245, - mem_45298, mem_45305, mem_45307, - mem_45309, mem_45312, mem_45314) - cl.enqueue_nd_range_kernel(self.queue, self.mainzisegmap_42001_var, - ((np.long(segmap_usable_groups_42286) * np.long(segmap_group_sizze_42285)),), - (np.long(segmap_group_sizze_42285),)) - if synchronous: - sync(self) - self.failure_is_an_option = np.int32(1) - mem_45298 = None - mem_45305 = None - mem_45307 = None - mem_45309 = None - defunc_0_f_res_mem_45315 = mem_45312 - defunc_0_f_res_mem_45316 = mem_45314 - defunc_4_map_res_mem_45178 = None - defunc_4_map_res_mem_45179 = None - defunc_3_map_res_mem_45244 = None - defunc_3_map_res_mem_45245 = None - defunc_3_map_res_mem_45246 = None - defunc_0_f_res_mem_45279 = None - mem_45284 = None - out_mem_45676 = defunc_4_map_res_mem_45177 - out_mem_45677 = defunc_0_f_res_mem_45315 - out_mem_45678 = defunc_0_f_res_mem_45316 - return (out_mem_45676, out_mem_45677, out_mem_45678) - def futhark_mainDetailed(self, mappingindices_mem_44380, images_mem_44381, - N_27771, m_27772, trend_27773, k_27774, n_27775, - freq_27776, hfrac_27777, lam_27778): - i32_res_27781 = sext_i32_i64(n_27775) - x_27782 = (np.int32(2) * k_27774) - k2p2_27783 = (np.int32(2) + x_27782) - cond_27784 = slt32(np.int32(0), trend_27773) - if cond_27784: - k2p2zq_27785 = k2p2_27783 + self.failure_is_an_option = np.int32(1) + mem_125093 = None + defunc_0_f_res_mem_125121 = mem_125118 + defunc_0_f_res_mem_125122 = mem_125110 + defunc_0_f_res_mem_125123 = mem_125113 + defunc_0_f_res_mem_125124 = mem_125115 + mem_125135 = opencl_alloc(self, bytes_125061, "mem_125135") + if (((m_70861 * iota_arg_72752) * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_125135, defunc_0_f_res_mem_125121, + dest_offset=np.int64(np.int64(0)), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64(((m_70861 * iota_arg_72752) * np.int32(8)))) + if synchronous: + sync(self) + defunc_0_f_res_mem_125121 = None + defunc_0_f_res_mem_125137 = mem_125135 + defunc_0_f_res_mem_125138 = defunc_0_f_res_mem_125122 + defunc_0_f_res_mem_125139 = defunc_0_f_res_mem_125123 + defunc_0_f_res_mem_125140 = defunc_0_f_res_mem_125124 + defunc_4_map_res_mem_124921 = None + defunc_3_map_res_mem_124961 = None + out_arrsizze_126325 = iota_arg_72752 + out_arrsizze_126327 = iota_arg_72752 + out_arrsizze_126329 = iota_arg_72752 + out_mem_126320 = defunc_0_f_res_mem_124973 + out_mem_126321 = defunc_4_map_res_mem_124919 + out_mem_126322 = defunc_3_map_res_mem_124962 + out_mem_126323 = defunc_3_map_res_mem_124963 + out_mem_126324 = defunc_0_f_res_mem_125137 + out_mem_126326 = defunc_0_f_res_mem_125138 + out_mem_126328 = mem_124976 + out_mem_126330 = defunc_0_f_res_mem_125139 + out_mem_126331 = defunc_0_f_res_mem_125140 + out_mem_126332 = mem_125017 + out_mem_126333 = defunc_4_map_res_mem_124920 + out_mem_126334 = defunc_3_map_res_mem_124883 + out_mem_126335 = hist_inds_mem_124138 + return (out_mem_126320, out_mem_126321, out_mem_126322, out_mem_126323, + out_mem_126324, out_arrsizze_126325, out_mem_126326, + out_arrsizze_126327, out_mem_126328, out_arrsizze_126329, + out_mem_126330, out_mem_126331, out_mem_126332, out_mem_126333, + out_mem_126334, out_mem_126335) + def futhark_mainMagnitude(self, mappingindices_mem_120107, images_mem_120108, + N_73007, m_73008, trend_73009, k_73010, n_73011, + freq_73012, hfrac_73013, level_73014, lam_73015, + hist_73016, conf_73017): + x_73020 = (np.int64(2) * k_73010) + k2p2_73021 = (np.int64(2) + x_73020) + cond_73022 = slt64(np.int64(0), trend_73009) + if cond_73022: + k2p2zq_73023 = k2p2_73021 else: - k2p2zq_f_res_27786 = (k2p2_27783 - np.int32(1)) - k2p2zq_27785 = k2p2zq_f_res_27786 - i32_res_27787 = sext_i32_i64(k2p2zq_27785) - binop_x_44384 = (N_27771 * i32_res_27787) - bytes_44383 = (np.int64(4) * binop_x_44384) - if cond_27784: - bounds_invalid_upwards_27789 = slt64(i32_res_27787, np.int64(0)) - valid_27790 = not(bounds_invalid_upwards_27789) - range_valid_c_27791 = True - assert valid_27790, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 helpers.fut:2:3-8\n #2 helpers.fut:37:10-20\n #3 bfastfinal.fut:29:17-66\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n" % ("Range ", - np.int64(0), - "..", - np.int64(1), - "..<", - i32_res_27787, - " is invalid.")) - segmap_group_sizze_30048 = self.sizes["mainDetailed.segmap_group_size_29978"] - segmap_usable_groups_30049 = sdiv_up64(binop_x_44384, - segmap_group_sizze_30048) - mem_44385 = opencl_alloc(self, bytes_44383, "mem_44385") - if ((1 * (np.long(segmap_usable_groups_30049) * np.long(segmap_group_sizze_30048))) != 0): - self.mainDetailedzisegmap_29975_var.set_args(self.global_failure, - np.int64(N_27771), - np.float32(freq_27776), - np.int64(i32_res_27787), - mappingindices_mem_44380, - mem_44385) + k2p2zq_f_res_73024 = (k2p2_73021 - np.int64(1)) + k2p2zq_73023 = k2p2zq_f_res_73024 + binop_x_120111 = (N_73007 * k2p2zq_73023) + bytes_120110 = (np.int64(8) * binop_x_120111) + if cond_73022: + bounds_invalid_upwards_73026 = slt64(k2p2zq_73023, np.int64(0)) + valid_73027 = not(bounds_invalid_upwards_73026) + range_valid_c_73028 = True + assert valid_73027, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 helpers.fut:31:10-18\n #2 bfastfinal.fut:29:17-58\n #3 bfastfinal.fut:192:5-74\n #4 bfastfinal.fut:187:1-193:48\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + k2p2zq_73023, + " is invalid.")) + segmap_group_sizze_90154 = self.sizes["mainMagnitude.segmap_group_size_90090"] + segmap_usable_groups_90155 = sdiv_up64(binop_x_120111, + segmap_group_sizze_90154) + mem_120112 = opencl_alloc(self, bytes_120110, "mem_120112") + if ((1 * (np.int64(segmap_usable_groups_90155) * np.int64(segmap_group_sizze_90154))) != 0): + self.mainMagnitudezisegmap_90087_var.set_args(self.global_failure, + np.int64(N_73007), + np.float64(freq_73012), + np.int64(k2p2zq_73023), + mappingindices_mem_120107, + mem_120112) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_29975_var, - ((np.long(segmap_usable_groups_30049) * np.long(segmap_group_sizze_30048)),), - (np.long(segmap_group_sizze_30048),)) + self.mainMagnitudezisegmap_90087_var, + ((np.int64(segmap_usable_groups_90155) * np.int64(segmap_group_sizze_90154)),), + (np.int64(segmap_group_sizze_90154),)) if synchronous: sync(self) - binop_p_mem_44390 = mem_44385 + binop_p_mem_120117 = mem_120112 else: - bounds_invalid_upwards_27815 = slt64(i32_res_27787, np.int64(0)) - valid_27816 = not(bounds_invalid_upwards_27815) - range_valid_c_27817 = True - assert valid_27816, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 helpers.fut:2:3-8\n #2 helpers.fut:50:10-22\n #3 bfastfinal.fut:30:17-64\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n" % ("Range ", - np.int64(0), - "..", - np.int64(1), - "..<", - i32_res_27787, - " is invalid.")) - segmap_group_sizze_30222 = self.sizes["mainDetailed.segmap_group_size_30156"] - segmap_usable_groups_30223 = sdiv_up64(binop_x_44384, - segmap_group_sizze_30222) - mem_44389 = opencl_alloc(self, bytes_44383, "mem_44389") - if ((1 * (np.long(segmap_usable_groups_30223) * np.long(segmap_group_sizze_30222))) != 0): - self.mainDetailedzisegmap_30153_var.set_args(self.global_failure, - np.int64(N_27771), - np.float32(freq_27776), - np.int64(i32_res_27787), - mappingindices_mem_44380, - mem_44389) + bounds_invalid_upwards_73051 = slt64(k2p2zq_73023, np.int64(0)) + valid_73052 = not(bounds_invalid_upwards_73051) + range_valid_c_73053 = True + assert valid_73052, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 helpers.fut:44:10-20\n #2 bfastfinal.fut:30:17-56\n #3 bfastfinal.fut:192:5-74\n #4 bfastfinal.fut:187:1-193:48\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + k2p2zq_73023, + " is invalid.")) + segmap_group_sizze_90250 = self.sizes["mainMagnitude.segmap_group_size_90190"] + segmap_usable_groups_90251 = sdiv_up64(binop_x_120111, + segmap_group_sizze_90250) + mem_120116 = opencl_alloc(self, bytes_120110, "mem_120116") + if ((1 * (np.int64(segmap_usable_groups_90251) * np.int64(segmap_group_sizze_90250))) != 0): + self.mainMagnitudezisegmap_90187_var.set_args(self.global_failure, + np.int64(N_73007), + np.float64(freq_73012), + np.int64(k2p2zq_73023), + mappingindices_mem_120107, + mem_120116) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_30153_var, - ((np.long(segmap_usable_groups_30223) * np.long(segmap_group_sizze_30222)),), - (np.long(segmap_group_sizze_30222),)) + self.mainMagnitudezisegmap_90187_var, + ((np.int64(segmap_usable_groups_90251) * np.int64(segmap_group_sizze_90250)),), + (np.int64(segmap_group_sizze_90250),)) if synchronous: sync(self) - binop_p_mem_44390 = mem_44389 - x_27840 = (N_27771 * N_27771) - y_27841 = (np.int64(2) * N_27771) - x_27842 = (x_27840 + y_27841) - x_27843 = (np.int64(1) + x_27842) - y_27844 = (np.int64(1) + N_27771) - zzero_27845 = (y_27844 == np.int64(0)) - nonzzero_27846 = not(zzero_27845) - nonzzero_cert_27847 = True - assert nonzzero_27846, ("Error: %s\n\nBacktrace:\n-> #0 bfastfinal.fut:35:32-60\n #1 bfastfinal.fut:174:3-56\n #2 bfastfinal.fut:170:1-174:56\n" % ("division by zero",)) - x_27848 = sdiv64(x_27843, y_27844) - x_27849 = (x_27848 - N_27771) - binop_p_27850 = (x_27849 - np.int64(1)) - defunc_0_f_res_27851 = sext_i64_i32(binop_p_27850) - i32_res_27852 = sitofp_i32_f32(defunc_0_f_res_27851) - segmap_group_sizze_30305 = self.sizes["mainDetailed.segmap_group_size_30284"] - segmap_usable_groups_30306 = sdiv_up64(binop_x_44384, - segmap_group_sizze_30305) - mem_44393 = opencl_alloc(self, bytes_44383, "mem_44393") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44393, np.int64(0), - binop_p_mem_44390, np.int64(0), - np.int64(1), N_27771, - i32_res_27787) - mem_44397 = opencl_alloc(self, bytes_44383, "mem_44397") - if ((1 * (np.long(segmap_usable_groups_30306) * np.long(segmap_group_sizze_30305))) != 0): - self.mainDetailedzisegmap_30281_var.set_args(self.global_failure, - np.int64(N_27771), - np.int64(i32_res_27787), - np.float32(i32_res_27852), - mem_44393, mem_44397) + binop_p_mem_120117 = mem_120116 + x_73075 = (N_73007 * N_73007) + y_73076 = (np.int64(2) * N_73007) + x_73077 = (x_73075 + y_73076) + x_73078 = (np.int64(1) + x_73077) + y_73079 = (np.int64(1) + N_73007) + zzero_73080 = (y_73079 == np.int64(0)) + nonzzero_73081 = not(zzero_73080) + nonzzero_cert_73082 = True + assert nonzzero_73081, ("Error: %s\n\nBacktrace:\n-> #0 bfastfinal.fut:35:25-53\n #1 bfastfinal.fut:192:5-74\n #2 bfastfinal.fut:187:1-193:48\n" % ("division by zero",)) + x_73083 = sdiv64(x_73078, y_73079) + x_73084 = (x_73083 - N_73007) + binop_p_73085 = (x_73084 - np.int64(1)) + defunc_0_f_res_73086 = sitofp_i64_f64(binop_p_73085) + segmap_group_sizze_90301 = self.sizes["mainMagnitude.segmap_group_size_90283"] + segmap_usable_groups_90302 = sdiv_up64(binop_x_120111, + segmap_group_sizze_90301) + mem_120120 = opencl_alloc(self, bytes_120110, "mem_120120") + self.futhark_builtinzhgpu_map_transpose_f64(mem_120120, np.int64(0), + binop_p_mem_120117, np.int64(0), + np.int64(1), N_73007, + k2p2zq_73023) + mem_120124 = opencl_alloc(self, bytes_120110, "mem_120124") + if ((1 * (np.int64(segmap_usable_groups_90302) * np.int64(segmap_group_sizze_90301))) != 0): + self.mainMagnitudezisegmap_90280_var.set_args(self.global_failure, + np.int64(N_73007), + np.int64(k2p2zq_73023), + np.float64(defunc_0_f_res_73086), + mem_120120, mem_120124) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_30281_var, - ((np.long(segmap_usable_groups_30306) * np.long(segmap_group_sizze_30305)),), - (np.long(segmap_group_sizze_30305),)) + self.mainMagnitudezisegmap_90280_var, + ((np.int64(segmap_usable_groups_90302) * np.int64(segmap_group_sizze_90301)),), + (np.int64(segmap_group_sizze_90301),)) if synchronous: sync(self) - eq_x_y_27860 = (np.int64(0) == i32_res_27787) - p_and_eq_x_y_27861 = (cond_27784 and eq_x_y_27860) - not_p_27862 = not(cond_27784) - p_and_eq_x_y_27863 = (eq_x_y_27860 and not_p_27862) - empty_slice_27864 = (p_and_eq_x_y_27861 or p_and_eq_x_y_27863) - m_27865 = (i32_res_27787 - np.int64(1)) - zzero_leq_i_p_m_t_s_27866 = sle64(np.int64(0), m_27865) - i_p_m_t_s_leq_w_27867 = slt64(m_27865, i32_res_27787) - i_lte_j_27868 = sle64(np.int64(0), i32_res_27787) - y_27869 = (zzero_leq_i_p_m_t_s_27866 and i_p_m_t_s_leq_w_27867) - y_27870 = (i_lte_j_27868 and y_27869) - ok_or_empty_27871 = (empty_slice_27864 or y_27870) - empty_slice_27872 = (i32_res_27781 == np.int64(0)) - m_27873 = (i32_res_27781 - np.int64(1)) - zzero_leq_i_p_m_t_s_27874 = sle64(np.int64(0), m_27873) - i_p_m_t_s_leq_w_27875 = slt64(m_27873, N_27771) - i_lte_j_27876 = sle64(np.int64(0), i32_res_27781) - y_27877 = (zzero_leq_i_p_m_t_s_27874 and i_p_m_t_s_leq_w_27875) - y_27878 = (i_lte_j_27876 and y_27877) - ok_or_empty_27879 = (empty_slice_27872 or y_27878) - index_ok_27880 = (ok_or_empty_27871 and ok_or_empty_27879) - index_certs_27881 = True - assert index_ok_27880, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 bfastfinal.fut:38:13-21\n #1 bfastfinal.fut:174:3-56\n #2 bfastfinal.fut:170:1-174:56\n" % ("Index [", + empty_slice_73094 = (k2p2zq_73023 == np.int64(0)) + m_73095 = (k2p2zq_73023 - np.int64(1)) + zzero_leq_i_p_m_t_s_73096 = sle64(np.int64(0), m_73095) + i_p_m_t_s_leq_w_73097 = slt64(m_73095, k2p2zq_73023) + i_lte_j_73098 = sle64(np.int64(0), k2p2zq_73023) + y_73099 = (zzero_leq_i_p_m_t_s_73096 and i_p_m_t_s_leq_w_73097) + y_73100 = (i_lte_j_73098 and y_73099) + ok_or_empty_73101 = (empty_slice_73094 or y_73100) + empty_slice_73102 = (n_73011 == np.int64(0)) + m_73103 = (n_73011 - np.int64(1)) + zzero_leq_i_p_m_t_s_73104 = sle64(np.int64(0), m_73103) + i_p_m_t_s_leq_w_73105 = slt64(m_73103, N_73007) + i_lte_j_73106 = sle64(np.int64(0), n_73011) + y_73107 = (zzero_leq_i_p_m_t_s_73104 and i_p_m_t_s_leq_w_73105) + y_73108 = (i_lte_j_73106 and y_73107) + ok_or_empty_73109 = (empty_slice_73102 or y_73108) + index_ok_73110 = (ok_or_empty_73101 and ok_or_empty_73109) + index_certs_73111 = True + assert index_ok_73110, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 bfastfinal.fut:38:13-19\n #1 bfastfinal.fut:192:5-74\n #2 bfastfinal.fut:187:1-193:48\n" % ("Index [", np.int64(0), ":, :", - i32_res_27781, + n_73011, "] out of bounds for array of shape [", - i32_res_27787, + k2p2zq_73023, "][", - N_27771, + N_73007, "].")) - empty_slice_27883 = (i32_res_27787 == np.int64(0)) - ok_or_empty_27884 = (y_27870 or empty_slice_27883) - index_ok_27885 = (ok_or_empty_27879 and ok_or_empty_27884) - index_certs_27886 = True - assert index_ok_27885, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 bfastfinal.fut:39:13-22\n #1 bfastfinal.fut:174:3-56\n #2 bfastfinal.fut:170:1-174:56\n" % ("Index [:", - i32_res_27781, + index_certs_73113 = True + assert index_ok_73110, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 bfastfinal.fut:39:13-20\n #1 bfastfinal.fut:192:5-74\n #2 bfastfinal.fut:187:1-193:48\n" % ("Index [:", + n_73011, ", ", np.int64(0), ":] out of bounds for array of shape [", - N_27771, + N_73007, "][", - i32_res_27787, + k2p2zq_73023, "].")) - empty_slice_27888 = (m_27772 == np.int64(0)) - m_27889 = (m_27772 - np.int64(1)) - zzero_leq_i_p_m_t_s_27890 = sle64(np.int64(0), m_27889) - i_p_m_t_s_leq_w_27891 = slt64(m_27889, m_27772) - i_lte_j_27892 = sle64(np.int64(0), m_27772) - y_27893 = (zzero_leq_i_p_m_t_s_27890 and i_p_m_t_s_leq_w_27891) - y_27894 = (i_lte_j_27892 and y_27893) - ok_or_empty_27895 = (empty_slice_27888 or y_27894) - index_ok_27896 = (ok_or_empty_27879 and ok_or_empty_27895) - index_certs_27897 = True - assert index_ok_27896, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 bfastfinal.fut:40:13-26\n #1 bfastfinal.fut:174:3-56\n #2 bfastfinal.fut:170:1-174:56\n" % ("Index [", + empty_slice_73115 = (m_73008 == np.int64(0)) + m_73116 = (m_73008 - np.int64(1)) + zzero_leq_i_p_m_t_s_73117 = sle64(np.int64(0), m_73116) + i_p_m_t_s_leq_w_73118 = slt64(m_73116, m_73008) + i_lte_j_73119 = sle64(np.int64(0), m_73008) + y_73120 = (zzero_leq_i_p_m_t_s_73117 and i_p_m_t_s_leq_w_73118) + y_73121 = (i_lte_j_73119 and y_73120) + ok_or_empty_73122 = (empty_slice_73115 or y_73121) + index_ok_73123 = (ok_or_empty_73109 and ok_or_empty_73122) + index_certs_73124 = True + assert index_ok_73123, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 bfastfinal.fut:40:13-24\n #1 bfastfinal.fut:192:5-74\n #2 bfastfinal.fut:187:1-193:48\n" % ("Index [", np.int64(0), ":, :", - i32_res_27781, + n_73011, "] out of bounds for array of shape [", - m_27772, + m_73008, "][", - N_27771, + N_73007, "].")) - suff_outer_par_30312 = (self.sizes["mainDetailed.suff_outer_par_6"] <= m_27772) - segmap_group_sizze_30338 = self.sizes["mainDetailed.segmap_group_size_30316"] - max_num_groups_45706 = self.sizes["mainDetailed.segmap_num_groups_30318"] - num_groups_30339 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(m_27772, - segmap_group_sizze_30338), - sext_i32_i64(max_num_groups_45706)))) - nest_sizze_30514 = (m_27772 * i32_res_27787) - segmap_group_sizze_30515 = self.sizes["mainDetailed.segmap_group_size_30363"] - max_num_groups_45707 = self.sizes["mainDetailed.segmap_num_groups_30365"] - num_groups_30516 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(nest_sizze_30514, - segmap_group_sizze_30515), - sext_i32_i64(max_num_groups_45707)))) - suff_outer_par_30520 = (self.sizes["mainDetailed.suff_outer_par_7"] <= nest_sizze_30514) - y_30544 = (i32_res_27787 * i32_res_27787) - comparatee_30545 = (m_27772 * y_30544) - suff_outer_par_30546 = (self.sizes["mainDetailed.suff_outer_par_8"] <= comparatee_30545) - nest_sizze_30566 = (i32_res_27781 * comparatee_30545) - segred_group_sizze_30567 = self.sizes["mainDetailed.segred_group_size_30423"] - max_num_groups_45708 = self.sizes["mainDetailed.segred_num_groups_30425"] - num_groups_30568 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(nest_sizze_30566, - segred_group_sizze_30567), - sext_i32_i64(max_num_groups_45708)))) - Tx_42527 = self.sizes["mainDetailed.Tx_42525"] - Ty_42528 = self.sizes["mainDetailed.Ty_42526"] - Ty_42529 = smin64(i32_res_27787, Ty_42528) - Tx_42530 = smin64(i32_res_27787, Tx_42527) - gridDim_zz_42533 = sdiv_up64(m_27772, np.int64(30)) - group_sizze_tile3d_42536 = (Ty_42529 * Tx_42530) - binop_x_44399 = (N_27771 * m_27772) - bytes_44398 = (np.int64(4) * binop_x_44399) - bytes_44443 = (np.int64(4) * comparatee_30545) - bytes_44402 = (np.int64(4) * y_30544) - binop_x_44546 = (i32_res_27787 * nest_sizze_30514) - bytes_44544 = (np.int64(4) * binop_x_44546) - bytes_44448 = (np.int64(4) * i32_res_27787) - binop_x_44477 = (np.int64(30) * group_sizze_tile3d_42536) - bytes_44475 = (np.int64(4) * binop_x_44477) - binop_x_45447 = (np.int64(4) * Ty_42529) - binop_x_45448 = (Tx_42530 * binop_x_45447) - sizze_45449 = (np.int64(30) * binop_x_45448) - num_threads_45527 = (segmap_group_sizze_30338 * num_groups_30339) - total_sizze_45528 = (bytes_44402 * num_threads_45527) - num_threads_45529 = (segmap_group_sizze_30515 * num_groups_30516) - total_sizze_45530 = (bytes_44448 * num_threads_45529) - local_memory_capacity_45830 = self.max_local_memory - if (sle64(np.int64(0), - sext_i32_i64(local_memory_capacity_45830)) and suff_outer_par_30312): - mem_44400 = opencl_alloc(self, bytes_44398, "mem_44400") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44400, np.int64(0), - images_mem_44381, np.int64(0), - np.int64(1), N_27771, m_27772) - mem_44446 = opencl_alloc(self, bytes_44443, "mem_44446") - mem_44404 = opencl_alloc(self, total_sizze_45528, "mem_44404") - if ((1 * (np.long(num_groups_30339) * np.long(segmap_group_sizze_30338))) != 0): - self.mainDetailedzisegmap_30314_var.set_args(self.global_failure, - np.int64(N_27771), - np.int64(m_27772), - np.int32(n_27775), - np.int32(k2p2zq_27785), - np.int64(i32_res_27787), - np.int64(num_groups_30339), - binop_p_mem_44390, - mem_44397, mem_44400, - mem_44404, mem_44446) + cond_73125 = (hist_73016 == np.int64(-1)) + y_73126 = slt64(m_73103, n_73011) + bounds_check_73127 = (zzero_leq_i_p_m_t_s_73104 and y_73126) + suff_outer_redomap_90308 = (self.sizes["mainMagnitude.suff_outer_redomap_0"] <= m_73008) + segred_group_sizze_90323 = self.sizes["mainMagnitude.segred_group_size_90310"] + max_num_groups_126340 = self.sizes["mainMagnitude.segred_num_groups_90312"] + num_groups_90324 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_73008, + segred_group_sizze_90323), + sext_i32_i64(max_num_groups_126340)))) + max_group_sizze_90561 = self.max_group_size + fits_90562 = sle64(n_73011, max_group_sizze_90561) + suff_intra_par_90564 = (self.sizes["mainMagnitude.suff_intra_par_1"] <= n_73011) + intra_suff_and_fits_90565 = (fits_90562 and suff_intra_par_90564) + nest_sizze_90595 = (m_73008 * n_73011) + segscan_group_sizze_90596 = self.sizes["mainMagnitude.segscan_group_size_90476"] + max_num_groups_126341 = self.sizes["mainMagnitude.segscan_num_groups_90478"] + num_groups_90597 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_90595, + segscan_group_sizze_90596), + sext_i32_i64(max_num_groups_126341)))) + segmap_group_sizze_90638 = self.sizes["mainMagnitude.segmap_group_size_90407"] + segred_group_sizze_90654 = self.sizes["mainMagnitude.segred_group_size_90550"] + max_num_groups_126342 = self.sizes["mainMagnitude.segred_num_groups_90552"] + num_groups_90655 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_73008, + segred_group_sizze_90654), + sext_i32_i64(max_num_groups_126342)))) + segmap_group_sizze_93521 = self.sizes["mainMagnitude.segmap_group_size_93260"] + max_num_groups_126343 = self.sizes["mainMagnitude.segmap_num_groups_93262"] + num_groups_93522 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_73008, + segmap_group_sizze_93521), + sext_i32_i64(max_num_groups_126343)))) + num_threads_115425 = (segmap_group_sizze_93521 * num_groups_93522) + y_115427 = smod_safe64(m_73008, num_threads_115425) + x_115428 = (num_threads_115425 - y_115427) + y_115429 = smod_safe64(x_115428, num_threads_115425) + segmap_group_sizze_97411 = self.sizes["mainMagnitude.segmap_group_size_97026"] + max_num_groups_126344 = self.sizes["mainMagnitude.segmap_num_groups_97028"] + num_groups_97412 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_73008, + segmap_group_sizze_97411), + sext_i32_i64(max_num_groups_126344)))) + num_threads_115503 = (segmap_group_sizze_97411 * num_groups_97412) + y_115505 = smod_safe64(m_73008, num_threads_115503) + x_115506 = (num_threads_115503 - y_115505) + y_115507 = smod_safe64(x_115506, num_threads_115503) + binop_x_120126 = (N_73007 * m_73008) + bytes_120125 = (np.int64(8) * binop_x_120126) + bytes_120173 = (np.int64(8) * m_73008) + bytes_120175 = (np.int64(8) * nest_sizze_90595) + bytes_120129 = (np.int64(8) * n_73011) + binop_x_120244 = (m_73008 * k2p2zq_73023) + bytes_120247 = (np.int64(8) * k2p2zq_73023) + binop_x_120251 = (k2p2zq_73023 * k2p2zq_73023) + bytes_120250 = (np.int64(8) * binop_x_120251) + bytes_120253 = (np.int64(8) * y_115429) + bytes_121947 = (np.int64(8) * y_115507) + bytes_121990 = (np.int64(8) * binop_x_120244) + binop_x_121994 = (np.int64(2) * m_73008) + binop_x_121995 = (k2p2zq_73023 * binop_x_121994) + bytes_121993 = (np.int64(8) * binop_x_121995) + binop_x_121999 = (k2p2zq_73023 * binop_x_120244) + bytes_121997 = (np.int64(8) * binop_x_121999) + num_threads_125867 = (segred_group_sizze_90323 * num_groups_90324) + total_sizze_125868 = (bytes_120129 * num_threads_125867) + total_sizze_125869 = (bytes_120129 * num_threads_125867) + total_sizze_125870 = (bytes_120129 * num_threads_125867) + segmap_group_sizze_90784 = self.sizes["mainMagnitude.segmap_group_size_90685"] + if cond_73125: + index_certs_73131 = True + assert bounds_check_73127, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:26:29-34\n #1 helpers.fut:14:13-20\n #2 recresid.fut:89:39-59\n #3 mroc.fut:27:25-38\n #4 mroc.fut:77:27-61\n #5 bfastfinal.fut:45:24-53\n #6 bfastfinal.fut:192:5-74\n #7 bfastfinal.fut:187:1-193:48\n" % ("Index [", + m_73103, + "] out of bounds for array of shape [", + n_73011, + "].")) + local_memory_capacity_126526 = self.max_local_memory + if (((sle64(((np.int32(1) + srem64((np.int64(8) - srem64(np.int32(1), + np.int64(8))), + np.int64(8))) + ((np.int32(8) * segred_group_sizze_90323) + srem64((np.int64(8) - srem64((np.int32(8) * segred_group_sizze_90323), + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_126526)) and sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_126526))) and sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_126526))) and suff_outer_redomap_90308): + mem_120127 = opencl_alloc(self, bytes_120125, "mem_120127") + self.futhark_builtinzhgpu_map_transpose_f64(mem_120127, np.int64(0), + images_mem_120108, + np.int64(0), np.int64(1), + N_73007, m_73008) + mem_120172 = opencl_alloc(self, np.int64(8), "mem_120172") + mem_120174 = opencl_alloc(self, bytes_120173, "mem_120174") + mem_120177 = opencl_alloc(self, bytes_120175, "mem_120177") + mem_120180 = opencl_alloc(self, bytes_120175, "mem_120180") + mem_120130 = opencl_alloc(self, total_sizze_125868, "mem_120130") + mem_120144 = opencl_alloc(self, total_sizze_125869, "mem_120144") + mem_120146 = opencl_alloc(self, total_sizze_125870, "mem_120146") + mainMagnitudezicounter_mem_126345 = self.mainMagnitudezicounter_mem_126345 + group_res_arr_mem_126347 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_90323 * num_groups_90324)), + "group_res_arr_mem_126347") + num_threads_126349 = (num_groups_90324 * segred_group_sizze_90323) + if ((1 * (np.int64(num_groups_90324) * np.int64(segred_group_sizze_90323))) != 0): + self.mainMagnitudezisegred_nonseg_90321_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_90323))), + cl.LocalMemory(np.int64(np.int32(1))), + np.int64(m_73008), + np.int64(n_73011), + np.int64(m_73103), + np.int64(num_groups_90324), + np.int64(num_threads_125867), + np.int64(num_threads_126349), + mem_120127, + mem_120130, + mem_120144, + mem_120146, + mem_120172, + mem_120174, + mem_120177, + mem_120180, + mainMagnitudezicounter_mem_126345, + group_res_arr_mem_126347) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_nonseg_90321_var, + ((np.int64(num_groups_90324) * np.int64(segred_group_sizze_90323)),), + (np.int64(segred_group_sizze_90323),)) + if synchronous: + sync(self) + mem_120127 = None + mem_120130 = None + mem_120144 = None + mem_120146 = None + read_res_129926 = np.empty(1, dtype=ct.c_int64) + cl.enqueue_copy(self.queue, read_res_129926, mem_120172, + device_offset=(np.int64(np.int64(0)) * 8), + is_blocking=synchronous) + sync(self) + defunc_2_reduce_res_90354 = read_res_129926[0] + mem_120172 = None + mem_120220 = opencl_alloc(self, bytes_120173, "mem_120220") + if ((m_73008 * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_120220, mem_120174, + dest_offset=np.int64(np.int64(0)), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((m_73008 * np.int32(8)))) + if synchronous: + sync(self) + mem_120174 = None + mem_120224 = opencl_alloc(self, bytes_120175, "mem_120224") + group_sizze_126387 = self.sizes["mainMagnitude.group_size_126387"] + num_groups_126388 = sdiv_up64((m_73008 * n_73011), group_sizze_126387) + if ((1 * (np.int64(num_groups_126388) * np.int64(group_sizze_126387))) != 0): + self.mainMagnitudezicopy_126384_var.set_args(np.int64(m_73008), + np.int64(n_73011), + mem_120177, mem_120224) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezicopy_126384_var, + ((np.int64(num_groups_126388) * np.int64(group_sizze_126387)),), + (np.int64(group_sizze_126387),)) + if synchronous: + sync(self) + mem_120177 = None + mem_120228 = opencl_alloc(self, bytes_120175, "mem_120228") + group_sizze_126392 = self.sizes["mainMagnitude.group_size_126392"] + num_groups_126393 = sdiv_up64((m_73008 * n_73011), group_sizze_126392) + if ((1 * (np.int64(num_groups_126393) * np.int64(group_sizze_126392))) != 0): + self.mainMagnitudezicopy_126389_var.set_args(np.int64(m_73008), + np.int64(n_73011), + mem_120180, mem_120228) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezicopy_126389_var, + ((np.int64(num_groups_126393) * np.int64(group_sizze_126392)),), + (np.int64(group_sizze_126392),)) + if synchronous: + sync(self) + mem_120180 = None + defunc_3_map_res_mem_120230 = mem_120220 + defunc_3_map_res_mem_120231 = mem_120224 + defunc_3_map_res_mem_120232 = mem_120228 + defunc_2_reduce_res_73132 = defunc_2_reduce_res_90354 + else: + local_memory_capacity_126495 = self.max_local_memory + if (sle64((((bytes_120129 + srem64((np.int64(8) - srem64(bytes_120129, + np.int64(8))), + np.int64(8))) + (bytes_120129 + srem64((np.int64(8) - srem64(bytes_120129, + np.int64(8))), + np.int64(8)))) + (bytes_120129 + srem64((np.int64(8) - srem64(bytes_120129, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_126495)) and intra_suff_and_fits_90565): + mem_120191 = opencl_alloc(self, bytes_120173, "mem_120191") + mem_120194 = opencl_alloc(self, bytes_120175, "mem_120194") + mem_120197 = opencl_alloc(self, bytes_120175, "mem_120197") + if ((1 * (np.int64(m_73008) * np.int64(n_73011))) != 0): + self.mainMagnitudezisegmap_intragroup_90368_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_120129)), + cl.LocalMemory(np.int64(bytes_120129)), + cl.LocalMemory(np.int64(bytes_120129)), + np.int64(N_73007), + np.int64(n_73011), + np.int64(m_73103), + images_mem_120108, + mem_120191, + mem_120194, + mem_120197) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_intragroup_90368_var, + ((np.int64(m_73008) * np.int64(n_73011)),), + (np.int64(n_73011),)) + if synchronous: + sync(self) + defunc_2_reduce_res_map_acc_mem_120211 = mem_120191 + defunc_3_map_res_mem_120212 = mem_120194 + defunc_3_map_res_mem_120213 = mem_120197 + else: + mem_120201 = opencl_alloc(self, bytes_120175, "mem_120201") + if slt64(np.int64(0), (m_73008 * n_73011)): + stage1_max_num_groups_126411 = self.max_group_size + stage1_num_groups_126412 = smin64(stage1_max_num_groups_126411, + num_groups_90597) + num_threads_126413 = sext_i64_i32((stage1_num_groups_126412 * segscan_group_sizze_90596)) + if ((1 * (np.int64(stage1_num_groups_126412) * np.int64(segscan_group_sizze_90596))) != 0): + self.mainMagnitudeziscan_stage1_90482_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(smax64(np.int64(1), + (np.int32(8) * segscan_group_sizze_90596)))), + np.int64(N_73007), + np.int64(m_73008), + np.int64(n_73011), + np.int64(m_73103), + np.int32(num_threads_126413), + images_mem_120108, + mem_120201) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudeziscan_stage1_90482_var, + ((np.int64(stage1_num_groups_126412) * np.int64(segscan_group_sizze_90596)),), + (np.int64(segscan_group_sizze_90596),)) + if synchronous: + sync(self) + if ((1 * (np.int64(np.int64(1)) * np.int64(stage1_num_groups_126412))) != 0): + self.mainMagnitudeziscan_stage2_90482_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(smax64(np.int64(1), + (np.int32(8) * stage1_num_groups_126412)))), + np.int64(m_73008), + np.int64(n_73011), + np.int64(stage1_num_groups_126412), + np.int32(num_threads_126413), + mem_120201) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudeziscan_stage2_90482_var, + ((np.int64(np.int64(1)) * np.int64(stage1_num_groups_126412)),), + (np.int64(stage1_num_groups_126412),)) + if synchronous: + sync(self) + required_groups_126455 = sext_i64_i32(sdiv_up64((m_73008 * n_73011), + segscan_group_sizze_90596)) + if ((1 * (np.int64(num_groups_90597) * np.int64(segscan_group_sizze_90596))) != 0): + self.mainMagnitudeziscan_stage3_90482_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(n_73011), + np.int64(num_groups_90597), + np.int32(num_threads_126413), + np.int32(required_groups_126455), + mem_120201) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudeziscan_stage3_90482_var, + ((np.int64(num_groups_90597) * np.int64(segscan_group_sizze_90596)),), + (np.int64(segscan_group_sizze_90596),)) + if synchronous: + sync(self) + mem_120203 = opencl_alloc(self, bytes_120173, "mem_120203") + group_sizze_126470 = self.sizes["mainMagnitude.group_size_126470"] + num_groups_126471 = sdiv_up64(m_73008, group_sizze_126470) + if ((1 * (np.int64(num_groups_126471) * np.int64(group_sizze_126470))) != 0): + self.mainMagnitudezicopy_126467_var.set_args(np.int64(m_73008), + np.int64(n_73011), + np.int64(m_73103), + mem_120201, mem_120203) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezicopy_126467_var, + ((np.int64(num_groups_126471) * np.int64(group_sizze_126470)),), + (np.int64(group_sizze_126470),)) + if synchronous: + sync(self) + mem_120206 = opencl_alloc(self, bytes_120175, "mem_120206") + self.futhark_builtinzhreplicate_f64(mem_120206, (m_73008 * n_73011), + np.nan) + mem_120209 = opencl_alloc(self, bytes_120175, "mem_120209") + self.futhark_builtinzhreplicate_i64(mem_120209, (m_73008 * n_73011), + np.int64(0)) + segmap_usable_groups_90639 = sdiv_up64(nest_sizze_90595, + segmap_group_sizze_90638) + if ((1 * (np.int64(segmap_usable_groups_90639) * np.int64(segmap_group_sizze_90638))) != 0): + self.mainMagnitudezisegmap_90404_var.set_args(self.global_failure, + np.int64(N_73007), + np.int64(m_73008), + np.int64(n_73011), + np.int64(m_73103), + images_mem_120108, + mem_120201, + mem_120206, + mem_120209) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_90404_var, + ((np.int64(segmap_usable_groups_90639) * np.int64(segmap_group_sizze_90638)),), + (np.int64(segmap_group_sizze_90638),)) + if synchronous: + sync(self) + mem_120201 = None + defunc_2_reduce_res_map_acc_mem_120211 = mem_120203 + defunc_3_map_res_mem_120212 = mem_120206 + defunc_3_map_res_mem_120213 = mem_120209 + mem_120215 = opencl_alloc(self, bytes_120173, "mem_120215") + if ((m_73008 * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_120215, + defunc_2_reduce_res_map_acc_mem_120211, + dest_offset=np.int64(np.int64(0)), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((m_73008 * np.int32(8)))) + if synchronous: + sync(self) + mem_120218 = opencl_alloc(self, np.int64(8), "mem_120218") + mainMagnitudezicounter_mem_126496 = self.mainMagnitudezicounter_mem_126496 + group_res_arr_mem_126498 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_90654 * num_groups_90655)), + "group_res_arr_mem_126498") + num_threads_126500 = (num_groups_90655 * segred_group_sizze_90654) + if ((1 * (np.int64(num_groups_90655) * np.int64(segred_group_sizze_90654))) != 0): + self.mainMagnitudezisegred_nonseg_90558_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_90654))), + cl.LocalMemory(np.int64(np.int32(1))), + np.int64(m_73008), + np.int64(num_groups_90655), + np.int64(num_threads_126500), + defunc_2_reduce_res_map_acc_mem_120211, + mem_120218, + mainMagnitudezicounter_mem_126496, + group_res_arr_mem_126498) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_nonseg_90558_var, + ((np.int64(num_groups_90655) * np.int64(segred_group_sizze_90654)),), + (np.int64(segred_group_sizze_90654),)) + if synchronous: + sync(self) + defunc_2_reduce_res_map_acc_mem_120211 = None + read_res_129928 = np.empty(1, dtype=ct.c_int64) + cl.enqueue_copy(self.queue, read_res_129928, mem_120218, + device_offset=(np.int64(np.int64(0)) * 8), + is_blocking=synchronous) + sync(self) + defunc_2_reduce_res_90662 = read_res_129928[0] + mem_120218 = None + defunc_3_map_res_mem_120230 = mem_120215 + defunc_3_map_res_mem_120231 = defunc_3_map_res_mem_120212 + defunc_3_map_res_mem_120232 = defunc_3_map_res_mem_120213 + defunc_2_reduce_res_73132 = defunc_2_reduce_res_90662 + empty_slice_73161 = (defunc_2_reduce_res_73132 == np.int64(0)) + m_73162 = (defunc_2_reduce_res_73132 - np.int64(1)) + zzero_leq_i_p_m_t_s_73163 = sle64(np.int64(0), m_73162) + i_p_m_t_s_leq_w_73164 = slt64(m_73162, n_73011) + i_lte_j_73165 = sle64(np.int64(0), defunc_2_reduce_res_73132) + y_73166 = (zzero_leq_i_p_m_t_s_73163 and i_p_m_t_s_leq_w_73164) + y_73167 = (i_lte_j_73165 and y_73166) + ok_or_empty_73168 = (empty_slice_73161 or y_73167) + nest_sizze_90783 = (m_73008 * defunc_2_reduce_res_73132) + max_num_groups_126527 = self.sizes["mainMagnitude.segmap_num_groups_90687"] + num_groups_90785 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_90783, + segmap_group_sizze_90784), + sext_i32_i64(max_num_groups_126527)))) + mem_120235 = opencl_alloc(self, bytes_120110, "mem_120235") + self.futhark_builtinzhgpu_map_transpose_f64(mem_120235, np.int64(0), + mem_120124, np.int64(0), + np.int64(1), k2p2zq_73023, + N_73007) + binop_x_120245 = (defunc_2_reduce_res_73132 * binop_x_120244) + bytes_120243 = (np.int64(8) * binop_x_120245) + mem_120246 = opencl_alloc(self, bytes_120243, "mem_120246") + num_threads_125875 = (segmap_group_sizze_90784 * num_groups_90785) + total_sizze_125876 = (bytes_120247 * num_threads_125875) + mem_120238 = opencl_alloc(self, total_sizze_125876, "mem_120238") + total_sizze_125877 = (bytes_120247 * num_threads_125875) + mem_125145 = opencl_alloc(self, total_sizze_125877, "mem_125145") + if ((1 * (np.int64(num_groups_90785) * np.int64(segmap_group_sizze_90784))) != 0): + self.mainMagnitudezisegmap_90682_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(N_73007), + np.int64(m_73008), + np.int64(n_73011), + np.int64(k2p2zq_73023), + np.int64(m_73103), + np.int64(defunc_2_reduce_res_73132), + np.int64(num_groups_90785), + np.int64(num_threads_125875), + defunc_3_map_res_mem_120232, + mem_120235, mem_120238, + mem_120246, mem_125145) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_30314_var, - ((np.long(num_groups_30339) * np.long(segmap_group_sizze_30338)),), - (np.long(segmap_group_sizze_30338),)) + self.mainMagnitudezisegmap_90682_var, + ((np.int64(num_groups_90785) * np.int64(segmap_group_sizze_90784)),), + (np.int64(segmap_group_sizze_90784),)) if synchronous: sync(self) - mem_44400 = None - mem_44404 = None - mem_44547 = opencl_alloc(self, bytes_44544, "mem_44547") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44547, np.int64(0), - mem_44446, np.int64(0), - np.int64(1), m_27772, - (i32_res_27787 * i32_res_27787)) - mem_44446 = None - defunc_3_map_res_mem_44549 = mem_44547 - else: - local_memory_capacity_45829 = self.max_local_memory - if (sle64(np.int64(0), - sext_i32_i64(local_memory_capacity_45829)) and suff_outer_par_30520): - mem_44465 = opencl_alloc(self, bytes_44544, "mem_44465") - mem_44449 = opencl_alloc(self, total_sizze_45530, "mem_44449") - if ((1 * (np.long(num_groups_30516) * np.long(segmap_group_sizze_30515))) != 0): - self.mainDetailedzisegmap_30360_var.set_args(self.global_failure, - np.int64(N_27771), - np.int64(m_27772), - np.int32(n_27775), - np.int32(k2p2zq_27785), - np.int64(i32_res_27787), - np.int64(num_groups_30516), - images_mem_44381, - mem_44393, mem_44397, - mem_44449, mem_44465) + self.failure_is_an_option = np.int32(1) + defunc_3_map_res_mem_120232 = None + mem_120235 = None + mem_120238 = None + mem_125145 = None + index_ok_73197 = (ok_or_empty_73122 and ok_or_empty_73168) + index_certs_73198 = True + assert index_ok_73197, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:98:15-28\n #1 mroc.fut:27:25-38\n #2 mroc.fut:77:27-61\n #3 bfastfinal.fut:45:24-53\n #4 bfastfinal.fut:192:5-74\n #5 bfastfinal.fut:187:1-193:48\n" % ("Index [", + np.int64(0), + ":, :", + defunc_2_reduce_res_73132, + "] out of bounds for array of shape [", + m_73008, + "][", + n_73011, + "].")) + i64_res_73200 = sitofp_i64_f64(k2p2zq_73023) + tol_73201 = (np.float64(1.4901161193847656e-8) / i64_res_73200) + i_p_m_t_s_leq_w_73202 = slt64(m_73095, defunc_2_reduce_res_73132) + y_73203 = (zzero_leq_i_p_m_t_s_73096 and i_p_m_t_s_leq_w_73202) + y_73204 = (i_lte_j_73098 and y_73203) + ok_or_empty_73205 = (empty_slice_73094 or y_73204) + index_certs_73206 = True + assert ok_or_empty_73205, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:23:56-63\n #1 /prelude/soacs.fut:67:19-23\n #2 /prelude/soacs.fut:67:3-37\n #3 recresid.fut:22:5-25:22\n #4 recresid.fut:100:7-30\n #5 mroc.fut:27:25-38\n #6 mroc.fut:77:27-61\n #7 bfastfinal.fut:45:24-53\n #8 bfastfinal.fut:192:5-74\n #9 bfastfinal.fut:187:1-193:48\n" % ("Index [:", + k2p2zq_73023, + "] out of bounds for array of shape [", + defunc_2_reduce_res_73132, + "].")) + index_ok_73207 = (ok_or_empty_73101 and ok_or_empty_73205) + index_certs_73208 = True + assert index_ok_73207, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:23:43-53\n #1 /prelude/soacs.fut:67:19-23\n #2 /prelude/soacs.fut:67:3-37\n #3 recresid.fut:22:5-25:22\n #4 recresid.fut:100:7-30\n #5 mroc.fut:27:25-38\n #6 mroc.fut:77:27-61\n #7 bfastfinal.fut:45:24-53\n #8 bfastfinal.fut:192:5-74\n #9 bfastfinal.fut:187:1-193:48\n" % ("Index [:", + k2p2zq_73023, + ", ", + np.int64(0), + ":] out of bounds for array of shape [", + defunc_2_reduce_res_73132, + "][", + k2p2zq_73023, + "].")) + replicate_arg_73209 = (np.int64(2) * k2p2zq_73023) + bounds_invalid_upwards_73210 = slt64(replicate_arg_73209, np.int64(0)) + valid_73211 = not(bounds_invalid_upwards_73210) + range_valid_c_73212 = True + assert valid_73211, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 /prelude/array.fut:108:18-23\n #2 lib/github.com/nhey/lm/linpack.fut:39:16-40\n #3 lib/github.com/nhey/lm/lm.fut:74:36-64\n #4 recresid.fut:23:25-63\n #5 /prelude/soacs.fut:67:19-23\n #6 /prelude/soacs.fut:67:3-37\n #7 recresid.fut:22:5-25:22\n #8 recresid.fut:100:7-30\n #9 mroc.fut:27:25-38\n #10 mroc.fut:77:27-61\n #11 bfastfinal.fut:45:24-53\n #12 bfastfinal.fut:192:5-74\n #13 bfastfinal.fut:187:1-193:48\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + replicate_arg_73209, + " is invalid.")) + min_res_73213 = smin64(k2p2zq_73023, k2p2zq_73023) + k_73214 = (np.int64(1) + k2p2zq_73023) + mem_120248 = opencl_alloc(self, bytes_120247, "mem_120248") + self.futhark_builtinzhiota_i64(mem_120248, k2p2zq_73023, np.int64(0), + np.int64(1)) + segmap_group_sizze_90843 = self.sizes["mainMagnitude.segmap_group_size_90821"] + segmap_usable_groups_90844 = sdiv_up64(binop_x_120251, + segmap_group_sizze_90843) + mem_120252 = opencl_alloc(self, bytes_120250, "mem_120252") + if ((1 * (np.int64(segmap_usable_groups_90844) * np.int64(segmap_group_sizze_90843))) != 0): + self.mainMagnitudezisegmap_90818_var.set_args(self.global_failure, + np.int64(k2p2zq_73023), + mem_120252) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_90818_var, + ((np.int64(segmap_usable_groups_90844) * np.int64(segmap_group_sizze_90843)),), + (np.int64(segmap_group_sizze_90843),)) + if synchronous: + sync(self) + suff_outer_par_90947 = (self.sizes["mainMagnitude.suff_outer_par_2"] <= m_73008) + segmap_group_sizze_91404 = self.sizes["mainMagnitude.segmap_group_size_90951"] + max_num_groups_126555 = self.sizes["mainMagnitude.segmap_num_groups_90953"] + num_groups_91405 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_73008, + segmap_group_sizze_91404), + sext_i32_i64(max_num_groups_126555)))) + suff_outer_par_93472 = (self.sizes["mainMagnitude.suff_outer_par_8"] <= m_73008) + segmap_group_sizze_93476 = self.sizes["mainMagnitude.segmap_group_size_93215"] + max_num_groups_126556 = self.sizes["mainMagnitude.segmap_num_groups_93217"] + num_groups_93477 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_73008, + segmap_group_sizze_93476), + sext_i32_i64(max_num_groups_126556)))) + segred_group_sizze_93498 = self.sizes["mainMagnitude.segred_group_size_93292"] + max_num_groups_126557 = self.sizes["mainMagnitude.segred_num_groups_93294"] + num_groups_93499 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120244, + segred_group_sizze_93498), + sext_i32_i64(max_num_groups_126557)))) + segmap_group_sizze_93509 = self.sizes["mainMagnitude.segmap_group_size_93284"] + segmap_group_sizze_93516 = self.sizes["mainMagnitude.segmap_group_size_93276"] + segmap_group_sizze_93534 = self.sizes["mainMagnitude.segmap_group_size_92927"] + max_num_groups_126558 = self.sizes["mainMagnitude.segmap_num_groups_92929"] + num_groups_93535 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_73008, + segmap_group_sizze_93534), + sext_i32_i64(max_num_groups_126558)))) + segmap_group_sizze_93820 = self.sizes["mainMagnitude.segmap_group_size_92883"] + nest_sizze_93834 = (m_73008 * binop_x_120251) + segmap_group_sizze_93835 = self.sizes["mainMagnitude.segmap_group_size_92849"] + suff_outer_par_93859 = (self.sizes["mainMagnitude.suff_outer_par_5"] <= binop_x_120244) + suff_outer_par_93954 = (self.sizes["mainMagnitude.suff_outer_par_6"] <= m_73008) + segmap_group_sizze_93957 = self.sizes["mainMagnitude.segmap_group_size_92421"] + max_num_groups_126559 = self.sizes["mainMagnitude.segmap_num_groups_92423"] + num_groups_93958 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_73008, + segmap_group_sizze_93957), + sext_i32_i64(max_num_groups_126559)))) + suff_outer_par_93988 = (self.sizes["mainMagnitude.suff_outer_par_7"] <= binop_x_120244) + segred_group_sizze_94011 = self.sizes["mainMagnitude.segred_group_size_92535"] + segmap_group_sizze_94028 = self.sizes["mainMagnitude.segmap_group_size_92522"] + segmap_group_sizze_94039 = self.sizes["mainMagnitude.segmap_group_size_92511"] + segmap_group_sizze_94050 = self.sizes["mainMagnitude.segmap_group_size_92206"] + max_num_groups_126560 = self.sizes["mainMagnitude.segmap_num_groups_92208"] + num_groups_94051 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120244, + segmap_group_sizze_94050), + sext_i32_i64(max_num_groups_126560)))) + suff_outer_par_94055 = (self.sizes["mainMagnitude.suff_outer_par_3"] <= binop_x_120244) + suff_outer_par_94077 = (self.sizes["mainMagnitude.suff_outer_par_4"] <= nest_sizze_93834) + nest_sizze_94092 = (k2p2zq_73023 * nest_sizze_93834) + segred_group_sizze_94093 = self.sizes["mainMagnitude.segred_group_size_92257"] + max_num_groups_126561 = self.sizes["mainMagnitude.segred_num_groups_92259"] + num_groups_94094 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_94092, + segred_group_sizze_94093), + sext_i32_i64(max_num_groups_126561)))) + segmap_group_sizze_94109 = self.sizes["mainMagnitude.segmap_group_size_92143"] + max_num_groups_126562 = self.sizes["mainMagnitude.segmap_num_groups_92145"] + num_groups_94110 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_73008, + segmap_group_sizze_94109), + sext_i32_i64(max_num_groups_126562)))) + segmap_group_sizze_94179 = self.sizes["mainMagnitude.segmap_group_size_92101"] + segmap_group_sizze_94250 = self.sizes["mainMagnitude.segmap_group_size_91944"] + segmap_group_sizze_94269 = self.sizes["mainMagnitude.segmap_group_size_91897"] + segmap_group_sizze_94278 = self.sizes["mainMagnitude.segmap_group_size_91874"] + segmap_usable_groups_93510 = sdiv_up_safe64(m_73008, + segmap_group_sizze_93509) + segmap_usable_groups_93517 = sdiv_up_safe64(m_73008, + segmap_group_sizze_93516) + segmap_usable_groups_94029 = sdiv_up_safe64(binop_x_120244, + segmap_group_sizze_94028) + segmap_usable_groups_94040 = sdiv_up_safe64(binop_x_120244, + segmap_group_sizze_94039) + tile_sizze_115656 = self.sizes["mainMagnitude.tile_size_115655"] + group_sizze_115657 = (tile_sizze_115656 * tile_sizze_115656) + tile_sizze_116018 = self.sizes["mainMagnitude.tile_size_116017"] + group_sizze_116019 = (tile_sizze_116018 * tile_sizze_116018) + Ty_116322 = self.sizes["mainMagnitude.Ty_116319"] + Ry_116323 = self.sizes["mainMagnitude.Ry_116321"] + Tx_116324 = self.sizes["mainMagnitude.Tx_116318"] + Rx_116325 = self.sizes["mainMagnitude.Rx_116320"] + Tk_116326 = self.sizes["mainMagnitude.Tk_116317"] + TxRx_116329 = (Tx_116324 * Rx_116325) + TyRy_116330 = (Ty_116322 * Ry_116323) + a_loc_szz_116332 = (Tk_116326 * TyRy_116330) + binop_x_116333 = (Tx_116324 * Tk_116326) + b_loc_szz_116334 = (Rx_116325 * binop_x_116333) + group_sizze_116339 = (Ty_116322 * Tx_116324) + num_groups_x_116020 = sdiv_up_safe64(m_73008, tile_sizze_116018) + num_groups_y_116021 = sdiv_up_safe64(k2p2zq_73023, tile_sizze_116018) + num_groups_top_116022 = (num_groups_x_116020 * num_groups_y_116021) + padded_sizze_115430 = (m_73008 + y_115429) + mem_120254 = opencl_alloc(self, bytes_120253, "mem_120254") + per_chunk_115432 = squot_safe64(padded_sizze_115430, num_threads_115425) + bytes_120258 = (np.int64(8) * nest_sizze_93834) + bytes_120269 = (np.int64(8) * replicate_arg_73209) + binop_x_120926 = (m_73008 * replicate_arg_73209) + bytes_120924 = (np.int64(8) * binop_x_120926) + bytes_120947 = (np.int64(8) * padded_sizze_115430) + binop_x_120950 = (num_threads_115425 * per_chunk_115432) + bytes_120949 = (np.int64(8) * binop_x_120950) + binop_x_121376 = (k2p2zq_73023 * group_sizze_115657) + bytes_121374 = (np.int64(8) * binop_x_121376) + ctx_val_121390 = (k2p2zq_73023 * tile_sizze_115656) + bytes_121393 = (np.int64(8) * group_sizze_115657) + binop_x_125185 = (np.int64(8) * tile_sizze_115656) + sizze_125186 = (tile_sizze_115656 * binop_x_125185) + bytes_121515 = (np.int64(8) * group_sizze_116019) + binop_x_125210 = (np.int64(8) * tile_sizze_116018) + sizze_125211 = (tile_sizze_116018 * binop_x_125210) + binop_x_121546 = (k2p2zq_73023 * group_sizze_116019) + bytes_121544 = (np.int64(8) * binop_x_121546) + binop_x_121648 = (Ry_116323 * group_sizze_116339) + binop_x_121649 = (Rx_116325 * binop_x_121648) + bytes_121646 = (np.int64(8) * binop_x_121649) + binop_x_121640 = (Ry_116323 * Rx_116325) + bytes_121639 = (np.int64(8) * binop_x_121640) + bytes_121651 = (np.int64(8) * a_loc_szz_116332) + bytes_121653 = (np.int64(8) * b_loc_szz_116334) + bytes_121722 = (np.int64(8) * binop_x_121648) + binop_x_121728 = (Rx_116325 * group_sizze_116339) + bytes_121726 = (np.int64(8) * binop_x_121728) + bytes_121714 = (np.int64(8) * Ry_116323) + bytes_121716 = (np.int64(8) * Rx_116325) + binop_x_125231 = (np.int64(8) * Ty_116322) + binop_x_125232 = (Tx_116324 * binop_x_125231) + binop_x_125233 = (Ry_116323 * binop_x_125232) + sizze_125234 = (Rx_116325 * binop_x_125233) + sizze_125149 = (np.int64(16) * k2p2zq_73023) + sizze_125420 = (k2p2zq_73023 * bytes_120247) + binop_x_125541 = (np.int64(8) * k2p2zq_73023) + double_buffer_sizze_125542 = (k2p2zq_73023 * binop_x_125541) + double_buffer_sizze_125543 = (np.int64(8) * k2p2zq_73023) + double_buffer_sizze_125544 = (np.int64(16) * k2p2zq_73023) + double_buffer_sizze_125550 = (np.int64(8) * k2p2zq_73023) + binop_x_125558 = (np.int64(8) * k2p2zq_73023) + double_buffer_sizze_125559 = (k2p2zq_73023 * binop_x_125558) + double_buffer_sizze_125560 = (np.int64(8) * k2p2zq_73023) + double_buffer_sizze_125561 = (np.int64(16) * k2p2zq_73023) + double_buffer_sizze_125567 = (np.int64(8) * k2p2zq_73023) + num_threads_125879 = (segmap_group_sizze_91404 * num_groups_91405) + total_sizze_125880 = (bytes_120247 * num_threads_125879) + total_sizze_125881 = (bytes_120269 * num_threads_125879) + total_sizze_125882 = (bytes_120247 * num_threads_125879) + total_sizze_125883 = (bytes_120250 * num_threads_125879) + total_sizze_125884 = (bytes_120250 * num_threads_125879) + total_sizze_125885 = (bytes_120247 * num_threads_125879) + total_sizze_125886 = (bytes_120250 * num_threads_125879) + total_sizze_125887 = (bytes_120247 * num_threads_125879) + total_sizze_125888 = (bytes_120250 * num_threads_125879) + total_sizze_125889 = (bytes_120247 * num_threads_125879) + total_sizze_125890 = (bytes_120250 * num_threads_125879) + total_sizze_125891 = (bytes_120247 * num_threads_125879) + total_sizze_125892 = (bytes_120250 * num_threads_125879) + total_sizze_125893 = (sizze_125149 * num_threads_125879) + total_sizze_125894 = (bytes_120247 * num_threads_125879) + total_sizze_125895 = (bytes_120247 * num_threads_125879) + total_sizze_125896 = (sizze_125420 * num_threads_125879) + total_sizze_125897 = (sizze_125149 * num_threads_125879) + total_sizze_125898 = (bytes_120247 * num_threads_125879) + total_sizze_125899 = (sizze_125420 * num_threads_125879) + total_sizze_125900 = (double_buffer_sizze_125542 * num_threads_125879) + total_sizze_125901 = (double_buffer_sizze_125543 * num_threads_125879) + total_sizze_125902 = (double_buffer_sizze_125544 * num_threads_125879) + total_sizze_125903 = (double_buffer_sizze_125550 * num_threads_125879) + num_threads_125909 = (segmap_group_sizze_93534 * num_groups_93535) + total_sizze_125910 = (bytes_120247 * num_threads_125909) + total_sizze_125911 = (sizze_125149 * num_threads_125909) + total_sizze_125912 = (bytes_120247 * num_threads_125909) + total_sizze_125913 = (sizze_125420 * num_threads_125909) + total_sizze_125914 = (sizze_125149 * num_threads_125909) + total_sizze_125915 = (bytes_120247 * num_threads_125909) + total_sizze_125916 = (sizze_125420 * num_threads_125909) + total_sizze_125917 = (double_buffer_sizze_125559 * num_threads_125909) + total_sizze_125918 = (double_buffer_sizze_125560 * num_threads_125909) + total_sizze_125919 = (double_buffer_sizze_125561 * num_threads_125909) + num_threads_125924 = (segmap_group_sizze_93957 * num_groups_93958) + total_sizze_125925 = (bytes_120250 * num_threads_125924) + total_sizze_125926 = (bytes_120247 * num_threads_125924) + num_threads_125927 = (group_sizze_116019 * num_groups_top_116022) + total_sizze_125928 = (bytes_120247 * num_threads_125927) + num_threads_125932 = (segmap_group_sizze_94050 * num_groups_94051) + total_sizze_125933 = (bytes_120247 * num_threads_125932) + num_threads_125936 = (segmap_group_sizze_94109 * num_groups_94110) + total_sizze_125937 = (bytes_120247 * num_threads_125936) + total_sizze_125938 = (double_buffer_sizze_125567 * num_threads_125936) + local_memory_capacity_127182 = self.max_local_memory + if ((((sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127182)) and sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127182))) and sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127182))) and sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127182))) and suff_outer_par_90947): + mem_120257 = opencl_alloc(self, bytes_121990, "mem_120257") + group_sizze_126566 = self.sizes["mainMagnitude.group_size_126566"] + num_groups_126567 = sdiv_up64((m_73008 * k2p2zq_73023), + group_sizze_126566) + if ((1 * (np.int64(num_groups_126567) * np.int64(group_sizze_126566))) != 0): + self.mainMagnitudezicopy_126563_var.set_args(np.int64(m_73008), + np.int64(n_73011), + np.int64(k2p2zq_73023), + defunc_3_map_res_mem_120231, + mem_120257) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezicopy_126563_var, + ((np.int64(num_groups_126567) * np.int64(group_sizze_126566)),), + (np.int64(group_sizze_126566),)) + if synchronous: + sync(self) + mem_120261 = opencl_alloc(self, bytes_120258, "mem_120261") + group_sizze_126571 = self.sizes["mainMagnitude.group_size_126571"] + num_groups_126572 = sdiv_up64(((m_73008 * k2p2zq_73023) * k2p2zq_73023), + group_sizze_126571) + if ((1 * (np.int64(num_groups_126572) * np.int64(group_sizze_126571))) != 0): + self.mainMagnitudezicopy_126568_var.set_args(np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(defunc_2_reduce_res_73132), + mem_120246, mem_120261) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezicopy_126568_var, + ((np.int64(num_groups_126572) * np.int64(group_sizze_126571)),), + (np.int64(group_sizze_126571),)) + if synchronous: + sync(self) + mem_120265 = opencl_alloc(self, bytes_121997, "mem_120265") + group_sizze_126576 = self.sizes["mainMagnitude.group_size_126576"] + num_groups_126577 = sdiv_up64(((m_73008 * k2p2zq_73023) * k2p2zq_73023), + group_sizze_126576) + if ((1 * (np.int64(num_groups_126577) * np.int64(group_sizze_126576))) != 0): + self.mainMagnitudezicopy_126573_var.set_args(np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(defunc_2_reduce_res_73132), + mem_120246, mem_120265) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezicopy_126573_var, + ((np.int64(num_groups_126577) * np.int64(group_sizze_126576)),), + (np.int64(group_sizze_126576),)) + if synchronous: + sync(self) + mem_120878 = opencl_alloc(self, bytes_120258, "mem_120878") + mem_120881 = opencl_alloc(self, bytes_121990, "mem_120881") + mem_120883 = opencl_alloc(self, bytes_120173, "mem_120883") + mem_120268 = opencl_alloc(self, total_sizze_125880, "mem_120268") + mem_120271 = opencl_alloc(self, total_sizze_125881, "mem_120271") + mem_120273 = opencl_alloc(self, total_sizze_125882, "mem_120273") + mem_120608 = opencl_alloc(self, total_sizze_125883, "mem_120608") + mem_120649 = opencl_alloc(self, total_sizze_125884, "mem_120649") + mem_120661 = opencl_alloc(self, total_sizze_125885, "mem_120661") + mem_120690 = opencl_alloc(self, total_sizze_125886, "mem_120690") + mem_120763 = opencl_alloc(self, total_sizze_125887, "mem_120763") + mem_120778 = opencl_alloc(self, total_sizze_125888, "mem_120778") + mem_120790 = opencl_alloc(self, total_sizze_125889, "mem_120790") + mem_120801 = opencl_alloc(self, total_sizze_125890, "mem_120801") + mem_120821 = opencl_alloc(self, total_sizze_125891, "mem_120821") + mem_120824 = opencl_alloc(self, total_sizze_125892, "mem_120824") + mem_125150 = opencl_alloc(self, total_sizze_125893, "mem_125150") + mem_125152 = opencl_alloc(self, total_sizze_125894, "mem_125152") + mem_125160 = opencl_alloc(self, total_sizze_125895, "mem_125160") + mem_125421 = opencl_alloc(self, total_sizze_125896, "mem_125421") + mem_125429 = opencl_alloc(self, total_sizze_125897, "mem_125429") + mem_125431 = opencl_alloc(self, total_sizze_125898, "mem_125431") + mem_125491 = opencl_alloc(self, total_sizze_125899, "mem_125491") + double_buffer_mem_125535 = opencl_alloc(self, total_sizze_125900, + "double_buffer_mem_125535") + double_buffer_mem_125536 = opencl_alloc(self, total_sizze_125901, + "double_buffer_mem_125536") + double_buffer_mem_125537 = opencl_alloc(self, total_sizze_125902, + "double_buffer_mem_125537") + double_buffer_mem_125548 = opencl_alloc(self, total_sizze_125903, + "double_buffer_mem_125548") + if ((1 * (np.int64(num_groups_91405) * np.int64(segmap_group_sizze_91404))) != 0): + self.mainMagnitudezisegmap_90949_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(m_73095), + np.byte(y_73099), + np.byte(ok_or_empty_73101), + np.int64(min_res_73213), + np.int64(k_73214), + np.int64(num_groups_91405), + np.int64(binop_x_120251), + np.int64(num_threads_125879), + mem_120252, mem_120257, + mem_120261, mem_120265, + mem_120268, mem_120271, + mem_120273, mem_120608, + mem_120649, mem_120661, + mem_120690, mem_120763, + mem_120778, mem_120790, + mem_120801, mem_120821, + mem_120824, mem_120878, + mem_120881, mem_120883, + mem_125150, mem_125152, + mem_125160, mem_125421, + mem_125429, mem_125431, + mem_125491, + double_buffer_mem_125535, + double_buffer_mem_125536, + double_buffer_mem_125537, + double_buffer_mem_125548) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_90949_var, + ((np.int64(num_groups_91405) * np.int64(segmap_group_sizze_91404)),), + (np.int64(segmap_group_sizze_91404),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + mem_120257 = None + mem_120261 = None + mem_120265 = None + mem_120268 = None + mem_120271 = None + mem_120273 = None + mem_120608 = None + mem_120649 = None + mem_120661 = None + mem_120690 = None + mem_120763 = None + mem_120778 = None + mem_120790 = None + mem_120801 = None + mem_120821 = None + mem_120824 = None + mem_125150 = None + mem_125152 = None + mem_125160 = None + mem_125421 = None + mem_125429 = None + mem_125431 = None + mem_125491 = None + double_buffer_mem_125535 = None + double_buffer_mem_125536 = None + double_buffer_mem_125537 = None + double_buffer_mem_125548 = None + mem_121923 = opencl_alloc(self, bytes_121997, "mem_121923") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121923, np.int64(0), + mem_120878, np.int64(0), + np.int64(1), m_73008, + (k2p2zq_73023 * k2p2zq_73023)) + mem_120878 = None + mem_121927 = opencl_alloc(self, bytes_121990, "mem_121927") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121927, np.int64(0), + mem_120881, np.int64(0), + np.int64(1), m_73008, + k2p2zq_73023) + mem_120881 = None + defunc_5_map_res_mem_121929 = mem_121923 + defunc_5_map_res_mem_121930 = mem_121927 + defunc_5_map_res_mem_121931 = mem_120883 + else: + mem_120886 = opencl_alloc(self, bytes_121990, "mem_120886") + self.futhark_builtinzhreplicate_f64(mem_120886, + (m_73008 * k2p2zq_73023), + np.float64(0.0)) + mem_120890 = opencl_alloc(self, bytes_121993, "mem_120890") + self.futhark_builtinzhreplicate_f64(mem_120890, + ((m_73008 * np.int64(2)) * k2p2zq_73023), + np.float64(0.0)) + mem_120894 = opencl_alloc(self, bytes_121997, "mem_120894") + group_sizze_126686 = self.sizes["mainMagnitude.group_size_126686"] + num_groups_126687 = sdiv_up64(((m_73008 * k2p2zq_73023) * k2p2zq_73023), + group_sizze_126686) + if ((1 * (np.int64(num_groups_126687) * np.int64(group_sizze_126686))) != 0): + self.mainMagnitudezicopy_126683_var.set_args(np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(defunc_2_reduce_res_73132), + mem_120246, mem_120894) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezicopy_126683_var, + ((np.int64(num_groups_126687) * np.int64(group_sizze_126686)),), + (np.int64(group_sizze_126686),)) + if synchronous: + sync(self) + mem_param_120902 = mem_120886 + mem_param_120913 = mem_120890 + j_93466 = np.int64(0) + one_129931 = np.int64(1) + for counter_129930 in range(k2p2zq_73023): + index_certs_93469 = True + assert ok_or_empty_73101, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/linpack.fut:44:25-30\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:23:25-63\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 recresid.fut:22:5-25:22\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n" % ("Index [", + j_93466, + ", ", + np.int64(0), + ":] out of bounds for array of shape [", + k2p2zq_73023, + "][", + k2p2zq_73023, + "].")) + local_memory_capacity_126786 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_126786)) and suff_outer_par_93472): + mem_120923 = opencl_alloc(self, bytes_121990, "mem_120923") + self.futhark_builtinzhgpu_map_transpose_f64(mem_120923, np.int64(0), + mem_param_120902, + np.int64(0), + np.int64(1), + k2p2zq_73023, m_73008) + mem_120927 = opencl_alloc(self, bytes_120924, "mem_120927") + self.futhark_builtinzhgpu_map_transpose_f64(mem_120927, np.int64(0), + mem_param_120913, + np.int64(0), + np.int64(1), + (np.int64(2) * k2p2zq_73023), + m_73008) + mem_120931 = opencl_alloc(self, bytes_121990, "mem_120931") + mem_120935 = opencl_alloc(self, bytes_120924, "mem_120935") + if ((1 * (np.int64(num_groups_93477) * np.int64(segmap_group_sizze_93476))) != 0): + self.mainMagnitudezisegmap_93213_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(j_93466), + np.int64(num_groups_93477), + mem_120894, + mem_120923, + mem_120927, + mem_120931, + mem_120935) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_93213_var, + ((np.int64(num_groups_93477) * np.int64(segmap_group_sizze_93476)),), + (np.int64(segmap_group_sizze_93476),)) + if synchronous: + sync(self) + mem_120923 = None + mem_120927 = None + mem_120959 = opencl_alloc(self, bytes_121990, "mem_120959") + self.futhark_builtinzhgpu_map_transpose_f64(mem_120959, np.int64(0), + mem_120931, np.int64(0), + np.int64(1), m_73008, + k2p2zq_73023) + mem_120931 = None + dqrdc2_res_mem_120965 = mem_120959 + dqrdc2_res_mem_120966 = mem_120935 + else: + mem_120938 = opencl_alloc(self, bytes_120173, "mem_120938") + if slt64((k2p2zq_73023 * np.int64(2)), segred_group_sizze_93498): + segment_sizze_nonzzero_126704 = smax64(np.int64(1), k2p2zq_73023) + num_threads_126705 = (num_groups_93499 * segred_group_sizze_93498) + if ((1 * (np.int64(num_groups_93499) * np.int64(segred_group_sizze_93498))) != 0): + self.mainMagnitudezisegred_small_93298_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_93498))), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(defunc_2_reduce_res_73132), + np.int64(j_93466), + np.int64(num_groups_93499), + np.int64(segment_sizze_nonzzero_126704), + mem_120246, + mem_120938) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_small_93298_var, + ((np.int64(num_groups_93499) * np.int64(segred_group_sizze_93498)),), + (np.int64(segred_group_sizze_93498),)) + if synchronous: + sync(self) + else: + groups_per_segment_126725 = sdiv_up64(num_groups_93499, + smax64(np.int64(1), + m_73008)) + elements_per_thread_126726 = sdiv_up64(k2p2zq_73023, + (segred_group_sizze_93498 * groups_per_segment_126725)) + virt_num_groups_126727 = (groups_per_segment_126725 * m_73008) + num_threads_126728 = (num_groups_93499 * segred_group_sizze_93498) + threads_per_segment_126729 = (groups_per_segment_126725 * segred_group_sizze_93498) + group_res_arr_mem_126730 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_93498 * virt_num_groups_126727)), + "group_res_arr_mem_126730") + mainMagnitudezicounter_mem_126732 = self.mainMagnitudezicounter_mem_126732 + if ((1 * (np.int64(num_groups_93499) * np.int64(segred_group_sizze_93498))) != 0): + self.mainMagnitudezisegred_large_93298_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_93498))), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(defunc_2_reduce_res_73132), + np.int64(j_93466), + np.int64(num_groups_93499), + np.int64(groups_per_segment_126725), + np.int64(elements_per_thread_126726), + np.int64(virt_num_groups_126727), + np.int64(threads_per_segment_126729), + mem_120246, + mem_120938, + group_res_arr_mem_126730, + mainMagnitudezicounter_mem_126732) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_large_93298_var, + ((np.int64(num_groups_93499) * np.int64(segred_group_sizze_93498)),), + (np.int64(segred_group_sizze_93498),)) + if synchronous: + sync(self) + mem_120941 = opencl_alloc(self, bytes_120173, "mem_120941") + if ((1 * (np.int64(segmap_usable_groups_93510) * np.int64(segmap_group_sizze_93509))) != 0): + self.mainMagnitudezisegmap_93282_var.set_args(self.global_failure, + np.int64(m_73008), + mem_120938, + mem_120941) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_93282_var, + ((np.int64(segmap_usable_groups_93510) * np.int64(segmap_group_sizze_93509)),), + (np.int64(segmap_group_sizze_93509),)) + if synchronous: + sync(self) + mem_120938 = None + if ((1 * (np.int64(segmap_usable_groups_93517) * np.int64(segmap_group_sizze_93516))) != 0): + self.mainMagnitudezisegmap_93273_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(j_93466), + mem_param_120902, + mem_120941) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_93273_var, + ((np.int64(segmap_usable_groups_93517) * np.int64(segmap_group_sizze_93516)),), + (np.int64(segmap_group_sizze_93516),)) + if synchronous: + sync(self) + mem_120946 = opencl_alloc(self, bytes_120924, "mem_120946") + self.futhark_builtinzhgpu_map_transpose_f64(mem_120946, np.int64(0), + mem_param_120913, + np.int64(0), + np.int64(1), + (np.int64(2) * k2p2zq_73023), + m_73008) + mem_120948 = opencl_alloc(self, bytes_120947, "mem_120948") + tmp_offs_126774 = np.int64(0) + if ((m_73008 * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_120948, mem_120941, + dest_offset=np.int64((tmp_offs_126774 * np.int64(8))), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((m_73008 * np.int32(8)))) + if synchronous: + sync(self) + tmp_offs_126774 = (tmp_offs_126774 + m_73008) + if ((y_115429 * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_120948, mem_120254, + dest_offset=np.int64((tmp_offs_126774 * np.int64(8))), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((y_115429 * np.int32(8)))) + if synchronous: + sync(self) + tmp_offs_126774 = (tmp_offs_126774 + y_115429) + mem_120951 = opencl_alloc(self, bytes_120949, "mem_120951") + self.futhark_builtinzhgpu_map_transpose_f64(mem_120951, np.int64(0), + mem_120948, np.int64(0), + np.int64(1), + per_chunk_115432, + num_threads_115425) + mem_120948 = None + mem_120956 = opencl_alloc(self, bytes_120924, "mem_120956") + if ((1 * (np.int64(num_groups_93522) * np.int64(segmap_group_sizze_93521))) != 0): + self.mainMagnitudezisegmap_93258_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(j_93466), + np.int64(num_groups_93522), + np.int64(num_threads_115425), + np.int64(per_chunk_115432), + mem_120941, + mem_120946, + mem_120951, + mem_120956) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_93258_var, + ((np.int64(num_groups_93522) * np.int64(segmap_group_sizze_93521)),), + (np.int64(segmap_group_sizze_93521),)) + if synchronous: + sync(self) + mem_120941 = None + mem_120946 = None + mem_120951 = None + mem_120963 = opencl_alloc(self, bytes_121990, "mem_120963") + if (((m_73008 * k2p2zq_73023) * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_120963, mem_param_120902, + dest_offset=np.int64(np.int64(0)), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64(((m_73008 * k2p2zq_73023) * np.int32(8)))) + if synchronous: + sync(self) + dqrdc2_res_mem_120965 = mem_120963 + dqrdc2_res_mem_120966 = mem_120956 + mem_120972 = opencl_alloc(self, bytes_121993, "mem_120972") + self.futhark_builtinzhgpu_map_transpose_f64(mem_120972, np.int64(0), + dqrdc2_res_mem_120966, + np.int64(0), np.int64(1), + m_73008, + (np.int64(2) * k2p2zq_73023)) + dqrdc2_res_mem_120966 = None + mem_param_tmp_126688 = dqrdc2_res_mem_120965 + mem_param_tmp_126689 = mem_120972 + mem_param_120902 = mem_param_tmp_126688 + mem_param_120913 = mem_param_tmp_126689 + j_93466 += one_129931 + dqrdc2_res_r_mem_120986 = mem_param_120902 + dqrdc2_res_r_mem_120997 = mem_param_120913 + mem_120886 = None + mem_120890 = None + mem_120894 = None + mem_121001 = opencl_alloc(self, bytes_120258, "mem_121001") + group_sizze_126790 = self.sizes["mainMagnitude.group_size_126790"] + num_groups_126791 = sdiv_up64(((m_73008 * k2p2zq_73023) * k2p2zq_73023), + group_sizze_126790) + if ((1 * (np.int64(num_groups_126791) * np.int64(group_sizze_126790))) != 0): + self.mainMagnitudezicopy_126787_var.set_args(np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(defunc_2_reduce_res_73132), + mem_120246, mem_121001) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezicopy_126787_var, + ((np.int64(num_groups_126791) * np.int64(group_sizze_126790)),), + (np.int64(group_sizze_126790),)) + if synchronous: + sync(self) + mem_121004 = opencl_alloc(self, bytes_121990, "mem_121004") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121004, np.int64(0), + dqrdc2_res_r_mem_120986, + np.int64(0), np.int64(1), + k2p2zq_73023, m_73008) + dqrdc2_res_r_mem_120986 = None + mem_121008 = opencl_alloc(self, bytes_120924, "mem_121008") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121008, np.int64(0), + dqrdc2_res_r_mem_120997, + np.int64(0), np.int64(1), + (np.int64(2) * k2p2zq_73023), + m_73008) + dqrdc2_res_r_mem_120997 = None + mem_121335 = opencl_alloc(self, bytes_120258, "mem_121335") + mem_121338 = opencl_alloc(self, bytes_121990, "mem_121338") + mem_121341 = opencl_alloc(self, bytes_121990, "mem_121341") + mem_121343 = opencl_alloc(self, bytes_120173, "mem_121343") + mem_121011 = opencl_alloc(self, total_sizze_125910, "mem_121011") + mem_125167 = opencl_alloc(self, total_sizze_125911, "mem_125167") + mem_125169 = opencl_alloc(self, total_sizze_125912, "mem_125169") + mem_125438 = opencl_alloc(self, total_sizze_125913, "mem_125438") + mem_125446 = opencl_alloc(self, total_sizze_125914, "mem_125446") + mem_125448 = opencl_alloc(self, total_sizze_125915, "mem_125448") + mem_125498 = opencl_alloc(self, total_sizze_125916, "mem_125498") + double_buffer_mem_125552 = opencl_alloc(self, total_sizze_125917, + "double_buffer_mem_125552") + double_buffer_mem_125553 = opencl_alloc(self, total_sizze_125918, + "double_buffer_mem_125553") + double_buffer_mem_125554 = opencl_alloc(self, total_sizze_125919, + "double_buffer_mem_125554") + if ((1 * (np.int64(num_groups_93535) * np.int64(segmap_group_sizze_93534))) != 0): + self.mainMagnitudezisegmap_92925_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(m_73095), + np.byte(y_73099), + np.int64(min_res_73213), + np.int64(k_73214), + np.int64(num_groups_93535), + np.int64(num_threads_125909), + mem_120248, mem_121001, + mem_121004, mem_121008, + mem_121011, mem_121335, + mem_121338, mem_121341, + mem_121343, mem_125167, + mem_125169, mem_125438, + mem_125446, mem_125448, + mem_125498, + double_buffer_mem_125552, + double_buffer_mem_125553, + double_buffer_mem_125554) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_92925_var, + ((np.int64(num_groups_93535) * np.int64(segmap_group_sizze_93534)),), + (np.int64(segmap_group_sizze_93534),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + mem_121001 = None + mem_121004 = None + mem_121008 = None + mem_121011 = None + mem_125167 = None + mem_125169 = None + mem_125438 = None + mem_125446 = None + mem_125448 = None + mem_125498 = None + double_buffer_mem_125552 = None + double_buffer_mem_125553 = None + double_buffer_mem_125554 = None + segmap_usable_groups_93821 = sdiv_up64(binop_x_120244, + segmap_group_sizze_93820) + mem_121346 = opencl_alloc(self, binop_x_120244, "mem_121346") + if ((1 * (np.int64(segmap_usable_groups_93821) * np.int64(segmap_group_sizze_93820))) != 0): + self.mainMagnitudezisegmap_92880_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + mem_121343, mem_121346) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_92880_var, + ((np.int64(segmap_usable_groups_93821) * np.int64(segmap_group_sizze_93820)),), + (np.int64(segmap_group_sizze_93820),)) + if synchronous: + sync(self) + segmap_usable_groups_93836 = sdiv_up64(nest_sizze_93834, + segmap_group_sizze_93835) + mem_121351 = opencl_alloc(self, bytes_121997, "mem_121351") + if ((1 * (np.int64(segmap_usable_groups_93836) * np.int64(segmap_group_sizze_93835))) != 0): + self.mainMagnitudezisegmap_92845_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + mem_121335, mem_121343, + mem_121346, mem_121351) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_92845_var, + ((np.int64(segmap_usable_groups_93836) * np.int64(segmap_group_sizze_93835)),), + (np.int64(segmap_group_sizze_93835),)) + if synchronous: + sync(self) + mem_121346 = None + local_memory_capacity_127001 = self.max_local_memory + if ((sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127001)) and sle64((((((bytes_121374 + srem64((np.int64(8) - srem64(bytes_121374, + np.int64(8))), + np.int64(8))) + (bytes_121393 + srem64((np.int64(8) - srem64(bytes_121393, + np.int64(8))), + np.int64(8)))) + (bytes_121393 + srem64((np.int64(8) - srem64(bytes_121393, + np.int64(8))), + np.int64(8)))) + (bytes_121374 + srem64((np.int64(8) - srem64(bytes_121374, + np.int64(8))), + np.int64(8)))) + (bytes_120247 + srem64((np.int64(8) - srem64(bytes_120247, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_127001))) and suff_outer_par_93859): + mem_121355 = opencl_alloc(self, bytes_121997, "mem_121355") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121355, np.int64(0), + mem_121351, np.int64(0), + m_73008, k2p2zq_73023, + k2p2zq_73023) + mem_121359 = opencl_alloc(self, bytes_121997, "mem_121359") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121359, np.int64(0), + mem_121355, np.int64(0), + np.int64(1), k2p2zq_73023, + (m_73008 * k2p2zq_73023)) + mem_121355 = None + mem_121363 = opencl_alloc(self, bytes_121997, "mem_121363") + group_sizze_126874 = self.sizes["mainMagnitude.group_size_126874"] + num_groups_126875 = sdiv_up64(((m_73008 * k2p2zq_73023) * k2p2zq_73023), + group_sizze_126874) + if ((1 * (np.int64(num_groups_126875) * np.int64(group_sizze_126874))) != 0): + self.mainMagnitudezicopy_126871_var.set_args(np.int64(m_73008), + np.int64(k2p2zq_73023), + mem_121351, mem_121363) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezicopy_126871_var, + ((np.int64(num_groups_126875) * np.int64(group_sizze_126874)),), + (np.int64(group_sizze_126874),)) + if synchronous: + sync(self) + mem_121366 = opencl_alloc(self, bytes_120250, "mem_121366") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121366, np.int64(0), + mem_120252, np.int64(0), + np.int64(1), k2p2zq_73023, + k2p2zq_73023) + num_groups_x_115658 = sdiv_up64(m_73008, tile_sizze_115656) + num_groups_y_115659 = sdiv_up64(k2p2zq_73023, tile_sizze_115656) + num_groups_top_115660 = (num_groups_x_115658 * num_groups_y_115659) + mem_121368 = opencl_alloc(self, bytes_120247, "mem_121368") + self.futhark_builtinzhreplicate_f64(mem_121368, k2p2zq_73023, + np.float64(0.0)) + mem_121446 = opencl_alloc(self, bytes_121997, "mem_121446") + num_threads_125922 = (group_sizze_115657 * num_groups_top_115660) + total_sizze_125923 = (bytes_120247 * num_threads_125922) + mem_125177 = opencl_alloc(self, total_sizze_125923, "mem_125177") + if ((1 * (np.int64(num_groups_top_115660) * np.int64(group_sizze_115657))) != 0): + self.mainMagnitudezisegmap_intragroup_115661_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64(bytes_120247)), + cl.LocalMemory(np.int64(bytes_121374)), + cl.LocalMemory(np.int64(bytes_121393)), + cl.LocalMemory(np.int64(bytes_121393)), + cl.LocalMemory(np.int64(bytes_121374)), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(num_groups_y_115659), + np.int64(ctx_val_121390), + np.int64(num_threads_125922), + mem_121359, + mem_121363, + mem_121366, + mem_121368, + mem_121446, + mem_125177) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_intragroup_115661_var, + ((np.int64(num_groups_top_115660) * np.int64(group_sizze_115657)),), + (np.int64(group_sizze_115657),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + mem_121359 = None + mem_121363 = None + mem_121366 = None + mem_121368 = None + mem_125177 = None + defunc_3_map_res_r_mem_121609 = mem_121446 + else: + mem_121450 = opencl_alloc(self, bytes_121997, "mem_121450") + self.futhark_builtinzhreplicate_f64(mem_121450, + ((m_73008 * k2p2zq_73023) * k2p2zq_73023), + np.float64(0.0)) + mem_121454 = opencl_alloc(self, bytes_121997, "mem_121454") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121454, np.int64(0), + mem_121351, np.int64(0), + m_73008, k2p2zq_73023, + k2p2zq_73023) + mem_121458 = opencl_alloc(self, bytes_121997, "mem_121458") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121458, np.int64(0), + mem_121454, np.int64(0), + np.int64(1), k2p2zq_73023, + (m_73008 * k2p2zq_73023)) + mem_121454 = None + mem_param_121469 = mem_121450 + i_93923 = np.int64(0) + one_129934 = np.int64(1) + for counter_129933 in range(k2p2zq_73023): + x_93925 = (k2p2zq_73023 - i_93923) + i_93926 = (x_93925 - np.int64(1)) + x_93927 = sle64(np.int64(0), i_93926) + y_93928 = slt64(i_93926, k2p2zq_73023) + bounds_check_93929 = (x_93927 and y_93928) + j_m_i_93930 = (k2p2zq_73023 - x_93925) + empty_slice_93931 = (j_m_i_93930 == np.int64(0)) + m_93932 = (j_m_i_93930 - np.int64(1)) + i_p_m_t_s_93933 = (x_93925 + m_93932) + zzero_leq_i_p_m_t_s_93934 = sle64(np.int64(0), i_p_m_t_s_93933) + i_p_m_t_s_leq_w_93935 = slt64(i_p_m_t_s_93933, k2p2zq_73023) + zzero_lte_i_93936 = sle64(np.int64(0), x_93925) + i_lte_j_93937 = sle64(x_93925, k2p2zq_73023) + y_93938 = (i_p_m_t_s_leq_w_93935 and zzero_lte_i_93936) + y_93939 = (zzero_leq_i_p_m_t_s_93934 and y_93938) + y_93940 = (i_lte_j_93937 and y_93939) + forwards_ok_93941 = (zzero_lte_i_93936 and y_93940) + ok_or_empty_93942 = (empty_slice_93931 or forwards_ok_93941) + index_ok_93943 = (bounds_check_93929 and ok_or_empty_93942) + index_certs_93944 = True + assert index_ok_93943, ("Error: %s%d%s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:28:39-48\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:192:5-74\n #12 bfastfinal.fut:187:1-193:48\n" % ("Index [", + i_93926, + ", ", + x_93925, + ":", + k2p2zq_73023, + "] out of bounds for array of shape [", + k2p2zq_73023, + "][", + k2p2zq_73023, + "].")) + index_certs_93945 = True + assert ok_or_empty_93942, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:28:30-37\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:192:5-74\n #12 bfastfinal.fut:187:1-193:48\n" % ("Index [", + x_93925, + ":", + k2p2zq_73023, + "] out of bounds for array of shape [", + k2p2zq_73023, + "].")) + index_ok_93946 = (bounds_check_93929 and bounds_check_93929) + index_certs_93947 = True + assert index_ok_93946, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:29:38-43\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:192:5-74\n #12 bfastfinal.fut:187:1-193:48\n" % ("Index [", + i_93926, + ", ", + i_93926, + "] out of bounds for array of shape [", + k2p2zq_73023, + "][", + k2p2zq_73023, + "].")) + index_certs_93948 = True + assert bounds_check_93929, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:29:19-22\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:23:25-63\n #4 /prelude/soacs.fut:67:19-23\n #5 /prelude/soacs.fut:67:3-37\n #6 recresid.fut:22:5-25:22\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:192:5-74\n #12 bfastfinal.fut:187:1-193:48\n" % ("Index [", + i_93926, + "] out of bounds for array of shape [", + k2p2zq_73023, + "].")) + nest_sizze_94010 = (j_m_i_93930 * binop_x_120244) + max_num_groups_126898 = self.sizes["mainMagnitude.segred_num_groups_92537"] + num_groups_94012 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_94010, + segred_group_sizze_94011), + sext_i32_i64(max_num_groups_126898)))) + local_memory_capacity_127000 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127000)) and suff_outer_par_93954): + mem_121476 = opencl_alloc(self, bytes_120258, "mem_121476") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121476, + np.int64(0), + mem_param_121469, + np.int64(0), + np.int64(1), + (k2p2zq_73023 * k2p2zq_73023), + m_73008) + mem_121504 = opencl_alloc(self, bytes_120258, "mem_121504") + mem_121480 = opencl_alloc(self, total_sizze_125925, "mem_121480") + mem_121492 = opencl_alloc(self, total_sizze_125926, "mem_121492") + if ((1 * (np.int64(num_groups_93958) * np.int64(segmap_group_sizze_93957))) != 0): + self.mainMagnitudezisegmap_92419_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(x_93925), + np.int64(i_93926), + np.int64(j_m_i_93930), + np.int64(num_groups_93958), + np.int64(num_threads_125924), + mem_120252, + mem_121351, + mem_121458, + mem_121476, + mem_121480, + mem_121492, + mem_121504) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_92419_var, + ((np.int64(num_groups_93958) * np.int64(segmap_group_sizze_93957)),), + (np.int64(segmap_group_sizze_93957),)) + if synchronous: + sync(self) + mem_121476 = None + mem_121480 = None + mem_121492 = None + mem_121576 = opencl_alloc(self, bytes_121997, "mem_121576") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121576, + np.int64(0), + mem_121504, + np.int64(0), + np.int64(1), m_73008, + (k2p2zq_73023 * k2p2zq_73023)) + mem_121504 = None + defunc_3_map_res_mem_121583 = mem_121576 + else: + local_memory_capacity_126999 = self.max_local_memory + if (sle64((((bytes_121515 + srem64((np.int64(8) - srem64(bytes_121515, + np.int64(8))), + np.int64(8))) + (bytes_121515 + srem64((np.int64(8) - srem64(bytes_121515, + np.int64(8))), + np.int64(8)))) + (bytes_121544 + srem64((np.int64(8) - srem64(bytes_121544, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_126999)) and suff_outer_par_93988): + mem_121508 = opencl_alloc(self, bytes_121997, "mem_121508") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121508, + np.int64(0), + mem_param_121469, + np.int64(0), + np.int64(1), + k2p2zq_73023, + (m_73008 * k2p2zq_73023)) + mem_121512 = opencl_alloc(self, bytes_121997, "mem_121512") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121512, + np.int64(0), + mem_param_121469, + np.int64(0), + np.int64(1), + k2p2zq_73023, + (m_73008 * k2p2zq_73023)) + num_whole_tiles_116039 = squot64(j_m_i_93930, tile_sizze_116018) + residual_input_116172 = srem64(j_m_i_93930, tile_sizze_116018) + cond_116173 = (residual_input_116172 == np.int64(0)) + mem_121551 = opencl_alloc(self, bytes_121997, "mem_121551") + mem_125219 = opencl_alloc(self, total_sizze_125928, + "mem_125219") + if ((1 * (np.int64(num_groups_top_116022) * np.int64(group_sizze_116019))) != 0): + self.mainMagnitudezisegmap_intragroup_116023_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_121544)), + cl.LocalMemory(np.int64(bytes_121515)), + cl.LocalMemory(np.int64(bytes_121515)), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(x_93925), + np.int64(i_93926), + np.int64(j_m_i_93930), + np.int64(num_groups_y_116021), + np.int64(num_whole_tiles_116039), + np.int64(residual_input_116172), + np.byte(cond_116173), + np.int64(num_threads_125927), + mem_120252, + mem_121351, + mem_121458, + mem_121508, + mem_121512, + mem_121551, + mem_125219) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_intragroup_116023_var, + ((np.int64(num_groups_top_116022) * np.int64(group_sizze_116019)),), + (np.int64(group_sizze_116019),)) + if synchronous: + sync(self) + mem_121508 = None + mem_121512 = None + mem_125219 = None + defunc_3_map_res_mem_121572 = mem_121551 + else: + mem_121555 = opencl_alloc(self, bytes_121990, "mem_121555") + if slt64((j_m_i_93930 * np.int64(2)), segred_group_sizze_94011): + segment_sizze_nonzzero_126929 = smax64(np.int64(1), + j_m_i_93930) + num_threads_126930 = (num_groups_94012 * segred_group_sizze_94011) + if ((1 * (np.int64(num_groups_94012) * np.int64(segred_group_sizze_94011))) != 0): + self.mainMagnitudezisegred_small_92541_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_94011))), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(x_93925), + np.int64(i_93926), + np.int64(j_m_i_93930), + np.int64(num_groups_94012), + np.int64(binop_x_120251), + np.int64(segment_sizze_nonzzero_126929), + mem_121351, + mem_param_121469, + mem_121555) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_small_92541_var, + ((np.int64(num_groups_94012) * np.int64(segred_group_sizze_94011)),), + (np.int64(segred_group_sizze_94011),)) + if synchronous: + sync(self) + else: + groups_per_segment_126950 = sdiv_up64(num_groups_94012, + smax64(np.int64(1), + (m_73008 * k2p2zq_73023))) + elements_per_thread_126951 = sdiv_up64(j_m_i_93930, + (segred_group_sizze_94011 * groups_per_segment_126950)) + virt_num_groups_126952 = (groups_per_segment_126950 * (m_73008 * k2p2zq_73023)) + num_threads_126953 = (num_groups_94012 * segred_group_sizze_94011) + threads_per_segment_126954 = (groups_per_segment_126950 * segred_group_sizze_94011) + group_res_arr_mem_126955 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_94011 * virt_num_groups_126952)), + "group_res_arr_mem_126955") + mainMagnitudezicounter_mem_126957 = self.mainMagnitudezicounter_mem_126957 + if ((1 * (np.int64(num_groups_94012) * np.int64(segred_group_sizze_94011))) != 0): + self.mainMagnitudezisegred_large_92541_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_94011))), + np.int64(k2p2zq_73023), + np.int64(x_93925), + np.int64(i_93926), + np.int64(j_m_i_93930), + np.int64(num_groups_94012), + np.int64(binop_x_120251), + np.int64(groups_per_segment_126950), + np.int64(elements_per_thread_126951), + np.int64(virt_num_groups_126952), + np.int64(threads_per_segment_126954), + mem_121351, + mem_param_121469, + mem_121555, + group_res_arr_mem_126955, + mainMagnitudezicounter_mem_126957) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_large_92541_var, + ((np.int64(num_groups_94012) * np.int64(segred_group_sizze_94011)),), + (np.int64(segred_group_sizze_94011),)) + if synchronous: + sync(self) + mem_121559 = opencl_alloc(self, bytes_121990, "mem_121559") + if ((1 * (np.int64(segmap_usable_groups_94029) * np.int64(segmap_group_sizze_94028))) != 0): + self.mainMagnitudezisegmap_92519_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(i_93926), + mem_120252, + mem_121351, + mem_121555, + mem_121559) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_92519_var, + ((np.int64(segmap_usable_groups_94029) * np.int64(segmap_group_sizze_94028)),), + (np.int64(segmap_group_sizze_94028),)) + if synchronous: + sync(self) + mem_121555 = None + if ((1 * (np.int64(segmap_usable_groups_94040) * np.int64(segmap_group_sizze_94039))) != 0): + self.mainMagnitudezisegmap_92507_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(i_93926), + np.int64(binop_x_120251), + mem_param_121469, + mem_121559) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_92507_var, + ((np.int64(segmap_usable_groups_94040) * np.int64(segmap_group_sizze_94039)),), + (np.int64(segmap_group_sizze_94039),)) + if synchronous: + sync(self) + mem_121559 = None + defunc_3_map_res_mem_121572 = mem_param_121469 + mem_121581 = opencl_alloc(self, bytes_121997, "mem_121581") + if ((((m_73008 * k2p2zq_73023) * k2p2zq_73023) * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_121581, + defunc_3_map_res_mem_121572, + dest_offset=np.int64(np.int64(0)), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((((m_73008 * k2p2zq_73023) * k2p2zq_73023) * np.int32(8)))) + if synchronous: + sync(self) + defunc_3_map_res_mem_121572 = None + defunc_3_map_res_mem_121583 = mem_121581 + mem_param_tmp_126896 = defunc_3_map_res_mem_121583 + mem_param_121469 = mem_param_tmp_126896 + i_93923 += one_129934 + defunc_3_map_res_r_mem_121597 = mem_param_121469 + mem_121450 = None + mem_121458 = None + defunc_3_map_res_r_mem_121609 = defunc_3_map_res_r_mem_121597 + mem_121351 = None + local_memory_capacity_127134 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127134)) and suff_outer_par_94055): + mem_121613 = opencl_alloc(self, bytes_121997, "mem_121613") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121613, np.int64(0), + defunc_3_map_res_r_mem_121609, + np.int64(0), m_73008, + k2p2zq_73023, + k2p2zq_73023) + mem_121632 = opencl_alloc(self, bytes_121997, "mem_121632") + mem_121616 = opencl_alloc(self, total_sizze_125933, "mem_121616") + if ((1 * (np.int64(num_groups_94051) * np.int64(segmap_group_sizze_94050))) != 0): + self.mainMagnitudezisegmap_92203_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(num_groups_94051), + np.int64(binop_x_120251), + np.int64(num_threads_125932), + defunc_3_map_res_r_mem_121609, + mem_121613, + mem_121616, + mem_121632) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_92203_var, + ((np.int64(num_groups_94051) * np.int64(segmap_group_sizze_94050)),), + (np.int64(segmap_group_sizze_94050),)) + if synchronous: + sync(self) + mem_121613 = None + mem_121616 = None + mem_121845 = opencl_alloc(self, bytes_121997, "mem_121845") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121845, np.int64(0), + mem_121632, np.int64(0), + np.int64(1), + (m_73008 * k2p2zq_73023), + k2p2zq_73023) + mem_121632 = None + defunc_3_map_res_r_mem_121847 = mem_121845 + else: + local_memory_capacity_127133 = self.max_local_memory + if (sle64(((bytes_121651 + srem64((np.int64(8) - srem64(bytes_121651, + np.int64(8))), + np.int64(8))) + (bytes_121653 + srem64((np.int64(8) - srem64(bytes_121653, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_127133)) and suff_outer_par_94077): + mem_121636 = opencl_alloc(self, bytes_121997, "mem_121636") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121636, np.int64(0), + defunc_3_map_res_r_mem_121609, + np.int64(0), m_73008, + k2p2zq_73023, + k2p2zq_73023) + tk_div_tx_116327 = sdiv_up64(Tk_116326, Tx_116324) + tk_div_ty_116328 = sdiv_up64(Tk_116326, Ty_116322) + gridDim_x_116335 = sdiv_up64(k2p2zq_73023, TxRx_116329) + gridDim_y_116336 = sdiv_up64(k2p2zq_73023, TyRy_116330) + binop_y_116337 = (gridDim_x_116335 * gridDim_y_116336) + grid_sizze_116338 = (m_73008 * binop_y_116337) + full_tiles_116367 = squot64(k2p2zq_73023, Tk_116326) + kk_116570 = (Tk_116326 * full_tiles_116367) + mem_121827 = opencl_alloc(self, bytes_121997, "mem_121827") + if ((1 * (np.int64(grid_sizze_116338) * np.int64(group_sizze_116339))) != 0): + self.mainMagnitudezisegmap_intragroup_116342_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_121653)), + cl.LocalMemory(np.int64(bytes_121651)), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(gridDim_x_116335), + np.int64(gridDim_y_116336), + np.int64(full_tiles_116367), + np.int64(kk_116570), + np.int64(binop_x_120251), + defunc_3_map_res_r_mem_121609, + mem_121636, + mem_121827) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_intragroup_116342_var, + ((np.int64(grid_sizze_116338) * np.int64(group_sizze_116339)),), + (np.int64(group_sizze_116339),)) + if synchronous: + sync(self) + mem_121636 = None + defunc_3_map_res_r_mem_121841 = mem_121827 + else: + mem_121831 = opencl_alloc(self, bytes_121997, "mem_121831") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121831, np.int64(0), + defunc_3_map_res_r_mem_121609, + np.int64(0), + np.int64(1), + k2p2zq_73023, + (m_73008 * k2p2zq_73023)) + mem_121835 = opencl_alloc(self, bytes_121997, "mem_121835") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121835, np.int64(0), + defunc_3_map_res_r_mem_121609, + np.int64(0), m_73008, + k2p2zq_73023, + k2p2zq_73023) + mem_121840 = opencl_alloc(self, bytes_121997, "mem_121840") + if slt64((k2p2zq_73023 * np.int64(2)), segred_group_sizze_94093): + segment_sizze_nonzzero_127073 = smax64(np.int64(1), k2p2zq_73023) + num_threads_127074 = (num_groups_94094 * segred_group_sizze_94093) + if ((1 * (np.int64(num_groups_94094) * np.int64(segred_group_sizze_94093))) != 0): + self.mainMagnitudezisegred_small_92263_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_94093))), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(num_groups_94094), + np.int64(segment_sizze_nonzzero_127073), + mem_121831, + mem_121835, + mem_121840) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_small_92263_var, + ((np.int64(num_groups_94094) * np.int64(segred_group_sizze_94093)),), + (np.int64(segred_group_sizze_94093),)) + if synchronous: + sync(self) + else: + groups_per_segment_127094 = sdiv_up64(num_groups_94094, + smax64(np.int64(1), + ((m_73008 * k2p2zq_73023) * k2p2zq_73023))) + elements_per_thread_127095 = sdiv_up64(k2p2zq_73023, + (segred_group_sizze_94093 * groups_per_segment_127094)) + virt_num_groups_127096 = (groups_per_segment_127094 * ((m_73008 * k2p2zq_73023) * k2p2zq_73023)) + num_threads_127097 = (num_groups_94094 * segred_group_sizze_94093) + threads_per_segment_127098 = (groups_per_segment_127094 * segred_group_sizze_94093) + group_res_arr_mem_127099 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_94093 * virt_num_groups_127096)), + "group_res_arr_mem_127099") + mainMagnitudezicounter_mem_127101 = self.mainMagnitudezicounter_mem_127101 + if ((1 * (np.int64(num_groups_94094) * np.int64(segred_group_sizze_94093))) != 0): + self.mainMagnitudezisegred_large_92263_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_94093))), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(num_groups_94094), + np.int64(groups_per_segment_127094), + np.int64(elements_per_thread_127095), + np.int64(virt_num_groups_127096), + np.int64(threads_per_segment_127098), + mem_121831, + mem_121835, + mem_121840, + group_res_arr_mem_127099, + mainMagnitudezicounter_mem_127101) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_large_92263_var, + ((np.int64(num_groups_94094) * np.int64(segred_group_sizze_94093)),), + (np.int64(segred_group_sizze_94093),)) + if synchronous: + sync(self) + mem_121831 = None + mem_121835 = None + defunc_3_map_res_r_mem_121841 = mem_121840 + defunc_3_map_res_r_mem_121847 = defunc_3_map_res_r_mem_121841 + mem_121850 = opencl_alloc(self, bytes_121990, "mem_121850") + group_sizze_127138 = self.sizes["mainMagnitude.group_size_127138"] + num_groups_127139 = sdiv_up64((m_73008 * k2p2zq_73023), + group_sizze_127138) + if ((1 * (np.int64(num_groups_127139) * np.int64(group_sizze_127138))) != 0): + self.mainMagnitudezicopy_127135_var.set_args(np.int64(m_73008), + np.int64(n_73011), + np.int64(k2p2zq_73023), + defunc_3_map_res_mem_120231, + mem_121850) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezicopy_127135_var, + ((np.int64(num_groups_127139) * np.int64(group_sizze_127138)),), + (np.int64(group_sizze_127138),)) + if synchronous: + sync(self) + mem_121854 = opencl_alloc(self, bytes_121997, "mem_121854") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121854, np.int64(0), + mem_121335, np.int64(0), + np.int64(1), m_73008, + (k2p2zq_73023 * k2p2zq_73023)) + mem_121335 = None + mem_121858 = opencl_alloc(self, bytes_120258, "mem_121858") + group_sizze_127143 = self.sizes["mainMagnitude.group_size_127143"] + num_groups_127144 = sdiv_up64(((m_73008 * k2p2zq_73023) * k2p2zq_73023), + group_sizze_127143) + if ((1 * (np.int64(num_groups_127144) * np.int64(group_sizze_127143))) != 0): + self.mainMagnitudezicopy_127140_var.set_args(np.int64(m_73008), + np.int64(k2p2zq_73023), + mem_121854, mem_121858) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezicopy_127140_var, + ((np.int64(num_groups_127144) * np.int64(group_sizze_127143)),), + (np.int64(group_sizze_127143),)) + if synchronous: + sync(self) + mem_121854 = None + mem_121895 = opencl_alloc(self, bytes_121990, "mem_121895") + mem_125243 = opencl_alloc(self, total_sizze_125937, "mem_125243") + double_buffer_mem_125565 = opencl_alloc(self, total_sizze_125938, + "double_buffer_mem_125565") + if ((1 * (np.int64(num_groups_94110) * np.int64(segmap_group_sizze_94109))) != 0): + self.mainMagnitudezisegmap_92141_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(m_73095), + np.int64(num_groups_94110), + np.int64(num_threads_125936), + mem_121338, mem_121343, + mem_121850, mem_121858, + mem_121895, mem_125243, + double_buffer_mem_125565) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_92141_var, + ((np.int64(num_groups_94110) * np.int64(segmap_group_sizze_94109)),), + (np.int64(segmap_group_sizze_94109),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + mem_121338 = None + mem_121850 = None + mem_121858 = None + mem_125243 = None + double_buffer_mem_125565 = None + mem_121898 = opencl_alloc(self, bytes_121990, "mem_121898") + self.futhark_builtinzhreplicate_f64(mem_121898, + (m_73008 * k2p2zq_73023), + np.float64(0.0)) + segmap_usable_groups_94180 = sdiv_up64(binop_x_120244, + segmap_group_sizze_94179) + mem_121901 = opencl_alloc(self, bytes_121990, "mem_121901") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121901, np.int64(0), + mem_121895, np.int64(0), + np.int64(1), m_73008, + k2p2zq_73023) + mem_121895 = None + if ((1 * (np.int64(segmap_usable_groups_94180) * np.int64(segmap_group_sizze_94179))) != 0): + self.mainMagnitudezisegmap_92098_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(binop_x_120251), + mem_121341, + defunc_3_map_res_r_mem_121609, + mem_121898, mem_121901) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_30360_var, - ((np.long(num_groups_30516) * np.long(segmap_group_sizze_30515)),), - (np.long(segmap_group_sizze_30515),)) + self.mainMagnitudezisegmap_92098_var, + ((np.int64(segmap_usable_groups_94180) * np.int64(segmap_group_sizze_94179)),), + (np.int64(segmap_group_sizze_94179),)) if synchronous: sync(self) - mem_44449 = None - mem_44541 = opencl_alloc(self, bytes_44544, "mem_44541") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44541, np.int64(0), - mem_44465, np.int64(0), - np.int64(1), - (m_27772 * i32_res_27787), - i32_res_27787) - mem_44465 = None - defunc_3_map_res_mem_44543 = mem_44541 - else: - local_memory_capacity_45828 = self.max_local_memory - if (sle64(np.int64(120), - sext_i32_i64(local_memory_capacity_45828)) and suff_outer_par_30546): - mem_44468 = opencl_alloc(self, bytes_44398, "mem_44468") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44468, np.int64(0), - images_mem_44381, + defunc_3_map_res_r_mem_121609 = None + mem_121901 = None + mem_121906 = opencl_alloc(self, bytes_121997, "mem_121906") + self.futhark_builtinzhreplicate_f64(mem_121906, + ((m_73008 * k2p2zq_73023) * k2p2zq_73023), + np.float64(0.0)) + segmap_usable_groups_94251 = sdiv_up64(nest_sizze_93834, + segmap_group_sizze_94250) + mem_121909 = opencl_alloc(self, bytes_121990, "mem_121909") + self.futhark_builtinzhgpu_map_transpose_i64(mem_121909, np.int64(0), + mem_121341, np.int64(0), + np.int64(1), m_73008, + k2p2zq_73023) + mem_121341 = None + if ((1 * (np.int64(segmap_usable_groups_94251) * np.int64(segmap_group_sizze_94250))) != 0): + self.mainMagnitudezisegmap_91941_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(binop_x_120251), + defunc_3_map_res_r_mem_121847, + mem_121906, mem_121909) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_91941_var, + ((np.int64(segmap_usable_groups_94251) * np.int64(segmap_group_sizze_94250)),), + (np.int64(segmap_group_sizze_94250),)) + if synchronous: + sync(self) + defunc_3_map_res_r_mem_121847 = None + mem_121909 = None + segmap_usable_groups_94270 = sdiv_up64(nest_sizze_93834, + segmap_group_sizze_94269) + mem_121915 = opencl_alloc(self, bytes_121997, "mem_121915") + if ((1 * (np.int64(segmap_usable_groups_94270) * np.int64(segmap_group_sizze_94269))) != 0): + self.mainMagnitudezisegmap_91893_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + mem_121906, mem_121915) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_91893_var, + ((np.int64(segmap_usable_groups_94270) * np.int64(segmap_group_sizze_94269)),), + (np.int64(segmap_group_sizze_94269),)) + if synchronous: + sync(self) + mem_121906 = None + segmap_usable_groups_94279 = sdiv_up64(binop_x_120244, + segmap_group_sizze_94278) + mem_121919 = opencl_alloc(self, bytes_121990, "mem_121919") + if ((1 * (np.int64(segmap_usable_groups_94279) * np.int64(segmap_group_sizze_94278))) != 0): + self.mainMagnitudezisegmap_91871_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + mem_121898, mem_121919) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_91871_var, + ((np.int64(segmap_usable_groups_94279) * np.int64(segmap_group_sizze_94278)),), + (np.int64(segmap_group_sizze_94278),)) + if synchronous: + sync(self) + mem_121898 = None + defunc_5_map_res_mem_121929 = mem_121915 + defunc_5_map_res_mem_121930 = mem_121919 + defunc_5_map_res_mem_121931 = mem_121343 + mem_120252 = None + mem_120254 = None + num_recresids_padded_73681 = (defunc_2_reduce_res_73132 - k2p2zq_73023) + replicate_arg_73682 = (m_73008 * num_recresids_padded_73681) + bounds_invalid_upwards_73683 = slt64(replicate_arg_73682, np.int64(0)) + valid_73684 = not(bounds_invalid_upwards_73683) + range_valid_c_73685 = True + assert valid_73684, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 /prelude/array.fut:108:18-23\n #2 recresid.fut:28:14-49\n #3 recresid.fut:100:7-30\n #4 mroc.fut:27:25-38\n #5 mroc.fut:77:27-61\n #6 bfastfinal.fut:45:24-53\n #7 bfastfinal.fut:192:5-74\n #8 bfastfinal.fut:187:1-193:48\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + replicate_arg_73682, + " is invalid.")) + bytes_121932 = (np.int64(8) * replicate_arg_73682) + mem_121934 = opencl_alloc(self, bytes_121932, "mem_121934") + self.futhark_builtinzhreplicate_f64(mem_121934, + (num_recresids_padded_73681 * m_73008), + np.float64(0.0)) + loop_cond_t_res_73687 = slt64(k2p2zq_73023, m_73162) + loop_not_taken_73688 = not(loop_cond_t_res_73687) + protect_assert_disj_73689 = (valid_73211 or loop_not_taken_73688) + range_valid_c_73690 = True + assert protect_assert_disj_73689, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 /prelude/array.fut:108:18-23\n #2 lib/github.com/nhey/lm/linpack.fut:39:16-40\n #3 lib/github.com/nhey/lm/lm.fut:74:36-64\n #4 recresid.fut:62:33-75\n #5 /prelude/soacs.fut:91:28-38\n #6 /prelude/soacs.fut:91:3-61\n #7 recresid.fut:51:11-73:44\n #8 recresid.fut:100:7-30\n #9 mroc.fut:27:25-38\n #10 mroc.fut:77:27-61\n #11 bfastfinal.fut:45:24-53\n #12 bfastfinal.fut:192:5-74\n #13 bfastfinal.fut:187:1-193:48\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + replicate_arg_73209, + " is invalid.")) + segmap_group_sizze_94324 = self.sizes["mainMagnitude.segmap_group_size_94302"] + segmap_usable_groups_94325 = sdiv_up_safe64(binop_x_120251, + segmap_group_sizze_94324) + mem_121938 = opencl_alloc(self, bytes_120250, "mem_121938") + if ((1 * (np.int64(segmap_usable_groups_94325) * np.int64(segmap_group_sizze_94324))) != 0): + self.mainMagnitudezisegmap_94299_var.set_args(self.global_failure, + np.int64(k2p2zq_73023), + mem_121938) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_94299_var, + ((np.int64(segmap_usable_groups_94325) * np.int64(segmap_group_sizze_94324)),), + (np.int64(segmap_group_sizze_94324),)) + if synchronous: + sync(self) + suff_outer_par_94436 = (self.sizes["mainMagnitude.suff_outer_par_9"] <= m_73008) + segmap_group_sizze_94973 = self.sizes["mainMagnitude.segmap_group_size_94440"] + max_num_groups_127188 = self.sizes["mainMagnitude.segmap_num_groups_94442"] + num_groups_94974 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_73008, + segmap_group_sizze_94973), + sext_i32_i64(max_num_groups_127188)))) + segred_group_sizze_97264 = self.sizes["mainMagnitude.segred_group_size_97240"] + max_num_groups_127189 = self.sizes["mainMagnitude.segred_num_groups_97242"] + num_groups_97265 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120244, + segred_group_sizze_97264), + sext_i32_i64(max_num_groups_127189)))) + segmap_group_sizze_97285 = self.sizes["mainMagnitude.segmap_group_size_97230"] + segred_group_sizze_97295 = self.sizes["mainMagnitude.segred_group_size_97211"] + max_num_groups_127190 = self.sizes["mainMagnitude.segred_num_groups_97213"] + num_groups_97296 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120244, + segred_group_sizze_97295), + sext_i32_i64(max_num_groups_127190)))) + segmap_group_sizze_97308 = self.sizes["mainMagnitude.segmap_group_size_97199"] + suff_outer_par_97362 = (self.sizes["mainMagnitude.suff_outer_par_15"] <= m_73008) + segmap_group_sizze_97366 = self.sizes["mainMagnitude.segmap_group_size_96981"] + max_num_groups_127191 = self.sizes["mainMagnitude.segmap_num_groups_96983"] + num_groups_97367 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_73008, + segmap_group_sizze_97366), + sext_i32_i64(max_num_groups_127191)))) + segred_group_sizze_97388 = self.sizes["mainMagnitude.segred_group_size_97058"] + segmap_group_sizze_97399 = self.sizes["mainMagnitude.segmap_group_size_97050"] + segmap_group_sizze_97406 = self.sizes["mainMagnitude.segmap_group_size_97042"] + segmap_group_sizze_97424 = self.sizes["mainMagnitude.segmap_group_size_96691"] + max_num_groups_127192 = self.sizes["mainMagnitude.segmap_num_groups_96693"] + num_groups_97425 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_73008, + segmap_group_sizze_97424), + sext_i32_i64(max_num_groups_127192)))) + segmap_group_sizze_97720 = self.sizes["mainMagnitude.segmap_group_size_96634"] + segmap_group_sizze_97735 = self.sizes["mainMagnitude.segmap_group_size_96600"] + suff_outer_par_97759 = (self.sizes["mainMagnitude.suff_outer_par_12"] <= binop_x_120244) + suff_outer_par_97854 = (self.sizes["mainMagnitude.suff_outer_par_13"] <= m_73008) + segmap_group_sizze_97857 = self.sizes["mainMagnitude.segmap_group_size_96171"] + max_num_groups_127193 = self.sizes["mainMagnitude.segmap_num_groups_96173"] + num_groups_97858 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_73008, + segmap_group_sizze_97857), + sext_i32_i64(max_num_groups_127193)))) + suff_outer_par_97888 = (self.sizes["mainMagnitude.suff_outer_par_14"] <= binop_x_120244) + segred_group_sizze_97911 = self.sizes["mainMagnitude.segred_group_size_96285"] + segmap_group_sizze_97928 = self.sizes["mainMagnitude.segmap_group_size_96272"] + segmap_group_sizze_97939 = self.sizes["mainMagnitude.segmap_group_size_96261"] + segmap_group_sizze_97950 = self.sizes["mainMagnitude.segmap_group_size_95956"] + max_num_groups_127194 = self.sizes["mainMagnitude.segmap_num_groups_95958"] + num_groups_97951 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120244, + segmap_group_sizze_97950), + sext_i32_i64(max_num_groups_127194)))) + suff_outer_par_97955 = (self.sizes["mainMagnitude.suff_outer_par_10"] <= binop_x_120244) + suff_outer_par_97977 = (self.sizes["mainMagnitude.suff_outer_par_11"] <= nest_sizze_93834) + segred_group_sizze_97993 = self.sizes["mainMagnitude.segred_group_size_96007"] + max_num_groups_127195 = self.sizes["mainMagnitude.segred_num_groups_96009"] + num_groups_97994 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_94092, + segred_group_sizze_97993), + sext_i32_i64(max_num_groups_127195)))) + segmap_group_sizze_98009 = self.sizes["mainMagnitude.segmap_group_size_95891"] + max_num_groups_127196 = self.sizes["mainMagnitude.segmap_num_groups_95893"] + num_groups_98010 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_73008, + segmap_group_sizze_98009), + sext_i32_i64(max_num_groups_127196)))) + segmap_group_sizze_98088 = self.sizes["mainMagnitude.segmap_group_size_95837"] + segmap_group_sizze_98159 = self.sizes["mainMagnitude.segmap_group_size_95680"] + segmap_group_sizze_98178 = self.sizes["mainMagnitude.segmap_group_size_95633"] + segmap_group_sizze_98187 = self.sizes["mainMagnitude.segmap_group_size_95610"] + segmap_group_sizze_98196 = self.sizes["mainMagnitude.segmap_group_size_95537"] + segred_group_sizze_98259 = self.sizes["mainMagnitude.segred_group_size_98258"] + max_num_groups_127197 = self.sizes["mainMagnitude.segred_num_groups_98260"] + num_groups_98261 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_73008, + segred_group_sizze_98259), + sext_i32_i64(max_num_groups_127197)))) + segmap_usable_groups_97286 = sdiv_up_safe64(m_73008, + segmap_group_sizze_97285) + segmap_usable_groups_97309 = sdiv_up_safe64(m_73008, + segmap_group_sizze_97308) + segmap_usable_groups_97400 = sdiv_up_safe64(m_73008, + segmap_group_sizze_97399) + segmap_usable_groups_97407 = sdiv_up_safe64(m_73008, + segmap_group_sizze_97406) + segmap_usable_groups_97721 = sdiv_up_safe64(binop_x_120244, + segmap_group_sizze_97720) + segmap_usable_groups_97736 = sdiv_up_safe64(nest_sizze_93834, + segmap_group_sizze_97735) + segmap_usable_groups_97929 = sdiv_up_safe64(binop_x_120244, + segmap_group_sizze_97928) + segmap_usable_groups_97940 = sdiv_up_safe64(binop_x_120244, + segmap_group_sizze_97939) + segmap_usable_groups_98089 = sdiv_up_safe64(binop_x_120244, + segmap_group_sizze_98088) + segmap_usable_groups_98160 = sdiv_up_safe64(nest_sizze_93834, + segmap_group_sizze_98159) + segmap_usable_groups_98179 = sdiv_up_safe64(nest_sizze_93834, + segmap_group_sizze_98178) + segmap_usable_groups_98188 = sdiv_up_safe64(binop_x_120244, + segmap_group_sizze_98187) + segmap_usable_groups_98197 = sdiv_up_safe64(m_73008, + segmap_group_sizze_98196) + mem_121941 = opencl_alloc(self, bytes_120175, "mem_121941") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121941, np.int64(0), + defunc_3_map_res_mem_120231, + np.int64(0), np.int64(1), + n_73011, m_73008) + mem_121944 = opencl_alloc(self, bytes_120250, "mem_121944") + self.futhark_builtinzhgpu_map_transpose_f64(mem_121944, np.int64(0), + mem_121938, np.int64(0), + np.int64(1), k2p2zq_73023, + k2p2zq_73023) + tile_sizze_116779 = self.sizes["mainMagnitude.tile_size_116778"] + group_sizze_116780 = (tile_sizze_116779 * tile_sizze_116779) + mem_121946 = opencl_alloc(self, bytes_120247, "mem_121946") + self.futhark_builtinzhreplicate_f64(mem_121946, k2p2zq_73023, + np.float64(0.0)) + tile_sizze_117141 = self.sizes["mainMagnitude.tile_size_117140"] + group_sizze_117142 = (tile_sizze_117141 * tile_sizze_117141) + Ty_117445 = self.sizes["mainMagnitude.Ty_117442"] + Ry_117446 = self.sizes["mainMagnitude.Ry_117444"] + Tx_117447 = self.sizes["mainMagnitude.Tx_117441"] + Rx_117448 = self.sizes["mainMagnitude.Rx_117443"] + Tk_117449 = self.sizes["mainMagnitude.Tk_117440"] + TxRx_117452 = (Tx_117447 * Rx_117448) + TyRy_117453 = (Ty_117445 * Ry_117446) + a_loc_szz_117455 = (Tk_117449 * TyRy_117453) + binop_x_117456 = (Tx_117447 * Tk_117449) + b_loc_szz_117457 = (Rx_117448 * binop_x_117456) + group_sizze_117462 = (Ty_117445 * Tx_117447) + num_groups_x_116781 = sdiv_up_safe64(m_73008, tile_sizze_116779) + num_groups_y_116782 = sdiv_up_safe64(k2p2zq_73023, tile_sizze_116779) + num_groups_top_116783 = (num_groups_x_116781 * num_groups_y_116782) + num_groups_x_117143 = sdiv_up_safe64(m_73008, tile_sizze_117141) + num_groups_y_117144 = sdiv_up_safe64(k2p2zq_73023, tile_sizze_117141) + num_groups_top_117145 = (num_groups_x_117143 * num_groups_y_117144) + tk_div_tx_117450 = sdiv_up_safe64(Tk_117449, Tx_117447) + tk_div_ty_117451 = sdiv_up_safe64(Tk_117449, Ty_117445) + gridDim_x_117458 = sdiv_up_safe64(k2p2zq_73023, TxRx_117452) + gridDim_y_117459 = sdiv_up_safe64(k2p2zq_73023, TyRy_117453) + binop_y_117460 = (gridDim_x_117458 * gridDim_y_117459) + grid_sizze_117461 = (m_73008 * binop_y_117460) + full_tiles_117490 = squot_safe64(k2p2zq_73023, Tk_117449) + kk_117693 = (Tk_117449 * full_tiles_117490) + padded_sizze_115508 = (m_73008 + y_115507) + mem_121948 = opencl_alloc(self, bytes_121947, "mem_121948") + per_chunk_115510 = squot_safe64(padded_sizze_115508, num_threads_115503) + mem_121992 = opencl_alloc(self, bytes_121990, "mem_121992") + mem_121996 = opencl_alloc(self, bytes_121993, "mem_121996") + mem_122000 = opencl_alloc(self, bytes_121997, "mem_122000") + mem_122003 = opencl_alloc(self, bytes_121990, "mem_122003") + mem_122007 = opencl_alloc(self, bytes_121997, "mem_122007") + bytes_122739 = (np.int64(8) * padded_sizze_115508) + binop_x_122742 = (num_threads_115503 * per_chunk_115510) + bytes_122741 = (np.int64(8) * binop_x_122742) + binop_x_123163 = (k2p2zq_73023 * group_sizze_116780) + bytes_123161 = (np.int64(8) * binop_x_123163) + ctx_val_123177 = (k2p2zq_73023 * tile_sizze_116779) + bytes_123180 = (np.int64(8) * group_sizze_116780) + binop_x_125283 = (np.int64(8) * tile_sizze_116779) + sizze_125284 = (tile_sizze_116779 * binop_x_125283) + bytes_123298 = (np.int64(8) * group_sizze_117142) + binop_x_125308 = (np.int64(8) * tile_sizze_117141) + sizze_125309 = (tile_sizze_117141 * binop_x_125308) + binop_x_123329 = (k2p2zq_73023 * group_sizze_117142) + bytes_123327 = (np.int64(8) * binop_x_123329) + binop_x_123431 = (Ry_117446 * group_sizze_117462) + binop_x_123432 = (Rx_117448 * binop_x_123431) + bytes_123429 = (np.int64(8) * binop_x_123432) + binop_x_123423 = (Ry_117446 * Rx_117448) + bytes_123422 = (np.int64(8) * binop_x_123423) + bytes_123434 = (np.int64(8) * a_loc_szz_117455) + bytes_123436 = (np.int64(8) * b_loc_szz_117457) + bytes_123505 = (np.int64(8) * binop_x_123431) + binop_x_123511 = (Rx_117448 * group_sizze_117462) + bytes_123509 = (np.int64(8) * binop_x_123511) + bytes_123497 = (np.int64(8) * Ry_117446) + bytes_123499 = (np.int64(8) * Rx_117448) + binop_x_125329 = (np.int64(8) * Ty_117445) + binop_x_125330 = (Tx_117447 * binop_x_125329) + binop_x_125331 = (Ry_117446 * binop_x_125330) + sizze_125332 = (Rx_117448 * binop_x_125331) + mem_123728 = opencl_alloc(self, np.int64(1), "mem_123728") + binop_x_125575 = (np.int64(8) * k2p2zq_73023) + double_buffer_sizze_125577 = (np.int64(8) * k2p2zq_73023) + double_buffer_sizze_125578 = (np.int64(16) * k2p2zq_73023) + binop_x_125592 = (np.int64(8) * k2p2zq_73023) + double_buffer_sizze_125594 = (np.int64(8) * k2p2zq_73023) + double_buffer_sizze_125595 = (np.int64(16) * k2p2zq_73023) + num_threads_125944 = (segmap_group_sizze_94973 * num_groups_94974) + total_sizze_125945 = (bytes_120247 * num_threads_125944) + total_sizze_125946 = (bytes_120247 * num_threads_125944) + total_sizze_125947 = (bytes_120269 * num_threads_125944) + total_sizze_125948 = (bytes_120247 * num_threads_125944) + total_sizze_125949 = (bytes_120250 * num_threads_125944) + total_sizze_125950 = (bytes_120250 * num_threads_125944) + total_sizze_125951 = (bytes_120247 * num_threads_125944) + total_sizze_125952 = (bytes_120250 * num_threads_125944) + total_sizze_125953 = (bytes_120247 * num_threads_125944) + total_sizze_125954 = (bytes_120250 * num_threads_125944) + total_sizze_125955 = (bytes_120247 * num_threads_125944) + total_sizze_125956 = (bytes_120250 * num_threads_125944) + total_sizze_125957 = (bytes_120247 * num_threads_125944) + total_sizze_125958 = (bytes_120250 * num_threads_125944) + total_sizze_125959 = (sizze_125149 * num_threads_125944) + total_sizze_125960 = (bytes_120247 * num_threads_125944) + total_sizze_125963 = (sizze_125149 * num_threads_125944) + total_sizze_125964 = (bytes_120247 * num_threads_125944) + total_sizze_125967 = (double_buffer_sizze_125577 * num_threads_125944) + total_sizze_125968 = (double_buffer_sizze_125578 * num_threads_125944) + num_threads_125979 = (segmap_group_sizze_97424 * num_groups_97425) + total_sizze_125980 = (bytes_120247 * num_threads_125979) + total_sizze_125981 = (sizze_125149 * num_threads_125979) + total_sizze_125982 = (bytes_120247 * num_threads_125979) + total_sizze_125984 = (sizze_125149 * num_threads_125979) + total_sizze_125985 = (bytes_120247 * num_threads_125979) + total_sizze_125988 = (double_buffer_sizze_125594 * num_threads_125979) + total_sizze_125989 = (double_buffer_sizze_125595 * num_threads_125979) + num_threads_125992 = (group_sizze_116780 * num_groups_top_116783) + total_sizze_125993 = (bytes_120247 * num_threads_125992) + num_threads_125994 = (segmap_group_sizze_97857 * num_groups_97858) + total_sizze_125995 = (bytes_120250 * num_threads_125994) + total_sizze_125996 = (bytes_120247 * num_threads_125994) + num_threads_125997 = (group_sizze_117142 * num_groups_top_117145) + total_sizze_125998 = (bytes_120247 * num_threads_125997) + num_threads_126002 = (segmap_group_sizze_97950 * num_groups_97951) + total_sizze_126003 = (bytes_120247 * num_threads_126002) + num_threads_126006 = (segmap_group_sizze_98009 * num_groups_98010) + mem_param_121959 = defunc_5_map_res_mem_121929 + mem_param_121967 = defunc_5_map_res_mem_121930 + mem_param_121972 = defunc_5_map_res_mem_121931 + loop_while_73697 = loop_cond_t_res_73687 + r_73698 = k2p2zq_73023 + while loop_while_73697: + x_73703 = sle64(np.int64(0), r_73698) + y_73704 = slt64(r_73698, defunc_2_reduce_res_73132) + bounds_check_73705 = (x_73703 and y_73704) + index_ok_73706 = (ok_or_empty_73101 and bounds_check_73705) + index_certs_73707 = True + assert index_ok_73706, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:34:13-22\n #1 recresid.fut:52:47-75\n #2 /prelude/soacs.fut:91:28-38\n #3 /prelude/soacs.fut:91:3-61\n #4 recresid.fut:51:11-73:44\n #5 recresid.fut:100:7-30\n #6 mroc.fut:27:25-38\n #7 mroc.fut:77:27-61\n #8 bfastfinal.fut:45:24-53\n #9 bfastfinal.fut:192:5-74\n #10 bfastfinal.fut:187:1-193:48\n" % ("Index [", + r_73698, + ", ", + np.int64(0), + ":] out of bounds for array of shape [", + defunc_2_reduce_res_73132, + "][", + k2p2zq_73023, + "].")) + index_certs_73708 = True + assert bounds_check_73705, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:37:17-23\n #1 recresid.fut:52:47-75\n #2 /prelude/soacs.fut:91:28-38\n #3 /prelude/soacs.fut:91:3-61\n #4 recresid.fut:51:11-73:44\n #5 recresid.fut:100:7-30\n #6 mroc.fut:27:25-38\n #7 mroc.fut:77:27-61\n #8 bfastfinal.fut:45:24-53\n #9 bfastfinal.fut:192:5-74\n #10 bfastfinal.fut:187:1-193:48\n" % ("Index [", + r_73698, + "] out of bounds for array of shape [", + defunc_2_reduce_res_73132, + "].")) + rp1_73709 = (np.int64(1) + r_73698) + empty_slice_73710 = (rp1_73709 == np.int64(0)) + i_lte_j_73711 = sle64(np.int64(0), rp1_73709) + y_73712 = (bounds_check_73705 and i_lte_j_73711) + ok_or_empty_73713 = (empty_slice_73710 or y_73712) + index_certs_73714 = True + assert ok_or_empty_73713, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:62:66-75\n #1 /prelude/soacs.fut:91:28-38\n #2 /prelude/soacs.fut:91:3-61\n #3 recresid.fut:51:11-73:44\n #4 recresid.fut:100:7-30\n #5 mroc.fut:27:25-38\n #6 mroc.fut:77:27-61\n #7 bfastfinal.fut:45:24-53\n #8 bfastfinal.fut:192:5-74\n #9 bfastfinal.fut:187:1-193:48\n" % ("Index [:", + rp1_73709, + "] out of bounds for array of shape [", + defunc_2_reduce_res_73132, + "].")) + index_ok_73715 = (ok_or_empty_73101 and ok_or_empty_73713) + index_certs_73716 = True + assert index_ok_73715, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:62:51-63\n #1 /prelude/soacs.fut:91:28-38\n #2 /prelude/soacs.fut:91:3-61\n #3 recresid.fut:51:11-73:44\n #4 recresid.fut:100:7-30\n #5 mroc.fut:27:25-38\n #6 mroc.fut:77:27-61\n #7 bfastfinal.fut:45:24-53\n #8 bfastfinal.fut:192:5-74\n #9 bfastfinal.fut:187:1-193:48\n" % ("Index [:", + rp1_73709, + ", ", + np.int64(0), + ":] out of bounds for array of shape [", + defunc_2_reduce_res_73132, + "][", + k2p2zq_73023, + "].")) + i_p_m_t_s_leq_w_73717 = slt64(r_73698, rp1_73709) + y_73718 = (x_73703 and i_p_m_t_s_leq_w_73717) + y_73719 = (i_lte_j_73711 and y_73718) + ok_or_empty_73720 = (empty_slice_73710 or y_73719) + min_res_73721 = smin64(k2p2zq_73023, rp1_73709) + i_p_m_t_s_leq_w_73722 = slt64(m_73095, rp1_73709) + y_73723 = (zzero_leq_i_p_m_t_s_73096 and i_p_m_t_s_leq_w_73722) + y_73724 = (i_lte_j_73098 and y_73723) + ok_or_empty_73725 = (empty_slice_73094 or y_73724) + index_ok_73726 = (ok_or_empty_73101 and ok_or_empty_73725) + index_certs_73727 = True + assert index_ok_73726, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:80:50-58\n #1 recresid.fut:62:33-75\n #2 /prelude/soacs.fut:91:28-38\n #3 /prelude/soacs.fut:91:3-61\n #4 recresid.fut:51:11-73:44\n #5 recresid.fut:100:7-30\n #6 mroc.fut:27:25-38\n #7 mroc.fut:77:27-61\n #8 bfastfinal.fut:45:24-53\n #9 bfastfinal.fut:192:5-74\n #10 bfastfinal.fut:187:1-193:48\n" % ("Index [:", + k2p2zq_73023, + ", :", + k2p2zq_73023, + "] out of bounds for array of shape [", + k2p2zq_73023, + "][", + rp1_73709, + "].")) + index_certs_73734 = True + assert ok_or_empty_73725, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:92:15-21\n #1 recresid.fut:62:33-75\n #2 /prelude/soacs.fut:91:28-38\n #3 /prelude/soacs.fut:91:3-61\n #4 recresid.fut:51:11-73:44\n #5 recresid.fut:100:7-30\n #6 mroc.fut:27:25-38\n #7 mroc.fut:77:27-61\n #8 bfastfinal.fut:45:24-53\n #9 bfastfinal.fut:192:5-74\n #10 bfastfinal.fut:187:1-193:48\n" % ("Index [:", + k2p2zq_73023, + "] out of bounds for array of shape [", + rp1_73709, + "].")) + nest_sizze_97387 = (m_73008 * rp1_73709) + max_num_groups_127207 = self.sizes["mainMagnitude.segred_num_groups_97060"] + num_groups_97389 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_97387, + segred_group_sizze_97388), + sext_i32_i64(max_num_groups_127207)))) + self.futhark_builtinzhreplicate_f64(mem_121992, + (m_73008 * k2p2zq_73023), + np.float64(0.0)) + self.futhark_builtinzhreplicate_f64(mem_121996, + ((m_73008 * np.int64(2)) * k2p2zq_73023), + np.float64(0.0)) + self.futhark_builtinzhreplicate_f64(mem_122000, + ((m_73008 * k2p2zq_73023) * k2p2zq_73023), + np.float64(0.0)) + self.futhark_builtinzhreplicate_f64(mem_122003, + (m_73008 * k2p2zq_73023), + np.float64(0.0)) + self.futhark_builtinzhreplicate_f64(mem_122007, + ((m_73008 * k2p2zq_73023) * k2p2zq_73023), + np.float64(0.0)) + bytes_122015 = (np.int64(8) * nest_sizze_97387) + binop_x_122019 = (k2p2zq_73023 * rp1_73709) + binop_x_122020 = (m_73008 * binop_x_122019) + bytes_122018 = (np.int64(8) * binop_x_122020) + binop_x_122024 = (k2p2zq_73023 * nest_sizze_97387) + bytes_122022 = (np.int64(8) * binop_x_122024) + bytes_122511 = (np.int64(8) * rp1_73709) + binop_x_123636 = (rp1_73709 * binop_x_120244) + bytes_123634 = (np.int64(8) * binop_x_123636) + sizze_125454 = (rp1_73709 * bytes_120247) + double_buffer_sizze_125576 = (rp1_73709 * binop_x_125575) + double_buffer_sizze_125584 = (np.int64(8) * rp1_73709) + double_buffer_sizze_125593 = (rp1_73709 * binop_x_125592) + double_buffer_sizze_125601 = (np.int64(8) * rp1_73709) + total_sizze_125961 = (bytes_122511 * num_threads_125944) + total_sizze_125962 = (sizze_125454 * num_threads_125944) + total_sizze_125965 = (sizze_125454 * num_threads_125944) + total_sizze_125966 = (double_buffer_sizze_125576 * num_threads_125944) + total_sizze_125969 = (double_buffer_sizze_125584 * num_threads_125944) + total_sizze_125983 = (sizze_125454 * num_threads_125979) + total_sizze_125986 = (sizze_125454 * num_threads_125979) + total_sizze_125987 = (double_buffer_sizze_125593 * num_threads_125979) + total_sizze_126007 = (bytes_122511 * num_threads_126006) + total_sizze_126008 = (double_buffer_sizze_125601 * num_threads_126006) + local_memory_capacity_127974 = self.max_local_memory + if ((((sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127974)) and sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127974))) and sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127974))) and sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127974))) and suff_outer_par_94436): + mem_122011 = opencl_alloc(self, bytes_120258, "mem_122011") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122011, np.int64(0), + mem_param_121959, np.int64(0), np.int64(1), - N_27771, m_27772) - gridDim_x_42531 = sdiv_up64(i32_res_27787, Tx_42530) - gridDim_y_42532 = sdiv_up64(i32_res_27787, Ty_42529) - binop_x_42534 = (gridDim_y_42532 * gridDim_zz_42533) - grid_sizze_tile3d_42535 = (gridDim_x_42531 * binop_x_42534) - count_shmem_42537 = sdiv_up64(np.int64(30), group_sizze_tile3d_42536) - mem_44528 = opencl_alloc(self, bytes_44544, "mem_44528") - if ((1 * (np.long(grid_sizze_tile3d_42535) * np.long(group_sizze_tile3d_42536))) != 0): - self.mainDetailedzisegmap_intragroup_42541_var.set_args(self.global_failure, - cl.LocalMemory(np.long(np.int64(120))), - np.int64(m_27772), - np.int32(n_27775), - np.int64(i32_res_27787), - np.int64(Ty_42529), - np.int64(Tx_42530), - np.int64(gridDim_x_42531), - np.int64(gridDim_y_42532), - np.int64(group_sizze_tile3d_42536), - np.int64(count_shmem_42537), - mem_44393, - mem_44397, - mem_44468, - mem_44528) + (k2p2zq_73023 * k2p2zq_73023), + m_73008) + mem_122014 = opencl_alloc(self, bytes_121990, "mem_122014") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122014, np.int64(0), + mem_param_121967, + np.int64(0), np.int64(1), + k2p2zq_73023, m_73008) + mem_122017 = opencl_alloc(self, bytes_122015, "mem_122017") + group_sizze_127211 = self.sizes["mainMagnitude.group_size_127211"] + num_groups_127212 = sdiv_up64((m_73008 * rp1_73709), + group_sizze_127211) + if ((1 * (np.int64(num_groups_127212) * np.int64(group_sizze_127211))) != 0): + self.mainMagnitudezicopy_127208_var.set_args(np.int64(m_73008), + np.int64(n_73011), + np.int64(rp1_73709), + defunc_3_map_res_mem_120231, + mem_122017) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezicopy_127208_var, + ((np.int64(num_groups_127212) * np.int64(group_sizze_127211)),), + (np.int64(group_sizze_127211),)) + if synchronous: + sync(self) + mem_122021 = opencl_alloc(self, bytes_122018, "mem_122021") + group_sizze_127216 = self.sizes["mainMagnitude.group_size_127216"] + num_groups_127217 = sdiv_up64(((m_73008 * k2p2zq_73023) * rp1_73709), + group_sizze_127216) + if ((1 * (np.int64(num_groups_127217) * np.int64(group_sizze_127216))) != 0): + self.mainMagnitudezicopy_127213_var.set_args(np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(defunc_2_reduce_res_73132), + np.int64(rp1_73709), + mem_120246, mem_122021) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezicopy_127213_var, + ((np.int64(num_groups_127217) * np.int64(group_sizze_127216)),), + (np.int64(group_sizze_127216),)) + if synchronous: + sync(self) + mem_122025 = opencl_alloc(self, bytes_122022, "mem_122025") + group_sizze_127221 = self.sizes["mainMagnitude.group_size_127221"] + num_groups_127222 = sdiv_up64(((m_73008 * k2p2zq_73023) * rp1_73709), + group_sizze_127221) + if ((1 * (np.int64(num_groups_127222) * np.int64(group_sizze_127221))) != 0): + self.mainMagnitudezicopy_127218_var.set_args(np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(defunc_2_reduce_res_73132), + np.int64(rp1_73709), + mem_120246, mem_122025) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezicopy_127218_var, + ((np.int64(num_groups_127222) * np.int64(group_sizze_127221)),), + (np.int64(group_sizze_127221),)) + if synchronous: + sync(self) + mem_122650 = opencl_alloc(self, m_73008, "mem_122650") + mem_122654 = opencl_alloc(self, bytes_120258, "mem_122654") + mem_122657 = opencl_alloc(self, bytes_121990, "mem_122657") + mem_122659 = opencl_alloc(self, bytes_120173, "mem_122659") + mem_122661 = opencl_alloc(self, bytes_120173, "mem_122661") + mem_122028 = opencl_alloc(self, total_sizze_125945, "mem_122028") + mem_122042 = opencl_alloc(self, total_sizze_125946, "mem_122042") + mem_122045 = opencl_alloc(self, total_sizze_125947, "mem_122045") + mem_122047 = opencl_alloc(self, total_sizze_125948, "mem_122047") + mem_122382 = opencl_alloc(self, total_sizze_125949, "mem_122382") + mem_122423 = opencl_alloc(self, total_sizze_125950, "mem_122423") + mem_122435 = opencl_alloc(self, total_sizze_125951, "mem_122435") + mem_122464 = opencl_alloc(self, total_sizze_125952, "mem_122464") + mem_122537 = opencl_alloc(self, total_sizze_125953, "mem_122537") + mem_122552 = opencl_alloc(self, total_sizze_125954, "mem_122552") + mem_122564 = opencl_alloc(self, total_sizze_125955, "mem_122564") + mem_122575 = opencl_alloc(self, total_sizze_125956, "mem_122575") + mem_122595 = opencl_alloc(self, total_sizze_125957, "mem_122595") + mem_122598 = opencl_alloc(self, total_sizze_125958, "mem_122598") + mem_125248 = opencl_alloc(self, total_sizze_125959, "mem_125248") + mem_125250 = opencl_alloc(self, total_sizze_125960, "mem_125250") + mem_125258 = opencl_alloc(self, total_sizze_125961, "mem_125258") + mem_125455 = opencl_alloc(self, total_sizze_125962, "mem_125455") + mem_125463 = opencl_alloc(self, total_sizze_125963, "mem_125463") + mem_125465 = opencl_alloc(self, total_sizze_125964, "mem_125465") + mem_125505 = opencl_alloc(self, total_sizze_125965, "mem_125505") + double_buffer_mem_125569 = opencl_alloc(self, total_sizze_125966, + "double_buffer_mem_125569") + double_buffer_mem_125570 = opencl_alloc(self, total_sizze_125967, + "double_buffer_mem_125570") + double_buffer_mem_125571 = opencl_alloc(self, total_sizze_125968, + "double_buffer_mem_125571") + double_buffer_mem_125582 = opencl_alloc(self, total_sizze_125969, + "double_buffer_mem_125582") + if ((1 * (np.int64(num_groups_94974) * np.int64(segmap_group_sizze_94973))) != 0): + self.mainMagnitudezisegmap_94438_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_73008), + np.int64(n_73011), + np.int64(k2p2zq_73023), + np.int64(m_73095), + np.byte(y_73099), + np.int64(defunc_2_reduce_res_73132), + np.float64(tol_73201), + np.int64(k_73214), + np.int64(r_73698), + np.int64(rp1_73709), + np.byte(ok_or_empty_73720), + np.int64(min_res_73721), + np.int64(num_groups_94974), + np.int64(binop_x_120251), + np.int64(num_threads_125944), + defunc_3_map_res_mem_120231, + mem_120246, + mem_121938, + mem_121941, + mem_param_121972, + mem_122011, + mem_122014, + mem_122017, + mem_122021, + mem_122025, + mem_122028, + mem_122042, + mem_122045, + mem_122047, + mem_122382, + mem_122423, + mem_122435, + mem_122464, + mem_122537, + mem_122552, + mem_122564, + mem_122575, + mem_122595, + mem_122598, + mem_122650, + mem_122654, + mem_122657, + mem_122659, + mem_122661, + mem_125248, + mem_125250, + mem_125258, + mem_125455, + mem_125463, + mem_125465, + mem_125505, + double_buffer_mem_125569, + double_buffer_mem_125570, + double_buffer_mem_125571, + double_buffer_mem_125582) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_intragroup_42541_var, - ((np.long(grid_sizze_tile3d_42535) * np.long(group_sizze_tile3d_42536)),), - (np.long(group_sizze_tile3d_42536),)) + self.mainMagnitudezisegmap_94438_var, + ((np.int64(num_groups_94974) * np.int64(segmap_group_sizze_94973)),), + (np.int64(segmap_group_sizze_94973),)) if synchronous: sync(self) - mem_44468 = None - defunc_3_map_res_mem_44537 = mem_44528 + self.failure_is_an_option = np.int32(1) + mem_122011 = None + mem_122014 = None + mem_122017 = None + mem_122021 = None + mem_122025 = None + mem_122028 = None + mem_122042 = None + mem_122045 = None + mem_122047 = None + mem_122382 = None + mem_122423 = None + mem_122435 = None + mem_122464 = None + mem_122537 = None + mem_122552 = None + mem_122564 = None + mem_122575 = None + mem_122595 = None + mem_122598 = None + mem_125248 = None + mem_125250 = None + mem_125258 = None + mem_125455 = None + mem_125463 = None + mem_125465 = None + mem_125505 = None + double_buffer_mem_125569 = None + double_buffer_mem_125570 = None + double_buffer_mem_125571 = None + double_buffer_mem_125582 = None + mem_123715 = opencl_alloc(self, bytes_121997, "mem_123715") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123715, np.int64(0), + mem_122654, np.int64(0), + np.int64(1), m_73008, + (k2p2zq_73023 * k2p2zq_73023)) + mem_122654 = None + mem_123719 = opencl_alloc(self, bytes_121990, "mem_123719") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123719, np.int64(0), + mem_122657, np.int64(0), + np.int64(1), m_73008, + k2p2zq_73023) + mem_122657 = None + defunc_7_map_res_mem_123721 = mem_122650 + defunc_7_map_res_mem_123722 = mem_123715 + defunc_7_map_res_mem_123723 = mem_123719 + defunc_7_map_res_mem_123724 = mem_122659 + defunc_7_map_res_mem_123725 = mem_122661 else: - mem_44531 = opencl_alloc(self, bytes_44383, "mem_44531") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44531, np.int64(0), - mem_44397, np.int64(0), - np.int64(1), - i32_res_27787, N_27771) - mem_44536 = opencl_alloc(self, bytes_44544, "mem_44536") - if slt64((i32_res_27781 * np.int64(2)), segred_group_sizze_30567): - segment_sizze_nonzzero_45768 = smax64(np.int64(1), i32_res_27781) - num_threads_45769 = (num_groups_30568 * segred_group_sizze_30567) - if ((1 * (np.long(num_groups_30568) * np.long(segred_group_sizze_30567))) != 0): - self.mainDetailedzisegred_small_30429_var.set_args(self.global_failure, - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_30567))), - np.int64(N_27771), - np.int64(m_27772), - np.int64(i32_res_27781), - np.int64(i32_res_27787), - np.int64(num_groups_30568), - np.int64(segment_sizze_nonzzero_45768), - images_mem_44381, - binop_p_mem_44390, - mem_44531, - mem_44536) + mem_122665 = opencl_alloc(self, bytes_121997, "mem_122665") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122665, np.int64(0), + mem_param_121959, + np.int64(0), np.int64(1), + k2p2zq_73023, + (m_73008 * k2p2zq_73023)) + mem_122668 = opencl_alloc(self, bytes_120173, "mem_122668") + mem_122671 = opencl_alloc(self, bytes_121990, "mem_122671") + if slt64((k2p2zq_73023 * np.int64(2)), segred_group_sizze_97264): + segment_sizze_nonzzero_127335 = smax64(np.int64(1), k2p2zq_73023) + num_threads_127336 = (num_groups_97265 * segred_group_sizze_97264) + if ((1 * (np.int64(num_groups_97265) * np.int64(segred_group_sizze_97264))) != 0): + self.mainMagnitudezisegred_small_97246_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_97264))), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(defunc_2_reduce_res_73132), + np.int64(r_73698), + np.int64(num_groups_97265), + np.int64(segment_sizze_nonzzero_127335), + mem_120246, + mem_122665, + mem_122668, + mem_122671) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegred_small_30429_var, - ((np.long(num_groups_30568) * np.long(segred_group_sizze_30567)),), - (np.long(segred_group_sizze_30567),)) + self.mainMagnitudezisegred_small_97246_var, + ((np.int64(num_groups_97265) * np.int64(segred_group_sizze_97264)),), + (np.int64(segred_group_sizze_97264),)) if synchronous: sync(self) else: - groups_per_segment_45789 = sdiv_up64(num_groups_30568, - smax64(np.int64(1), - ((m_27772 * i32_res_27787) * i32_res_27787))) - elements_per_thread_45790 = sdiv_up64(i32_res_27781, - (segred_group_sizze_30567 * groups_per_segment_45789)) - virt_num_groups_45791 = (groups_per_segment_45789 * ((m_27772 * i32_res_27787) * i32_res_27787)) - num_threads_45792 = (num_groups_30568 * segred_group_sizze_30567) - threads_per_segment_45793 = (groups_per_segment_45789 * segred_group_sizze_30567) - group_res_arr_mem_45794 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_30567 * virt_num_groups_45791)), - "group_res_arr_mem_45794") - mainDetailedzicounter_mem_45796 = self.mainDetailedzicounter_mem_45796 - if ((1 * (np.long(num_groups_30568) * np.long(segred_group_sizze_30567))) != 0): - self.mainDetailedzisegred_large_30429_var.set_args(self.global_failure, - cl.LocalMemory(np.long(np.int32(1))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_30567))), - np.int64(N_27771), - np.int64(i32_res_27781), - np.int64(i32_res_27787), - np.int64(num_groups_30568), - np.int64(groups_per_segment_45789), - np.int64(elements_per_thread_45790), - np.int64(virt_num_groups_45791), - np.int64(threads_per_segment_45793), - images_mem_44381, - binop_p_mem_44390, - mem_44531, - mem_44536, - group_res_arr_mem_45794, - mainDetailedzicounter_mem_45796) + groups_per_segment_127357 = sdiv_up64(num_groups_97265, + smax64(np.int64(1), m_73008)) + elements_per_thread_127358 = sdiv_up64(k2p2zq_73023, + (segred_group_sizze_97264 * groups_per_segment_127357)) + virt_num_groups_127359 = (groups_per_segment_127357 * m_73008) + num_threads_127360 = (num_groups_97265 * segred_group_sizze_97264) + threads_per_segment_127361 = (groups_per_segment_127357 * segred_group_sizze_97264) + group_res_arr_mem_127362 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_97264 * virt_num_groups_127359)), + "group_res_arr_mem_127362") + mainMagnitudezicounter_mem_127364 = self.mainMagnitudezicounter_mem_127364 + if ((1 * (np.int64(num_groups_97265) * np.int64(segred_group_sizze_97264))) != 0): + self.mainMagnitudezisegred_large_97246_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_97264))), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(defunc_2_reduce_res_73132), + np.int64(r_73698), + np.int64(num_groups_97265), + np.int64(groups_per_segment_127357), + np.int64(elements_per_thread_127358), + np.int64(virt_num_groups_127359), + np.int64(threads_per_segment_127361), + mem_120246, + mem_122665, + mem_122668, + mem_122671, + group_res_arr_mem_127362, + mainMagnitudezicounter_mem_127364) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegred_large_30429_var, - ((np.long(num_groups_30568) * np.long(segred_group_sizze_30567)),), - (np.long(segred_group_sizze_30567),)) + self.mainMagnitudezisegred_large_97246_var, + ((np.int64(num_groups_97265) * np.int64(segred_group_sizze_97264)),), + (np.int64(segred_group_sizze_97264),)) if synchronous: sync(self) - mem_44531 = None - defunc_3_map_res_mem_44537 = mem_44536 - defunc_3_map_res_mem_44543 = defunc_3_map_res_mem_44537 - defunc_3_map_res_mem_44549 = defunc_3_map_res_mem_44543 - m_27918 = (np.int32(2) * k2p2zq_27785) - x_27919 = (np.int64(2) * i32_res_27787) - nm_27920 = (i32_res_27787 * x_27919) - bounds_invalid_upwards_27921 = slt64(nm_27920, np.int64(0)) - valid_27922 = not(bounds_invalid_upwards_27921) - range_valid_c_27923 = True - assert valid_27922, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 helpers.fut:2:3-8\n #2 helpers.fut:79:21-29\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n" % ("Range ", - np.int64(0), - "..", - np.int64(1), - "..<", - nm_27920, - " is invalid.")) - zzero_27928 = (m_27918 == np.int32(0)) - nonzzero_27929 = not(zzero_27928) - nonzzero_cert_27930 = True - assert nonzzero_27929, ("Error: %s\n\nBacktrace:\n-> #0 helpers.fut:74:41-47\n #1 helpers.fut:74:14-79:30\n #2 bfastfinal.fut:50:35-50\n #3 bfastfinal.fut:174:3-56\n #4 bfastfinal.fut:170:1-174:56\n" % ("division by zero",)) - loop_nonempty_27931 = slt32(np.int32(0), k2p2zq_27785) - loop_not_taken_27932 = not(loop_nonempty_27931) - protect_assert_disj_27933 = (nonzzero_27929 or loop_not_taken_27932) - nonzzero_cert_27934 = True - assert protect_assert_disj_27933, ("Error: %s\n\nBacktrace:\n-> #0 helpers.fut:60:43-49\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n" % ("division by zero",)) - i32_res_27935 = sext_i32_i64(m_27918) - x_27936 = (i32_res_27787 * i32_res_27935) - dim_ok_27937 = (x_27936 == nm_27920) - dim_ok_cert_27938 = True - assert dim_ok_27937, ("Error: %s\n\nBacktrace:\n-> #0 /prelude/array.fut:141:3-33\n #1 helpers.fut:81:16-43\n #2 bfastfinal.fut:50:35-50\n #3 bfastfinal.fut:174:3-56\n #4 bfastfinal.fut:170:1-174:56\n" % ("new shape has different number of elements than old shape",)) - j_m_i_27939 = (x_27919 - i32_res_27787) - empty_slice_27940 = (j_m_i_27939 == np.int64(0)) - m_27941 = (j_m_i_27939 - np.int64(1)) - i_p_m_t_s_27942 = (i32_res_27787 + m_27941) - zzero_leq_i_p_m_t_s_27943 = sle64(np.int64(0), i_p_m_t_s_27942) - i_p_m_t_s_leq_w_27944 = slt64(i_p_m_t_s_27942, i32_res_27935) - i_lte_j_27945 = sle64(i32_res_27787, x_27919) - y_27946 = (i_lte_j_27868 and i_p_m_t_s_leq_w_27944) - y_27947 = (zzero_leq_i_p_m_t_s_27943 and y_27946) - y_27948 = (i_lte_j_27945 and y_27947) - forwards_ok_27949 = (i_lte_j_27868 and y_27948) - ok_or_empty_27950 = (empty_slice_27940 or forwards_ok_27949) - index_ok_27951 = (ok_or_empty_27884 and ok_or_empty_27950) - index_certs_27952 = True - assert index_ok_27951, ("Error: %s%d%s%d%s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 helpers.fut:83:8-30\n #1 bfastfinal.fut:50:35-50\n #2 bfastfinal.fut:174:3-56\n #3 bfastfinal.fut:170:1-174:56\n" % ("Index [", - np.int64(0), - ":", - i32_res_27787, - ", ", - i32_res_27787, - ":", - x_27919, - "] out of bounds for array of shape [", - i32_res_27787, - "][", - i32_res_27935, - "].")) - dim_match_27953 = (i32_res_27787 == j_m_i_27939) - empty_or_match_cert_27954 = True - assert dim_match_27953, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 helpers.fut:83:8-45\n #1 bfastfinal.fut:50:35-50\n #2 bfastfinal.fut:174:3-56\n #3 bfastfinal.fut:170:1-174:56\n" % ("Value of (core language) shape (", - i32_res_27787, - ", ", - j_m_i_27939, - ") cannot match shape of type `[", - i32_res_27787, - "][", - i32_res_27787, - "]f32`.")) - max_group_sizze_30862 = self.max_group_size - fits_30863 = sle64(nm_27920, max_group_sizze_30862) - suff_intra_par_30861 = (self.sizes["mainDetailed.suff_intra_par_11"] <= nm_27920) - intra_suff_and_fits_30864 = (suff_intra_par_30861 and fits_30863) - nest_sizze_31528 = (m_27772 * nm_27920) - segmap_group_sizze_31529 = self.sizes["mainDetailed.segmap_group_size_31472"] - suff_intra_par_31568 = (self.sizes["mainDetailed.suff_intra_par_13"] <= nm_27920) - intra_suff_and_fits_31569 = (fits_30863 and suff_intra_par_31568) - segmap_group_sizze_31621 = self.sizes["mainDetailed.segmap_group_size_31354"] - segmap_group_sizze_31637 = self.sizes["mainDetailed.segmap_group_size_31255"] - segmap_group_sizze_31687 = self.sizes["mainDetailed.segmap_group_size_31185"] - y_31697 = (i32_res_27787 * j_m_i_27939) - nest_sizze_31698 = (m_27772 * y_31697) - segmap_group_sizze_31699 = self.sizes["mainDetailed.segmap_group_size_30956"] - segmap_usable_groups_31622 = sdiv_up_safe64(m_27772, - segmap_group_sizze_31621) - segmap_usable_groups_31638 = sdiv_up_safe64(nest_sizze_31528, - segmap_group_sizze_31637) - segmap_usable_groups_31688 = sdiv_up_safe64(nest_sizze_31528, - segmap_group_sizze_31687) - bytes_44552 = (np.int64(4) * nm_27920) - bytes_44575 = (np.int64(4) * nest_sizze_31528) - binop_x_44626 = (j_m_i_27939 * nest_sizze_30514) - bytes_44624 = (np.int64(4) * binop_x_44626) - local_memory_capacity_45831 = self.max_local_memory - if intra_suff_and_fits_30864: - defunc_3_map_res_ixfn_44628 = i32_res_27787 - else: - defunc_3_map_res_ixfn_44628 = j_m_i_27939 - local_memory_capacity_45894 = self.max_local_memory - if (sle64(((bytes_44552 + srem64((np.int64(8) - srem64(bytes_44552, - np.int64(8))), - np.int64(8))) + (bytes_44552 + srem64((np.int64(8) - srem64(bytes_44552, - np.int64(8))), - np.int64(8)))), - sext_i32_i64(local_memory_capacity_45894)) and intra_suff_and_fits_30864): - mem_44573 = opencl_alloc(self, bytes_44544, "mem_44573") - if ((1 * (np.long(m_27772) * np.long(nm_27920))) != 0): - self.mainDetailedzisegmap_intragroup_30688_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long(bytes_44552)), - cl.LocalMemory(np.long(bytes_44552)), - np.int32(k2p2zq_27785), - np.int64(i32_res_27787), - np.int32(m_27918), - np.int64(nm_27920), - np.int64(i32_res_27935), - defunc_3_map_res_mem_44549, - mem_44573) - cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_intragroup_30688_var, - ((np.long(m_27772) * np.long(nm_27920)),), - (np.long(nm_27920),)) - if synchronous: - sync(self) - self.failure_is_an_option = np.int32(1) - defunc_3_map_res_mem_44629 = mem_44573 - else: - segmap_usable_groups_31530 = sdiv_up64(nest_sizze_31528, - segmap_group_sizze_31529) - mem_44577 = opencl_alloc(self, bytes_44575, "mem_44577") - if ((1 * (np.long(segmap_usable_groups_31530) * np.long(segmap_group_sizze_31529))) != 0): - self.mainDetailedzisegmap_31469_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - np.int64(m_27772), - np.int32(k2p2zq_27785), - np.int64(i32_res_27787), - np.int32(m_27918), - np.int64(nm_27920), - defunc_3_map_res_mem_44549, - mem_44577) - cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_31469_var, - ((np.long(segmap_usable_groups_31530) * np.long(segmap_group_sizze_31529)),), - (np.long(segmap_group_sizze_31529),)) - if synchronous: - sync(self) - self.failure_is_an_option = np.int32(1) - ctx_param_ext_44578 = m_27772 - ctx_param_ext_44579 = nm_27920 - ctx_param_ext_44580 = np.int64(0) - ctx_param_ext_44581 = nm_27920 - ctx_param_ext_44582 = m_27772 - ctx_param_ext_44583 = np.int64(1) - ctx_param_ext_44584 = nm_27920 - mem_param_44585 = mem_44577 - i_31554 = np.int32(0) - one_46875 = np.int32(1) - for counter_46874 in range(k2p2zq_27785): - i32_res_31556 = sext_i32_i64(i_31554) - x_31557 = sle64(np.int64(0), i32_res_31556) - y_31558 = slt64(i32_res_31556, nm_27920) - bounds_check_31559 = (x_31557 and y_31558) - index_certs_31560 = True - assert bounds_check_31559, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 helpers.fut:59:16-27\n #1 helpers.fut:80:16-34\n #2 bfastfinal.fut:50:35-50\n #3 bfastfinal.fut:174:3-56\n #4 bfastfinal.fut:170:1-174:56\n" % ("Index [", - i32_res_31556, - "] out of bounds for array of shape [", - nm_27920, - "].")) - local_memory_capacity_45854 = self.max_local_memory - if intra_suff_and_fits_31569: - gauss_jordan_res_ixfn_44607 = m_27772 - else: - gauss_jordan_res_ixfn_44607 = ctx_param_ext_44582 - local_memory_capacity_45855 = self.max_local_memory - if intra_suff_and_fits_31569: - gauss_jordan_res_ixfn_44608 = nm_27920 - else: - gauss_jordan_res_ixfn_44608 = ctx_param_ext_44584 - local_memory_capacity_45856 = self.max_local_memory - if intra_suff_and_fits_31569: - gauss_jordan_res_ixfn_44609 = m_27772 - else: - gauss_jordan_res_ixfn_44609 = ctx_param_ext_44578 - local_memory_capacity_45857 = self.max_local_memory - if intra_suff_and_fits_31569: - gauss_jordan_res_ixfn_44610 = nm_27920 - else: - gauss_jordan_res_ixfn_44610 = ctx_param_ext_44579 - local_memory_capacity_45858 = self.max_local_memory - if intra_suff_and_fits_31569: - gauss_jordan_res_ixfn_44611 = nm_27920 - else: - gauss_jordan_res_ixfn_44611 = ctx_param_ext_44581 - local_memory_capacity_45859 = self.max_local_memory - if intra_suff_and_fits_31569: - gauss_jordan_res_ixfn_44612 = np.int64(1) - else: - gauss_jordan_res_ixfn_44612 = ctx_param_ext_44583 - local_memory_capacity_45860 = self.max_local_memory - if intra_suff_and_fits_31569: - gauss_jordan_res_ixfn_44613 = np.int64(0) - else: - gauss_jordan_res_ixfn_44613 = ctx_param_ext_44580 - local_memory_capacity_45888 = self.max_local_memory - if ((sle64(np.int64(0), - sext_i32_i64(local_memory_capacity_45888)) and sle64((bytes_44552 + srem64((np.int64(8) - srem64(bytes_44552, - np.int64(8))), - np.int64(8))), - sext_i32_i64(local_memory_capacity_45888))) and intra_suff_and_fits_31569): - mem_44590 = opencl_alloc(self, bytes_44575, "mem_44590") - group_sizze_45864 = self.sizes["mainDetailed.group_size_45864"] - num_groups_45865 = sdiv_up64((m_27772 * nm_27920), group_sizze_45864) - if ((1 * (np.long(num_groups_45865) * np.long(group_sizze_45864))) != 0): - self.mainDetailedzicopy_45861_var.set_args(np.int64(m_27772), - np.int64(nm_27920), - np.int64(ctx_param_ext_44580), - np.int64(ctx_param_ext_44581), - np.int64(ctx_param_ext_44583), - mem_param_44585, - mem_44590) + mem_122665 = None + mem_122674 = opencl_alloc(self, bytes_120173, "mem_122674") + if ((1 * (np.int64(segmap_usable_groups_97286) * np.int64(segmap_group_sizze_97285))) != 0): + self.mainMagnitudezisegmap_97228_var.set_args(self.global_failure, + np.int64(m_73008), + mem_122668, + mem_122674) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzicopy_45861_var, - ((np.long(num_groups_45865) * np.long(group_sizze_45864)),), - (np.long(group_sizze_45864),)) + self.mainMagnitudezisegmap_97228_var, + ((np.int64(segmap_usable_groups_97286) * np.int64(segmap_group_sizze_97285)),), + (np.int64(segmap_group_sizze_97285),)) if synchronous: sync(self) - mem_44598 = opencl_alloc(self, bytes_44575, "mem_44598") - if ((1 * (np.long(m_27772) * np.long(nm_27920))) != 0): - self.mainDetailedzisegmap_intragroup_31015_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long(bytes_44552)), - np.int64(m_27772), - np.int32(k2p2zq_27785), - np.int32(m_27918), - np.int64(nm_27920), - np.int32(i_31554), - np.int64(i32_res_31556), - np.int64(ctx_param_ext_44580), - np.int64(ctx_param_ext_44581), - np.int64(ctx_param_ext_44583), - mem_param_44585, - mem_44590, - mem_44598) + mem_122668 = None + mem_122677 = opencl_alloc(self, bytes_120173, "mem_122677") + if slt64((k2p2zq_73023 * np.int64(2)), segred_group_sizze_97295): + segment_sizze_nonzzero_127402 = smax64(np.int64(1), k2p2zq_73023) + num_threads_127403 = (num_groups_97296 * segred_group_sizze_97295) + if ((1 * (np.int64(num_groups_97296) * np.int64(segred_group_sizze_97295))) != 0): + self.mainMagnitudezisegred_small_97217_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_97295))), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(defunc_2_reduce_res_73132), + np.int64(r_73698), + np.int64(num_groups_97296), + np.int64(segment_sizze_nonzzero_127402), + mem_120246, + mem_param_121967, + mem_122677) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_small_97217_var, + ((np.int64(num_groups_97296) * np.int64(segred_group_sizze_97295)),), + (np.int64(segred_group_sizze_97295),)) + if synchronous: + sync(self) + else: + groups_per_segment_127423 = sdiv_up64(num_groups_97296, + smax64(np.int64(1), m_73008)) + elements_per_thread_127424 = sdiv_up64(k2p2zq_73023, + (segred_group_sizze_97295 * groups_per_segment_127423)) + virt_num_groups_127425 = (groups_per_segment_127423 * m_73008) + num_threads_127426 = (num_groups_97296 * segred_group_sizze_97295) + threads_per_segment_127427 = (groups_per_segment_127423 * segred_group_sizze_97295) + group_res_arr_mem_127428 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_97295 * virt_num_groups_127425)), + "group_res_arr_mem_127428") + mainMagnitudezicounter_mem_127430 = self.mainMagnitudezicounter_mem_127430 + if ((1 * (np.int64(num_groups_97296) * np.int64(segred_group_sizze_97295))) != 0): + self.mainMagnitudezisegred_large_97217_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_97295))), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(defunc_2_reduce_res_73132), + np.int64(r_73698), + np.int64(num_groups_97296), + np.int64(groups_per_segment_127423), + np.int64(elements_per_thread_127424), + np.int64(virt_num_groups_127425), + np.int64(threads_per_segment_127427), + mem_120246, + mem_param_121967, + mem_122677, + group_res_arr_mem_127428, + mainMagnitudezicounter_mem_127430) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_large_97217_var, + ((np.int64(num_groups_97296) * np.int64(segred_group_sizze_97295)),), + (np.int64(segred_group_sizze_97295),)) + if synchronous: + sync(self) + mem_122680 = opencl_alloc(self, bytes_120173, "mem_122680") + mem_122682 = opencl_alloc(self, bytes_120173, "mem_122682") + if ((1 * (np.int64(segmap_usable_groups_97309) * np.int64(segmap_group_sizze_97308))) != 0): + self.mainMagnitudezisegmap_97197_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(n_73011), + np.int64(r_73698), + defunc_3_map_res_mem_120231, + mem_122674, + mem_122677, + mem_122680, + mem_122682) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_intragroup_31015_var, - ((np.long(m_27772) * np.long(nm_27920)),), - (np.long(nm_27920),)) + self.mainMagnitudezisegmap_97197_var, + ((np.int64(segmap_usable_groups_97309) * np.int64(segmap_group_sizze_97308)),), + (np.int64(segmap_group_sizze_97308),)) if synchronous: sync(self) - self.failure_is_an_option = np.int32(1) - mem_44590 = None - gauss_jordan_res_mem_44614 = mem_44598 - else: - mem_44601 = opencl_alloc(self, m_27772, "mem_44601") - if ((1 * (np.long(segmap_usable_groups_31622) * np.long(segmap_group_sizze_31621))) != 0): - self.mainDetailedzisegmap_31352_var.set_args(self.global_failure, - np.int64(m_27772), - np.int64(i32_res_31556), - np.int64(ctx_param_ext_44580), - np.int64(ctx_param_ext_44581), - np.int64(ctx_param_ext_44583), - mem_param_44585, - mem_44601) + mem_122677 = None + mem_122686 = opencl_alloc(self, bytes_122022, "mem_122686") + group_sizze_127470 = self.sizes["mainMagnitude.group_size_127470"] + num_groups_127471 = sdiv_up64(((m_73008 * k2p2zq_73023) * rp1_73709), + group_sizze_127470) + if ((1 * (np.int64(num_groups_127471) * np.int64(group_sizze_127470))) != 0): + self.mainMagnitudezicopy_127467_var.set_args(np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(defunc_2_reduce_res_73132), + np.int64(rp1_73709), + mem_120246, mem_122686) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_31352_var, - ((np.long(segmap_usable_groups_31622) * np.long(segmap_group_sizze_31621)),), - (np.long(segmap_group_sizze_31621),)) + self.mainMagnitudezicopy_127467_var, + ((np.int64(num_groups_127471) * np.int64(group_sizze_127470)),), + (np.int64(group_sizze_127470),)) if synchronous: sync(self) - mem_44605 = opencl_alloc(self, bytes_44575, "mem_44605") - if ((1 * (np.long(segmap_usable_groups_31638) * np.long(segmap_group_sizze_31637))) != 0): - self.mainDetailedzisegmap_31252_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - np.int64(m_27772), - np.int32(k2p2zq_27785), - np.int32(m_27918), - np.int64(nm_27920), - np.int32(i_31554), - np.int64(i32_res_31556), - np.int64(ctx_param_ext_44580), - np.int64(ctx_param_ext_44581), - np.int64(ctx_param_ext_44583), - mem_param_44585, - mem_44601, mem_44605) + mem_param_122694 = mem_121992 + mem_param_122705 = mem_121996 + j_97356 = np.int64(0) + one_129940 = np.int64(1) + for counter_129939 in range(k2p2zq_73023): + index_certs_97359 = True + assert ok_or_empty_73720, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/linpack.fut:44:25-30\n #1 lib/github.com/nhey/lm/lm.fut:74:36-64\n #2 recresid.fut:62:33-75\n #3 /prelude/soacs.fut:91:28-38\n #4 /prelude/soacs.fut:91:3-61\n #5 recresid.fut:51:11-73:44\n #6 recresid.fut:100:7-30\n #7 mroc.fut:27:25-38\n #8 mroc.fut:77:27-61\n #9 bfastfinal.fut:45:24-53\n #10 bfastfinal.fut:192:5-74\n #11 bfastfinal.fut:187:1-193:48\n" % ("Index [", + j_97356, + ", ", + np.int64(0), + ":] out of bounds for array of shape [", + k2p2zq_73023, + "][", + rp1_73709, + "].")) + local_memory_capacity_127570 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127570)) and suff_outer_par_97362): + mem_122715 = opencl_alloc(self, bytes_121990, "mem_122715") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122715, + np.int64(0), + mem_param_122694, + np.int64(0), + np.int64(1), + k2p2zq_73023, m_73008) + mem_122719 = opencl_alloc(self, bytes_120924, "mem_122719") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122719, + np.int64(0), + mem_param_122705, + np.int64(0), + np.int64(1), + (np.int64(2) * k2p2zq_73023), + m_73008) + mem_122723 = opencl_alloc(self, bytes_121990, "mem_122723") + mem_122727 = opencl_alloc(self, bytes_120924, "mem_122727") + if ((1 * (np.int64(num_groups_97367) * np.int64(segmap_group_sizze_97366))) != 0): + self.mainMagnitudezisegmap_96979_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(rp1_73709), + np.int64(j_97356), + np.int64(num_groups_97367), + mem_122686, + mem_122715, + mem_122719, + mem_122723, + mem_122727) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_96979_var, + ((np.int64(num_groups_97367) * np.int64(segmap_group_sizze_97366)),), + (np.int64(segmap_group_sizze_97366),)) + if synchronous: + sync(self) + mem_122715 = None + mem_122719 = None + mem_122751 = opencl_alloc(self, bytes_121990, "mem_122751") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122751, + np.int64(0), + mem_122723, + np.int64(0), + np.int64(1), m_73008, + k2p2zq_73023) + mem_122723 = None + dqrdc2_res_mem_122757 = mem_122751 + dqrdc2_res_mem_122758 = mem_122727 + else: + mem_122730 = opencl_alloc(self, bytes_120173, "mem_122730") + if slt64((rp1_73709 * np.int64(2)), segred_group_sizze_97388): + segment_sizze_nonzzero_127488 = smax64(np.int64(1), rp1_73709) + num_threads_127489 = (num_groups_97389 * segred_group_sizze_97388) + if ((1 * (np.int64(num_groups_97389) * np.int64(segred_group_sizze_97388))) != 0): + self.mainMagnitudezisegred_small_97064_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_97388))), + np.int64(m_73008), + np.int64(defunc_2_reduce_res_73132), + np.int64(rp1_73709), + np.int64(j_97356), + np.int64(num_groups_97389), + np.int64(segment_sizze_nonzzero_127488), + mem_120246, + mem_122730) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_small_97064_var, + ((np.int64(num_groups_97389) * np.int64(segred_group_sizze_97388)),), + (np.int64(segred_group_sizze_97388),)) + if synchronous: + sync(self) + else: + groups_per_segment_127509 = sdiv_up64(num_groups_97389, + smax64(np.int64(1), + m_73008)) + elements_per_thread_127510 = sdiv_up64(rp1_73709, + (segred_group_sizze_97388 * groups_per_segment_127509)) + virt_num_groups_127511 = (groups_per_segment_127509 * m_73008) + num_threads_127512 = (num_groups_97389 * segred_group_sizze_97388) + threads_per_segment_127513 = (groups_per_segment_127509 * segred_group_sizze_97388) + group_res_arr_mem_127514 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_97388 * virt_num_groups_127511)), + "group_res_arr_mem_127514") + mainMagnitudezicounter_mem_127516 = self.mainMagnitudezicounter_mem_127516 + if ((1 * (np.int64(num_groups_97389) * np.int64(segred_group_sizze_97388))) != 0): + self.mainMagnitudezisegred_large_97064_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_97388))), + np.int64(m_73008), + np.int64(defunc_2_reduce_res_73132), + np.int64(rp1_73709), + np.int64(j_97356), + np.int64(num_groups_97389), + np.int64(groups_per_segment_127509), + np.int64(elements_per_thread_127510), + np.int64(virt_num_groups_127511), + np.int64(threads_per_segment_127513), + mem_120246, + mem_122730, + group_res_arr_mem_127514, + mainMagnitudezicounter_mem_127516) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_large_97064_var, + ((np.int64(num_groups_97389) * np.int64(segred_group_sizze_97388)),), + (np.int64(segred_group_sizze_97388),)) + if synchronous: + sync(self) + mem_122733 = opencl_alloc(self, bytes_120173, "mem_122733") + if ((1 * (np.int64(segmap_usable_groups_97400) * np.int64(segmap_group_sizze_97399))) != 0): + self.mainMagnitudezisegmap_97048_var.set_args(self.global_failure, + np.int64(m_73008), + mem_122730, + mem_122733) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_97048_var, + ((np.int64(segmap_usable_groups_97400) * np.int64(segmap_group_sizze_97399)),), + (np.int64(segmap_group_sizze_97399),)) + if synchronous: + sync(self) + mem_122730 = None + if ((1 * (np.int64(segmap_usable_groups_97407) * np.int64(segmap_group_sizze_97406))) != 0): + self.mainMagnitudezisegmap_97039_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(j_97356), + mem_param_122694, + mem_122733) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_97039_var, + ((np.int64(segmap_usable_groups_97407) * np.int64(segmap_group_sizze_97406)),), + (np.int64(segmap_group_sizze_97406),)) + if synchronous: + sync(self) + mem_122738 = opencl_alloc(self, bytes_120924, "mem_122738") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122738, + np.int64(0), + mem_param_122705, + np.int64(0), + np.int64(1), + (np.int64(2) * k2p2zq_73023), + m_73008) + mem_122740 = opencl_alloc(self, bytes_122739, "mem_122740") + tmp_offs_127558 = np.int64(0) + if ((m_73008 * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_122740, mem_122733, + dest_offset=np.int64((tmp_offs_127558 * np.int64(8))), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((m_73008 * np.int32(8)))) + if synchronous: + sync(self) + tmp_offs_127558 = (tmp_offs_127558 + m_73008) + if ((y_115507 * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_122740, mem_121948, + dest_offset=np.int64((tmp_offs_127558 * np.int64(8))), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((y_115507 * np.int32(8)))) + if synchronous: + sync(self) + tmp_offs_127558 = (tmp_offs_127558 + y_115507) + mem_122743 = opencl_alloc(self, bytes_122741, "mem_122743") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122743, + np.int64(0), + mem_122740, + np.int64(0), + np.int64(1), + per_chunk_115510, + num_threads_115503) + mem_122740 = None + mem_122748 = opencl_alloc(self, bytes_120924, "mem_122748") + if ((1 * (np.int64(num_groups_97412) * np.int64(segmap_group_sizze_97411))) != 0): + self.mainMagnitudezisegmap_97024_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(j_97356), + np.int64(num_groups_97412), + np.int64(num_threads_115503), + np.int64(per_chunk_115510), + mem_122733, + mem_122738, + mem_122743, + mem_122748) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_97024_var, + ((np.int64(num_groups_97412) * np.int64(segmap_group_sizze_97411)),), + (np.int64(segmap_group_sizze_97411),)) + if synchronous: + sync(self) + mem_122733 = None + mem_122738 = None + mem_122743 = None + mem_122755 = opencl_alloc(self, bytes_121990, "mem_122755") + if (((m_73008 * k2p2zq_73023) * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_122755, mem_param_122694, + dest_offset=np.int64(np.int64(0)), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64(((m_73008 * k2p2zq_73023) * np.int32(8)))) + if synchronous: + sync(self) + dqrdc2_res_mem_122757 = mem_122755 + dqrdc2_res_mem_122758 = mem_122748 + mem_122764 = opencl_alloc(self, bytes_121993, "mem_122764") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122764, np.int64(0), + dqrdc2_res_mem_122758, + np.int64(0), + np.int64(1), m_73008, + (np.int64(2) * k2p2zq_73023)) + dqrdc2_res_mem_122758 = None + mem_param_tmp_127472 = dqrdc2_res_mem_122757 + mem_param_tmp_127473 = mem_122764 + mem_param_122694 = mem_param_tmp_127472 + mem_param_122705 = mem_param_tmp_127473 + j_97356 += one_129940 + dqrdc2_res_r_mem_122778 = mem_param_122694 + dqrdc2_res_r_mem_122789 = mem_param_122705 + mem_122686 = None + mem_122793 = opencl_alloc(self, bytes_122018, "mem_122793") + group_sizze_127574 = self.sizes["mainMagnitude.group_size_127574"] + num_groups_127575 = sdiv_up64(((m_73008 * k2p2zq_73023) * rp1_73709), + group_sizze_127574) + if ((1 * (np.int64(num_groups_127575) * np.int64(group_sizze_127574))) != 0): + self.mainMagnitudezicopy_127571_var.set_args(np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(defunc_2_reduce_res_73132), + np.int64(rp1_73709), + mem_120246, mem_122793) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezicopy_127571_var, + ((np.int64(num_groups_127575) * np.int64(group_sizze_127574)),), + (np.int64(group_sizze_127574),)) + if synchronous: + sync(self) + mem_122796 = opencl_alloc(self, bytes_121990, "mem_122796") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122796, np.int64(0), + dqrdc2_res_r_mem_122778, + np.int64(0), np.int64(1), + k2p2zq_73023, m_73008) + dqrdc2_res_r_mem_122778 = None + mem_122800 = opencl_alloc(self, bytes_120924, "mem_122800") + self.futhark_builtinzhgpu_map_transpose_f64(mem_122800, np.int64(0), + dqrdc2_res_r_mem_122789, + np.int64(0), np.int64(1), + (np.int64(2) * k2p2zq_73023), + m_73008) + dqrdc2_res_r_mem_122789 = None + mem_123127 = opencl_alloc(self, bytes_122018, "mem_123127") + mem_123130 = opencl_alloc(self, bytes_121990, "mem_123130") + mem_123133 = opencl_alloc(self, bytes_121990, "mem_123133") + mem_123135 = opencl_alloc(self, bytes_120173, "mem_123135") + mem_122803 = opencl_alloc(self, total_sizze_125980, "mem_122803") + mem_125265 = opencl_alloc(self, total_sizze_125981, "mem_125265") + mem_125267 = opencl_alloc(self, total_sizze_125982, "mem_125267") + mem_125472 = opencl_alloc(self, total_sizze_125983, "mem_125472") + mem_125480 = opencl_alloc(self, total_sizze_125984, "mem_125480") + mem_125482 = opencl_alloc(self, total_sizze_125985, "mem_125482") + mem_125512 = opencl_alloc(self, total_sizze_125986, "mem_125512") + double_buffer_mem_125586 = opencl_alloc(self, total_sizze_125987, + "double_buffer_mem_125586") + double_buffer_mem_125587 = opencl_alloc(self, total_sizze_125988, + "double_buffer_mem_125587") + double_buffer_mem_125588 = opencl_alloc(self, total_sizze_125989, + "double_buffer_mem_125588") + if ((1 * (np.int64(num_groups_97425) * np.int64(segmap_group_sizze_97424))) != 0): + self.mainMagnitudezisegmap_96689_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(m_73095), + np.byte(y_73099), + np.int64(k_73214), + np.int64(rp1_73709), + np.int64(min_res_73721), + np.int64(num_groups_97425), + np.int64(num_threads_125979), + mem_120248, + mem_122793, + mem_122796, + mem_122800, + mem_122803, + mem_123127, + mem_123130, + mem_123133, + mem_123135, + mem_125265, + mem_125267, + mem_125472, + mem_125480, + mem_125482, + mem_125512, + double_buffer_mem_125586, + double_buffer_mem_125587, + double_buffer_mem_125588) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_31252_var, - ((np.long(segmap_usable_groups_31638) * np.long(segmap_group_sizze_31637)),), - (np.long(segmap_group_sizze_31637),)) + self.mainMagnitudezisegmap_96689_var, + ((np.int64(num_groups_97425) * np.int64(segmap_group_sizze_97424)),), + (np.int64(segmap_group_sizze_97424),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - mem_44601 = None - if ((1 * (np.long(segmap_usable_groups_31688) * np.long(segmap_group_sizze_31687))) != 0): - self.mainDetailedzisegmap_31182_var.set_args(self.global_failure, - np.int64(m_27772), - np.int64(nm_27920), - np.int64(ctx_param_ext_44580), - np.int64(ctx_param_ext_44581), - np.int64(ctx_param_ext_44583), - mem_param_44585, - mem_44605) + mem_122793 = None + mem_122796 = None + mem_122800 = None + mem_122803 = None + mem_125265 = None + mem_125267 = None + mem_125472 = None + mem_125480 = None + mem_125482 = None + mem_125512 = None + double_buffer_mem_125586 = None + double_buffer_mem_125587 = None + double_buffer_mem_125588 = None + mem_123138 = opencl_alloc(self, binop_x_120244, "mem_123138") + if ((1 * (np.int64(segmap_usable_groups_97721) * np.int64(segmap_group_sizze_97720))) != 0): + self.mainMagnitudezisegmap_96631_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + mem_123135, + mem_123138) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_31182_var, - ((np.long(segmap_usable_groups_31688) * np.long(segmap_group_sizze_31687)),), - (np.long(segmap_group_sizze_31687),)) + self.mainMagnitudezisegmap_96631_var, + ((np.int64(segmap_usable_groups_97721) * np.int64(segmap_group_sizze_97720)),), + (np.int64(segmap_group_sizze_97720),)) if synchronous: sync(self) - mem_44605 = None - gauss_jordan_res_mem_44614 = mem_param_44585 - ctx_param_ext_tmp_45845 = gauss_jordan_res_ixfn_44609 - ctx_param_ext_tmp_45846 = gauss_jordan_res_ixfn_44610 - ctx_param_ext_tmp_45847 = gauss_jordan_res_ixfn_44613 - ctx_param_ext_tmp_45848 = gauss_jordan_res_ixfn_44611 - ctx_param_ext_tmp_45849 = gauss_jordan_res_ixfn_44607 - ctx_param_ext_tmp_45850 = gauss_jordan_res_ixfn_44612 - ctx_param_ext_tmp_45851 = gauss_jordan_res_ixfn_44608 - mem_param_tmp_45852 = gauss_jordan_res_mem_44614 - ctx_param_ext_44578 = ctx_param_ext_tmp_45845 - ctx_param_ext_44579 = ctx_param_ext_tmp_45846 - ctx_param_ext_44580 = ctx_param_ext_tmp_45847 - ctx_param_ext_44581 = ctx_param_ext_tmp_45848 - ctx_param_ext_44582 = ctx_param_ext_tmp_45849 - ctx_param_ext_44583 = ctx_param_ext_tmp_45850 - ctx_param_ext_44584 = ctx_param_ext_tmp_45851 - mem_param_44585 = mem_param_tmp_45852 - i_31554 += one_46875 - gauss_jordan_res_r_ixfn_44615 = ctx_param_ext_44578 - gauss_jordan_res_r_ixfn_44616 = ctx_param_ext_44579 - gauss_jordan_res_r_ixfn_44617 = ctx_param_ext_44580 - gauss_jordan_res_r_ixfn_44618 = ctx_param_ext_44581 - gauss_jordan_res_r_ixfn_44619 = ctx_param_ext_44582 - gauss_jordan_res_r_ixfn_44620 = ctx_param_ext_44583 - gauss_jordan_res_r_ixfn_44621 = ctx_param_ext_44584 - gauss_jordan_res_r_mem_44622 = mem_param_44585 - mem_44577 = None - segmap_usable_groups_31700 = sdiv_up64(nest_sizze_31698, - segmap_group_sizze_31699) - mem_44627 = opencl_alloc(self, bytes_44624, "mem_44627") - if ((1 * (np.long(segmap_usable_groups_31700) * np.long(segmap_group_sizze_31699))) != 0): - self.mainDetailedzisegmap_30952_var.set_args(self.global_failure, - np.int64(m_27772), - np.int64(i32_res_27787), - np.int64(nm_27920), - np.int64(i32_res_27935), - np.int64(x_27936), - np.int64(j_m_i_27939), - np.int64(gauss_jordan_res_r_ixfn_44617), - np.int64(gauss_jordan_res_r_ixfn_44618), - np.int64(gauss_jordan_res_r_ixfn_44620), - gauss_jordan_res_r_mem_44622, - mem_44627) - cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_30952_var, - ((np.long(segmap_usable_groups_31700) * np.long(segmap_group_sizze_31699)),), - (np.long(segmap_group_sizze_31699),)) - if synchronous: - sync(self) - gauss_jordan_res_r_mem_44622 = None - defunc_3_map_res_mem_44629 = mem_44627 - defunc_3_map_res_mem_44549 = None - suff_outer_par_31708 = (self.sizes["mainDetailed.suff_outer_par_16"] <= m_27772) - segmap_group_sizze_31730 = self.sizes["mainDetailed.segmap_group_size_31712"] - max_num_groups_45895 = self.sizes["mainDetailed.segmap_num_groups_31714"] - num_groups_31731 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(m_27772, - segmap_group_sizze_31730), - sext_i32_i64(max_num_groups_45895)))) - suff_outer_par_31815 = (self.sizes["mainDetailed.suff_outer_par_17"] <= nest_sizze_30514) - nest_sizze_31831 = (i32_res_27781 * nest_sizze_30514) - segred_group_sizze_31832 = self.sizes["mainDetailed.segred_group_size_31774"] - max_num_groups_45896 = self.sizes["mainDetailed.segred_num_groups_31776"] - num_groups_31833 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(nest_sizze_31831, - segred_group_sizze_31832), - sext_i32_i64(max_num_groups_45896)))) - Ty_42675 = self.sizes["mainDetailed.Ty_42672"] - Ry_42676 = self.sizes["mainDetailed.Ry_42674"] - Tx_42677 = self.sizes["mainDetailed.Tx_42671"] - Rx_42678 = self.sizes["mainDetailed.Rx_42673"] - Tk_42679 = self.sizes["mainDetailed.Tk_42670"] - TxRx_42682 = (Tx_42677 * Rx_42678) - TyRy_42683 = (Ty_42675 * Ry_42676) - a_loc_szz_42685 = (Tk_42679 * TyRy_42683) - binop_x_42686 = (Tx_42677 * Tk_42679) - b_loc_szz_42687 = (Rx_42678 * binop_x_42686) - group_sizze_42691 = (Ty_42675 * Tx_42677) - bytes_44648 = (np.int64(4) * nest_sizze_30514) - binop_x_44662 = (Ry_42676 * group_sizze_42691) - binop_x_44663 = (Rx_42678 * binop_x_44662) - bytes_44660 = (np.int64(4) * binop_x_44663) - binop_x_44654 = (Ry_42676 * Rx_42678) - bytes_44653 = (np.int64(4) * binop_x_44654) - bytes_44665 = (np.int64(4) * a_loc_szz_42685) - bytes_44667 = (np.int64(4) * b_loc_szz_42687) - bytes_44736 = (np.int64(4) * binop_x_44662) - binop_x_44742 = (Rx_42678 * group_sizze_42691) - bytes_44740 = (np.int64(4) * binop_x_44742) - bytes_44728 = (np.int64(4) * Ry_42676) - bytes_44730 = (np.int64(4) * Rx_42678) - binop_x_45464 = (np.int64(4) * Ty_42675) - binop_x_45465 = (Tx_42677 * binop_x_45464) - binop_x_45466 = (Ry_42676 * binop_x_45465) - sizze_45467 = (Rx_42678 * binop_x_45466) - num_threads_45540 = (segmap_group_sizze_31730 * num_groups_31731) - total_sizze_45541 = (bytes_44448 * num_threads_45540) - local_memory_capacity_46026 = self.max_local_memory - if (sle64(np.int64(0), - sext_i32_i64(local_memory_capacity_46026)) and suff_outer_par_31708): - mem_44632 = opencl_alloc(self, bytes_44398, "mem_44632") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44632, np.int64(0), - images_mem_44381, np.int64(0), - np.int64(1), N_27771, m_27772) - mem_44650 = opencl_alloc(self, bytes_44648, "mem_44650") - mem_44635 = opencl_alloc(self, total_sizze_45541, "mem_44635") - if ((1 * (np.long(num_groups_31731) * np.long(segmap_group_sizze_31730))) != 0): - self.mainDetailedzisegmap_31710_var.set_args(self.global_failure, - np.int64(N_27771), - np.int64(m_27772), - np.int32(n_27775), - np.int32(k2p2zq_27785), - np.int64(i32_res_27787), - np.int64(num_groups_31731), - binop_p_mem_44390, - mem_44632, mem_44635, - mem_44650) - cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_31710_var, - ((np.long(num_groups_31731) * np.long(segmap_group_sizze_31730)),), - (np.long(segmap_group_sizze_31730),)) - if synchronous: - sync(self) - mem_44632 = None - mem_44635 = None - mem_44848 = opencl_alloc(self, bytes_44648, "mem_44848") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44848, np.int64(0), - mem_44650, np.int64(0), - np.int64(1), m_27772, - i32_res_27787) - mem_44650 = None - defunc_3_map_res_mem_44850 = mem_44848 - else: - local_memory_capacity_46025 = self.max_local_memory - if (sle64(((bytes_44665 + srem64((np.int64(8) - srem64(bytes_44665, - np.int64(8))), - np.int64(8))) + (bytes_44667 + srem64((np.int64(8) - srem64(bytes_44667, - np.int64(8))), - np.int64(8)))), - sext_i32_i64(local_memory_capacity_46025)) and suff_outer_par_31815): - tk_div_tx_42680 = sdiv_up64(Tk_42679, Tx_42677) - tk_div_ty_42681 = sdiv_up64(Tk_42679, Ty_42675) - gridDim_x_42688 = sdiv_up64(i32_res_27787, TxRx_42682) - gridDim_y_42689 = sdiv_up64(m_27772, TyRy_42683) - grid_sizze_42690 = (gridDim_x_42688 * gridDim_y_42689) - full_tiles_42719 = squot64(i32_res_27781, Tk_42679) - kk_42926 = (Tk_42679 * full_tiles_42719) - mem_44840 = opencl_alloc(self, bytes_44648, "mem_44840") - if ((1 * (np.long(grid_sizze_42690) * np.long(group_sizze_42691))) != 0): - self.mainDetailedzisegmap_intragroup_42694_var.set_args(self.global_failure, - cl.LocalMemory(np.long(bytes_44667)), - cl.LocalMemory(np.long(bytes_44665)), - np.int64(N_27771), - np.int64(m_27772), - np.int64(i32_res_27781), - np.int64(i32_res_27787), - np.int64(gridDim_x_42688), - np.int64(full_tiles_42719), - np.int64(kk_42926), - images_mem_44381, - mem_44393, - mem_44840) - cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_intragroup_42694_var, - ((np.long(grid_sizze_42690) * np.long(group_sizze_42691)),), - (np.long(group_sizze_42691),)) - if synchronous: - sync(self) - defunc_3_map_res_mem_44845 = mem_44840 - else: - mem_44844 = opencl_alloc(self, bytes_44648, "mem_44844") - if slt64((i32_res_27781 * np.int64(2)), segred_group_sizze_31832): - segment_sizze_nonzzero_45965 = smax64(np.int64(1), i32_res_27781) - num_threads_45966 = (num_groups_31833 * segred_group_sizze_31832) - if ((1 * (np.long(num_groups_31833) * np.long(segred_group_sizze_31832))) != 0): - self.mainDetailedzisegred_small_31780_var.set_args(self.global_failure, - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_31832))), - np.int64(N_27771), - np.int64(m_27772), - np.int64(i32_res_27781), - np.int64(i32_res_27787), - np.int64(num_groups_31833), - np.int64(segment_sizze_nonzzero_45965), - images_mem_44381, - binop_p_mem_44390, - mem_44844) + mem_123143 = opencl_alloc(self, bytes_121997, "mem_123143") + if ((1 * (np.int64(segmap_usable_groups_97736) * np.int64(segmap_group_sizze_97735))) != 0): + self.mainMagnitudezisegmap_96596_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(rp1_73709), + mem_123127, + mem_123135, + mem_123138, + mem_123143) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegred_small_31780_var, - ((np.long(num_groups_31833) * np.long(segred_group_sizze_31832)),), - (np.long(segred_group_sizze_31832),)) + self.mainMagnitudezisegmap_96596_var, + ((np.int64(segmap_usable_groups_97736) * np.int64(segmap_group_sizze_97735)),), + (np.int64(segmap_group_sizze_97735),)) if synchronous: sync(self) - else: - groups_per_segment_45986 = sdiv_up64(num_groups_31833, - smax64(np.int64(1), - (m_27772 * i32_res_27787))) - elements_per_thread_45987 = sdiv_up64(i32_res_27781, - (segred_group_sizze_31832 * groups_per_segment_45986)) - virt_num_groups_45988 = (groups_per_segment_45986 * (m_27772 * i32_res_27787)) - num_threads_45989 = (num_groups_31833 * segred_group_sizze_31832) - threads_per_segment_45990 = (groups_per_segment_45986 * segred_group_sizze_31832) - group_res_arr_mem_45991 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_31832 * virt_num_groups_45988)), - "group_res_arr_mem_45991") - mainDetailedzicounter_mem_45993 = self.mainDetailedzicounter_mem_45993 - if ((1 * (np.long(num_groups_31833) * np.long(segred_group_sizze_31832))) != 0): - self.mainDetailedzisegred_large_31780_var.set_args(self.global_failure, - cl.LocalMemory(np.long(np.int32(1))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_31832))), - np.int64(N_27771), - np.int64(i32_res_27781), - np.int64(i32_res_27787), - np.int64(num_groups_31833), - np.int64(groups_per_segment_45986), - np.int64(elements_per_thread_45987), - np.int64(virt_num_groups_45988), - np.int64(threads_per_segment_45990), - images_mem_44381, - binop_p_mem_44390, - mem_44844, - group_res_arr_mem_45991, - mainDetailedzicounter_mem_45993) + mem_123138 = None + local_memory_capacity_127785 = self.max_local_memory + if ((sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127785)) and sle64((((((bytes_123161 + srem64((np.int64(8) - srem64(bytes_123161, + np.int64(8))), + np.int64(8))) + (bytes_123180 + srem64((np.int64(8) - srem64(bytes_123180, + np.int64(8))), + np.int64(8)))) + (bytes_123180 + srem64((np.int64(8) - srem64(bytes_123180, + np.int64(8))), + np.int64(8)))) + (bytes_123161 + srem64((np.int64(8) - srem64(bytes_123161, + np.int64(8))), + np.int64(8)))) + (bytes_120247 + srem64((np.int64(8) - srem64(bytes_120247, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_127785))) and suff_outer_par_97759): + mem_123147 = opencl_alloc(self, bytes_121997, "mem_123147") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123147, np.int64(0), + mem_123143, np.int64(0), + m_73008, k2p2zq_73023, + k2p2zq_73023) + mem_123151 = opencl_alloc(self, bytes_121997, "mem_123151") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123151, np.int64(0), + mem_123147, np.int64(0), + np.int64(1), + k2p2zq_73023, + (m_73008 * k2p2zq_73023)) + mem_123147 = None + mem_123155 = opencl_alloc(self, bytes_121997, "mem_123155") + group_sizze_127658 = self.sizes["mainMagnitude.group_size_127658"] + num_groups_127659 = sdiv_up64(((m_73008 * k2p2zq_73023) * k2p2zq_73023), + group_sizze_127658) + if ((1 * (np.int64(num_groups_127659) * np.int64(group_sizze_127658))) != 0): + self.mainMagnitudezicopy_127655_var.set_args(np.int64(m_73008), + np.int64(k2p2zq_73023), + mem_123143, + mem_123155) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezicopy_127655_var, + ((np.int64(num_groups_127659) * np.int64(group_sizze_127658)),), + (np.int64(group_sizze_127658),)) + if synchronous: + sync(self) + mem_123233 = opencl_alloc(self, bytes_121997, "mem_123233") + mem_125275 = opencl_alloc(self, total_sizze_125993, "mem_125275") + if ((1 * (np.int64(num_groups_top_116783) * np.int64(group_sizze_116780))) != 0): + self.mainMagnitudezisegmap_intragroup_116784_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64(bytes_120247)), + cl.LocalMemory(np.int64(bytes_123161)), + cl.LocalMemory(np.int64(bytes_123180)), + cl.LocalMemory(np.int64(bytes_123180)), + cl.LocalMemory(np.int64(bytes_123161)), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(num_groups_y_116782), + np.int64(ctx_val_123177), + np.int64(num_threads_125992), + mem_121944, + mem_121946, + mem_123151, + mem_123155, + mem_123233, + mem_125275) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_intragroup_116784_var, + ((np.int64(num_groups_top_116783) * np.int64(group_sizze_116780)),), + (np.int64(group_sizze_116780),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + mem_123151 = None + mem_123155 = None + mem_125275 = None + defunc_3_map_res_r_mem_123392 = mem_123233 + else: + mem_123237 = opencl_alloc(self, bytes_121997, "mem_123237") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123237, np.int64(0), + mem_123143, np.int64(0), + m_73008, k2p2zq_73023, + k2p2zq_73023) + mem_123241 = opencl_alloc(self, bytes_121997, "mem_123241") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123241, np.int64(0), + mem_123237, np.int64(0), + np.int64(1), + k2p2zq_73023, + (m_73008 * k2p2zq_73023)) + mem_123237 = None + mem_param_123252 = mem_122000 + i_97823 = np.int64(0) + one_129943 = np.int64(1) + for counter_129942 in range(k2p2zq_73023): + x_97825 = (k2p2zq_73023 - i_97823) + i_97826 = (x_97825 - np.int64(1)) + x_97827 = sle64(np.int64(0), i_97826) + y_97828 = slt64(i_97826, k2p2zq_73023) + bounds_check_97829 = (x_97827 and y_97828) + j_m_i_97830 = (k2p2zq_73023 - x_97825) + empty_slice_97831 = (j_m_i_97830 == np.int64(0)) + m_97832 = (j_m_i_97830 - np.int64(1)) + i_p_m_t_s_97833 = (x_97825 + m_97832) + zzero_leq_i_p_m_t_s_97834 = sle64(np.int64(0), i_p_m_t_s_97833) + i_p_m_t_s_leq_w_97835 = slt64(i_p_m_t_s_97833, k2p2zq_73023) + zzero_lte_i_97836 = sle64(np.int64(0), x_97825) + i_lte_j_97837 = sle64(x_97825, k2p2zq_73023) + y_97838 = (i_p_m_t_s_leq_w_97835 and zzero_lte_i_97836) + y_97839 = (zzero_leq_i_p_m_t_s_97834 and y_97838) + y_97840 = (i_lte_j_97837 and y_97839) + forwards_ok_97841 = (zzero_lte_i_97836 and y_97840) + ok_or_empty_97842 = (empty_slice_97831 or forwards_ok_97841) + index_ok_97843 = (bounds_check_97829 and ok_or_empty_97842) + index_certs_97844 = True + assert index_ok_97843, ("Error: %s%d%s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:28:39-48\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:192:5-74\n #12 bfastfinal.fut:187:1-193:48\n" % ("Index [", + i_97826, + ", ", + x_97825, + ":", + k2p2zq_73023, + "] out of bounds for array of shape [", + k2p2zq_73023, + "][", + k2p2zq_73023, + "].")) + index_certs_97845 = True + assert ok_or_empty_97842, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:28:30-37\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:192:5-74\n #12 bfastfinal.fut:187:1-193:48\n" % ("Index [", + x_97825, + ":", + k2p2zq_73023, + "] out of bounds for array of shape [", + k2p2zq_73023, + "].")) + index_ok_97846 = (bounds_check_97829 and bounds_check_97829) + index_certs_97847 = True + assert index_ok_97846, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:29:38-43\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:192:5-74\n #12 bfastfinal.fut:187:1-193:48\n" % ("Index [", + i_97826, + ", ", + i_97826, + "] out of bounds for array of shape [", + k2p2zq_73023, + "][", + k2p2zq_73023, + "].")) + index_certs_97848 = True + assert bounds_check_97829, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/nhey/lm/lm.fut:29:19-22\n #1 lib/github.com/nhey/lm/lm.fut:43:17-54\n #2 lib/github.com/nhey/lm/lm.fut:81:30-39\n #3 recresid.fut:62:33-75\n #4 /prelude/soacs.fut:91:28-38\n #5 /prelude/soacs.fut:91:3-61\n #6 recresid.fut:51:11-73:44\n #7 recresid.fut:100:7-30\n #8 mroc.fut:27:25-38\n #9 mroc.fut:77:27-61\n #10 bfastfinal.fut:45:24-53\n #11 bfastfinal.fut:192:5-74\n #12 bfastfinal.fut:187:1-193:48\n" % ("Index [", + i_97826, + "] out of bounds for array of shape [", + k2p2zq_73023, + "].")) + nest_sizze_97910 = (j_m_i_97830 * binop_x_120244) + max_num_groups_127682 = self.sizes["mainMagnitude.segred_num_groups_96287"] + num_groups_97912 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_97910, + segred_group_sizze_97911), + sext_i32_i64(max_num_groups_127682)))) + local_memory_capacity_127784 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127784)) and suff_outer_par_97854): + mem_123259 = opencl_alloc(self, bytes_120258, "mem_123259") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123259, + np.int64(0), + mem_param_123252, + np.int64(0), + np.int64(1), + (k2p2zq_73023 * k2p2zq_73023), + m_73008) + mem_123287 = opencl_alloc(self, bytes_120258, "mem_123287") + mem_123263 = opencl_alloc(self, total_sizze_125995, + "mem_123263") + mem_123275 = opencl_alloc(self, total_sizze_125996, + "mem_123275") + if ((1 * (np.int64(num_groups_97858) * np.int64(segmap_group_sizze_97857))) != 0): + self.mainMagnitudezisegmap_96169_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(x_97825), + np.int64(i_97826), + np.int64(j_m_i_97830), + np.int64(num_groups_97858), + np.int64(num_threads_125994), + mem_121938, + mem_123143, + mem_123241, + mem_123259, + mem_123263, + mem_123275, + mem_123287) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_96169_var, + ((np.int64(num_groups_97858) * np.int64(segmap_group_sizze_97857)),), + (np.int64(segmap_group_sizze_97857),)) + if synchronous: + sync(self) + mem_123259 = None + mem_123263 = None + mem_123275 = None + mem_123359 = opencl_alloc(self, bytes_121997, "mem_123359") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123359, + np.int64(0), + mem_123287, + np.int64(0), + np.int64(1), + m_73008, + (k2p2zq_73023 * k2p2zq_73023)) + mem_123287 = None + defunc_3_map_res_mem_123366 = mem_123359 + else: + local_memory_capacity_127783 = self.max_local_memory + if (sle64((((bytes_123298 + srem64((np.int64(8) - srem64(bytes_123298, + np.int64(8))), + np.int64(8))) + (bytes_123298 + srem64((np.int64(8) - srem64(bytes_123298, + np.int64(8))), + np.int64(8)))) + (bytes_123327 + srem64((np.int64(8) - srem64(bytes_123327, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_127783)) and suff_outer_par_97888): + mem_123291 = opencl_alloc(self, bytes_121997, "mem_123291") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123291, + np.int64(0), + mem_param_123252, + np.int64(0), + np.int64(1), + k2p2zq_73023, + (m_73008 * k2p2zq_73023)) + mem_123295 = opencl_alloc(self, bytes_121997, "mem_123295") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123295, + np.int64(0), + mem_param_123252, + np.int64(0), + np.int64(1), + k2p2zq_73023, + (m_73008 * k2p2zq_73023)) + num_whole_tiles_117162 = squot64(j_m_i_97830, + tile_sizze_117141) + residual_input_117295 = srem64(j_m_i_97830, tile_sizze_117141) + cond_117296 = (residual_input_117295 == np.int64(0)) + mem_123334 = opencl_alloc(self, bytes_121997, "mem_123334") + mem_125317 = opencl_alloc(self, total_sizze_125998, + "mem_125317") + if ((1 * (np.int64(num_groups_top_117145) * np.int64(group_sizze_117142))) != 0): + self.mainMagnitudezisegmap_intragroup_117146_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_123327)), + cl.LocalMemory(np.int64(bytes_123298)), + cl.LocalMemory(np.int64(bytes_123298)), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(x_97825), + np.int64(i_97826), + np.int64(j_m_i_97830), + np.int64(num_groups_y_117144), + np.int64(num_whole_tiles_117162), + np.int64(residual_input_117295), + np.byte(cond_117296), + np.int64(num_threads_125997), + mem_121938, + mem_123143, + mem_123241, + mem_123291, + mem_123295, + mem_123334, + mem_125317) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_intragroup_117146_var, + ((np.int64(num_groups_top_117145) * np.int64(group_sizze_117142)),), + (np.int64(group_sizze_117142),)) + if synchronous: + sync(self) + mem_123291 = None + mem_123295 = None + mem_125317 = None + defunc_3_map_res_mem_123355 = mem_123334 + else: + mem_123338 = opencl_alloc(self, bytes_121990, "mem_123338") + if slt64((j_m_i_97830 * np.int64(2)), + segred_group_sizze_97911): + segment_sizze_nonzzero_127713 = smax64(np.int64(1), + j_m_i_97830) + num_threads_127714 = (num_groups_97912 * segred_group_sizze_97911) + if ((1 * (np.int64(num_groups_97912) * np.int64(segred_group_sizze_97911))) != 0): + self.mainMagnitudezisegred_small_96291_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_97911))), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(x_97825), + np.int64(i_97826), + np.int64(j_m_i_97830), + np.int64(num_groups_97912), + np.int64(binop_x_120251), + np.int64(segment_sizze_nonzzero_127713), + mem_123143, + mem_param_123252, + mem_123338) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_small_96291_var, + ((np.int64(num_groups_97912) * np.int64(segred_group_sizze_97911)),), + (np.int64(segred_group_sizze_97911),)) + if synchronous: + sync(self) + else: + groups_per_segment_127734 = sdiv_up64(num_groups_97912, + smax64(np.int64(1), + (m_73008 * k2p2zq_73023))) + elements_per_thread_127735 = sdiv_up64(j_m_i_97830, + (segred_group_sizze_97911 * groups_per_segment_127734)) + virt_num_groups_127736 = (groups_per_segment_127734 * (m_73008 * k2p2zq_73023)) + num_threads_127737 = (num_groups_97912 * segred_group_sizze_97911) + threads_per_segment_127738 = (groups_per_segment_127734 * segred_group_sizze_97911) + group_res_arr_mem_127739 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_97911 * virt_num_groups_127736)), + "group_res_arr_mem_127739") + mainMagnitudezicounter_mem_127741 = self.mainMagnitudezicounter_mem_127741 + if ((1 * (np.int64(num_groups_97912) * np.int64(segred_group_sizze_97911))) != 0): + self.mainMagnitudezisegred_large_96291_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_97911))), + np.int64(k2p2zq_73023), + np.int64(x_97825), + np.int64(i_97826), + np.int64(j_m_i_97830), + np.int64(num_groups_97912), + np.int64(binop_x_120251), + np.int64(groups_per_segment_127734), + np.int64(elements_per_thread_127735), + np.int64(virt_num_groups_127736), + np.int64(threads_per_segment_127738), + mem_123143, + mem_param_123252, + mem_123338, + group_res_arr_mem_127739, + mainMagnitudezicounter_mem_127741) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_large_96291_var, + ((np.int64(num_groups_97912) * np.int64(segred_group_sizze_97911)),), + (np.int64(segred_group_sizze_97911),)) + if synchronous: + sync(self) + mem_123342 = opencl_alloc(self, bytes_121990, "mem_123342") + if ((1 * (np.int64(segmap_usable_groups_97929) * np.int64(segmap_group_sizze_97928))) != 0): + self.mainMagnitudezisegmap_96269_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(i_97826), + mem_121938, + mem_123143, + mem_123338, + mem_123342) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_96269_var, + ((np.int64(segmap_usable_groups_97929) * np.int64(segmap_group_sizze_97928)),), + (np.int64(segmap_group_sizze_97928),)) + if synchronous: + sync(self) + mem_123338 = None + if ((1 * (np.int64(segmap_usable_groups_97940) * np.int64(segmap_group_sizze_97939))) != 0): + self.mainMagnitudezisegmap_96257_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(i_97826), + np.int64(binop_x_120251), + mem_param_123252, + mem_123342) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_96257_var, + ((np.int64(segmap_usable_groups_97940) * np.int64(segmap_group_sizze_97939)),), + (np.int64(segmap_group_sizze_97939),)) + if synchronous: + sync(self) + mem_123342 = None + defunc_3_map_res_mem_123355 = mem_param_123252 + mem_123364 = opencl_alloc(self, bytes_121997, "mem_123364") + if ((((m_73008 * k2p2zq_73023) * k2p2zq_73023) * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_123364, + defunc_3_map_res_mem_123355, + dest_offset=np.int64(np.int64(0)), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((((m_73008 * k2p2zq_73023) * k2p2zq_73023) * np.int32(8)))) + if synchronous: + sync(self) + defunc_3_map_res_mem_123355 = None + defunc_3_map_res_mem_123366 = mem_123364 + mem_param_tmp_127680 = defunc_3_map_res_mem_123366 + mem_param_123252 = mem_param_tmp_127680 + i_97823 += one_129943 + defunc_3_map_res_r_mem_123380 = mem_param_123252 + mem_123241 = None + defunc_3_map_res_r_mem_123392 = defunc_3_map_res_r_mem_123380 + mem_123143 = None + local_memory_capacity_127918 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_127918)) and suff_outer_par_97955): + mem_123396 = opencl_alloc(self, bytes_121997, "mem_123396") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123396, np.int64(0), + defunc_3_map_res_r_mem_123392, + np.int64(0), m_73008, + k2p2zq_73023, + k2p2zq_73023) + mem_123415 = opencl_alloc(self, bytes_121997, "mem_123415") + mem_123399 = opencl_alloc(self, total_sizze_126003, "mem_123399") + if ((1 * (np.int64(num_groups_97951) * np.int64(segmap_group_sizze_97950))) != 0): + self.mainMagnitudezisegmap_95953_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(num_groups_97951), + np.int64(binop_x_120251), + np.int64(num_threads_126002), + defunc_3_map_res_r_mem_123392, + mem_123396, + mem_123399, + mem_123415) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_95953_var, + ((np.int64(num_groups_97951) * np.int64(segmap_group_sizze_97950)),), + (np.int64(segmap_group_sizze_97950),)) + if synchronous: + sync(self) + mem_123396 = None + mem_123399 = None + mem_123628 = opencl_alloc(self, bytes_121997, "mem_123628") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123628, np.int64(0), + mem_123415, np.int64(0), + np.int64(1), + (m_73008 * k2p2zq_73023), + k2p2zq_73023) + mem_123415 = None + defunc_3_map_res_r_mem_123630 = mem_123628 + else: + local_memory_capacity_127917 = self.max_local_memory + if (sle64(((bytes_123434 + srem64((np.int64(8) - srem64(bytes_123434, + np.int64(8))), + np.int64(8))) + (bytes_123436 + srem64((np.int64(8) - srem64(bytes_123436, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_127917)) and suff_outer_par_97977): + mem_123419 = opencl_alloc(self, bytes_121997, "mem_123419") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123419, + np.int64(0), + defunc_3_map_res_r_mem_123392, + np.int64(0), m_73008, + k2p2zq_73023, + k2p2zq_73023) + mem_123610 = opencl_alloc(self, bytes_121997, "mem_123610") + if ((1 * (np.int64(grid_sizze_117461) * np.int64(group_sizze_117462))) != 0): + self.mainMagnitudezisegmap_intragroup_117465_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_123436)), + cl.LocalMemory(np.int64(bytes_123434)), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(gridDim_x_117458), + np.int64(gridDim_y_117459), + np.int64(full_tiles_117490), + np.int64(kk_117693), + np.int64(binop_x_120251), + defunc_3_map_res_r_mem_123392, + mem_123419, + mem_123610) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_intragroup_117465_var, + ((np.int64(grid_sizze_117461) * np.int64(group_sizze_117462)),), + (np.int64(group_sizze_117462),)) + if synchronous: + sync(self) + mem_123419 = None + defunc_3_map_res_r_mem_123624 = mem_123610 + else: + mem_123614 = opencl_alloc(self, bytes_121997, "mem_123614") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123614, + np.int64(0), + defunc_3_map_res_r_mem_123392, + np.int64(0), + np.int64(1), + k2p2zq_73023, + (m_73008 * k2p2zq_73023)) + mem_123618 = opencl_alloc(self, bytes_121997, "mem_123618") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123618, + np.int64(0), + defunc_3_map_res_r_mem_123392, + np.int64(0), m_73008, + k2p2zq_73023, + k2p2zq_73023) + mem_123623 = opencl_alloc(self, bytes_121997, "mem_123623") + if slt64((k2p2zq_73023 * np.int64(2)), segred_group_sizze_97993): + segment_sizze_nonzzero_127857 = smax64(np.int64(1), + k2p2zq_73023) + num_threads_127858 = (num_groups_97994 * segred_group_sizze_97993) + if ((1 * (np.int64(num_groups_97994) * np.int64(segred_group_sizze_97993))) != 0): + self.mainMagnitudezisegred_small_96013_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_97993))), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(num_groups_97994), + np.int64(segment_sizze_nonzzero_127857), + mem_123614, + mem_123618, + mem_123623) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_small_96013_var, + ((np.int64(num_groups_97994) * np.int64(segred_group_sizze_97993)),), + (np.int64(segred_group_sizze_97993),)) + if synchronous: + sync(self) + else: + groups_per_segment_127878 = sdiv_up64(num_groups_97994, + smax64(np.int64(1), + ((m_73008 * k2p2zq_73023) * k2p2zq_73023))) + elements_per_thread_127879 = sdiv_up64(k2p2zq_73023, + (segred_group_sizze_97993 * groups_per_segment_127878)) + virt_num_groups_127880 = (groups_per_segment_127878 * ((m_73008 * k2p2zq_73023) * k2p2zq_73023)) + num_threads_127881 = (num_groups_97994 * segred_group_sizze_97993) + threads_per_segment_127882 = (groups_per_segment_127878 * segred_group_sizze_97993) + group_res_arr_mem_127883 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_97993 * virt_num_groups_127880)), + "group_res_arr_mem_127883") + mainMagnitudezicounter_mem_127885 = self.mainMagnitudezicounter_mem_127885 + if ((1 * (np.int64(num_groups_97994) * np.int64(segred_group_sizze_97993))) != 0): + self.mainMagnitudezisegred_large_96013_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_97993))), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(num_groups_97994), + np.int64(groups_per_segment_127878), + np.int64(elements_per_thread_127879), + np.int64(virt_num_groups_127880), + np.int64(threads_per_segment_127882), + mem_123614, + mem_123618, + mem_123623, + group_res_arr_mem_127883, + mainMagnitudezicounter_mem_127885) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_large_96013_var, + ((np.int64(num_groups_97994) * np.int64(segred_group_sizze_97993)),), + (np.int64(segred_group_sizze_97993),)) + if synchronous: + sync(self) + mem_123614 = None + mem_123618 = None + defunc_3_map_res_r_mem_123624 = mem_123623 + defunc_3_map_res_r_mem_123630 = defunc_3_map_res_r_mem_123624 + mem_123633 = opencl_alloc(self, bytes_122015, "mem_123633") + group_sizze_127922 = self.sizes["mainMagnitude.group_size_127922"] + num_groups_127923 = sdiv_up64((m_73008 * rp1_73709), + group_sizze_127922) + if ((1 * (np.int64(num_groups_127923) * np.int64(group_sizze_127922))) != 0): + self.mainMagnitudezicopy_127919_var.set_args(np.int64(m_73008), + np.int64(n_73011), + np.int64(rp1_73709), + defunc_3_map_res_mem_120231, + mem_123633) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegred_large_31780_var, - ((np.long(num_groups_31833) * np.long(segred_group_sizze_31832)),), - (np.long(segred_group_sizze_31832),)) + self.mainMagnitudezicopy_127919_var, + ((np.int64(num_groups_127923) * np.int64(group_sizze_127922)),), + (np.int64(group_sizze_127922),)) if synchronous: sync(self) - defunc_3_map_res_mem_44845 = mem_44844 - defunc_3_map_res_mem_44850 = defunc_3_map_res_mem_44845 - binop_p_mem_44390 = None - mem_44393 = None - suff_outer_par_31849 = (self.sizes["mainDetailed.suff_outer_par_18"] <= m_27772) - segmap_group_sizze_31870 = self.sizes["mainDetailed.segmap_group_size_31853"] - max_num_groups_46027 = self.sizes["mainDetailed.segmap_num_groups_31855"] - num_groups_31871 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(m_27772, - segmap_group_sizze_31870), - sext_i32_i64(max_num_groups_46027)))) - suff_outer_par_31950 = (self.sizes["mainDetailed.suff_outer_par_19"] <= nest_sizze_30514) - segred_group_sizze_31965 = self.sizes["mainDetailed.segred_group_size_31911"] - max_num_groups_46028 = self.sizes["mainDetailed.segred_num_groups_31913"] - num_groups_31966 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(binop_x_44546, - segred_group_sizze_31965), - sext_i32_i64(max_num_groups_46028)))) - tile_sizze_43138 = self.sizes["mainDetailed.tile_size_43137"] - group_sizze_43139 = (tile_sizze_43138 * tile_sizze_43138) - bytes_44882 = (np.int64(4) * group_sizze_43139) - binop_x_45480 = (np.int64(4) * tile_sizze_43138) - sizze_45481 = (tile_sizze_43138 * binop_x_45480) - num_threads_45544 = (segmap_group_sizze_31870 * num_groups_31871) - total_sizze_45545 = (bytes_44448 * num_threads_45544) - local_memory_capacity_46114 = self.max_local_memory - if (sle64(np.int64(0), - sext_i32_i64(local_memory_capacity_46114)) and suff_outer_par_31849): - mem_44854 = opencl_alloc(self, bytes_44443, "mem_44854") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44854, np.int64(0), - defunc_3_map_res_mem_44629, - np.int64(0), np.int64(1), - (i32_res_27787 * i32_res_27787), - m_27772) - mem_44857 = opencl_alloc(self, bytes_44648, "mem_44857") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44857, np.int64(0), - defunc_3_map_res_mem_44850, - np.int64(0), np.int64(1), - i32_res_27787, m_27772) - mem_44875 = opencl_alloc(self, bytes_44648, "mem_44875") - mem_44860 = opencl_alloc(self, total_sizze_45545, "mem_44860") - if ((1 * (np.long(num_groups_31871) * np.long(segmap_group_sizze_31870))) != 0): - self.mainDetailedzisegmap_31851_var.set_args(self.global_failure, - np.int64(m_27772), - np.int32(k2p2zq_27785), - np.int64(i32_res_27787), - np.int64(num_groups_31871), - mem_44854, mem_44857, - mem_44860, mem_44875) - cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_31851_var, - ((np.long(num_groups_31871) * np.long(segmap_group_sizze_31870)),), - (np.long(segmap_group_sizze_31870),)) - if synchronous: - sync(self) - mem_44854 = None - mem_44857 = None - mem_44860 = None - mem_44914 = opencl_alloc(self, bytes_44648, "mem_44914") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44914, np.int64(0), - mem_44875, np.int64(0), - np.int64(1), m_27772, - i32_res_27787) - mem_44875 = None - defunc_4_map_res_mem_44916 = mem_44914 - else: - local_memory_capacity_46113 = self.max_local_memory - if (sle64(((bytes_44882 + srem64((np.int64(8) - srem64(bytes_44882, - np.int64(8))), - np.int64(8))) + (bytes_44882 + srem64((np.int64(8) - srem64(bytes_44882, - np.int64(8))), - np.int64(8)))), - sext_i32_i64(local_memory_capacity_46113)) and suff_outer_par_31950): - mem_44879 = opencl_alloc(self, bytes_44544, "mem_44879") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44879, np.int64(0), - defunc_3_map_res_mem_44629, - np.int64(0), np.int64(1), - i32_res_27787, - (m_27772 * i32_res_27787)) - num_groups_x_43140 = sdiv_up64(m_27772, tile_sizze_43138) - num_groups_y_43141 = sdiv_up64(i32_res_27787, tile_sizze_43138) - num_groups_top_43142 = (num_groups_x_43140 * num_groups_y_43141) - num_whole_tiles_43159 = squot64(i32_res_27787, tile_sizze_43138) - residual_input_43286 = srem64(i32_res_27787, tile_sizze_43138) - cond_43287 = (residual_input_43286 == np.int64(0)) - mem_44906 = opencl_alloc(self, bytes_44648, "mem_44906") - if ((1 * (np.long(num_groups_top_43142) * np.long(group_sizze_43139))) != 0): - self.mainDetailedzisegmap_intragroup_43143_var.set_args(self.global_failure, - cl.LocalMemory(np.long(bytes_44882)), - cl.LocalMemory(np.long(bytes_44882)), - np.int64(m_27772), - np.int64(i32_res_27787), - np.int64(num_groups_y_43141), - np.int64(num_whole_tiles_43159), - np.int64(residual_input_43286), - np.byte(cond_43287), - defunc_3_map_res_mem_44850, - mem_44879, - mem_44906) - cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_intragroup_43143_var, - ((np.long(num_groups_top_43142) * np.long(group_sizze_43139)),), - (np.long(group_sizze_43139),)) - if synchronous: - sync(self) - mem_44879 = None - defunc_4_map_res_mem_44911 = mem_44906 - else: - mem_44910 = opencl_alloc(self, bytes_44648, "mem_44910") - if slt64((i32_res_27787 * np.int64(2)), segred_group_sizze_31965): - segment_sizze_nonzzero_46053 = smax64(np.int64(1), i32_res_27787) - num_threads_46054 = (num_groups_31966 * segred_group_sizze_31965) - if ((1 * (np.long(num_groups_31966) * np.long(segred_group_sizze_31965))) != 0): - self.mainDetailedzisegred_small_31917_var.set_args(self.global_failure, - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_31965))), - np.int64(m_27772), - np.int64(i32_res_27787), - np.int64(num_groups_31966), - np.int64(segment_sizze_nonzzero_46053), - defunc_3_map_res_mem_44629, - defunc_3_map_res_mem_44850, - mem_44910) + mem_123637 = opencl_alloc(self, bytes_123634, "mem_123637") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123637, np.int64(0), + mem_123127, np.int64(0), + np.int64(1), m_73008, + (k2p2zq_73023 * rp1_73709)) + mem_123127 = None + mem_123641 = opencl_alloc(self, bytes_122018, "mem_123641") + group_sizze_127927 = self.sizes["mainMagnitude.group_size_127927"] + num_groups_127928 = sdiv_up64(((m_73008 * k2p2zq_73023) * rp1_73709), + group_sizze_127927) + if ((1 * (np.int64(num_groups_127928) * np.int64(group_sizze_127927))) != 0): + self.mainMagnitudezicopy_127924_var.set_args(np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(rp1_73709), + mem_123637, mem_123641) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegred_small_31917_var, - ((np.long(num_groups_31966) * np.long(segred_group_sizze_31965)),), - (np.long(segred_group_sizze_31965),)) + self.mainMagnitudezicopy_127924_var, + ((np.int64(num_groups_127928) * np.int64(group_sizze_127927)),), + (np.int64(group_sizze_127927),)) if synchronous: sync(self) - else: - groups_per_segment_46074 = sdiv_up64(num_groups_31966, - smax64(np.int64(1), - (m_27772 * i32_res_27787))) - elements_per_thread_46075 = sdiv_up64(i32_res_27787, - (segred_group_sizze_31965 * groups_per_segment_46074)) - virt_num_groups_46076 = (groups_per_segment_46074 * (m_27772 * i32_res_27787)) - num_threads_46077 = (num_groups_31966 * segred_group_sizze_31965) - threads_per_segment_46078 = (groups_per_segment_46074 * segred_group_sizze_31965) - group_res_arr_mem_46079 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_31965 * virt_num_groups_46076)), - "group_res_arr_mem_46079") - mainDetailedzicounter_mem_46081 = self.mainDetailedzicounter_mem_46081 - if ((1 * (np.long(num_groups_31966) * np.long(segred_group_sizze_31965))) != 0): - self.mainDetailedzisegred_large_31917_var.set_args(self.global_failure, - cl.LocalMemory(np.long(np.int32(1))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_31965))), - np.int64(i32_res_27787), - np.int64(num_groups_31966), - np.int64(groups_per_segment_46074), - np.int64(elements_per_thread_46075), - np.int64(virt_num_groups_46076), - np.int64(threads_per_segment_46078), - defunc_3_map_res_mem_44629, - defunc_3_map_res_mem_44850, - mem_44910, - group_res_arr_mem_46079, - mainDetailedzicounter_mem_46081) + mem_123637 = None + mem_123678 = opencl_alloc(self, bytes_122015, "mem_123678") + mem_125341 = opencl_alloc(self, total_sizze_126007, "mem_125341") + double_buffer_mem_125599 = opencl_alloc(self, total_sizze_126008, + "double_buffer_mem_125599") + if ((1 * (np.int64(num_groups_98010) * np.int64(segmap_group_sizze_98009))) != 0): + self.mainMagnitudezisegmap_95889_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(r_73698), + np.int64(rp1_73709), + np.int64(num_groups_98010), + np.int64(num_threads_126006), + mem_123130, + mem_123135, + mem_123633, + mem_123641, + mem_123678, + mem_125341, + double_buffer_mem_125599) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegred_large_31917_var, - ((np.long(num_groups_31966) * np.long(segred_group_sizze_31965)),), - (np.long(segred_group_sizze_31965),)) + self.mainMagnitudezisegmap_95889_var, + ((np.int64(num_groups_98010) * np.int64(segmap_group_sizze_98009)),), + (np.int64(segmap_group_sizze_98009),)) if synchronous: sync(self) - defunc_4_map_res_mem_44911 = mem_44910 - defunc_4_map_res_mem_44916 = defunc_4_map_res_mem_44911 - defunc_3_map_res_mem_44629 = None - defunc_3_map_res_mem_44850 = None - suff_outer_par_31981 = (self.sizes["mainDetailed.suff_outer_par_20"] <= m_27772) - segmap_group_sizze_32001 = self.sizes["mainDetailed.segmap_group_size_31985"] - max_num_groups_46115 = self.sizes["mainDetailed.segmap_num_groups_31987"] - num_groups_32002 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(m_27772, - segmap_group_sizze_32001), - sext_i32_i64(max_num_groups_46115)))) - suff_outer_par_32078 = (self.sizes["mainDetailed.suff_outer_par_21"] <= binop_x_44399) - nest_sizze_32092 = (i32_res_27787 * binop_x_44399) - segred_group_sizze_32093 = self.sizes["mainDetailed.segred_group_size_32041"] - max_num_groups_46116 = self.sizes["mainDetailed.segred_num_groups_32043"] - num_groups_32094 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(nest_sizze_32092, - segred_group_sizze_32093), - sext_i32_i64(max_num_groups_46116)))) - Ty_43416 = self.sizes["mainDetailed.Ty_43413"] - Ry_43417 = self.sizes["mainDetailed.Ry_43415"] - Tx_43418 = self.sizes["mainDetailed.Tx_43412"] - Rx_43419 = self.sizes["mainDetailed.Rx_43414"] - Tk_43420 = self.sizes["mainDetailed.Tk_43411"] - TxRx_43423 = (Tx_43418 * Rx_43419) - TyRy_43424 = (Ty_43416 * Ry_43417) - a_loc_szz_43426 = (Tk_43420 * TyRy_43424) - binop_x_43427 = (Tx_43418 * Tk_43420) - b_loc_szz_43428 = (Rx_43419 * binop_x_43427) - group_sizze_43432 = (Ty_43416 * Tx_43418) - bytes_44921 = (np.int64(4) * N_27771) - binop_x_44952 = (Ry_43417 * group_sizze_43432) - binop_x_44953 = (Rx_43419 * binop_x_44952) - bytes_44950 = (np.int64(4) * binop_x_44953) - binop_x_44944 = (Ry_43417 * Rx_43419) - bytes_44943 = (np.int64(4) * binop_x_44944) - bytes_44955 = (np.int64(4) * a_loc_szz_43426) - bytes_44957 = (np.int64(4) * b_loc_szz_43428) - bytes_45026 = (np.int64(4) * binop_x_44952) - binop_x_45032 = (Rx_43419 * group_sizze_43432) - bytes_45030 = (np.int64(4) * binop_x_45032) - bytes_45018 = (np.int64(4) * Ry_43417) - bytes_45020 = (np.int64(4) * Rx_43419) - binop_x_45494 = (np.int64(4) * Ty_43416) - binop_x_45495 = (Tx_43418 * binop_x_45494) - binop_x_45496 = (Ry_43417 * binop_x_45495) - sizze_45497 = (Rx_43419 * binop_x_45496) - num_threads_45548 = (segmap_group_sizze_32001 * num_groups_32002) - total_sizze_45549 = (bytes_44921 * num_threads_45548) - local_memory_capacity_46246 = self.max_local_memory - if (sle64(np.int64(0), - sext_i32_i64(local_memory_capacity_46246)) and suff_outer_par_31981): - mem_44919 = opencl_alloc(self, bytes_44648, "mem_44919") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44919, np.int64(0), - defunc_4_map_res_mem_44916, - np.int64(0), np.int64(1), - i32_res_27787, m_27772) - mem_44937 = opencl_alloc(self, bytes_44398, "mem_44937") - mem_44922 = opencl_alloc(self, total_sizze_45549, "mem_44922") - if ((1 * (np.long(num_groups_32002) * np.long(segmap_group_sizze_32001))) != 0): - self.mainDetailedzisegmap_31983_var.set_args(self.global_failure, - np.int64(N_27771), - np.int64(m_27772), - np.int32(k2p2zq_27785), - np.int64(i32_res_27787), - np.int64(num_groups_32002), - mem_44397, mem_44919, - mem_44922, mem_44937) - cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_31983_var, - ((np.long(num_groups_32002) * np.long(segmap_group_sizze_32001)),), - (np.long(segmap_group_sizze_32001),)) - if synchronous: - sync(self) - mem_44919 = None - mem_44922 = None - mem_45138 = opencl_alloc(self, bytes_44398, "mem_45138") - self.futhark_builtinzhgpu_map_transpose_f32(mem_45138, np.int64(0), - mem_44937, np.int64(0), - np.int64(1), m_27772, N_27771) - mem_44937 = None - defunc_3_map_res_mem_45140 = mem_45138 - else: - local_memory_capacity_46245 = self.max_local_memory - if (sle64(((bytes_44955 + srem64((np.int64(8) - srem64(bytes_44955, - np.int64(8))), - np.int64(8))) + (bytes_44957 + srem64((np.int64(8) - srem64(bytes_44957, - np.int64(8))), - np.int64(8)))), - sext_i32_i64(local_memory_capacity_46245)) and suff_outer_par_32078): - mem_44940 = opencl_alloc(self, bytes_44383, "mem_44940") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44940, np.int64(0), - mem_44397, np.int64(0), - np.int64(1), i32_res_27787, - N_27771) - tk_div_tx_43421 = sdiv_up64(Tk_43420, Tx_43418) - tk_div_ty_43422 = sdiv_up64(Tk_43420, Ty_43416) - gridDim_x_43429 = sdiv_up64(N_27771, TxRx_43423) - gridDim_y_43430 = sdiv_up64(m_27772, TyRy_43424) - grid_sizze_43431 = (gridDim_x_43429 * gridDim_y_43430) - full_tiles_43460 = squot64(i32_res_27787, Tk_43420) - kk_43663 = (Tk_43420 * full_tiles_43460) - mem_45130 = opencl_alloc(self, bytes_44398, "mem_45130") - if ((1 * (np.long(grid_sizze_43431) * np.long(group_sizze_43432))) != 0): - self.mainDetailedzisegmap_intragroup_43435_var.set_args(self.global_failure, - cl.LocalMemory(np.long(bytes_44957)), - cl.LocalMemory(np.long(bytes_44955)), - np.int64(N_27771), - np.int64(m_27772), - np.int64(i32_res_27787), - np.int64(gridDim_x_43429), - np.int64(full_tiles_43460), - np.int64(kk_43663), - defunc_4_map_res_mem_44916, - mem_44940, - mem_45130) - cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_intragroup_43435_var, - ((np.long(grid_sizze_43431) * np.long(group_sizze_43432)),), - (np.long(group_sizze_43432),)) - if synchronous: - sync(self) - mem_44940 = None - defunc_3_map_res_mem_45135 = mem_45130 - else: - mem_45134 = opencl_alloc(self, bytes_44398, "mem_45134") - if slt64((i32_res_27787 * np.int64(2)), segred_group_sizze_32093): - segment_sizze_nonzzero_46185 = smax64(np.int64(1), i32_res_27787) - num_threads_46186 = (num_groups_32094 * segred_group_sizze_32093) - if ((1 * (np.long(num_groups_32094) * np.long(segred_group_sizze_32093))) != 0): - self.mainDetailedzisegred_small_32047_var.set_args(self.global_failure, - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_32093))), - np.int64(N_27771), - np.int64(m_27772), - np.int64(i32_res_27787), - np.int64(num_groups_32094), - np.int64(segment_sizze_nonzzero_46185), - mem_44397, - defunc_4_map_res_mem_44916, - mem_45134) + self.failure_is_an_option = np.int32(1) + mem_123130 = None + mem_123633 = None + mem_123641 = None + mem_125341 = None + double_buffer_mem_125599 = None + mem_123681 = opencl_alloc(self, bytes_122015, "mem_123681") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123681, np.int64(0), + mem_123678, np.int64(0), + np.int64(1), m_73008, + rp1_73709) + mem_123678 = None + if ((1 * (np.int64(segmap_usable_groups_98089) * np.int64(segmap_group_sizze_98088))) != 0): + self.mainMagnitudezisegmap_95834_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(rp1_73709), + np.int64(binop_x_120251), + mem_122003, + mem_123133, + defunc_3_map_res_r_mem_123392, + mem_123681) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegred_small_32047_var, - ((np.long(num_groups_32094) * np.long(segred_group_sizze_32093)),), - (np.long(segred_group_sizze_32093),)) + self.mainMagnitudezisegmap_95834_var, + ((np.int64(segmap_usable_groups_98089) * np.int64(segmap_group_sizze_98088)),), + (np.int64(segmap_group_sizze_98088),)) if synchronous: sync(self) - else: - groups_per_segment_46206 = sdiv_up64(num_groups_32094, - smax64(np.int64(1), - (m_27772 * N_27771))) - elements_per_thread_46207 = sdiv_up64(i32_res_27787, - (segred_group_sizze_32093 * groups_per_segment_46206)) - virt_num_groups_46208 = (groups_per_segment_46206 * (m_27772 * N_27771)) - num_threads_46209 = (num_groups_32094 * segred_group_sizze_32093) - threads_per_segment_46210 = (groups_per_segment_46206 * segred_group_sizze_32093) - group_res_arr_mem_46211 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_32093 * virt_num_groups_46208)), - "group_res_arr_mem_46211") - mainDetailedzicounter_mem_46213 = self.mainDetailedzicounter_mem_46213 - if ((1 * (np.long(num_groups_32094) * np.long(segred_group_sizze_32093))) != 0): - self.mainDetailedzisegred_large_32047_var.set_args(self.global_failure, - cl.LocalMemory(np.long(np.int32(1))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_32093))), - np.int64(N_27771), - np.int64(i32_res_27787), - np.int64(num_groups_32094), - np.int64(groups_per_segment_46206), - np.int64(elements_per_thread_46207), - np.int64(virt_num_groups_46208), - np.int64(threads_per_segment_46210), - mem_44397, - defunc_4_map_res_mem_44916, - mem_45134, - group_res_arr_mem_46211, - mainDetailedzicounter_mem_46213) + defunc_3_map_res_r_mem_123392 = None + mem_123681 = None + mem_123685 = opencl_alloc(self, bytes_121990, "mem_123685") + self.futhark_builtinzhgpu_map_transpose_i64(mem_123685, np.int64(0), + mem_123133, np.int64(0), + np.int64(1), m_73008, + k2p2zq_73023) + mem_123133 = None + if ((1 * (np.int64(segmap_usable_groups_98160) * np.int64(segmap_group_sizze_98159))) != 0): + self.mainMagnitudezisegmap_95677_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(binop_x_120251), + mem_122007, + defunc_3_map_res_r_mem_123630, + mem_123685) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegred_large_32047_var, - ((np.long(num_groups_32094) * np.long(segred_group_sizze_32093)),), - (np.long(segred_group_sizze_32093),)) + self.mainMagnitudezisegmap_95677_var, + ((np.int64(segmap_usable_groups_98160) * np.int64(segmap_group_sizze_98159)),), + (np.int64(segmap_group_sizze_98159),)) if synchronous: sync(self) - defunc_3_map_res_mem_45135 = mem_45134 - defunc_3_map_res_mem_45140 = defunc_3_map_res_mem_45135 - mem_44397 = None - defunc_4_map_res_mem_44916 = None - i_28075 = (N_27771 - np.int64(1)) - x_28076 = sle64(np.int64(0), i_28075) - y_28077 = slt64(i_28075, N_27771) - bounds_check_28078 = (x_28076 and y_28077) - index_certs_28079 = True - assert bounds_check_28078, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:26:29-34\n #1 helpers.fut:20:13-20\n #2 bfastfinal.fut:76:16-75\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 bfastfinal.fut:71:5-77:25\n #6 bfastfinal.fut:174:3-56\n #7 bfastfinal.fut:170:1-174:56\n" % ("Index [", - i_28075, - "] out of bounds for array of shape [", - N_27771, - "].")) - fits_32223 = sle64(N_27771, max_group_sizze_30862) - suff_intra_par_32221 = (self.sizes["mainDetailed.suff_intra_par_24"] <= N_27771) - intra_suff_and_fits_32224 = (suff_intra_par_32221 and fits_32223) - segscan_group_sizze_32373 = self.sizes["mainDetailed.segscan_group_size_32350"] - max_num_groups_46247 = self.sizes["mainDetailed.segscan_num_groups_32352"] - num_groups_32374 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(binop_x_44399, - segscan_group_sizze_32373), - sext_i32_i64(max_num_groups_46247)))) - segmap_group_sizze_32393 = self.sizes["mainDetailed.segmap_group_size_32341"] - segmap_group_sizze_32429 = self.sizes["mainDetailed.segmap_group_size_32266"] - bytes_45152 = (np.int64(4) * m_27772) - bytes_45143 = (np.int64(8) * N_27771) - bytes_45161 = (np.int64(8) * binop_x_44399) - local_memory_capacity_46349 = self.max_local_memory - if (sle64(((((bytes_45143 + srem64((np.int64(8) - srem64(bytes_45143, - np.int64(8))), - np.int64(8))) + (bytes_44921 + srem64((np.int64(8) - srem64(bytes_44921, - np.int64(8))), - np.int64(8)))) + (bytes_44921 + srem64((np.int64(8) - srem64(bytes_44921, - np.int64(8))), - np.int64(8)))) + (bytes_44921 + srem64((np.int64(8) - srem64(bytes_44921, - np.int64(8))), - np.int64(8)))), - sext_i32_i64(local_memory_capacity_46349)) and intra_suff_and_fits_32224): - mem_45153 = opencl_alloc(self, bytes_45152, "mem_45153") - mem_45156 = opencl_alloc(self, bytes_44398, "mem_45156") - mem_45159 = opencl_alloc(self, bytes_44398, "mem_45159") - if ((1 * (np.long(m_27772) * np.long(N_27771))) != 0): - self.mainDetailedzisegmap_intragroup_32146_var.set_args(self.global_failure, - cl.LocalMemory(np.long(bytes_44921)), - cl.LocalMemory(np.long(bytes_44921)), - cl.LocalMemory(np.long(bytes_44921)), - cl.LocalMemory(np.long(bytes_45143)), - np.int64(N_27771), - np.int64(i_28075), - images_mem_44381, - defunc_3_map_res_mem_45140, - mem_45153, - mem_45156, - mem_45159) - cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_intragroup_32146_var, - ((np.long(m_27772) * np.long(N_27771)),), - (np.long(N_27771),)) + defunc_3_map_res_r_mem_123630 = None + mem_123685 = None + mem_123691 = opencl_alloc(self, bytes_121997, "mem_123691") + if ((1 * (np.int64(segmap_usable_groups_98179) * np.int64(segmap_group_sizze_98178))) != 0): + self.mainMagnitudezisegmap_95629_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + mem_122007, + mem_123691) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_95629_var, + ((np.int64(segmap_usable_groups_98179) * np.int64(segmap_group_sizze_98178)),), + (np.int64(segmap_group_sizze_98178),)) + if synchronous: + sync(self) + mem_123695 = opencl_alloc(self, bytes_121990, "mem_123695") + if ((1 * (np.int64(segmap_usable_groups_98188) * np.int64(segmap_group_sizze_98187))) != 0): + self.mainMagnitudezisegmap_95607_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + mem_122003, + mem_123695) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_95607_var, + ((np.int64(segmap_usable_groups_98188) * np.int64(segmap_group_sizze_98187)),), + (np.int64(segmap_group_sizze_98187),)) + if synchronous: + sync(self) + mem_123699 = opencl_alloc(self, bytes_120258, "mem_123699") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123699, np.int64(0), + mem_param_121959, + np.int64(0), np.int64(1), + (k2p2zq_73023 * k2p2zq_73023), + m_73008) + mem_123702 = opencl_alloc(self, bytes_121990, "mem_123702") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123702, np.int64(0), + mem_param_121967, + np.int64(0), np.int64(1), + k2p2zq_73023, m_73008) + mem_123705 = opencl_alloc(self, bytes_121990, "mem_123705") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123705, np.int64(0), + mem_122671, np.int64(0), + np.int64(1), k2p2zq_73023, + m_73008) + mem_122671 = None + mem_123708 = opencl_alloc(self, bytes_121990, "mem_123708") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123708, np.int64(0), + mem_123695, np.int64(0), + np.int64(1), k2p2zq_73023, + m_73008) + mem_123711 = opencl_alloc(self, m_73008, "mem_123711") + if ((1 * (np.int64(segmap_usable_groups_98197) * np.int64(segmap_group_sizze_98196))) != 0): + self.mainMagnitudezisegmap_95535_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(defunc_2_reduce_res_73132), + np.float64(tol_73201), + np.int64(r_73698), + mem_120246, + mem_121941, + mem_param_121972, + mem_122674, + mem_122680, + mem_122682, + mem_123135, + mem_123699, + mem_123702, + mem_123705, + mem_123708, + mem_123711) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_95535_var, + ((np.int64(segmap_usable_groups_98197) * np.int64(segmap_group_sizze_98196)),), + (np.int64(segmap_group_sizze_98196),)) + if synchronous: + sync(self) + mem_122674 = None + mem_122680 = None + mem_123699 = None + mem_123702 = None + mem_123705 = None + mem_123708 = None + defunc_7_map_res_mem_123721 = mem_123711 + defunc_7_map_res_mem_123722 = mem_123691 + defunc_7_map_res_mem_123723 = mem_123695 + defunc_7_map_res_mem_123724 = mem_123135 + defunc_7_map_res_mem_123725 = mem_122682 + i_74284 = (r_73698 - k2p2zq_73023) + x_74285 = sle64(np.int64(0), i_74284) + y_74286 = slt64(i_74284, num_recresids_padded_73681) + bounds_check_74287 = (x_74285 and y_74286) + index_ok_74288 = (ok_or_empty_73122 and bounds_check_74287) + index_certs_74289 = True + assert index_ok_74288, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:74:9-75:74\n #1 recresid.fut:100:7-30\n #2 mroc.fut:27:25-38\n #3 mroc.fut:77:27-61\n #4 bfastfinal.fut:45:24-53\n #5 bfastfinal.fut:192:5-74\n #6 bfastfinal.fut:187:1-193:48\n" % ("Index [", + i_74284, + ", ", + np.int64(0), + ":] out of bounds for array of shape [", + num_recresids_padded_73681, + "][", + m_73008, + "].")) + if ((m_73008 * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_121934, defunc_7_map_res_mem_123725, + dest_offset=np.int64(((i_74284 * m_73008) * np.int64(8))), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((m_73008 * np.int32(8)))) if synchronous: sync(self) - defunc_4_map_res_mem_45177 = mem_45153 - defunc_4_map_res_mem_45178 = mem_45156 - defunc_4_map_res_mem_45179 = mem_45159 - else: - mem_45163 = opencl_alloc(self, bytes_45161, "mem_45163") - mem_45166 = opencl_alloc(self, bytes_44398, "mem_45166") - if slt64(np.int64(0), (m_27772 * N_27771)): - stage1_max_num_groups_46265 = self.max_group_size - stage1_num_groups_46266 = smin64(stage1_max_num_groups_46265, - num_groups_32374) - num_threads_46267 = sext_i64_i32((stage1_num_groups_46266 * segscan_group_sizze_32373)) - if ((1 * (np.long(stage1_num_groups_46266) * np.long(segscan_group_sizze_32373))) != 0): - self.mainDetailedziscan_stage1_32356_var.set_args(self.global_failure, - cl.LocalMemory(np.long(smax64(np.int64(1), - (np.int32(8) * segscan_group_sizze_32373)))), - np.int64(N_27771), - np.int64(m_27772), - np.int32(num_threads_46267), - images_mem_44381, - defunc_3_map_res_mem_45140, - mem_45163, - mem_45166) - cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedziscan_stage1_32356_var, - ((np.long(stage1_num_groups_46266) * np.long(segscan_group_sizze_32373)),), - (np.long(segscan_group_sizze_32373),)) - if synchronous: - sync(self) - if ((1 * (np.long(np.int64(1)) * np.long(stage1_num_groups_46266))) != 0): - self.mainDetailedziscan_stage2_32356_var.set_args(self.global_failure, - cl.LocalMemory(np.long(smax64(np.int64(1), - (np.int32(8) * stage1_num_groups_46266)))), - np.int64(N_27771), - np.int64(m_27772), - np.int64(stage1_num_groups_46266), - np.int32(num_threads_46267), - mem_45163) - cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedziscan_stage2_32356_var, - ((np.long(np.int64(1)) * np.long(stage1_num_groups_46266)),), - (np.long(stage1_num_groups_46266),)) - if synchronous: - sync(self) - required_groups_46309 = sext_i64_i32(sdiv_up64((m_27772 * N_27771), - segscan_group_sizze_32373)) - if ((1 * (np.long(num_groups_32374) * np.long(segscan_group_sizze_32373))) != 0): - self.mainDetailedziscan_stage3_32356_var.set_args(self.global_failure, - np.int64(N_27771), - np.int64(m_27772), - np.int64(num_groups_32374), - np.int32(num_threads_46267), - np.int32(required_groups_46309), - mem_45163) + defunc_7_map_res_mem_123725 = None + mainMagnitudezicounter_mem_127975 = self.mainMagnitudezicounter_mem_127975 + group_res_arr_mem_127977 = opencl_alloc(self, + (np.int32(1) * (segred_group_sizze_98259 * num_groups_98261)), + "group_res_arr_mem_127977") + num_threads_127979 = (num_groups_98261 * segred_group_sizze_98259) + if ((1 * (np.int64(num_groups_98261) * np.int64(segred_group_sizze_98259))) != 0): + self.mainMagnitudezisegred_nonseg_98266_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(1) * segred_group_sizze_98259))), + cl.LocalMemory(np.int64(np.int32(1))), + np.int64(m_73008), + np.int64(num_groups_98261), + np.int64(num_threads_127979), + defunc_7_map_res_mem_123721, + mem_123728, + mainMagnitudezicounter_mem_127975, + group_res_arr_mem_127977) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedziscan_stage3_32356_var, - ((np.long(num_groups_32374) * np.long(segscan_group_sizze_32373)),), - (np.long(segscan_group_sizze_32373),)) + self.mainMagnitudezisegred_nonseg_98266_var, + ((np.int64(num_groups_98261) * np.int64(segred_group_sizze_98259)),), + (np.int64(segred_group_sizze_98259),)) if synchronous: sync(self) - segmap_usable_groups_32394 = sdiv_up64(m_27772, segmap_group_sizze_32393) - mem_45169 = opencl_alloc(self, bytes_45152, "mem_45169") - if ((1 * (np.long(segmap_usable_groups_32394) * np.long(segmap_group_sizze_32393))) != 0): - self.mainDetailedzisegmap_32339_var.set_args(self.global_failure, - np.int64(N_27771), - np.int64(m_27772), - np.int64(i_28075), - mem_45163, mem_45169) - cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_32339_var, - ((np.long(segmap_usable_groups_32394) * np.long(segmap_group_sizze_32393)),), - (np.long(segmap_group_sizze_32393),)) - if synchronous: - sync(self) - mem_45172 = opencl_alloc(self, bytes_44398, "mem_45172") - self.futhark_builtinzhreplicate_f32(mem_45172, (m_27772 * N_27771), - np.nan) - mem_45175 = opencl_alloc(self, bytes_44398, "mem_45175") - self.futhark_builtinzhreplicate_i32(mem_45175, (m_27772 * N_27771), - np.int32(0)) - segmap_usable_groups_32430 = sdiv_up64(binop_x_44399, - segmap_group_sizze_32429) - if ((1 * (np.long(segmap_usable_groups_32430) * np.long(segmap_group_sizze_32429))) != 0): - self.mainDetailedzisegmap_32263_var.set_args(self.global_failure, - np.int64(N_27771), - np.int64(m_27772), - mem_45163, mem_45166, - mem_45172, mem_45175) - cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_32263_var, - ((np.long(segmap_usable_groups_32430) * np.long(segmap_group_sizze_32429)),), - (np.long(segmap_group_sizze_32429),)) - if synchronous: - sync(self) - mem_45163 = None - mem_45166 = None - defunc_4_map_res_mem_45177 = mem_45169 - defunc_4_map_res_mem_45178 = mem_45172 - defunc_4_map_res_mem_45179 = mem_45175 - suff_outer_par_32478 = (self.sizes["mainDetailed.suff_outer_par_28"] <= m_27772) - fits_32564 = sle64(i32_res_27781, max_group_sizze_30862) - suff_intra_par_32562 = (self.sizes["mainDetailed.suff_intra_par_29"] <= i32_res_27781) - intra_suff_and_fits_32565 = (suff_intra_par_32562 and fits_32564) - segmap_group_sizze_32525 = self.sizes["mainDetailed.segmap_group_size_32490"] - nest_sizze_32660 = (m_27772 * i32_res_27781) - segred_group_sizze_32661 = self.sizes["mainDetailed.segred_group_size_32644"] - max_num_groups_46350 = self.sizes["mainDetailed.segred_num_groups_32646"] - num_groups_32662 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(nest_sizze_32660, - segred_group_sizze_32661), - sext_i32_i64(max_num_groups_46350)))) - segred_group_sizze_32675 = self.sizes["mainDetailed.segred_group_size_32619"] - max_num_groups_46351 = self.sizes["mainDetailed.segred_num_groups_32621"] - num_groups_32676 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(nest_sizze_32660, - segred_group_sizze_32675), - sext_i32_i64(max_num_groups_46351)))) - segmap_group_sizze_32695 = self.sizes["mainDetailed.segmap_group_size_32604"] - bytes_45188 = (np.int64(4) * segmap_group_sizze_32525) - local_memory_capacity_46503 = self.max_local_memory - if (sle64(((bytes_45188 + srem64((np.int64(8) - srem64(bytes_45188, - np.int64(8))), - np.int64(8))) + (bytes_45188 + srem64((np.int64(8) - srem64(bytes_45188, - np.int64(8))), - np.int64(8)))), - sext_i32_i64(local_memory_capacity_46503)) and suff_outer_par_32478): - segmap_usable_groups_32526 = sdiv_up64(m_27772, segmap_group_sizze_32525) - mem_45182 = opencl_alloc(self, bytes_44398, "mem_45182") - self.futhark_builtinzhgpu_map_transpose_f32(mem_45182, np.int64(0), - images_mem_44381, np.int64(0), - np.int64(1), N_27771, m_27772) - mem_45185 = opencl_alloc(self, bytes_44398, "mem_45185") - self.futhark_builtinzhgpu_map_transpose_f32(mem_45185, np.int64(0), - defunc_4_map_res_mem_45178, - np.int64(0), np.int64(1), - N_27771, m_27772) - num_whole_tiles_43891 = squot64(i32_res_27781, segmap_group_sizze_32525) - residual_input_43992 = srem64(i32_res_27781, segmap_group_sizze_32525) - cond_43993 = (residual_input_43992 == np.int64(0)) - mem_45216 = opencl_alloc(self, bytes_45152, "mem_45216") - mem_45218 = opencl_alloc(self, bytes_45152, "mem_45218") - mem_45220 = opencl_alloc(self, bytes_45152, "mem_45220") - if ((1 * (np.long(segmap_usable_groups_32526) * np.long(segmap_group_sizze_32525))) != 0): - self.mainDetailedzisegmap_intragroup_43869_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long(bytes_45188)), - cl.LocalMemory(np.long(bytes_45188)), - np.int64(N_27771), - np.int64(m_27772), - np.int32(n_27775), - np.float32(hfrac_27777), - np.int64(i32_res_27781), - np.int32(k2p2_27783), - np.int64(num_whole_tiles_43891), - np.int64(residual_input_43992), - np.byte(cond_43993), - mem_45182, - mem_45185, - mem_45216, - mem_45218, - mem_45220) - cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_intragroup_43869_var, - ((np.long(segmap_usable_groups_32526) * np.long(segmap_group_sizze_32525)),), - (np.long(segmap_group_sizze_32525),)) + defunc_7_map_res_mem_123721 = None + read_res_129946 = np.empty(1, dtype=ct.c_bool) + cl.enqueue_copy(self.queue, read_res_129946, mem_123728, + device_offset=(np.int64(np.int64(0)) * 1), + is_blocking=synchronous) + sync(self) + defunc_2_reduce_comm_res_74291 = read_res_129946[0] + loop_cond_t_res_74296 = slt64(rp1_73709, m_73162) + x_74297 = (defunc_2_reduce_comm_res_74291 and loop_cond_t_res_74296) + mem_param_tmp_127198 = defunc_7_map_res_mem_123722 + mem_param_tmp_127199 = defunc_7_map_res_mem_123723 + mem_param_tmp_127200 = defunc_7_map_res_mem_123724 + loop_while_tmp_127201 = x_74297 + r_tmp_127202 = rp1_73709 + mem_param_121959 = mem_param_tmp_127198 + mem_param_121967 = mem_param_tmp_127199 + mem_param_121972 = mem_param_tmp_127200 + loop_while_73697 = loop_while_tmp_127201 + r_73698 = r_tmp_127202 + mrecresid_nn_res_mem_123746 = mem_param_121959 + mrecresid_nn_res_mem_123754 = mem_param_121967 + mrecresid_nn_res_mem_123759 = mem_param_121972 + mrecresid_nn_res_73691 = loop_while_73697 + mrecresid_nn_res_73692 = r_73698 + mem_120248 = None + defunc_5_map_res_mem_121929 = None + defunc_5_map_res_mem_121930 = None + defunc_5_map_res_mem_121931 = None + mem_121938 = None + mem_121941 = None + mem_121944 = None + mem_121946 = None + mem_121948 = None + mem_121992 = None + mem_121996 = None + mem_122000 = None + mem_122003 = None + mem_122007 = None + mem_123728 = None + bounds_invalid_upwards_74298 = slt64(defunc_2_reduce_res_73132, + mrecresid_nn_res_73692) + distance_74299 = (defunc_2_reduce_res_73132 - mrecresid_nn_res_73692) + valid_74300 = not(bounds_invalid_upwards_74298) + range_valid_c_74301 = True + assert valid_74300, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:78:63-68\n #1 recresid.fut:100:7-30\n #2 mroc.fut:27:25-38\n #3 mroc.fut:77:27-61\n #4 bfastfinal.fut:45:24-53\n #5 bfastfinal.fut:192:5-74\n #6 bfastfinal.fut:187:1-193:48\n" % ("Range ", + mrecresid_nn_res_73692, + "..<", + defunc_2_reduce_res_73132, + " is invalid.")) + suff_outer_par_98304 = (self.sizes["mainMagnitude.suff_outer_par_16"] <= m_73008) + intra_avail_par_98298 = smin64(k2p2zq_73023, binop_x_120251) + computed_group_sizze_98269 = smax64(k2p2zq_73023, binop_x_120251) + fits_98421 = sle64(computed_group_sizze_98269, max_group_sizze_90561) + suff_intra_par_98419 = (self.sizes["mainMagnitude.suff_intra_par_17"] <= intra_avail_par_98298) + intra_suff_and_fits_98422 = (suff_intra_par_98419 and fits_98421) + segmap_group_sizze_98363 = self.sizes["mainMagnitude.segmap_group_size_98308"] + max_num_groups_128005 = self.sizes["mainMagnitude.segmap_num_groups_98310"] + num_groups_98364 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_73008, + segmap_group_sizze_98363), + sext_i32_i64(max_num_groups_128005)))) + segred_group_sizze_98663 = self.sizes["mainMagnitude.segred_group_size_98639"] + max_num_groups_128006 = self.sizes["mainMagnitude.segred_num_groups_98641"] + num_groups_98664 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120244, + segred_group_sizze_98663), + sext_i32_i64(max_num_groups_128006)))) + segmap_group_sizze_98684 = self.sizes["mainMagnitude.segmap_group_size_98631"] + segred_group_sizze_98691 = self.sizes["mainMagnitude.segred_group_size_98612"] + max_num_groups_128007 = self.sizes["mainMagnitude.segred_num_groups_98614"] + num_groups_98692 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120244, + segred_group_sizze_98691), + sext_i32_i64(max_num_groups_128007)))) + segmap_group_sizze_98704 = self.sizes["mainMagnitude.segmap_group_size_98600"] + segmap_group_sizze_98716 = self.sizes["mainMagnitude.segmap_group_size_98486"] + max_num_groups_128008 = self.sizes["mainMagnitude.segmap_num_groups_98488"] + num_groups_98717 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120244, + segmap_group_sizze_98716), + sext_i32_i64(max_num_groups_128008)))) + suff_outer_par_98721 = (self.sizes["mainMagnitude.suff_outer_par_18"] <= binop_x_120244) + segred_group_sizze_98750 = self.sizes["mainMagnitude.segred_group_size_98537"] + max_num_groups_128009 = self.sizes["mainMagnitude.segred_num_groups_98539"] + num_groups_98751 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_121999, + segred_group_sizze_98750), + sext_i32_i64(max_num_groups_128009)))) + segmap_group_sizze_98773 = self.sizes["mainMagnitude.segmap_group_size_98524"] + segmap_usable_groups_98685 = sdiv_up_safe64(m_73008, + segmap_group_sizze_98684) + segmap_usable_groups_98705 = sdiv_up_safe64(m_73008, + segmap_group_sizze_98704) + segmap_usable_groups_98774 = sdiv_up_safe64(binop_x_120244, + segmap_group_sizze_98773) + num_threads_126015 = (segmap_group_sizze_98363 * num_groups_98364) + total_sizze_126016 = (bytes_120247 * num_threads_126015) + total_sizze_126017 = (bytes_120247 * num_threads_126015) + total_sizze_126018 = (bytes_120250 * num_threads_126015) + total_sizze_126019 = (bytes_120247 * num_threads_126015) + num_threads_126025 = (segmap_group_sizze_98716 * num_groups_98717) + total_sizze_126026 = (bytes_120247 * num_threads_126025) + mem_param_123778 = mrecresid_nn_res_mem_123746 + mem_param_123786 = mrecresid_nn_res_mem_123754 + i_74305 = np.int64(0) + one_129951 = np.int64(1) + for counter_129950 in range(distance_74299): + index_primexp_74309 = (mrecresid_nn_res_73692 + i_74305) + x_74310 = sle64(np.int64(0), index_primexp_74309) + y_74311 = slt64(index_primexp_74309, defunc_2_reduce_res_73132) + bounds_check_74312 = (x_74310 and y_74311) + index_ok_74313 = (ok_or_empty_73101 and bounds_check_74312) + index_certs_74314 = True + assert index_ok_74313, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:34:13-22\n #1 /prelude/soacs.fut:83:25-33\n #2 /prelude/soacs.fut:83:3-53\n #3 recresid.fut:80:17-56\n #4 recresid.fut:100:7-30\n #5 mroc.fut:27:25-38\n #6 mroc.fut:77:27-61\n #7 bfastfinal.fut:45:24-53\n #8 bfastfinal.fut:192:5-74\n #9 bfastfinal.fut:187:1-193:48\n" % ("Index [", + index_primexp_74309, + ", ", + np.int64(0), + ":] out of bounds for array of shape [", + defunc_2_reduce_res_73132, + "][", + k2p2zq_73023, + "].")) + index_certs_74315 = True + assert bounds_check_74312, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:37:17-23\n #1 /prelude/soacs.fut:83:25-33\n #2 /prelude/soacs.fut:83:3-53\n #3 recresid.fut:80:17-56\n #4 recresid.fut:100:7-30\n #5 mroc.fut:27:25-38\n #6 mroc.fut:77:27-61\n #7 bfastfinal.fut:45:24-53\n #8 bfastfinal.fut:192:5-74\n #9 bfastfinal.fut:187:1-193:48\n" % ("Index [", + index_primexp_74309, + "] out of bounds for array of shape [", + defunc_2_reduce_res_73132, + "].")) + local_memory_capacity_128278 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_128278)) and suff_outer_par_98304): + mem_123798 = opencl_alloc(self, bytes_120258, "mem_123798") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123798, np.int64(0), + mem_param_123778, + np.int64(0), np.int64(1), + (k2p2zq_73023 * k2p2zq_73023), + m_73008) + mem_123801 = opencl_alloc(self, bytes_121990, "mem_123801") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123801, np.int64(0), + mem_param_123786, + np.int64(0), np.int64(1), + k2p2zq_73023, m_73008) + mem_123869 = opencl_alloc(self, bytes_120258, "mem_123869") + mem_123872 = opencl_alloc(self, bytes_121990, "mem_123872") + mem_123874 = opencl_alloc(self, bytes_120173, "mem_123874") + mem_123804 = opencl_alloc(self, total_sizze_126016, "mem_123804") + mem_123818 = opencl_alloc(self, total_sizze_126017, "mem_123818") + mem_123821 = opencl_alloc(self, total_sizze_126018, "mem_123821") + mem_123840 = opencl_alloc(self, total_sizze_126019, "mem_123840") + if ((1 * (np.int64(num_groups_98364) * np.int64(segmap_group_sizze_98363))) != 0): + self.mainMagnitudezisegmap_98306_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(n_73011), + np.int64(k2p2zq_73023), + np.int64(defunc_2_reduce_res_73132), + np.int64(index_primexp_74309), + np.int64(num_groups_98364), + np.int64(num_threads_126015), + defunc_3_map_res_mem_120231, + mem_120246, + mem_123798, + mem_123801, + mem_123804, + mem_123818, + mem_123821, + mem_123840, + mem_123869, + mem_123872, + mem_123874) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_98306_var, + ((np.int64(num_groups_98364) * np.int64(segmap_group_sizze_98363)),), + (np.int64(segmap_group_sizze_98363),)) + if synchronous: + sync(self) + mem_123798 = None + mem_123801 = None + mem_123804 = None + mem_123818 = None + mem_123821 = None + mem_123840 = None + mem_123966 = opencl_alloc(self, bytes_121997, "mem_123966") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123966, np.int64(0), + mem_123869, np.int64(0), + np.int64(1), m_73008, + (k2p2zq_73023 * k2p2zq_73023)) + mem_123869 = None + mem_123970 = opencl_alloc(self, bytes_121990, "mem_123970") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123970, np.int64(0), + mem_123872, np.int64(0), + np.int64(1), m_73008, + k2p2zq_73023) + mem_123872 = None + defunc_7_map_res_mem_123972 = mem_123966 + defunc_7_map_res_mem_123973 = mem_123970 + defunc_7_map_res_mem_123974 = mem_123874 + else: + local_memory_capacity_128277 = self.max_local_memory + if (sle64((((((((bytes_120247 + srem64((np.int64(8) - srem64(bytes_120247, + np.int64(8))), + np.int64(8))) + ((np.int32(8) * k2p2zq_73023) + srem64((np.int64(8) - srem64((np.int32(8) * k2p2zq_73023), + np.int64(8))), + np.int64(8)))) + ((np.int32(8) * k2p2zq_73023) + srem64((np.int64(8) - srem64((np.int32(8) * k2p2zq_73023), + np.int64(8))), + np.int64(8)))) + (bytes_120247 + srem64((np.int64(8) - srem64(bytes_120247, + np.int64(8))), + np.int64(8)))) + (bytes_120250 + srem64((np.int64(8) - srem64(bytes_120250, + np.int64(8))), + np.int64(8)))) + ((np.int32(8) * (k2p2zq_73023 * k2p2zq_73023)) + srem64((np.int64(8) - srem64((np.int32(8) * (k2p2zq_73023 * k2p2zq_73023)), + np.int64(8))), + np.int64(8)))) + (bytes_120247 + srem64((np.int64(8) - srem64(bytes_120247, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_128277)) and intra_suff_and_fits_98422): + mem_123892 = opencl_alloc(self, bytes_121997, "mem_123892") + mem_123895 = opencl_alloc(self, bytes_121990, "mem_123895") + mem_123897 = opencl_alloc(self, bytes_120173, "mem_123897") + if ((1 * (np.int64(m_73008) * np.int64(computed_group_sizze_98269))) != 0): + self.mainMagnitudezisegmap_intragroup_98302_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_120247)), + cl.LocalMemory(np.int64((np.int32(8) * (k2p2zq_73023 * k2p2zq_73023)))), + cl.LocalMemory(np.int64(bytes_120250)), + cl.LocalMemory(np.int64(bytes_120247)), + cl.LocalMemory(np.int64((np.int32(8) * k2p2zq_73023))), + cl.LocalMemory(np.int64((np.int32(8) * k2p2zq_73023))), + cl.LocalMemory(np.int64(bytes_120247)), + np.int64(m_73008), + np.int64(n_73011), + np.int64(k2p2zq_73023), + np.int64(defunc_2_reduce_res_73132), + np.int64(index_primexp_74309), + np.int64(computed_group_sizze_98269), + np.int64(binop_x_120251), + defunc_3_map_res_mem_120231, + mem_120246, + mem_param_123778, + mem_param_123786, + mem_123892, + mem_123895, + mem_123897) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_intragroup_98302_var, + ((np.int64(m_73008) * np.int64(computed_group_sizze_98269)),), + (np.int64(computed_group_sizze_98269),)) + if synchronous: + sync(self) + defunc_7_map_res_mem_123960 = mem_123892 + defunc_7_map_res_mem_123961 = mem_123895 + defunc_7_map_res_mem_123962 = mem_123897 + else: + mem_123901 = opencl_alloc(self, bytes_121997, "mem_123901") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123901, np.int64(0), + mem_param_123778, + np.int64(0), + np.int64(1), + k2p2zq_73023, + (m_73008 * k2p2zq_73023)) + mem_123904 = opencl_alloc(self, bytes_120173, "mem_123904") + mem_123907 = opencl_alloc(self, bytes_121990, "mem_123907") + if slt64((k2p2zq_73023 * np.int64(2)), segred_group_sizze_98663): + segment_sizze_nonzzero_128068 = smax64(np.int64(1), k2p2zq_73023) + num_threads_128069 = (num_groups_98664 * segred_group_sizze_98663) + if ((1 * (np.int64(num_groups_98664) * np.int64(segred_group_sizze_98663))) != 0): + self.mainMagnitudezisegred_small_98645_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_98663))), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(defunc_2_reduce_res_73132), + np.int64(index_primexp_74309), + np.int64(num_groups_98664), + np.int64(segment_sizze_nonzzero_128068), + mem_120246, + mem_123901, + mem_123904, + mem_123907) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_small_98645_var, + ((np.int64(num_groups_98664) * np.int64(segred_group_sizze_98663)),), + (np.int64(segred_group_sizze_98663),)) + if synchronous: + sync(self) + else: + groups_per_segment_128090 = sdiv_up64(num_groups_98664, + smax64(np.int64(1), + m_73008)) + elements_per_thread_128091 = sdiv_up64(k2p2zq_73023, + (segred_group_sizze_98663 * groups_per_segment_128090)) + virt_num_groups_128092 = (groups_per_segment_128090 * m_73008) + num_threads_128093 = (num_groups_98664 * segred_group_sizze_98663) + threads_per_segment_128094 = (groups_per_segment_128090 * segred_group_sizze_98663) + group_res_arr_mem_128095 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_98663 * virt_num_groups_128092)), + "group_res_arr_mem_128095") + mainMagnitudezicounter_mem_128097 = self.mainMagnitudezicounter_mem_128097 + if ((1 * (np.int64(num_groups_98664) * np.int64(segred_group_sizze_98663))) != 0): + self.mainMagnitudezisegred_large_98645_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_98663))), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(defunc_2_reduce_res_73132), + np.int64(index_primexp_74309), + np.int64(num_groups_98664), + np.int64(groups_per_segment_128090), + np.int64(elements_per_thread_128091), + np.int64(virt_num_groups_128092), + np.int64(threads_per_segment_128094), + mem_120246, + mem_123901, + mem_123904, + mem_123907, + group_res_arr_mem_128095, + mainMagnitudezicounter_mem_128097) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_large_98645_var, + ((np.int64(num_groups_98664) * np.int64(segred_group_sizze_98663)),), + (np.int64(segred_group_sizze_98663),)) + if synchronous: + sync(self) + mem_123910 = opencl_alloc(self, bytes_120173, "mem_123910") + if ((1 * (np.int64(segmap_usable_groups_98685) * np.int64(segmap_group_sizze_98684))) != 0): + self.mainMagnitudezisegmap_98629_var.set_args(self.global_failure, + np.int64(m_73008), + mem_123904, + mem_123910) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_98629_var, + ((np.int64(segmap_usable_groups_98685) * np.int64(segmap_group_sizze_98684)),), + (np.int64(segmap_group_sizze_98684),)) + if synchronous: + sync(self) + mem_123904 = None + mem_123913 = opencl_alloc(self, bytes_120173, "mem_123913") + if slt64((k2p2zq_73023 * np.int64(2)), segred_group_sizze_98691): + segment_sizze_nonzzero_128135 = smax64(np.int64(1), k2p2zq_73023) + num_threads_128136 = (num_groups_98692 * segred_group_sizze_98691) + if ((1 * (np.int64(num_groups_98692) * np.int64(segred_group_sizze_98691))) != 0): + self.mainMagnitudezisegred_small_98618_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_98691))), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(defunc_2_reduce_res_73132), + np.int64(index_primexp_74309), + np.int64(num_groups_98692), + np.int64(segment_sizze_nonzzero_128135), + mem_120246, + mem_param_123786, + mem_123913) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_small_98618_var, + ((np.int64(num_groups_98692) * np.int64(segred_group_sizze_98691)),), + (np.int64(segred_group_sizze_98691),)) + if synchronous: + sync(self) + else: + groups_per_segment_128156 = sdiv_up64(num_groups_98692, + smax64(np.int64(1), + m_73008)) + elements_per_thread_128157 = sdiv_up64(k2p2zq_73023, + (segred_group_sizze_98691 * groups_per_segment_128156)) + virt_num_groups_128158 = (groups_per_segment_128156 * m_73008) + num_threads_128159 = (num_groups_98692 * segred_group_sizze_98691) + threads_per_segment_128160 = (groups_per_segment_128156 * segred_group_sizze_98691) + group_res_arr_mem_128161 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_98691 * virt_num_groups_128158)), + "group_res_arr_mem_128161") + mainMagnitudezicounter_mem_128163 = self.mainMagnitudezicounter_mem_128163 + if ((1 * (np.int64(num_groups_98692) * np.int64(segred_group_sizze_98691))) != 0): + self.mainMagnitudezisegred_large_98618_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_98691))), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(defunc_2_reduce_res_73132), + np.int64(index_primexp_74309), + np.int64(num_groups_98692), + np.int64(groups_per_segment_128156), + np.int64(elements_per_thread_128157), + np.int64(virt_num_groups_128158), + np.int64(threads_per_segment_128160), + mem_120246, + mem_param_123786, + mem_123913, + group_res_arr_mem_128161, + mainMagnitudezicounter_mem_128163) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_large_98618_var, + ((np.int64(num_groups_98692) * np.int64(segred_group_sizze_98691)),), + (np.int64(segred_group_sizze_98691),)) + if synchronous: + sync(self) + mem_123916 = opencl_alloc(self, bytes_120173, "mem_123916") + mem_123918 = opencl_alloc(self, bytes_120173, "mem_123918") + if ((1 * (np.int64(segmap_usable_groups_98705) * np.int64(segmap_group_sizze_98704))) != 0): + self.mainMagnitudezisegmap_98598_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(n_73011), + np.int64(index_primexp_74309), + defunc_3_map_res_mem_120231, + mem_123910, + mem_123913, + mem_123916, + mem_123918) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_98598_var, + ((np.int64(segmap_usable_groups_98705) * np.int64(segmap_group_sizze_98704)),), + (np.int64(segmap_group_sizze_98704),)) + if synchronous: + sync(self) + mem_123913 = None + local_memory_capacity_128276 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_128276)) and suff_outer_par_98721): + mem_123937 = opencl_alloc(self, bytes_121997, "mem_123937") + mem_123940 = opencl_alloc(self, bytes_121990, "mem_123940") + mem_123921 = opencl_alloc(self, total_sizze_126026, "mem_123921") + if ((1 * (np.int64(num_groups_98717) * np.int64(segmap_group_sizze_98716))) != 0): + self.mainMagnitudezisegmap_98483_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(defunc_2_reduce_res_73132), + np.int64(index_primexp_74309), + np.int64(num_groups_98717), + np.int64(num_threads_126025), + mem_120246, + mem_param_123786, + mem_123901, + mem_123907, + mem_123910, + mem_123916, + mem_123921, + mem_123937, + mem_123940) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_98483_var, + ((np.int64(num_groups_98717) * np.int64(segmap_group_sizze_98716)),), + (np.int64(segmap_group_sizze_98716),)) + if synchronous: + sync(self) + mem_123921 = None + mem_123956 = opencl_alloc(self, bytes_121997, "mem_123956") + self.futhark_builtinzhgpu_map_transpose_f64(mem_123956, + np.int64(0), + mem_123937, + np.int64(0), + np.int64(1), + (m_73008 * k2p2zq_73023), + k2p2zq_73023) + mem_123937 = None + defunc_7_map_res_mem_123958 = mem_123956 + defunc_7_map_res_mem_123959 = mem_123940 + else: + mem_123944 = opencl_alloc(self, bytes_121990, "mem_123944") + mem_123948 = opencl_alloc(self, bytes_121997, "mem_123948") + if slt64((k2p2zq_73023 * np.int64(2)), segred_group_sizze_98750): + segment_sizze_nonzzero_128211 = smax64(np.int64(1), + k2p2zq_73023) + num_threads_128212 = (num_groups_98751 * segred_group_sizze_98750) + if ((1 * (np.int64(num_groups_98751) * np.int64(segred_group_sizze_98750))) != 0): + self.mainMagnitudezisegred_small_98543_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_98750))), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(defunc_2_reduce_res_73132), + np.int64(index_primexp_74309), + np.int64(num_groups_98751), + np.int64(binop_x_120251), + np.int64(segment_sizze_nonzzero_128211), + mem_120246, + mem_param_123778, + mem_123907, + mem_123910, + mem_123944, + mem_123948) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_small_98543_var, + ((np.int64(num_groups_98751) * np.int64(segred_group_sizze_98750)),), + (np.int64(segred_group_sizze_98750),)) + if synchronous: + sync(self) + else: + groups_per_segment_128232 = sdiv_up64(num_groups_98751, + smax64(np.int64(1), + (m_73008 * k2p2zq_73023))) + elements_per_thread_128233 = sdiv_up64(k2p2zq_73023, + (segred_group_sizze_98750 * groups_per_segment_128232)) + virt_num_groups_128234 = (groups_per_segment_128232 * (m_73008 * k2p2zq_73023)) + num_threads_128235 = (num_groups_98751 * segred_group_sizze_98750) + threads_per_segment_128236 = (groups_per_segment_128232 * segred_group_sizze_98750) + group_res_arr_mem_128237 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_98750 * virt_num_groups_128234)), + "group_res_arr_mem_128237") + mainMagnitudezicounter_mem_128239 = self.mainMagnitudezicounter_mem_128239 + if ((1 * (np.int64(num_groups_98751) * np.int64(segred_group_sizze_98750))) != 0): + self.mainMagnitudezisegred_large_98543_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_98750))), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(defunc_2_reduce_res_73132), + np.int64(index_primexp_74309), + np.int64(num_groups_98751), + np.int64(binop_x_120251), + np.int64(groups_per_segment_128232), + np.int64(elements_per_thread_128233), + np.int64(virt_num_groups_128234), + np.int64(threads_per_segment_128236), + mem_120246, + mem_param_123778, + mem_123907, + mem_123910, + mem_123944, + mem_123948, + group_res_arr_mem_128237, + mainMagnitudezicounter_mem_128239) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_large_98543_var, + ((np.int64(num_groups_98751) * np.int64(segred_group_sizze_98750)),), + (np.int64(segred_group_sizze_98750),)) + if synchronous: + sync(self) + mem_123952 = opencl_alloc(self, bytes_121990, "mem_123952") + if ((1 * (np.int64(segmap_usable_groups_98774) * np.int64(segmap_group_sizze_98773))) != 0): + self.mainMagnitudezisegmap_98521_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + mem_param_123786, + mem_123916, + mem_123944, + mem_123952) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegmap_98521_var, + ((np.int64(segmap_usable_groups_98774) * np.int64(segmap_group_sizze_98773)),), + (np.int64(segmap_group_sizze_98773),)) + if synchronous: + sync(self) + mem_123944 = None + defunc_7_map_res_mem_123958 = mem_123948 + defunc_7_map_res_mem_123959 = mem_123952 + mem_123901 = None + mem_123907 = None + mem_123910 = None + mem_123916 = None + defunc_7_map_res_mem_123960 = defunc_7_map_res_mem_123958 + defunc_7_map_res_mem_123961 = defunc_7_map_res_mem_123959 + defunc_7_map_res_mem_123962 = mem_123918 + defunc_7_map_res_mem_123972 = defunc_7_map_res_mem_123960 + defunc_7_map_res_mem_123973 = defunc_7_map_res_mem_123961 + defunc_7_map_res_mem_123974 = defunc_7_map_res_mem_123962 + i_74400 = (index_primexp_74309 - k2p2zq_73023) + x_74401 = sle64(np.int64(0), i_74400) + y_74402 = slt64(i_74400, num_recresids_padded_73681) + bounds_check_74403 = (x_74401 and y_74402) + index_ok_74404 = (ok_or_empty_73122 and bounds_check_74403) + index_certs_74405 = True + assert index_ok_74404, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 recresid.fut:81:7-82:29\n #1 recresid.fut:100:7-30\n #2 mroc.fut:27:25-38\n #3 mroc.fut:77:27-61\n #4 bfastfinal.fut:45:24-53\n #5 bfastfinal.fut:192:5-74\n #6 bfastfinal.fut:187:1-193:48\n" % ("Index [", + i_74400, + ", ", + np.int64(0), + ":] out of bounds for array of shape [", + num_recresids_padded_73681, + "][", + m_73008, + "].")) + if ((m_73008 * np.int32(8)) != 0): + cl.enqueue_copy(self.queue, mem_121934, defunc_7_map_res_mem_123974, + dest_offset=np.int64(((i_74400 * m_73008) * np.int64(8))), + src_offset=np.int64(np.int64(0)), + byte_count=np.int64((m_73008 * np.int32(8)))) if synchronous: sync(self) - self.failure_is_an_option = np.int32(1) - mem_45182 = None - mem_45185 = None - defunc_3_map_res_mem_45244 = mem_45216 - defunc_3_map_res_mem_45245 = mem_45218 - defunc_3_map_res_mem_45246 = mem_45220 - else: - local_memory_capacity_46502 = self.max_local_memory - if (sle64((((np.int32(4) * i32_res_27781) + srem64((np.int64(8) - srem64((np.int32(4) * i32_res_27781), - np.int64(8))), - np.int64(8))) + ((np.int32(4) * i32_res_27781) + srem64((np.int64(8) - srem64((np.int32(4) * i32_res_27781), - np.int64(8))), - np.int64(8)))), - sext_i32_i64(local_memory_capacity_46502)) and intra_suff_and_fits_32565): - mem_45225 = opencl_alloc(self, bytes_45152, "mem_45225") - mem_45227 = opencl_alloc(self, bytes_45152, "mem_45227") - mem_45229 = opencl_alloc(self, bytes_45152, "mem_45229") - if ((1 * (np.long(m_27772) * np.long(i32_res_27781))) != 0): - self.mainDetailedzisegmap_intragroup_32486_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long((np.int32(4) * i32_res_27781))), - cl.LocalMemory(np.long((np.int32(4) * i32_res_27781))), - np.int64(N_27771), - np.float32(hfrac_27777), - np.int64(i32_res_27781), - np.int32(k2p2_27783), - images_mem_44381, - defunc_4_map_res_mem_45178, - mem_45225, - mem_45227, - mem_45229) + defunc_7_map_res_mem_123974 = None + mem_param_tmp_128010 = defunc_7_map_res_mem_123972 + mem_param_tmp_128011 = defunc_7_map_res_mem_123973 + mem_param_123778 = mem_param_tmp_128010 + mem_param_123786 = mem_param_tmp_128011 + i_74305 += one_129951 + retsT_mem_123990 = mem_param_123778 + retsT_mem_123998 = mem_param_123786 + defunc_3_map_res_mem_120231 = None + mem_120246 = None + mrecresid_nn_res_mem_123746 = None + mrecresid_nn_res_mem_123754 = None + Nmk_74408 = (np.int64(1) + num_recresids_padded_73681) + bounds_invalid_upwards_74409 = slt64(Nmk_74408, np.int64(0)) + valid_74410 = not(bounds_invalid_upwards_74409) + range_valid_c_74411 = True + assert valid_74410, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 mroc.fut:36:70-77\n #2 /prelude/soacs.fut:67:19-23\n #3 /prelude/soacs.fut:67:3-37\n #4 mroc.fut:32:5-38:15\n #5 mroc.fut:77:27-61\n #6 bfastfinal.fut:45:24-53\n #7 bfastfinal.fut:192:5-74\n #8 bfastfinal.fut:187:1-193:48\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + Nmk_74408, + " is invalid.")) + suff_outer_par_98802 = (self.sizes["mainMagnitude.suff_outer_par_19"] <= m_73008) + intra_avail_par_98798 = smin64(num_recresids_padded_73681, Nmk_74408) + computed_group_sizze_98791 = smax64(num_recresids_padded_73681, Nmk_74408) + fits_98904 = sle64(computed_group_sizze_98791, max_group_sizze_90561) + suff_intra_par_98902 = (self.sizes["mainMagnitude.suff_intra_par_20"] <= intra_avail_par_98798) + intra_suff_and_fits_98905 = (suff_intra_par_98902 and fits_98904) + segmap_group_sizze_98854 = self.sizes["mainMagnitude.segmap_group_size_98806"] + max_num_groups_128279 = self.sizes["mainMagnitude.segmap_num_groups_98808"] + num_groups_98855 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_73008, + segmap_group_sizze_98854), + sext_i32_i64(max_num_groups_128279)))) + segmap_group_sizze_99052 = self.sizes["mainMagnitude.segmap_group_size_99045"] + segred_group_sizze_99061 = self.sizes["mainMagnitude.segred_group_size_99026"] + max_num_groups_128280 = self.sizes["mainMagnitude.segred_num_groups_99028"] + num_groups_99062 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(replicate_arg_73682, + segred_group_sizze_99061), + sext_i32_i64(max_num_groups_128280)))) + segmap_group_sizze_99075 = self.sizes["mainMagnitude.segmap_group_size_99017"] + segred_group_sizze_99083 = self.sizes["mainMagnitude.segred_group_size_98996"] + max_num_groups_128281 = self.sizes["mainMagnitude.segred_num_groups_98998"] + num_groups_99084 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(replicate_arg_73682, + segred_group_sizze_99083), + sext_i32_i64(max_num_groups_128281)))) + segmap_group_sizze_99098 = self.sizes["mainMagnitude.segmap_group_size_98982"] + nest_sizze_99109 = (m_73008 * Nmk_74408) + segscan_group_sizze_99110 = self.sizes["mainMagnitude.segscan_group_size_98955"] + max_num_groups_128282 = self.sizes["mainMagnitude.segscan_num_groups_98957"] + num_groups_99111 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_99109, + segscan_group_sizze_99110), + sext_i32_i64(max_num_groups_128282)))) + bytes_124022 = (np.int64(8) * nest_sizze_99109) + bytes_124008 = (np.int64(8) * Nmk_74408) + num_threads_126029 = (segmap_group_sizze_98854 * num_groups_98855) + total_sizze_126030 = (bytes_124008 * num_threads_126029) + local_memory_capacity_128525 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_128525)) and suff_outer_par_98802): + mem_124024 = opencl_alloc(self, bytes_124022, "mem_124024") + mem_124026 = opencl_alloc(self, bytes_120173, "mem_124026") + mem_124009 = opencl_alloc(self, total_sizze_126030, "mem_124009") + if ((1 * (np.int64(num_groups_98855) * np.int64(segmap_group_sizze_98854))) != 0): + self.mainMagnitudezisegmap_98804_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(num_recresids_padded_73681), + np.int64(Nmk_74408), + np.int64(num_groups_98855), + np.int64(num_threads_126029), + defunc_3_map_res_mem_120230, + mem_121934, mem_124009, + mem_124024, mem_124026) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_intragroup_32486_var, - ((np.long(m_27772) * np.long(i32_res_27781)),), - (np.long(i32_res_27781),)) + self.mainMagnitudezisegmap_98804_var, + ((np.int64(num_groups_98855) * np.int64(segmap_group_sizze_98854)),), + (np.int64(segmap_group_sizze_98854),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - defunc_3_map_res_mem_45241 = mem_45225 - defunc_3_map_res_mem_45242 = mem_45227 - defunc_3_map_res_mem_45243 = mem_45229 + mem_124009 = None + mem_124066 = opencl_alloc(self, bytes_124022, "mem_124066") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124066, np.int64(0), + mem_124024, np.int64(0), + np.int64(1), m_73008, + Nmk_74408) + mem_124024 = None + defunc_3_map_res_mem_124068 = mem_124066 + defunc_3_map_res_mem_124069 = mem_124026 else: - mem_45232 = opencl_alloc(self, bytes_45152, "mem_45232") - if slt64((i32_res_27781 * np.int64(2)), segred_group_sizze_32661): - segment_sizze_nonzzero_46377 = smax64(np.int64(1), i32_res_27781) - num_threads_46378 = (num_groups_32662 * segred_group_sizze_32661) - if ((1 * (np.long(num_groups_32662) * np.long(segred_group_sizze_32661))) != 0): - self.mainDetailedzisegred_small_32650_var.set_args(self.global_failure, - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_32661))), - np.int64(N_27771), - np.int64(m_27772), - np.int64(i32_res_27781), - np.int64(num_groups_32662), - np.int64(segment_sizze_nonzzero_46377), - images_mem_44381, - mem_45232) + local_memory_capacity_128524 = self.max_local_memory + if (sle64(((((np.int32(8) * num_recresids_padded_73681) + srem64((np.int64(8) - srem64((np.int32(8) * num_recresids_padded_73681), + np.int64(8))), + np.int64(8))) + ((np.int32(8) * num_recresids_padded_73681) + srem64((np.int64(8) - srem64((np.int32(8) * num_recresids_padded_73681), + np.int64(8))), + np.int64(8)))) + (bytes_124008 + srem64((np.int64(8) - srem64(bytes_124008, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_128524)) and intra_suff_and_fits_98905): + mem_124035 = opencl_alloc(self, bytes_124022, "mem_124035") + mem_124037 = opencl_alloc(self, bytes_120173, "mem_124037") + if ((1 * (np.int64(m_73008) * np.int64(computed_group_sizze_98791))) != 0): + self.mainMagnitudezisegmap_intragroup_98800_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64(bytes_124008)), + cl.LocalMemory(np.int64((np.int32(8) * num_recresids_padded_73681))), + cl.LocalMemory(np.int64((np.int32(8) * num_recresids_padded_73681))), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(num_recresids_padded_73681), + np.int64(Nmk_74408), + np.int64(computed_group_sizze_98791), + defunc_3_map_res_mem_120230, + mem_121934, + mem_124035, + mem_124037) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegred_small_32650_var, - ((np.long(num_groups_32662) * np.long(segred_group_sizze_32661)),), - (np.long(segred_group_sizze_32661),)) + self.mainMagnitudezisegmap_intragroup_98800_var, + ((np.int64(m_73008) * np.int64(computed_group_sizze_98791)),), + (np.int64(computed_group_sizze_98791),)) if synchronous: sync(self) + self.failure_is_an_option = np.int32(1) + defunc_3_map_res_mem_124062 = mem_124035 + defunc_3_map_res_mem_124063 = mem_124037 else: - groups_per_segment_46398 = sdiv_up64(num_groups_32662, - smax64(np.int64(1), m_27772)) - elements_per_thread_46399 = sdiv_up64(i32_res_27781, - (segred_group_sizze_32661 * groups_per_segment_46398)) - virt_num_groups_46400 = (groups_per_segment_46398 * m_27772) - num_threads_46401 = (num_groups_32662 * segred_group_sizze_32661) - threads_per_segment_46402 = (groups_per_segment_46398 * segred_group_sizze_32661) - group_res_arr_mem_46403 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_32661 * virt_num_groups_46400)), - "group_res_arr_mem_46403") - mainDetailedzicounter_mem_46405 = self.mainDetailedzicounter_mem_46405 - if ((1 * (np.long(num_groups_32662) * np.long(segred_group_sizze_32661))) != 0): - self.mainDetailedzisegred_large_32650_var.set_args(self.global_failure, - cl.LocalMemory(np.long(np.int32(1))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_32661))), - np.int64(N_27771), - np.int64(i32_res_27781), - np.int64(num_groups_32662), - np.int64(groups_per_segment_46398), - np.int64(elements_per_thread_46399), - np.int64(virt_num_groups_46400), - np.int64(threads_per_segment_46402), - images_mem_44381, - mem_45232, - group_res_arr_mem_46403, - mainDetailedzicounter_mem_46405) + segmap_usable_groups_99053 = sdiv_up64(m_73008, + segmap_group_sizze_99052) + mem_124040 = opencl_alloc(self, bytes_120173, "mem_124040") + mem_124042 = opencl_alloc(self, bytes_120173, "mem_124042") + if ((1 * (np.int64(segmap_usable_groups_99053) * np.int64(segmap_group_sizze_99052))) != 0): + self.mainMagnitudezisegmap_99043_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + defunc_3_map_res_mem_120230, + mem_124040, + mem_124042) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegred_large_32650_var, - ((np.long(num_groups_32662) * np.long(segred_group_sizze_32661)),), - (np.long(segred_group_sizze_32661),)) + self.mainMagnitudezisegmap_99043_var, + ((np.int64(segmap_usable_groups_99053) * np.int64(segmap_group_sizze_99052)),), + (np.int64(segmap_group_sizze_99052),)) if synchronous: sync(self) - mem_45235 = opencl_alloc(self, bytes_45152, "mem_45235") - if slt64((i32_res_27781 * np.int64(2)), segred_group_sizze_32675): - segment_sizze_nonzzero_46437 = smax64(np.int64(1), i32_res_27781) - num_threads_46438 = (num_groups_32676 * segred_group_sizze_32675) - if ((1 * (np.long(num_groups_32676) * np.long(segred_group_sizze_32675))) != 0): - self.mainDetailedzisegred_small_32625_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_32675))), - np.int64(N_27771), - np.int64(m_27772), - np.int64(i32_res_27781), - np.int64(num_groups_32676), - np.int64(segment_sizze_nonzzero_46437), - defunc_4_map_res_mem_45178, - mem_45232, - mem_45235) + mem_124045 = opencl_alloc(self, bytes_121932, "mem_124045") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124045, np.int64(0), + mem_121934, np.int64(0), + np.int64(1), m_73008, + num_recresids_padded_73681) + mem_124048 = opencl_alloc(self, bytes_120173, "mem_124048") + if slt64((num_recresids_padded_73681 * np.int64(2)), + segred_group_sizze_99061): + segment_sizze_nonzzero_128328 = smax64(np.int64(1), + num_recresids_padded_73681) + num_threads_128329 = (num_groups_99062 * segred_group_sizze_99061) + if ((1 * (np.int64(num_groups_99062) * np.int64(segred_group_sizze_99061))) != 0): + self.mainMagnitudezisegred_small_99032_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_99061))), + np.int64(m_73008), + np.int64(num_recresids_padded_73681), + np.int64(num_groups_99062), + np.int64(segment_sizze_nonzzero_128328), + mem_124045, + mem_124048) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_small_99032_var, + ((np.int64(num_groups_99062) * np.int64(segred_group_sizze_99061)),), + (np.int64(segred_group_sizze_99061),)) + if synchronous: + sync(self) + else: + groups_per_segment_128353 = sdiv_up64(num_groups_99062, + smax64(np.int64(1), m_73008)) + elements_per_thread_128354 = sdiv_up64(num_recresids_padded_73681, + (segred_group_sizze_99061 * groups_per_segment_128353)) + virt_num_groups_128355 = (groups_per_segment_128353 * m_73008) + num_threads_128356 = (num_groups_99062 * segred_group_sizze_99061) + threads_per_segment_128357 = (groups_per_segment_128353 * segred_group_sizze_99061) + group_res_arr_mem_128358 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_99061 * virt_num_groups_128355)), + "group_res_arr_mem_128358") + mainMagnitudezicounter_mem_128360 = self.mainMagnitudezicounter_mem_128360 + if ((1 * (np.int64(num_groups_99062) * np.int64(segred_group_sizze_99061))) != 0): + self.mainMagnitudezisegred_large_99032_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_99061))), + np.int64(num_recresids_padded_73681), + np.int64(num_groups_99062), + np.int64(groups_per_segment_128353), + np.int64(elements_per_thread_128354), + np.int64(virt_num_groups_128355), + mem_124045, + mem_124048, + group_res_arr_mem_128358, + mainMagnitudezicounter_mem_128360) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_large_99032_var, + ((np.int64(num_groups_99062) * np.int64(segred_group_sizze_99061)),), + (np.int64(segred_group_sizze_99061),)) + if synchronous: + sync(self) + segmap_usable_groups_99076 = sdiv_up64(m_73008, + segmap_group_sizze_99075) + mem_124051 = opencl_alloc(self, bytes_120173, "mem_124051") + if ((1 * (np.int64(segmap_usable_groups_99076) * np.int64(segmap_group_sizze_99075))) != 0): + self.mainMagnitudezisegmap_99015_var.set_args(self.global_failure, + np.int64(m_73008), + mem_124040, + mem_124048, + mem_124051) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegred_small_32625_var, - ((np.long(num_groups_32676) * np.long(segred_group_sizze_32675)),), - (np.long(segred_group_sizze_32675),)) + self.mainMagnitudezisegmap_99015_var, + ((np.int64(segmap_usable_groups_99076) * np.int64(segmap_group_sizze_99075)),), + (np.int64(segmap_group_sizze_99075),)) if synchronous: sync(self) - self.failure_is_an_option = np.int32(1) - else: - groups_per_segment_46458 = sdiv_up64(num_groups_32676, - smax64(np.int64(1), m_27772)) - elements_per_thread_46459 = sdiv_up64(i32_res_27781, - (segred_group_sizze_32675 * groups_per_segment_46458)) - virt_num_groups_46460 = (groups_per_segment_46458 * m_27772) - num_threads_46461 = (num_groups_32676 * segred_group_sizze_32675) - threads_per_segment_46462 = (groups_per_segment_46458 * segred_group_sizze_32675) - group_res_arr_mem_46463 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_32675 * virt_num_groups_46460)), - "group_res_arr_mem_46463") - mainDetailedzicounter_mem_46465 = self.mainDetailedzicounter_mem_46465 - if ((1 * (np.long(num_groups_32676) * np.long(segred_group_sizze_32675))) != 0): - self.mainDetailedzisegred_large_32625_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long(np.int32(1))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_32675))), - np.int64(N_27771), - np.int64(i32_res_27781), - np.int64(num_groups_32676), - np.int64(groups_per_segment_46458), - np.int64(elements_per_thread_46459), - np.int64(virt_num_groups_46460), - np.int64(threads_per_segment_46462), - defunc_4_map_res_mem_45178, - mem_45232, - mem_45235, - group_res_arr_mem_46463, - mainDetailedzicounter_mem_46465) + mem_124048 = None + mem_124054 = opencl_alloc(self, bytes_120173, "mem_124054") + if slt64((num_recresids_padded_73681 * np.int64(2)), + segred_group_sizze_99083): + segment_sizze_nonzzero_128403 = smax64(np.int64(1), + num_recresids_padded_73681) + num_threads_128404 = (num_groups_99084 * segred_group_sizze_99083) + if ((1 * (np.int64(num_groups_99084) * np.int64(segred_group_sizze_99083))) != 0): + self.mainMagnitudezisegred_small_99002_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_99083))), + np.int64(m_73008), + np.int64(num_recresids_padded_73681), + np.int64(num_groups_99084), + np.int64(segment_sizze_nonzzero_128403), + mem_124045, + mem_124051, + mem_124054) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_small_99002_var, + ((np.int64(num_groups_99084) * np.int64(segred_group_sizze_99083)),), + (np.int64(segred_group_sizze_99083),)) + if synchronous: + sync(self) + else: + groups_per_segment_128424 = sdiv_up64(num_groups_99084, + smax64(np.int64(1), m_73008)) + elements_per_thread_128425 = sdiv_up64(num_recresids_padded_73681, + (segred_group_sizze_99083 * groups_per_segment_128424)) + virt_num_groups_128426 = (groups_per_segment_128424 * m_73008) + num_threads_128427 = (num_groups_99084 * segred_group_sizze_99083) + threads_per_segment_128428 = (groups_per_segment_128424 * segred_group_sizze_99083) + group_res_arr_mem_128429 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_99083 * virt_num_groups_128426)), + "group_res_arr_mem_128429") + mainMagnitudezicounter_mem_128431 = self.mainMagnitudezicounter_mem_128431 + if ((1 * (np.int64(num_groups_99084) * np.int64(segred_group_sizze_99083))) != 0): + self.mainMagnitudezisegred_large_99002_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_99083))), + np.int64(num_recresids_padded_73681), + np.int64(num_groups_99084), + np.int64(groups_per_segment_128424), + np.int64(elements_per_thread_128425), + np.int64(virt_num_groups_128426), + np.int64(threads_per_segment_128428), + mem_124045, + mem_124051, + mem_124054, + group_res_arr_mem_128429, + mainMagnitudezicounter_mem_128431) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_large_99002_var, + ((np.int64(num_groups_99084) * np.int64(segred_group_sizze_99083)),), + (np.int64(segred_group_sizze_99083),)) + if synchronous: + sync(self) + mem_124051 = None + segmap_usable_groups_99099 = sdiv_up64(m_73008, + segmap_group_sizze_99098) + mem_124057 = opencl_alloc(self, bytes_120173, "mem_124057") + if ((1 * (np.int64(segmap_usable_groups_99099) * np.int64(segmap_group_sizze_99098))) != 0): + self.mainMagnitudezisegmap_98980_var.set_args(self.global_failure, + np.int64(m_73008), + mem_124040, + mem_124054, + mem_124057) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegred_large_32625_var, - ((np.long(num_groups_32676) * np.long(segred_group_sizze_32675)),), - (np.long(segred_group_sizze_32675),)) + self.mainMagnitudezisegmap_98980_var, + ((np.int64(segmap_usable_groups_99099) * np.int64(segmap_group_sizze_99098)),), + (np.int64(segmap_group_sizze_99098),)) if synchronous: sync(self) - self.failure_is_an_option = np.int32(1) - segmap_usable_groups_32696 = sdiv_up64(m_27772, - segmap_group_sizze_32695) - mem_45238 = opencl_alloc(self, bytes_45152, "mem_45238") - mem_45240 = opencl_alloc(self, bytes_45152, "mem_45240") - if ((1 * (np.long(segmap_usable_groups_32696) * np.long(segmap_group_sizze_32695))) != 0): - self.mainDetailedzisegmap_32602_var.set_args(self.global_failure, - np.int64(m_27772), - np.float32(hfrac_27777), - np.int32(k2p2_27783), - mem_45232, mem_45235, - mem_45238, mem_45240) - cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_32602_var, - ((np.long(segmap_usable_groups_32696) * np.long(segmap_group_sizze_32695)),), - (np.long(segmap_group_sizze_32695),)) - if synchronous: - sync(self) - mem_45235 = None - defunc_3_map_res_mem_45241 = mem_45238 - defunc_3_map_res_mem_45242 = mem_45232 - defunc_3_map_res_mem_45243 = mem_45240 - defunc_3_map_res_mem_45244 = defunc_3_map_res_mem_45241 - defunc_3_map_res_mem_45245 = defunc_3_map_res_mem_45242 - defunc_3_map_res_mem_45246 = defunc_3_map_res_mem_45243 - segred_group_sizze_32716 = self.sizes["mainDetailed.segred_group_size_32715"] - max_num_groups_46504 = self.sizes["mainDetailed.segred_num_groups_32717"] - num_groups_32718 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(m_27772, - segred_group_sizze_32716), - sext_i32_i64(max_num_groups_46504)))) - mem_45249 = opencl_alloc(self, np.int64(4), "mem_45249") - mainDetailedzicounter_mem_46505 = self.mainDetailedzicounter_mem_46505 - group_res_arr_mem_46507 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_32716 * num_groups_32718)), - "group_res_arr_mem_46507") - num_threads_46509 = (num_groups_32718 * segred_group_sizze_32716) - if ((1 * (np.long(num_groups_32718) * np.long(segred_group_sizze_32716))) != 0): - self.mainDetailedzisegred_nonseg_32723_var.set_args(self.global_failure, - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_32716))), - cl.LocalMemory(np.long(np.int32(1))), - np.int64(m_27772), - np.int64(num_groups_32718), - np.int64(num_threads_46509), - defunc_3_map_res_mem_45244, - mem_45249, - mainDetailedzicounter_mem_46505, - group_res_arr_mem_46507) - cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegred_nonseg_32723_var, - ((np.long(num_groups_32718) * np.long(segred_group_sizze_32716)),), - (np.long(segred_group_sizze_32716),)) - if synchronous: - sync(self) - read_res_46882 = np.empty(1, dtype=ct.c_int32) - cl.enqueue_copy(self.queue, read_res_46882, mem_45249, - device_offset=(np.long(np.int64(0)) * 4), - is_blocking=synchronous) - sync(self) - defunc_2_reduce_comm_res_28169 = read_res_46882[0] - mem_45249 = None - i32_res_28174 = sext_i32_i64(defunc_2_reduce_comm_res_28169) - bounds_invalid_upwards_28175 = slt64(i32_res_28174, np.int64(0)) - valid_28176 = not(bounds_invalid_upwards_28175) - range_valid_c_28177 = True - assert valid_28176, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 helpers.fut:5:3-18\n #2 bfastfinal.fut:102:34-46\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:97:17-104:24\n #5 bfastfinal.fut:174:3-56\n #6 bfastfinal.fut:170:1-174:56\n" % ("Range ", - np.int64(0), - "..", - np.int64(1), - "..<", - i32_res_28174, - " is invalid.")) - suff_outer_par_32755 = (self.sizes["mainDetailed.suff_outer_par_31"] <= m_27772) - segmap_group_sizze_32782 = self.sizes["mainDetailed.segmap_group_size_32759"] - nest_sizze_32833 = (m_27772 * i32_res_28174) - segred_group_sizze_32834 = self.sizes["mainDetailed.segred_group_size_32807"] - max_num_groups_46535 = self.sizes["mainDetailed.segred_num_groups_32809"] - num_groups_32835 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(nest_sizze_32833, - segred_group_sizze_32834), - sext_i32_i64(max_num_groups_46535)))) - bytes_45252 = (np.int64(4) * segmap_group_sizze_32782) - local_memory_capacity_46606 = self.max_local_memory - if (sle64(((bytes_45252 + srem64((np.int64(8) - srem64(bytes_45252, - np.int64(8))), - np.int64(8))) + (bytes_45252 + srem64((np.int64(8) - srem64(bytes_45252, - np.int64(8))), - np.int64(8)))), - sext_i32_i64(local_memory_capacity_46606)) and suff_outer_par_32755): - segmap_usable_groups_32783 = sdiv_up64(m_27772, segmap_group_sizze_32782) - num_whole_tiles_44095 = squot64(i32_res_28174, segmap_group_sizze_32782) - residual_input_44206 = srem64(i32_res_28174, segmap_group_sizze_32782) - cond_44207 = (residual_input_44206 == np.int64(0)) - mem_45275 = opencl_alloc(self, bytes_45152, "mem_45275") - if ((1 * (np.long(segmap_usable_groups_32783) * np.long(segmap_group_sizze_32782))) != 0): - self.mainDetailedzisegmap_intragroup_44075_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long(bytes_45252)), - cl.LocalMemory(np.long(bytes_45252)), - np.int64(N_27771), - np.int64(m_27772), - np.int64(i32_res_28174), - np.int64(num_whole_tiles_44095), - np.int64(residual_input_44206), - np.byte(cond_44207), - defunc_4_map_res_mem_45178, - defunc_3_map_res_mem_45244, - defunc_3_map_res_mem_45245, - mem_45275) + mem_124040 = None + mem_124054 = None + mem_124061 = opencl_alloc(self, bytes_124022, "mem_124061") + if slt64(np.int64(0), (m_73008 * Nmk_74408)): + stage1_max_num_groups_128468 = self.max_group_size + stage1_num_groups_128469 = smin64(stage1_max_num_groups_128468, + num_groups_99111) + num_threads_128470 = sext_i64_i32((stage1_num_groups_128469 * segscan_group_sizze_99110)) + if ((1 * (np.int64(stage1_num_groups_128469) * np.int64(segscan_group_sizze_99110))) != 0): + self.mainMagnitudeziscan_stage1_98961_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64(smax64(np.int64(1), + (np.int32(8) * segscan_group_sizze_99110)))), + np.int64(m_73008), + np.int64(num_recresids_padded_73681), + np.int64(Nmk_74408), + np.int32(num_threads_128470), + mem_124045, + mem_124057, + mem_124061) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudeziscan_stage1_98961_var, + ((np.int64(stage1_num_groups_128469) * np.int64(segscan_group_sizze_99110)),), + (np.int64(segscan_group_sizze_99110),)) + if synchronous: + sync(self) + self.failure_is_an_option = np.int32(1) + if ((1 * (np.int64(np.int64(1)) * np.int64(stage1_num_groups_128469))) != 0): + self.mainMagnitudeziscan_stage2_98961_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(smax64(np.int64(1), + (np.int32(8) * stage1_num_groups_128469)))), + np.int64(m_73008), + np.int64(Nmk_74408), + np.int64(stage1_num_groups_128469), + np.int32(num_threads_128470), + mem_124061) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudeziscan_stage2_98961_var, + ((np.int64(np.int64(1)) * np.int64(stage1_num_groups_128469)),), + (np.int64(stage1_num_groups_128469),)) + if synchronous: + sync(self) + required_groups_128512 = sext_i64_i32(sdiv_up64((m_73008 * Nmk_74408), + segscan_group_sizze_99110)) + if ((1 * (np.int64(num_groups_99111) * np.int64(segscan_group_sizze_99110))) != 0): + self.mainMagnitudeziscan_stage3_98961_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(Nmk_74408), + np.int64(num_groups_99111), + np.int32(num_threads_128470), + np.int32(required_groups_128512), + mem_124061) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudeziscan_stage3_98961_var, + ((np.int64(num_groups_99111) * np.int64(segscan_group_sizze_99110)),), + (np.int64(segscan_group_sizze_99110),)) + if synchronous: + sync(self) + mem_124045 = None + mem_124057 = None + defunc_3_map_res_mem_124062 = mem_124061 + defunc_3_map_res_mem_124063 = mem_124042 + defunc_3_map_res_mem_124068 = defunc_3_map_res_mem_124062 + defunc_3_map_res_mem_124069 = defunc_3_map_res_mem_124063 + defunc_3_map_res_mem_120230 = None + mem_121934 = None + empty_slice_74457 = (num_recresids_padded_73681 == np.int64(0)) + zzero_leq_i_p_m_t_s_74458 = sle64(np.int64(0), num_recresids_padded_73681) + i_p_m_t_s_leq_w_74459 = slt64(num_recresids_padded_73681, Nmk_74408) + i_lte_j_74460 = sle64(np.int64(1), Nmk_74408) + y_74461 = (zzero_leq_i_p_m_t_s_74458 and i_p_m_t_s_leq_w_74459) + y_74462 = (i_lte_j_74460 and y_74461) + ok_or_empty_74463 = (empty_slice_74457 or y_74462) + index_certs_74464 = True + assert ok_or_empty_74463, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 mroc.fut:57:12-22\n #1 /prelude/soacs.fut:67:19-23\n #2 /prelude/soacs.fut:67:3-37\n #3 mroc.fut:78:15-34\n #4 bfastfinal.fut:45:24-53\n #5 bfastfinal.fut:192:5-74\n #6 bfastfinal.fut:187:1-193:48\n" % ("Index [", + np.int64(1), + ":] out of bounds for array of shape [", + Nmk_74408, + "].")) + range_valid_c_74467 = True + assert valid_74410, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 mroc.fut:72:13-18\n #2 mroc.fut:80:16-46\n #3 bfastfinal.fut:45:24-53\n #4 bfastfinal.fut:192:5-74\n #5 bfastfinal.fut:187:1-193:48\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + Nmk_74408, + " is invalid.")) + segmap_group_sizze_99187 = self.sizes["mainMagnitude.segmap_group_size_99179"] + segmap_usable_groups_99188 = sdiv_up64(m_73008, segmap_group_sizze_99187) + mem_124072 = opencl_alloc(self, bytes_120173, "mem_124072") + mem_124074 = opencl_alloc(self, bytes_120173, "mem_124074") + if ((1 * (np.int64(segmap_usable_groups_99188) * np.int64(segmap_group_sizze_99187))) != 0): + self.mainMagnitudezisegmap_99177_var.set_args(self.global_failure, + np.int64(m_73008), + defunc_3_map_res_mem_124069, + mem_124072, mem_124074) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_intragroup_44075_var, - ((np.long(segmap_usable_groups_32783) * np.long(segmap_group_sizze_32782)),), - (np.long(segmap_group_sizze_32782),)) + self.mainMagnitudezisegmap_99177_var, + ((np.int64(segmap_usable_groups_99188) * np.int64(segmap_group_sizze_99187)),), + (np.int64(segmap_group_sizze_99187),)) if synchronous: sync(self) - self.failure_is_an_option = np.int32(1) - defunc_0_f_res_mem_45279 = mem_45275 - else: - mem_45278 = opencl_alloc(self, bytes_45152, "mem_45278") - if slt64((i32_res_28174 * np.int64(2)), segred_group_sizze_32834): - segment_sizze_nonzzero_46546 = smax64(np.int64(1), i32_res_28174) - num_threads_46547 = (num_groups_32835 * segred_group_sizze_32834) - if ((1 * (np.long(num_groups_32835) * np.long(segred_group_sizze_32834))) != 0): - self.mainDetailedzisegred_small_32813_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_32834))), - np.int64(N_27771), - np.int64(m_27772), - np.int64(i32_res_28174), - np.int64(num_groups_32835), - np.int64(segment_sizze_nonzzero_46546), - defunc_4_map_res_mem_45178, - defunc_3_map_res_mem_45244, - defunc_3_map_res_mem_45245, - mem_45278) - cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegred_small_32813_var, - ((np.long(num_groups_32835) * np.long(segred_group_sizze_32834)),), - (np.long(segred_group_sizze_32834),)) - if synchronous: - sync(self) - self.failure_is_an_option = np.int32(1) - else: - groups_per_segment_46567 = sdiv_up64(num_groups_32835, - smax64(np.int64(1), m_27772)) - elements_per_thread_46568 = sdiv_up64(i32_res_28174, - (segred_group_sizze_32834 * groups_per_segment_46567)) - virt_num_groups_46569 = (groups_per_segment_46567 * m_27772) - num_threads_46570 = (num_groups_32835 * segred_group_sizze_32834) - threads_per_segment_46571 = (groups_per_segment_46567 * segred_group_sizze_32834) - group_res_arr_mem_46572 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_32834 * virt_num_groups_46569)), - "group_res_arr_mem_46572") - mainDetailedzicounter_mem_46574 = self.mainDetailedzicounter_mem_46574 - if ((1 * (np.long(num_groups_32835) * np.long(segred_group_sizze_32834))) != 0): - self.mainDetailedzisegred_large_32813_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long(np.int32(1))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_32834))), - np.int64(N_27771), - np.int64(i32_res_28174), - np.int64(num_groups_32835), - np.int64(groups_per_segment_46567), - np.int64(elements_per_thread_46568), - np.int64(virt_num_groups_46569), - np.int64(threads_per_segment_46571), - defunc_4_map_res_mem_45178, - defunc_3_map_res_mem_45244, - defunc_3_map_res_mem_45245, - mem_45278, - group_res_arr_mem_46572, - mainDetailedzicounter_mem_46574) - cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegred_large_32813_var, - ((np.long(num_groups_32835) * np.long(segred_group_sizze_32834)),), - (np.long(segred_group_sizze_32834),)) - if synchronous: - sync(self) - self.failure_is_an_option = np.int32(1) - defunc_0_f_res_mem_45279 = mem_45278 - iota32_arg_28203 = (N_27771 - i32_res_27781) - bounds_invalid_upwards_28204 = slt64(iota32_arg_28203, np.int64(0)) - valid_28205 = not(bounds_invalid_upwards_28204) - range_valid_c_28206 = True - assert valid_28205, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 helpers.fut:2:3-8\n #2 bfastfinal.fut:109:22-35\n #3 bfastfinal.fut:174:3-56\n #4 bfastfinal.fut:170:1-174:56\n" % ("Range ", - np.int64(0), - "..", - np.int64(1), - "..<", - iota32_arg_28203, - " is invalid.")) - i_28208 = (n_27775 - np.int32(1)) - i_28209 = sext_i32_i64(i_28208) - x_28210 = sle64(np.int64(0), i_28209) - y_28211 = slt64(i_28209, N_27771) - bounds_check_28212 = (x_28210 and y_28211) - index_certs_28213 = True - assert bounds_check_28212, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 bfastfinal.fut:107:64-84\n #1 bfastfinal.fut:106:15-109:36\n #2 bfastfinal.fut:174:3-56\n #3 bfastfinal.fut:170:1-174:56\n" % ("Index [", - i_28209, - "] out of bounds for array of shape [", - N_27771, - "].")) - read_res_46884 = np.empty(1, dtype=ct.c_int32) - cl.enqueue_copy(self.queue, read_res_46884, mappingindices_mem_44380, - device_offset=(np.long(i_28209) * 4), - is_blocking=synchronous) - sync(self) - r32_arg_28214 = read_res_46884[0] - i32_res_28215 = sitofp_i32_f32(r32_arg_28214) - segmap_group_sizze_32924 = self.sizes["mainDetailed.segmap_group_size_32904"] - segmap_usable_groups_32925 = sdiv_up64(iota32_arg_28203, - segmap_group_sizze_32924) - bytes_45281 = (np.int64(4) * iota32_arg_28203) - mem_45282 = opencl_alloc(self, bytes_45281, "mem_45282") - if ((1 * (np.long(segmap_usable_groups_32925) * np.long(segmap_group_sizze_32924))) != 0): - self.mainDetailedzisegmap_32902_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - np.int64(N_27771), - np.int32(n_27775), - np.float32(lam_27778), - np.int64(iota32_arg_28203), - np.float32(i32_res_28215), - mappingindices_mem_44380, - mem_45282) - cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_32902_var, - ((np.long(segmap_usable_groups_32925) * np.long(segmap_group_sizze_32924)),), - (np.long(segmap_group_sizze_32924),)) - if synchronous: - sync(self) - self.failure_is_an_option = np.int32(1) - iota32_arg_28233 = (N_27771 - i32_res_27781) - bounds_invalid_upwards_28234 = slt64(iota32_arg_28233, np.int64(0)) - valid_28235 = not(bounds_invalid_upwards_28234) - range_valid_c_28236 = True - assert valid_28235, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 helpers.fut:2:3-8\n #2 bfastfinal.fut:119:20-35\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:114:20-131:9\n #5 bfastfinal.fut:174:3-56\n #6 bfastfinal.fut:170:1-174:56\n" % ("Range ", - np.int64(0), - "..", - np.int64(1), - "..<", - iota32_arg_28233, - " is invalid.")) - max_res_28241 = smax64(np.int64(1), iota32_arg_28233) - bounds_invalid_upwards_28242 = slt64(max_res_28241, np.int64(1)) - distance_28243 = (max_res_28241 - np.int64(1)) - valid_28244 = not(bounds_invalid_upwards_28242) - range_valid_c_28245 = True - assert valid_28244, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/diku-dk/sorts/insertion_sort.fut:16:30-45\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:116:13-121:42\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:114:20-131:9\n #5 bfastfinal.fut:174:3-56\n #6 bfastfinal.fut:170:1-174:56\n" % ("Range ", - np.int64(1), - "..<", - max_res_28241, - " is invalid.")) - dim_match_28246 = (iota32_arg_28203 == iota32_arg_28233) - empty_or_match_cert_28247 = True - assert dim_match_28246, ("Error: %s\n\nBacktrace:\n-> #0 unknown location\n #1 bfastfinal.fut:116:13-121:42\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:114:20-131:9\n #4 bfastfinal.fut:174:3-56\n #5 bfastfinal.fut:170:1-174:56\n" % ("Function return value does not match shape of declared return type.",)) - segmap_group_sizze_33376 = self.sizes["mainDetailed.segmap_group_size_33369"] - segmap_usable_groups_33377 = sdiv_up64(m_27772, segmap_group_sizze_33376) - mem_45285 = opencl_alloc(self, bytes_45152, "mem_45285") - if ((1 * (np.long(segmap_usable_groups_33377) * np.long(segmap_group_sizze_33376))) != 0): - self.mainDetailedzisegmap_33367_var.set_args(self.global_failure, - np.int64(m_27772), - defunc_4_map_res_mem_45177, - defunc_3_map_res_mem_45245, - mem_45285) - cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_33367_var, - ((np.long(segmap_usable_groups_33377) * np.long(segmap_group_sizze_33376)),), - (np.long(segmap_group_sizze_33376),)) - if synchronous: - sync(self) - nest_sizze_33390 = (m_27772 * iota32_arg_28233) - segmap_group_sizze_33391 = self.sizes["mainDetailed.segmap_group_size_33312"] - segmap_usable_groups_33392 = sdiv_up64(nest_sizze_33390, - segmap_group_sizze_33391) - bytes_45287 = (np.int64(4) * nest_sizze_33390) - mem_45289 = opencl_alloc(self, bytes_45287, "mem_45289") - if ((1 * (np.long(segmap_usable_groups_33392) * np.long(segmap_group_sizze_33391))) != 0): - self.mainDetailedzisegmap_33309_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - np.int64(N_27771), - np.int64(m_27772), - np.int64(iota32_arg_28233), - defunc_4_map_res_mem_45178, - defunc_3_map_res_mem_45245, - mem_45285, mem_45289) - cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_33309_var, - ((np.long(segmap_usable_groups_33392) * np.long(segmap_group_sizze_33391)),), - (np.long(segmap_group_sizze_33391),)) - if synchronous: - sync(self) - self.failure_is_an_option = np.int32(1) - segmap_group_sizze_33420 = self.sizes["mainDetailed.segmap_group_size_33190"] - segmap_usable_groups_33421 = sdiv_up64(m_27772, segmap_group_sizze_33420) - mem_45292 = opencl_alloc(self, bytes_45287, "mem_45292") - self.futhark_builtinzhgpu_map_transpose_f32(mem_45292, np.int64(0), - mem_45289, np.int64(0), - np.int64(1), iota32_arg_28233, - m_27772) - mem_45289 = None - mem_45323 = opencl_alloc(self, bytes_45152, "mem_45323") - bytes_45302 = (np.int64(4) * iota32_arg_28233) - num_threads_45567 = (segmap_group_sizze_33420 * segmap_usable_groups_33421) - total_sizze_45568 = (bytes_45302 * num_threads_45567) - mem_45303 = opencl_alloc(self, total_sizze_45568, "mem_45303") - if ((1 * (np.long(segmap_usable_groups_33421) * np.long(segmap_group_sizze_33420))) != 0): - self.mainDetailedzisegmap_33188_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - np.int64(m_27772), - np.int64(iota32_arg_28203), - np.int64(iota32_arg_28233), - np.int64(distance_28243), - np.int64(segmap_usable_groups_33421), - defunc_4_map_res_mem_45177, - defunc_3_map_res_mem_45245, - mem_45285, mem_45292, - mem_45303, mem_45323) - cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_33188_var, - ((np.long(segmap_usable_groups_33421) * np.long(segmap_group_sizze_33420)),), - (np.long(segmap_group_sizze_33420),)) - if synchronous: - sync(self) - self.failure_is_an_option = np.int32(1) - mem_45285 = None - mem_45292 = None - mem_45303 = None - empty_or_match_cert_28353 = True - assert dim_match_28246, ("Error: %s\n\nBacktrace:\n-> #0 bfastfinal.fut:146:17-149:51\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:136:38-164:9\n #3 bfastfinal.fut:174:3-56\n #4 bfastfinal.fut:170:1-174:56\n" % ("function arguments of wrong shape",)) - dim_match_28354 = (iota32_arg_28233 == iota32_arg_28203) - empty_or_match_cert_28355 = True - assert dim_match_28354, ("Error: %s\n\nBacktrace:\n-> #0 bfastfinal.fut:162:24-88\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:136:38-164:9\n #3 bfastfinal.fut:174:3-56\n #4 bfastfinal.fut:170:1-174:56\n" % ("function arguments of wrong shape",)) - empty_or_match_cert_28356 = True - assert dim_match_28246, ("Error: %s\n\nBacktrace:\n-> #0 unknown location\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:136:38-164:9\n #3 bfastfinal.fut:174:3-56\n #4 bfastfinal.fut:170:1-174:56\n" % ("Function return value does not match shape of declared return type.",)) - suff_outer_par_33532 = (self.sizes["mainDetailed.suff_outer_par_37"] <= m_27772) - intra_avail_par_33542 = smin64(iota32_arg_28203, iota32_arg_28233) - computed_group_sizze_33535 = smax64(iota32_arg_28203, iota32_arg_28233) - fits_33775 = sle64(computed_group_sizze_33535, max_group_sizze_30862) - suff_intra_par_33773 = (self.sizes["mainDetailed.suff_intra_par_38"] <= intra_avail_par_33542) - intra_suff_and_fits_33776 = (suff_intra_par_33773 and fits_33775) - segmap_group_sizze_33659 = self.sizes["mainDetailed.segmap_group_size_33547"] - max_num_groups_46632 = self.sizes["mainDetailed.segmap_num_groups_33549"] - num_groups_33660 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(m_27772, - segmap_group_sizze_33659), - sext_i32_i64(max_num_groups_46632)))) - segmap_group_sizze_34085 = self.sizes["mainDetailed.segmap_group_size_34078"] - segscan_group_sizze_34093 = self.sizes["mainDetailed.segscan_group_size_34039"] - max_num_groups_46633 = self.sizes["mainDetailed.segscan_num_groups_34041"] - num_groups_34094 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(nest_sizze_33390, - segscan_group_sizze_34093), - sext_i32_i64(max_num_groups_46633)))) - segmap_group_sizze_34127 = self.sizes["mainDetailed.segmap_group_size_34028"] - nest_sizze_34137 = (m_27772 * iota32_arg_28203) - segred_group_sizze_34138 = self.sizes["mainDetailed.segred_group_size_33988"] - max_num_groups_46634 = self.sizes["mainDetailed.segred_num_groups_33990"] - num_groups_34139 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(nest_sizze_34137, - segred_group_sizze_34138), - sext_i32_i64(max_num_groups_46634)))) - segmap_group_sizze_34175 = self.sizes["mainDetailed.segmap_group_size_33952"] - segmap_group_sizze_34224 = self.sizes["mainDetailed.segmap_group_size_33896"] - bytes_45367 = (np.int64(4) * nest_sizze_34137) - local_memory_capacity_46635 = self.max_local_memory - if intra_suff_and_fits_33776: - defunc_0_f_res_ixfn_45426 = iota32_arg_28203 - else: - defunc_0_f_res_ixfn_45426 = iota32_arg_28233 - num_threads_45569 = (segmap_group_sizze_33659 * num_groups_33660) - total_sizze_45570 = (bytes_45302 * num_threads_45569) - total_sizze_45571 = (bytes_45281 * num_threads_45569) - total_sizze_45572 = (bytes_45302 * num_threads_45569) - local_memory_capacity_46859 = self.max_local_memory - if (sle64(np.int64(0), - sext_i32_i64(local_memory_capacity_46859)) and suff_outer_par_33532): - mem_45369 = opencl_alloc(self, bytes_45367, "mem_45369") - mem_45372 = opencl_alloc(self, bytes_45367, "mem_45372") - mem_45374 = opencl_alloc(self, bytes_45152, "mem_45374") - mem_45376 = opencl_alloc(self, bytes_45152, "mem_45376") - mem_45326 = opencl_alloc(self, total_sizze_45570, "mem_45326") - mem_45340 = opencl_alloc(self, total_sizze_45571, "mem_45340") - mem_45354 = opencl_alloc(self, total_sizze_45572, "mem_45354") - if ((1 * (np.long(num_groups_33660) * np.long(segmap_group_sizze_33659))) != 0): - self.mainDetailedzisegmap_33545_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - np.int64(N_27771), - np.int64(m_27772), - np.int32(n_27775), - np.int64(iota32_arg_28203), - np.int64(iota32_arg_28233), - np.int64(num_groups_33660), - defunc_4_map_res_mem_45177, - defunc_4_map_res_mem_45178, - defunc_4_map_res_mem_45179, - defunc_3_map_res_mem_45244, - defunc_3_map_res_mem_45245, - defunc_3_map_res_mem_45246, - defunc_0_f_res_mem_45279, - mem_45282, mem_45326, - mem_45340, mem_45354, - mem_45369, mem_45372, - mem_45374, mem_45376) + segmap_group_sizze_99201 = self.sizes["mainMagnitude.segmap_group_size_99148"] + segmap_usable_groups_99202 = sdiv_up64(nest_sizze_99109, + segmap_group_sizze_99201) + mem_124078 = opencl_alloc(self, bytes_124022, "mem_124078") + if ((1 * (np.int64(segmap_usable_groups_99202) * np.int64(segmap_group_sizze_99201))) != 0): + self.mainMagnitudezisegmap_99145_var.set_args(self.global_failure, + np.int64(m_73008), + np.float64(conf_73017), + np.int64(Nmk_74408), + mem_124072, mem_124074, + mem_124078) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_33545_var, - ((np.long(num_groups_33660) * np.long(segmap_group_sizze_33659)),), - (np.long(segmap_group_sizze_33659),)) + self.mainMagnitudezisegmap_99145_var, + ((np.int64(segmap_usable_groups_99202) * np.int64(segmap_group_sizze_99201)),), + (np.int64(segmap_group_sizze_99201),)) if synchronous: sync(self) - self.failure_is_an_option = np.int32(1) - mem_45326 = None - mem_45340 = None - mem_45354 = None - mem_45433 = opencl_alloc(self, bytes_45367, "mem_45433") - self.futhark_builtinzhgpu_map_transpose_f32(mem_45433, np.int64(0), - mem_45369, np.int64(0), - np.int64(1), m_27772, - iota32_arg_28203) - mem_45369 = None - mem_45437 = opencl_alloc(self, bytes_45367, "mem_45437") - self.futhark_builtinzhgpu_map_transpose_f32(mem_45437, np.int64(0), - mem_45372, np.int64(0), - np.int64(1), m_27772, - iota32_arg_28203) - mem_45372 = None - defunc_0_f_res_mem_45443 = mem_45433 - defunc_0_f_res_mem_45444 = mem_45437 - defunc_0_f_res_mem_45445 = mem_45374 - defunc_0_f_res_mem_45446 = mem_45376 - else: - local_memory_capacity_46858 = self.max_local_memory - if (sle64(((((((bytes_45302 + srem64((np.int64(8) - srem64(bytes_45302, - np.int64(8))), - np.int64(8))) + (bytes_45281 + srem64((np.int64(8) - srem64(bytes_45281, - np.int64(8))), - np.int64(8)))) + ((np.int32(1) * iota32_arg_28203) + srem64((np.int64(8) - srem64((np.int32(1) * iota32_arg_28203), - np.int64(8))), - np.int64(8)))) + ((np.int32(4) * iota32_arg_28203) + srem64((np.int64(8) - srem64((np.int32(4) * iota32_arg_28203), - np.int64(8))), - np.int64(8)))) + ((np.int32(4) * iota32_arg_28203) + srem64((np.int64(8) - srem64((np.int32(4) * iota32_arg_28203), - np.int64(8))), - np.int64(8)))) + (bytes_45302 + srem64((np.int64(8) - srem64(bytes_45302, - np.int64(8))), - np.int64(8)))), - sext_i32_i64(local_memory_capacity_46858)) and intra_suff_and_fits_33776): - mem_45389 = opencl_alloc(self, bytes_45367, "mem_45389") - mem_45392 = opencl_alloc(self, bytes_45367, "mem_45392") - mem_45394 = opencl_alloc(self, bytes_45152, "mem_45394") - mem_45396 = opencl_alloc(self, bytes_45152, "mem_45396") - if ((1 * (np.long(m_27772) * np.long(computed_group_sizze_33535))) != 0): - self.mainDetailedzisegmap_intragroup_33543_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long(bytes_45302)), - cl.LocalMemory(np.long((np.int32(4) * iota32_arg_28203))), - cl.LocalMemory(np.long((np.int32(4) * iota32_arg_28203))), - cl.LocalMemory(np.long((np.int32(1) * iota32_arg_28203))), - cl.LocalMemory(np.long(bytes_45281)), - cl.LocalMemory(np.long(bytes_45302)), - np.int64(N_27771), - np.int32(n_27775), - np.int64(iota32_arg_28203), - np.int64(iota32_arg_28233), - np.int64(computed_group_sizze_33535), - defunc_4_map_res_mem_45177, - defunc_4_map_res_mem_45178, - defunc_4_map_res_mem_45179, - defunc_3_map_res_mem_45244, - defunc_3_map_res_mem_45245, - defunc_3_map_res_mem_45246, - defunc_0_f_res_mem_45279, - mem_45282, - mem_45389, - mem_45392, - mem_45394, - mem_45396) + mem_124072 = None + mem_124074 = None + suff_outer_par_99226 = (self.sizes["mainMagnitude.suff_outer_par_21"] <= m_73008) + fits_99502 = sle64(num_recresids_padded_73681, max_group_sizze_90561) + suff_intra_par_99500 = (self.sizes["mainMagnitude.suff_intra_par_22"] <= num_recresids_padded_73681) + intra_suff_and_fits_99503 = (suff_intra_par_99500 and fits_99502) + segmap_group_sizze_99365 = self.sizes["mainMagnitude.segmap_group_size_99230"] + segmap_group_sizze_99816 = self.sizes["mainMagnitude.segmap_group_size_99810"] + segred_group_sizze_99823 = self.sizes["mainMagnitude.segred_group_size_99786"] + max_num_groups_128536 = self.sizes["mainMagnitude.segred_num_groups_99788"] + num_groups_99824 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(replicate_arg_73682, + segred_group_sizze_99823), + sext_i32_i64(max_num_groups_128536)))) + segmap_group_sizze_99842 = self.sizes["mainMagnitude.segmap_group_size_99683"] + segred_group_sizze_99943 = self.sizes["mainMagnitude.segred_group_size_99661"] + max_num_groups_128537 = self.sizes["mainMagnitude.segred_num_groups_99663"] + num_groups_99944 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(replicate_arg_73682, + segred_group_sizze_99943), + sext_i32_i64(max_num_groups_128537)))) + segmap_group_sizze_99959 = self.sizes["mainMagnitude.segmap_group_size_99643"] + bytes_124087 = (np.int64(8) * segmap_group_sizze_99365) + local_memory_capacity_128699 = self.max_local_memory + if (sle64(((bytes_124087 + srem64((np.int64(8) - srem64(bytes_124087, + np.int64(8))), + np.int64(8))) + (bytes_124087 + srem64((np.int64(8) - srem64(bytes_124087, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_128699)) and suff_outer_par_99226): + segmap_usable_groups_99366 = sdiv_up64(m_73008, + segmap_group_sizze_99365) + mem_124081 = opencl_alloc(self, bytes_124022, "mem_124081") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124081, np.int64(0), + defunc_3_map_res_mem_124068, + np.int64(0), np.int64(1), + Nmk_74408, m_73008) + mem_124084 = opencl_alloc(self, bytes_124022, "mem_124084") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124084, np.int64(0), + mem_124078, np.int64(0), + np.int64(1), Nmk_74408, + m_73008) + num_whole_tiles_117920 = squot64(num_recresids_padded_73681, + segmap_group_sizze_99365) + residual_input_118032 = srem64(num_recresids_padded_73681, + segmap_group_sizze_99365) + cond_118033 = (residual_input_118032 == np.int64(0)) + mem_124113 = opencl_alloc(self, bytes_120173, "mem_124113") + if ((1 * (np.int64(segmap_usable_groups_99366) * np.int64(segmap_group_sizze_99365))) != 0): + self.mainMagnitudezisegmap_intragroup_117900_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_124087)), + cl.LocalMemory(np.int64(bytes_124087)), + np.int64(m_73008), + np.float64(level_73014), + np.int64(num_recresids_padded_73681), + np.int64(num_whole_tiles_117920), + np.int64(residual_input_118032), + np.byte(cond_118033), + defunc_3_map_res_mem_124069, + mem_124081, + mem_124084, + mem_124113) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_intragroup_33543_var, - ((np.long(m_27772) * np.long(computed_group_sizze_33535)),), - (np.long(computed_group_sizze_33535),)) + self.mainMagnitudezisegmap_intragroup_117900_var, + ((np.int64(segmap_usable_groups_99366) * np.int64(segmap_group_sizze_99365)),), + (np.int64(segmap_group_sizze_99365),)) if synchronous: sync(self) - self.failure_is_an_option = np.int32(1) - defunc_0_f_res_mem_45427 = mem_45389 - defunc_0_f_res_mem_45428 = mem_45392 - defunc_0_f_res_mem_45429 = mem_45394 - defunc_0_f_res_mem_45430 = mem_45396 + mem_124081 = None + mem_124084 = None + defunc_1_map_res_mem_124135 = mem_124113 else: - segmap_usable_groups_34086 = sdiv_up64(m_27772, - segmap_group_sizze_34085) - mem_45399 = opencl_alloc(self, bytes_45152, "mem_45399") - if ((1 * (np.long(segmap_usable_groups_34086) * np.long(segmap_group_sizze_34085))) != 0): - self.mainDetailedzisegmap_34076_var.set_args(self.global_failure, - np.int64(m_27772), - defunc_4_map_res_mem_45177, - defunc_3_map_res_mem_45245, - mem_45399) - cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_34076_var, - ((np.long(segmap_usable_groups_34086) * np.long(segmap_group_sizze_34085)),), - (np.long(segmap_group_sizze_34085),)) - if synchronous: - sync(self) - mem_45403 = opencl_alloc(self, bytes_45287, "mem_45403") - if slt64(np.int64(0), (m_27772 * iota32_arg_28233)): - stage1_max_num_groups_46689 = self.max_group_size - stage1_num_groups_46690 = smin64(stage1_max_num_groups_46689, - num_groups_34094) - num_threads_46691 = sext_i64_i32((stage1_num_groups_46690 * segscan_group_sizze_34093)) - if ((1 * (np.long(stage1_num_groups_46690) * np.long(segscan_group_sizze_34093))) != 0): - self.mainDetailedziscan_stage1_34045_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long(smax64(np.int64(1), - (np.int32(4) * segscan_group_sizze_34093)))), - np.int64(N_27771), - np.int64(m_27772), - np.int64(iota32_arg_28233), - np.int32(num_threads_46691), - defunc_4_map_res_mem_45178, - defunc_3_map_res_mem_45244, - defunc_3_map_res_mem_45245, - defunc_0_f_res_mem_45279, - mem_45399, - mem_45403) - cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedziscan_stage1_34045_var, - ((np.long(stage1_num_groups_46690) * np.long(segscan_group_sizze_34093)),), - (np.long(segscan_group_sizze_34093),)) - if synchronous: - sync(self) - self.failure_is_an_option = np.int32(1) - if ((1 * (np.long(np.int64(1)) * np.long(stage1_num_groups_46690))) != 0): - self.mainDetailedziscan_stage2_34045_var.set_args(self.global_failure, - cl.LocalMemory(np.long(smax64(np.int64(1), - (np.int32(4) * stage1_num_groups_46690)))), - np.int64(m_27772), - np.int64(iota32_arg_28233), - np.int64(stage1_num_groups_46690), - np.int32(num_threads_46691), - mem_45403) + local_memory_capacity_128698 = self.max_local_memory + if (sle64((((np.int32(8) * num_recresids_padded_73681) + srem64((np.int64(8) - srem64((np.int32(8) * num_recresids_padded_73681), + np.int64(8))), + np.int64(8))) + ((np.int32(8) * num_recresids_padded_73681) + srem64((np.int64(8) - srem64((np.int32(8) * num_recresids_padded_73681), + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_128698)) and intra_suff_and_fits_99503): + mem_124118 = opencl_alloc(self, bytes_120173, "mem_124118") + if ((1 * (np.int64(m_73008) * np.int64(num_recresids_padded_73681))) != 0): + self.mainMagnitudezisegmap_intragroup_99224_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * num_recresids_padded_73681))), + cl.LocalMemory(np.int64((np.int32(8) * num_recresids_padded_73681))), + np.float64(level_73014), + np.int64(num_recresids_padded_73681), + np.int64(Nmk_74408), + defunc_3_map_res_mem_124068, + defunc_3_map_res_mem_124069, + mem_124078, + mem_124118) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedziscan_stage2_34045_var, - ((np.long(np.int64(1)) * np.long(stage1_num_groups_46690)),), - (np.long(stage1_num_groups_46690),)) + self.mainMagnitudezisegmap_intragroup_99224_var, + ((np.int64(m_73008) * np.int64(num_recresids_padded_73681)),), + (np.int64(num_recresids_padded_73681),)) if synchronous: sync(self) - required_groups_46733 = sext_i64_i32(sdiv_up64((m_27772 * iota32_arg_28233), - segscan_group_sizze_34093)) - if ((1 * (np.long(num_groups_34094) * np.long(segscan_group_sizze_34093))) != 0): - self.mainDetailedziscan_stage3_34045_var.set_args(self.global_failure, - np.int64(m_27772), - np.int64(iota32_arg_28233), - np.int64(num_groups_34094), - np.int32(num_threads_46691), - np.int32(required_groups_46733), - mem_45403) + defunc_1_map_res_mem_124134 = mem_124118 + else: + segmap_usable_groups_99817 = sdiv_up64(m_73008, + segmap_group_sizze_99816) + mem_124121 = opencl_alloc(self, bytes_120173, "mem_124121") + if ((1 * (np.int64(segmap_usable_groups_99817) * np.int64(segmap_group_sizze_99816))) != 0): + self.mainMagnitudezisegmap_99808_var.set_args(self.global_failure, + np.int64(m_73008), + defunc_3_map_res_mem_124069, + mem_124121) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedziscan_stage3_34045_var, - ((np.long(num_groups_34094) * np.long(segscan_group_sizze_34093)),), - (np.long(segscan_group_sizze_34093),)) + self.mainMagnitudezisegmap_99808_var, + ((np.int64(segmap_usable_groups_99817) * np.int64(segmap_group_sizze_99816)),), + (np.int64(segmap_group_sizze_99816),)) if synchronous: sync(self) - segmap_usable_groups_34128 = sdiv_up64(m_27772, - segmap_group_sizze_34127) - mem_45406 = opencl_alloc(self, bytes_45152, "mem_45406") - if ((1 * (np.long(segmap_usable_groups_34128) * np.long(segmap_group_sizze_34127))) != 0): - self.mainDetailedzisegmap_34026_var.set_args(self.global_failure, - np.int64(m_27772), - defunc_3_map_res_mem_45245, - defunc_3_map_res_mem_45246, - mem_45406) - cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_34026_var, - ((np.long(segmap_usable_groups_34128) * np.long(segmap_group_sizze_34127)),), - (np.long(segmap_group_sizze_34127),)) - if synchronous: - sync(self) - mem_45409 = opencl_alloc(self, m_27772, "mem_45409") - mem_45411 = opencl_alloc(self, bytes_45152, "mem_45411") - mem_45413 = opencl_alloc(self, bytes_45152, "mem_45413") - mem_45416 = opencl_alloc(self, bytes_45367, "mem_45416") - if slt64((iota32_arg_28203 * np.int64(2)), segred_group_sizze_34138): - segment_sizze_nonzzero_46750 = smax64(np.int64(1), iota32_arg_28203) - num_threads_46751 = (num_groups_34139 * segred_group_sizze_34138) - if ((1 * (np.long(num_groups_34139) * np.long(segred_group_sizze_34138))) != 0): - self.mainDetailedzisegred_small_33994_var.set_args(self.global_failure, - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_34138))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_34138))), - cl.LocalMemory(np.long((np.int32(1) * segred_group_sizze_34138))), - np.int64(m_27772), - np.int64(iota32_arg_28203), - np.int64(iota32_arg_28233), - np.int64(num_groups_34139), - np.int64(segment_sizze_nonzzero_46750), - mem_45282, - mem_45399, - mem_45403, - mem_45406, - mem_45409, - mem_45411, - mem_45413, - mem_45416) + mem_124124 = opencl_alloc(self, bytes_120173, "mem_124124") + if slt64((num_recresids_padded_73681 * np.int64(2)), + segred_group_sizze_99823): + segment_sizze_nonzzero_128568 = smax64(np.int64(1), + num_recresids_padded_73681) + num_threads_128569 = (num_groups_99824 * segred_group_sizze_99823) + if ((1 * (np.int64(num_groups_99824) * np.int64(segred_group_sizze_99823))) != 0): + self.mainMagnitudezisegred_small_99792_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_99823))), + np.int64(m_73008), + np.int64(num_recresids_padded_73681), + np.int64(Nmk_74408), + np.int64(num_groups_99824), + np.int64(segment_sizze_nonzzero_128568), + defunc_3_map_res_mem_124068, + mem_124121, + mem_124124) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_small_99792_var, + ((np.int64(num_groups_99824) * np.int64(segred_group_sizze_99823)),), + (np.int64(segred_group_sizze_99823),)) + if synchronous: + sync(self) + else: + groups_per_segment_128589 = sdiv_up64(num_groups_99824, + smax64(np.int64(1), m_73008)) + elements_per_thread_128590 = sdiv_up64(num_recresids_padded_73681, + (segred_group_sizze_99823 * groups_per_segment_128589)) + virt_num_groups_128591 = (groups_per_segment_128589 * m_73008) + num_threads_128592 = (num_groups_99824 * segred_group_sizze_99823) + threads_per_segment_128593 = (groups_per_segment_128589 * segred_group_sizze_99823) + group_res_arr_mem_128594 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_99823 * virt_num_groups_128591)), + "group_res_arr_mem_128594") + mainMagnitudezicounter_mem_128596 = self.mainMagnitudezicounter_mem_128596 + if ((1 * (np.int64(num_groups_99824) * np.int64(segred_group_sizze_99823))) != 0): + self.mainMagnitudezisegred_large_99792_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_99823))), + np.int64(num_recresids_padded_73681), + np.int64(Nmk_74408), + np.int64(num_groups_99824), + np.int64(groups_per_segment_128589), + np.int64(elements_per_thread_128590), + np.int64(virt_num_groups_128591), + np.int64(threads_per_segment_128593), + defunc_3_map_res_mem_124068, + mem_124121, + mem_124124, + group_res_arr_mem_128594, + mainMagnitudezicounter_mem_128596) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_large_99792_var, + ((np.int64(num_groups_99824) * np.int64(segred_group_sizze_99823)),), + (np.int64(segred_group_sizze_99823),)) + if synchronous: + sync(self) + mem_124121 = None + segmap_usable_groups_99843 = sdiv_up64(m_73008, + segmap_group_sizze_99842) + mem_124127 = opencl_alloc(self, bytes_120173, "mem_124127") + if ((1 * (np.int64(segmap_usable_groups_99843) * np.int64(segmap_group_sizze_99842))) != 0): + self.mainMagnitudezisegmap_99681_var.set_args(self.global_failure, + np.int64(m_73008), + mem_124124, + mem_124127) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegred_small_33994_var, - ((np.long(num_groups_34139) * np.long(segred_group_sizze_34138)),), - (np.long(segred_group_sizze_34138),)) + self.mainMagnitudezisegmap_99681_var, + ((np.int64(segmap_usable_groups_99843) * np.int64(segmap_group_sizze_99842)),), + (np.int64(segmap_group_sizze_99842),)) if synchronous: sync(self) - else: - groups_per_segment_46786 = sdiv_up64(num_groups_34139, - smax64(np.int64(1), m_27772)) - elements_per_thread_46787 = sdiv_up64(iota32_arg_28203, - (segred_group_sizze_34138 * groups_per_segment_46786)) - virt_num_groups_46788 = (groups_per_segment_46786 * m_27772) - num_threads_46789 = (num_groups_34139 * segred_group_sizze_34138) - threads_per_segment_46790 = (groups_per_segment_46786 * segred_group_sizze_34138) - group_res_arr_mem_46791 = opencl_alloc(self, - (np.int32(1) * (segred_group_sizze_34138 * virt_num_groups_46788)), - "group_res_arr_mem_46791") - group_res_arr_mem_46793 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_34138 * virt_num_groups_46788)), - "group_res_arr_mem_46793") - group_res_arr_mem_46795 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_34138 * virt_num_groups_46788)), - "group_res_arr_mem_46795") - mainDetailedzicounter_mem_46797 = self.mainDetailedzicounter_mem_46797 - if ((1 * (np.long(num_groups_34139) * np.long(segred_group_sizze_34138))) != 0): - self.mainDetailedzisegred_large_33994_var.set_args(self.global_failure, - cl.LocalMemory(np.long(np.int32(1))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_34138))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_34138))), - cl.LocalMemory(np.long((np.int32(1) * segred_group_sizze_34138))), - np.int64(iota32_arg_28203), - np.int64(iota32_arg_28233), - np.int64(num_groups_34139), - np.int64(groups_per_segment_46786), - np.int64(elements_per_thread_46787), - np.int64(virt_num_groups_46788), - mem_45282, - mem_45399, - mem_45403, - mem_45406, - mem_45409, - mem_45411, - mem_45413, - mem_45416, - group_res_arr_mem_46791, - group_res_arr_mem_46793, - group_res_arr_mem_46795, - mainDetailedzicounter_mem_46797) + mem_124124 = None + mem_124130 = opencl_alloc(self, bytes_120173, "mem_124130") + if slt64((num_recresids_padded_73681 * np.int64(2)), + segred_group_sizze_99943): + segment_sizze_nonzzero_128633 = smax64(np.int64(1), + num_recresids_padded_73681) + num_threads_128634 = (num_groups_99944 * segred_group_sizze_99943) + if ((1 * (np.int64(num_groups_99944) * np.int64(segred_group_sizze_99943))) != 0): + self.mainMagnitudezisegred_small_99667_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_99943))), + np.int64(m_73008), + np.int64(num_recresids_padded_73681), + np.int64(Nmk_74408), + np.int64(num_groups_99944), + np.int64(segment_sizze_nonzzero_128633), + defunc_3_map_res_mem_124068, + mem_124078, + mem_124130) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_small_99667_var, + ((np.int64(num_groups_99944) * np.int64(segred_group_sizze_99943)),), + (np.int64(segred_group_sizze_99943),)) + if synchronous: + sync(self) + else: + groups_per_segment_128654 = sdiv_up64(num_groups_99944, + smax64(np.int64(1), m_73008)) + elements_per_thread_128655 = sdiv_up64(num_recresids_padded_73681, + (segred_group_sizze_99943 * groups_per_segment_128654)) + virt_num_groups_128656 = (groups_per_segment_128654 * m_73008) + num_threads_128657 = (num_groups_99944 * segred_group_sizze_99943) + threads_per_segment_128658 = (groups_per_segment_128654 * segred_group_sizze_99943) + group_res_arr_mem_128659 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_99943 * virt_num_groups_128656)), + "group_res_arr_mem_128659") + mainMagnitudezicounter_mem_128661 = self.mainMagnitudezicounter_mem_128661 + if ((1 * (np.int64(num_groups_99944) * np.int64(segred_group_sizze_99943))) != 0): + self.mainMagnitudezisegred_large_99667_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_99943))), + np.int64(num_recresids_padded_73681), + np.int64(Nmk_74408), + np.int64(num_groups_99944), + np.int64(groups_per_segment_128654), + np.int64(elements_per_thread_128655), + np.int64(virt_num_groups_128656), + np.int64(threads_per_segment_128658), + defunc_3_map_res_mem_124068, + mem_124078, + mem_124130, + group_res_arr_mem_128659, + mainMagnitudezicounter_mem_128661) + cl.enqueue_nd_range_kernel(self.queue, + self.mainMagnitudezisegred_large_99667_var, + ((np.int64(num_groups_99944) * np.int64(segred_group_sizze_99943)),), + (np.int64(segred_group_sizze_99943),)) + if synchronous: + sync(self) + segmap_usable_groups_99960 = sdiv_up64(m_73008, + segmap_group_sizze_99959) + mem_124133 = opencl_alloc(self, bytes_120173, "mem_124133") + if ((1 * (np.int64(segmap_usable_groups_99960) * np.int64(segmap_group_sizze_99959))) != 0): + self.mainMagnitudezisegmap_99641_var.set_args(self.global_failure, + np.int64(m_73008), + np.float64(level_73014), + defunc_3_map_res_mem_124069, + mem_124127, + mem_124130, + mem_124133) cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegred_large_33994_var, - ((np.long(num_groups_34139) * np.long(segred_group_sizze_34138)),), - (np.long(segred_group_sizze_34138),)) + self.mainMagnitudezisegmap_99641_var, + ((np.int64(segmap_usable_groups_99960) * np.int64(segmap_group_sizze_99959)),), + (np.int64(segmap_group_sizze_99959),)) if synchronous: sync(self) - mem_45403 = None - mem_45406 = None - segmap_usable_groups_34176 = sdiv_up64(m_27772, - segmap_group_sizze_34175) - mem_45419 = opencl_alloc(self, bytes_45152, "mem_45419") - mem_45421 = opencl_alloc(self, bytes_45152, "mem_45421") - if ((1 * (np.long(segmap_usable_groups_34176) * np.long(segmap_group_sizze_34175))) != 0): - self.mainDetailedzisegmap_33950_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - np.int64(N_27771), - np.int64(m_27772), - np.int32(n_27775), - defunc_4_map_res_mem_45179, - defunc_3_map_res_mem_45245, - mem_45399, mem_45409, - mem_45411, mem_45413, - mem_45419, mem_45421) - cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_33950_var, - ((np.long(segmap_usable_groups_34176) * np.long(segmap_group_sizze_34175)),), - (np.long(segmap_group_sizze_34175),)) - if synchronous: - sync(self) - self.failure_is_an_option = np.int32(1) - mem_45409 = None - mem_45411 = None - mem_45413 = None - mem_45424 = opencl_alloc(self, bytes_45287, "mem_45424") - self.futhark_builtinzhreplicate_f32(mem_45424, - (m_27772 * iota32_arg_28233), - np.nan) - segmap_usable_groups_34225 = sdiv_up64(nest_sizze_33390, - segmap_group_sizze_34224) - if ((1 * (np.long(segmap_usable_groups_34225) * np.long(segmap_group_sizze_34224))) != 0): - self.mainDetailedzisegmap_33893_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - np.int64(N_27771), - np.int64(m_27772), - np.int32(n_27775), - np.int64(iota32_arg_28203), - np.int64(iota32_arg_28233), - defunc_4_map_res_mem_45179, - defunc_3_map_res_mem_45245, - mem_45399, mem_45416, - mem_45424) - cl.enqueue_nd_range_kernel(self.queue, - self.mainDetailedzisegmap_33893_var, - ((np.long(segmap_usable_groups_34225) * np.long(segmap_group_sizze_34224)),), - (np.long(segmap_group_sizze_34224),)) - if synchronous: - sync(self) - self.failure_is_an_option = np.int32(1) - mem_45399 = None - defunc_0_f_res_mem_45427 = mem_45424 - defunc_0_f_res_mem_45428 = mem_45416 - defunc_0_f_res_mem_45429 = mem_45419 - defunc_0_f_res_mem_45430 = mem_45421 - mem_45441 = opencl_alloc(self, bytes_45367, "mem_45441") - if (((m_27772 * iota32_arg_28203) * np.int32(4)) != 0): - cl.enqueue_copy(self.queue, mem_45441, defunc_0_f_res_mem_45427, - dest_offset=np.long(np.int64(0)), - src_offset=np.long(np.int64(0)), - byte_count=np.long(((m_27772 * iota32_arg_28203) * np.int32(4)))) - if synchronous: - sync(self) - defunc_0_f_res_mem_45427 = None - defunc_0_f_res_mem_45443 = mem_45441 - defunc_0_f_res_mem_45444 = defunc_0_f_res_mem_45428 - defunc_0_f_res_mem_45445 = defunc_0_f_res_mem_45429 - defunc_0_f_res_mem_45446 = defunc_0_f_res_mem_45430 - defunc_4_map_res_mem_45179 = None - defunc_3_map_res_mem_45244 = None - out_arrsizze_45681 = iota32_arg_28203 - out_arrsizze_45683 = iota32_arg_28203 - out_arrsizze_45685 = iota32_arg_28203 - out_mem_45676 = defunc_0_f_res_mem_45279 - out_mem_45677 = defunc_4_map_res_mem_45177 - out_mem_45678 = defunc_3_map_res_mem_45245 - out_mem_45679 = defunc_3_map_res_mem_45246 - out_mem_45680 = defunc_0_f_res_mem_45443 - out_mem_45682 = defunc_0_f_res_mem_45444 - out_mem_45684 = mem_45282 - out_mem_45686 = defunc_0_f_res_mem_45445 - out_mem_45687 = defunc_0_f_res_mem_45446 - out_mem_45688 = mem_45323 - out_mem_45689 = defunc_4_map_res_mem_45178 - out_mem_45690 = defunc_3_map_res_mem_45140 - return (out_mem_45676, out_mem_45677, out_mem_45678, out_mem_45679, - out_mem_45680, out_arrsizze_45681, out_mem_45682, - out_arrsizze_45683, out_mem_45684, out_arrsizze_45685, - out_mem_45686, out_mem_45687, out_mem_45688, out_mem_45689, - out_mem_45690) - def futhark_mainMagnitude(self, mappingindices_mem_44380, images_mem_44381, - N_28477, m_28478, trend_28479, k_28480, n_28481, - freq_28482, hfrac_28483, lam_28484): - i32_res_28487 = sext_i32_i64(n_28481) - x_28488 = (np.int32(2) * k_28480) - k2p2_28489 = (np.int32(2) + x_28488) - cond_28490 = slt32(np.int32(0), trend_28479) - if cond_28490: - k2p2zq_28491 = k2p2_28489 - else: - k2p2zq_f_res_28492 = (k2p2_28489 - np.int32(1)) - k2p2zq_28491 = k2p2zq_f_res_28492 - i32_res_28493 = sext_i32_i64(k2p2zq_28491) - binop_x_44384 = (N_28477 * i32_res_28493) - bytes_44383 = (np.int64(4) * binop_x_44384) - if cond_28490: - bounds_invalid_upwards_28495 = slt64(i32_res_28493, np.int64(0)) - valid_28496 = not(bounds_invalid_upwards_28495) - range_valid_c_28497 = True - assert valid_28496, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 helpers.fut:2:3-8\n #2 helpers.fut:37:10-20\n #3 bfastfinal.fut:29:17-66\n #4 bfastfinal.fut:181:5-58\n #5 bfastfinal.fut:176:1-182:37\n" % ("Range ", - np.int64(0), - "..", - np.int64(1), - "..<", - i32_res_28493, - " is invalid.")) - segmap_group_sizze_34416 = self.sizes["mainMagnitude.segmap_group_size_34346"] - segmap_usable_groups_34417 = sdiv_up64(binop_x_44384, - segmap_group_sizze_34416) - mem_44385 = opencl_alloc(self, bytes_44383, "mem_44385") - if ((1 * (np.long(segmap_usable_groups_34417) * np.long(segmap_group_sizze_34416))) != 0): - self.mainMagnitudezisegmap_34343_var.set_args(self.global_failure, - np.int64(N_28477), - np.float32(freq_28482), - np.int64(i32_res_28493), - mappingindices_mem_44380, - mem_44385) - cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_34343_var, - ((np.long(segmap_usable_groups_34417) * np.long(segmap_group_sizze_34416)),), - (np.long(segmap_group_sizze_34416),)) - if synchronous: - sync(self) - binop_p_mem_44390 = mem_44385 + mem_124127 = None + mem_124130 = None + defunc_1_map_res_mem_124134 = mem_124133 + defunc_1_map_res_mem_124135 = defunc_1_map_res_mem_124134 + defunc_3_map_res_mem_124068 = None + defunc_3_map_res_mem_124069 = None + mem_124078 = None + hist_inds_mem_124138 = defunc_1_map_res_mem_124135 else: - bounds_invalid_upwards_28521 = slt64(i32_res_28493, np.int64(0)) - valid_28522 = not(bounds_invalid_upwards_28521) - range_valid_c_28523 = True - assert valid_28522, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 helpers.fut:2:3-8\n #2 helpers.fut:50:10-22\n #3 bfastfinal.fut:30:17-64\n #4 bfastfinal.fut:181:5-58\n #5 bfastfinal.fut:176:1-182:37\n" % ("Range ", - np.int64(0), - "..", - np.int64(1), - "..<", - i32_res_28493, - " is invalid.")) - segmap_group_sizze_34590 = self.sizes["mainMagnitude.segmap_group_size_34524"] - segmap_usable_groups_34591 = sdiv_up64(binop_x_44384, - segmap_group_sizze_34590) - mem_44389 = opencl_alloc(self, bytes_44383, "mem_44389") - if ((1 * (np.long(segmap_usable_groups_34591) * np.long(segmap_group_sizze_34590))) != 0): - self.mainMagnitudezisegmap_34521_var.set_args(self.global_failure, - np.int64(N_28477), - np.float32(freq_28482), - np.int64(i32_res_28493), - mappingindices_mem_44380, - mem_44389) - cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_34521_var, - ((np.long(segmap_usable_groups_34591) * np.long(segmap_group_sizze_34590)),), - (np.long(segmap_group_sizze_34590),)) - if synchronous: - sync(self) - binop_p_mem_44390 = mem_44389 - x_28546 = (N_28477 * N_28477) - y_28547 = (np.int64(2) * N_28477) - x_28548 = (x_28546 + y_28547) - x_28549 = (np.int64(1) + x_28548) - y_28550 = (np.int64(1) + N_28477) - zzero_28551 = (y_28550 == np.int64(0)) - nonzzero_28552 = not(zzero_28551) - nonzzero_cert_28553 = True - assert nonzzero_28552, ("Error: %s\n\nBacktrace:\n-> #0 bfastfinal.fut:35:32-60\n #1 bfastfinal.fut:181:5-58\n #2 bfastfinal.fut:176:1-182:37\n" % ("division by zero",)) - x_28554 = sdiv64(x_28549, y_28550) - x_28555 = (x_28554 - N_28477) - binop_p_28556 = (x_28555 - np.int64(1)) - defunc_0_f_res_28557 = sext_i64_i32(binop_p_28556) - i32_res_28558 = sitofp_i32_f32(defunc_0_f_res_28557) - segmap_group_sizze_34673 = self.sizes["mainMagnitude.segmap_group_size_34652"] - segmap_usable_groups_34674 = sdiv_up64(binop_x_44384, - segmap_group_sizze_34673) - mem_44393 = opencl_alloc(self, bytes_44383, "mem_44393") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44393, np.int64(0), - binop_p_mem_44390, np.int64(0), - np.int64(1), N_28477, - i32_res_28493) - mem_44397 = opencl_alloc(self, bytes_44383, "mem_44397") - if ((1 * (np.long(segmap_usable_groups_34674) * np.long(segmap_group_sizze_34673))) != 0): - self.mainMagnitudezisegmap_34649_var.set_args(self.global_failure, - np.int64(N_28477), - np.int64(i32_res_28493), - np.float32(i32_res_28558), - mem_44393, mem_44397) + mem_124137 = opencl_alloc(self, bytes_120173, "mem_124137") + self.futhark_builtinzhreplicate_i64(mem_124137, m_73008, hist_73016) + hist_inds_mem_124138 = mem_124137 + segmap_group_sizze_100013 = self.sizes["mainMagnitude.segmap_group_size_99989"] + segmap_usable_groups_100014 = sdiv_up64(binop_x_120126, + segmap_group_sizze_100013) + mem_124142 = opencl_alloc(self, bytes_120125, "mem_124142") + if ((1 * (np.int64(segmap_usable_groups_100014) * np.int64(segmap_group_sizze_100013))) != 0): + self.mainMagnitudezisegmap_99986_var.set_args(self.global_failure, + np.int64(N_73007), + np.int64(m_73008), + images_mem_120108, + hist_inds_mem_124138, + mem_124142) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_34649_var, - ((np.long(segmap_usable_groups_34674) * np.long(segmap_group_sizze_34673)),), - (np.long(segmap_group_sizze_34673),)) + self.mainMagnitudezisegmap_99986_var, + ((np.int64(segmap_usable_groups_100014) * np.int64(segmap_group_sizze_100013)),), + (np.int64(segmap_group_sizze_100013),)) if synchronous: sync(self) - eq_x_y_28566 = (np.int64(0) == i32_res_28493) - p_and_eq_x_y_28567 = (cond_28490 and eq_x_y_28566) - not_p_28568 = not(cond_28490) - p_and_eq_x_y_28569 = (eq_x_y_28566 and not_p_28568) - empty_slice_28570 = (p_and_eq_x_y_28567 or p_and_eq_x_y_28569) - m_28571 = (i32_res_28493 - np.int64(1)) - zzero_leq_i_p_m_t_s_28572 = sle64(np.int64(0), m_28571) - i_p_m_t_s_leq_w_28573 = slt64(m_28571, i32_res_28493) - i_lte_j_28574 = sle64(np.int64(0), i32_res_28493) - y_28575 = (zzero_leq_i_p_m_t_s_28572 and i_p_m_t_s_leq_w_28573) - y_28576 = (i_lte_j_28574 and y_28575) - ok_or_empty_28577 = (empty_slice_28570 or y_28576) - empty_slice_28578 = (i32_res_28487 == np.int64(0)) - m_28579 = (i32_res_28487 - np.int64(1)) - zzero_leq_i_p_m_t_s_28580 = sle64(np.int64(0), m_28579) - i_p_m_t_s_leq_w_28581 = slt64(m_28579, N_28477) - i_lte_j_28582 = sle64(np.int64(0), i32_res_28487) - y_28583 = (zzero_leq_i_p_m_t_s_28580 and i_p_m_t_s_leq_w_28581) - y_28584 = (i_lte_j_28582 and y_28583) - ok_or_empty_28585 = (empty_slice_28578 or y_28584) - index_ok_28586 = (ok_or_empty_28577 and ok_or_empty_28585) - index_certs_28587 = True - assert index_ok_28586, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 bfastfinal.fut:38:13-21\n #1 bfastfinal.fut:181:5-58\n #2 bfastfinal.fut:176:1-182:37\n" % ("Index [", - np.int64(0), - ":, :", - i32_res_28487, - "] out of bounds for array of shape [", - i32_res_28493, - "][", - N_28477, - "].")) - empty_slice_28589 = (i32_res_28493 == np.int64(0)) - ok_or_empty_28590 = (y_28576 or empty_slice_28589) - index_ok_28591 = (ok_or_empty_28585 and ok_or_empty_28590) - index_certs_28592 = True - assert index_ok_28591, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 bfastfinal.fut:39:13-22\n #1 bfastfinal.fut:181:5-58\n #2 bfastfinal.fut:176:1-182:37\n" % ("Index [:", - i32_res_28487, - ", ", - np.int64(0), - ":] out of bounds for array of shape [", - N_28477, - "][", - i32_res_28493, - "].")) - empty_slice_28594 = (m_28478 == np.int64(0)) - m_28595 = (m_28478 - np.int64(1)) - zzero_leq_i_p_m_t_s_28596 = sle64(np.int64(0), m_28595) - i_p_m_t_s_leq_w_28597 = slt64(m_28595, m_28478) - i_lte_j_28598 = sle64(np.int64(0), m_28478) - y_28599 = (zzero_leq_i_p_m_t_s_28596 and i_p_m_t_s_leq_w_28597) - y_28600 = (i_lte_j_28598 and y_28599) - ok_or_empty_28601 = (empty_slice_28594 or y_28600) - index_ok_28602 = (ok_or_empty_28585 and ok_or_empty_28601) - index_certs_28603 = True - assert index_ok_28602, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 bfastfinal.fut:40:13-26\n #1 bfastfinal.fut:181:5-58\n #2 bfastfinal.fut:176:1-182:37\n" % ("Index [", - np.int64(0), - ":, :", - i32_res_28487, - "] out of bounds for array of shape [", - m_28478, - "][", - N_28477, - "].")) - suff_outer_par_34680 = (self.sizes["mainMagnitude.suff_outer_par_6"] <= m_28478) - segmap_group_sizze_34706 = self.sizes["mainMagnitude.segmap_group_size_34684"] - max_num_groups_45695 = self.sizes["mainMagnitude.segmap_num_groups_34686"] - num_groups_34707 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(m_28478, - segmap_group_sizze_34706), - sext_i32_i64(max_num_groups_45695)))) - nest_sizze_34882 = (m_28478 * i32_res_28493) - segmap_group_sizze_34883 = self.sizes["mainMagnitude.segmap_group_size_34731"] - max_num_groups_45696 = self.sizes["mainMagnitude.segmap_num_groups_34733"] - num_groups_34884 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(nest_sizze_34882, - segmap_group_sizze_34883), - sext_i32_i64(max_num_groups_45696)))) - suff_outer_par_34888 = (self.sizes["mainMagnitude.suff_outer_par_7"] <= nest_sizze_34882) - y_34912 = (i32_res_28493 * i32_res_28493) - comparatee_34913 = (m_28478 * y_34912) - suff_outer_par_34914 = (self.sizes["mainMagnitude.suff_outer_par_8"] <= comparatee_34913) - nest_sizze_34934 = (i32_res_28487 * comparatee_34913) - segred_group_sizze_34935 = self.sizes["mainMagnitude.segred_group_size_34791"] - max_num_groups_45697 = self.sizes["mainMagnitude.segred_num_groups_34793"] - num_groups_34936 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(nest_sizze_34934, - segred_group_sizze_34935), - sext_i32_i64(max_num_groups_45697)))) - Tx_42527 = self.sizes["mainMagnitude.Tx_42525"] - Ty_42528 = self.sizes["mainMagnitude.Ty_42526"] - Ty_42529 = smin64(i32_res_28493, Ty_42528) - Tx_42530 = smin64(i32_res_28493, Tx_42527) - gridDim_zz_42533 = sdiv_up64(m_28478, np.int64(30)) - group_sizze_tile3d_42536 = (Ty_42529 * Tx_42530) - binop_x_44399 = (N_28477 * m_28478) - bytes_44398 = (np.int64(4) * binop_x_44399) - bytes_44443 = (np.int64(4) * comparatee_34913) - bytes_44402 = (np.int64(4) * y_34912) - binop_x_44546 = (i32_res_28493 * nest_sizze_34882) - bytes_44544 = (np.int64(4) * binop_x_44546) - bytes_44448 = (np.int64(4) * i32_res_28493) - binop_x_44477 = (np.int64(30) * group_sizze_tile3d_42536) - bytes_44475 = (np.int64(4) * binop_x_44477) - binop_x_45447 = (np.int64(4) * Ty_42529) - binop_x_45448 = (Tx_42530 * binop_x_45447) - sizze_45449 = (np.int64(30) * binop_x_45448) - num_threads_45583 = (segmap_group_sizze_34706 * num_groups_34707) - total_sizze_45584 = (bytes_44402 * num_threads_45583) - num_threads_45585 = (segmap_group_sizze_34883 * num_groups_34884) - total_sizze_45586 = (bytes_44448 * num_threads_45585) - local_memory_capacity_45819 = self.max_local_memory + suff_outer_par_100023 = (self.sizes["mainMagnitude.suff_outer_par_23"] <= m_73008) + segmap_group_sizze_100049 = self.sizes["mainMagnitude.segmap_group_size_100027"] + max_num_groups_128705 = self.sizes["mainMagnitude.segmap_num_groups_100029"] + num_groups_100050 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_73008, + segmap_group_sizze_100049), + sext_i32_i64(max_num_groups_128705)))) + segmap_group_sizze_100226 = self.sizes["mainMagnitude.segmap_group_size_100074"] + max_num_groups_128706 = self.sizes["mainMagnitude.segmap_num_groups_100076"] + num_groups_100227 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120244, + segmap_group_sizze_100226), + sext_i32_i64(max_num_groups_128706)))) + suff_outer_par_100231 = (self.sizes["mainMagnitude.suff_outer_par_24"] <= binop_x_120244) + comparatee_100256 = (m_73008 * binop_x_120251) + suff_outer_par_100257 = (self.sizes["mainMagnitude.suff_outer_par_25"] <= comparatee_100256) + nest_sizze_100277 = (n_73011 * comparatee_100256) + segred_group_sizze_100278 = self.sizes["mainMagnitude.segred_group_size_100134"] + max_num_groups_128707 = self.sizes["mainMagnitude.segred_num_groups_100136"] + num_groups_100279 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_100277, + segred_group_sizze_100278), + sext_i32_i64(max_num_groups_128707)))) + Tx_118224 = self.sizes["mainMagnitude.Tx_118222"] + Ty_118225 = self.sizes["mainMagnitude.Ty_118223"] + Ty_118226 = smin64(k2p2zq_73023, Ty_118225) + Tx_118227 = smin64(k2p2zq_73023, Tx_118224) + gridDim_zz_118230 = sdiv_up64(m_73008, np.int64(30)) + group_sizze_tile3d_118233 = (Ty_118226 * Tx_118227) + bytes_124188 = (np.int64(8) * comparatee_100256) + binop_x_124222 = (np.int64(30) * group_sizze_tile3d_118233) + bytes_124220 = (np.int64(8) * binop_x_124222) + binop_x_125361 = (np.int64(8) * Ty_118226) + binop_x_125362 = (Tx_118227 * binop_x_125361) + sizze_125363 = (np.int64(30) * binop_x_125362) + num_threads_126048 = (segmap_group_sizze_100049 * num_groups_100050) + total_sizze_126049 = (bytes_120250 * num_threads_126048) + num_threads_126050 = (segmap_group_sizze_100226 * num_groups_100227) + total_sizze_126051 = (bytes_120247 * num_threads_126050) + local_memory_capacity_128829 = self.max_local_memory if (sle64(np.int64(0), - sext_i32_i64(local_memory_capacity_45819)) and suff_outer_par_34680): - mem_44400 = opencl_alloc(self, bytes_44398, "mem_44400") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44400, np.int64(0), - images_mem_44381, np.int64(0), - np.int64(1), N_28477, m_28478) - mem_44446 = opencl_alloc(self, bytes_44443, "mem_44446") - mem_44404 = opencl_alloc(self, total_sizze_45584, "mem_44404") - if ((1 * (np.long(num_groups_34707) * np.long(segmap_group_sizze_34706))) != 0): - self.mainMagnitudezisegmap_34682_var.set_args(self.global_failure, - np.int64(N_28477), - np.int64(m_28478), - np.int32(n_28481), - np.int32(k2p2zq_28491), - np.int64(i32_res_28493), - np.int64(num_groups_34707), - binop_p_mem_44390, - mem_44397, mem_44400, - mem_44404, mem_44446) + sext_i32_i64(local_memory_capacity_128829)) and suff_outer_par_100023): + mem_124145 = opencl_alloc(self, bytes_120125, "mem_124145") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124145, np.int64(0), + mem_124142, np.int64(0), + np.int64(1), N_73007, m_73008) + mem_124191 = opencl_alloc(self, bytes_124188, "mem_124191") + mem_124149 = opencl_alloc(self, total_sizze_126049, "mem_124149") + if ((1 * (np.int64(num_groups_100050) * np.int64(segmap_group_sizze_100049))) != 0): + self.mainMagnitudezisegmap_100025_var.set_args(self.global_failure, + np.int64(N_73007), + np.int64(m_73008), + np.int64(n_73011), + np.int64(k2p2zq_73023), + np.int64(num_groups_100050), + np.int64(num_threads_126048), + binop_p_mem_120117, + mem_120124, mem_124145, + mem_124149, mem_124191) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_34682_var, - ((np.long(num_groups_34707) * np.long(segmap_group_sizze_34706)),), - (np.long(segmap_group_sizze_34706),)) + self.mainMagnitudezisegmap_100025_var, + ((np.int64(num_groups_100050) * np.int64(segmap_group_sizze_100049)),), + (np.int64(segmap_group_sizze_100049),)) if synchronous: sync(self) - mem_44400 = None - mem_44404 = None - mem_44547 = opencl_alloc(self, bytes_44544, "mem_44547") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44547, np.int64(0), - mem_44446, np.int64(0), - np.int64(1), m_28478, - (i32_res_28493 * i32_res_28493)) - mem_44446 = None - defunc_3_map_res_mem_44549 = mem_44547 + mem_124145 = None + mem_124149 = None + mem_124292 = opencl_alloc(self, bytes_121997, "mem_124292") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124292, np.int64(0), + mem_124191, np.int64(0), + np.int64(1), m_73008, + (k2p2zq_73023 * k2p2zq_73023)) + mem_124191 = None + defunc_3_map_res_mem_124294 = mem_124292 else: - local_memory_capacity_45818 = self.max_local_memory + local_memory_capacity_128828 = self.max_local_memory if (sle64(np.int64(0), - sext_i32_i64(local_memory_capacity_45818)) and suff_outer_par_34888): - mem_44465 = opencl_alloc(self, bytes_44544, "mem_44465") - mem_44449 = opencl_alloc(self, total_sizze_45586, "mem_44449") - if ((1 * (np.long(num_groups_34884) * np.long(segmap_group_sizze_34883))) != 0): - self.mainMagnitudezisegmap_34728_var.set_args(self.global_failure, - np.int64(N_28477), - np.int64(m_28478), - np.int32(n_28481), - np.int32(k2p2zq_28491), - np.int64(i32_res_28493), - np.int64(num_groups_34884), - images_mem_44381, - mem_44393, mem_44397, - mem_44449, mem_44465) + sext_i32_i64(local_memory_capacity_128828)) and suff_outer_par_100231): + mem_124210 = opencl_alloc(self, bytes_121997, "mem_124210") + mem_124194 = opencl_alloc(self, total_sizze_126051, "mem_124194") + if ((1 * (np.int64(num_groups_100227) * np.int64(segmap_group_sizze_100226))) != 0): + self.mainMagnitudezisegmap_100071_var.set_args(self.global_failure, + np.int64(N_73007), + np.int64(m_73008), + np.int64(n_73011), + np.int64(k2p2zq_73023), + np.int64(num_groups_100227), + np.int64(num_threads_126050), + mem_120120, mem_120124, + mem_124142, mem_124194, + mem_124210) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_34728_var, - ((np.long(num_groups_34884) * np.long(segmap_group_sizze_34883)),), - (np.long(segmap_group_sizze_34883),)) + self.mainMagnitudezisegmap_100071_var, + ((np.int64(num_groups_100227) * np.int64(segmap_group_sizze_100226)),), + (np.int64(segmap_group_sizze_100226),)) if synchronous: sync(self) - mem_44449 = None - mem_44541 = opencl_alloc(self, bytes_44544, "mem_44541") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44541, np.int64(0), - mem_44465, np.int64(0), + mem_124194 = None + mem_124286 = opencl_alloc(self, bytes_121997, "mem_124286") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124286, np.int64(0), + mem_124210, np.int64(0), np.int64(1), - (m_28478 * i32_res_28493), - i32_res_28493) - mem_44465 = None - defunc_3_map_res_mem_44543 = mem_44541 + (m_73008 * k2p2zq_73023), + k2p2zq_73023) + mem_124210 = None + defunc_3_map_res_mem_124288 = mem_124286 else: - local_memory_capacity_45817 = self.max_local_memory - if (sle64(np.int64(120), - sext_i32_i64(local_memory_capacity_45817)) and suff_outer_par_34914): - mem_44468 = opencl_alloc(self, bytes_44398, "mem_44468") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44468, np.int64(0), - images_mem_44381, - np.int64(0), np.int64(1), - N_28477, m_28478) - gridDim_x_42531 = sdiv_up64(i32_res_28493, Tx_42530) - gridDim_y_42532 = sdiv_up64(i32_res_28493, Ty_42529) - binop_x_42534 = (gridDim_y_42532 * gridDim_zz_42533) - grid_sizze_tile3d_42535 = (gridDim_x_42531 * binop_x_42534) - count_shmem_42537 = sdiv_up64(np.int64(30), group_sizze_tile3d_42536) - mem_44528 = opencl_alloc(self, bytes_44544, "mem_44528") - if ((1 * (np.long(grid_sizze_tile3d_42535) * np.long(group_sizze_tile3d_42536))) != 0): - self.mainMagnitudezisegmap_intragroup_42541_var.set_args(self.global_failure, - cl.LocalMemory(np.long(np.int64(120))), - np.int64(m_28478), - np.int32(n_28481), - np.int64(i32_res_28493), - np.int64(Ty_42529), - np.int64(Tx_42530), - np.int64(gridDim_x_42531), - np.int64(gridDim_y_42532), - np.int64(group_sizze_tile3d_42536), - np.int64(count_shmem_42537), - mem_44393, - mem_44397, - mem_44468, - mem_44528) + local_memory_capacity_128827 = self.max_local_memory + if (sle64(np.int64(240), + sext_i32_i64(local_memory_capacity_128827)) and suff_outer_par_100257): + mem_124213 = opencl_alloc(self, bytes_120125, "mem_124213") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124213, np.int64(0), + mem_124142, np.int64(0), + np.int64(1), N_73007, + m_73008) + gridDim_x_118228 = sdiv_up64(k2p2zq_73023, Tx_118227) + gridDim_y_118229 = sdiv_up64(k2p2zq_73023, Ty_118226) + binop_x_118231 = (gridDim_y_118229 * gridDim_zz_118230) + grid_sizze_tile3d_118232 = (gridDim_x_118228 * binop_x_118231) + count_shmem_118234 = sdiv_up64(np.int64(30), + group_sizze_tile3d_118233) + mem_124273 = opencl_alloc(self, bytes_121997, "mem_124273") + if ((1 * (np.int64(grid_sizze_tile3d_118232) * np.int64(group_sizze_tile3d_118233))) != 0): + self.mainMagnitudezisegmap_intragroup_118238_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int64(240))), + np.int64(m_73008), + np.int64(n_73011), + np.int64(k2p2zq_73023), + np.int64(Ty_118226), + np.int64(Tx_118227), + np.int64(gridDim_x_118228), + np.int64(gridDim_y_118229), + np.int64(group_sizze_tile3d_118233), + np.int64(count_shmem_118234), + mem_120120, + mem_120124, + mem_124213, + mem_124273) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_intragroup_42541_var, - ((np.long(grid_sizze_tile3d_42535) * np.long(group_sizze_tile3d_42536)),), - (np.long(group_sizze_tile3d_42536),)) + self.mainMagnitudezisegmap_intragroup_118238_var, + ((np.int64(grid_sizze_tile3d_118232) * np.int64(group_sizze_tile3d_118233)),), + (np.int64(group_sizze_tile3d_118233),)) if synchronous: sync(self) - mem_44468 = None - defunc_3_map_res_mem_44537 = mem_44528 + mem_124213 = None + defunc_3_map_res_mem_124282 = mem_124273 else: - mem_44531 = opencl_alloc(self, bytes_44383, "mem_44531") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44531, np.int64(0), - mem_44397, np.int64(0), - np.int64(1), - i32_res_28493, N_28477) - mem_44536 = opencl_alloc(self, bytes_44544, "mem_44536") - if slt64((i32_res_28487 * np.int64(2)), segred_group_sizze_34935): - segment_sizze_nonzzero_45757 = smax64(np.int64(1), i32_res_28487) - num_threads_45758 = (num_groups_34936 * segred_group_sizze_34935) - if ((1 * (np.long(num_groups_34936) * np.long(segred_group_sizze_34935))) != 0): - self.mainMagnitudezisegred_small_34797_var.set_args(self.global_failure, - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_34935))), - np.int64(N_28477), - np.int64(m_28478), - np.int64(i32_res_28487), - np.int64(i32_res_28493), - np.int64(num_groups_34936), - np.int64(segment_sizze_nonzzero_45757), - images_mem_44381, - binop_p_mem_44390, - mem_44531, - mem_44536) + mem_124276 = opencl_alloc(self, bytes_120110, "mem_124276") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124276, np.int64(0), + mem_120124, np.int64(0), + np.int64(1), k2p2zq_73023, + N_73007) + mem_124281 = opencl_alloc(self, bytes_121997, "mem_124281") + if slt64((n_73011 * np.int64(2)), segred_group_sizze_100278): + segment_sizze_nonzzero_128767 = smax64(np.int64(1), n_73011) + num_threads_128768 = (num_groups_100279 * segred_group_sizze_100278) + if ((1 * (np.int64(num_groups_100279) * np.int64(segred_group_sizze_100278))) != 0): + self.mainMagnitudezisegred_small_100140_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_100278))), + np.int64(N_73007), + np.int64(m_73008), + np.int64(n_73011), + np.int64(k2p2zq_73023), + np.int64(num_groups_100279), + np.int64(segment_sizze_nonzzero_128767), + binop_p_mem_120117, + mem_124142, + mem_124276, + mem_124281) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegred_small_34797_var, - ((np.long(num_groups_34936) * np.long(segred_group_sizze_34935)),), - (np.long(segred_group_sizze_34935),)) + self.mainMagnitudezisegred_small_100140_var, + ((np.int64(num_groups_100279) * np.int64(segred_group_sizze_100278)),), + (np.int64(segred_group_sizze_100278),)) if synchronous: sync(self) else: - groups_per_segment_45778 = sdiv_up64(num_groups_34936, - smax64(np.int64(1), - ((m_28478 * i32_res_28493) * i32_res_28493))) - elements_per_thread_45779 = sdiv_up64(i32_res_28487, - (segred_group_sizze_34935 * groups_per_segment_45778)) - virt_num_groups_45780 = (groups_per_segment_45778 * ((m_28478 * i32_res_28493) * i32_res_28493)) - num_threads_45781 = (num_groups_34936 * segred_group_sizze_34935) - threads_per_segment_45782 = (groups_per_segment_45778 * segred_group_sizze_34935) - group_res_arr_mem_45783 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_34935 * virt_num_groups_45780)), - "group_res_arr_mem_45783") - mainMagnitudezicounter_mem_45785 = self.mainMagnitudezicounter_mem_45785 - if ((1 * (np.long(num_groups_34936) * np.long(segred_group_sizze_34935))) != 0): - self.mainMagnitudezisegred_large_34797_var.set_args(self.global_failure, - cl.LocalMemory(np.long(np.int32(1))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_34935))), - np.int64(N_28477), - np.int64(i32_res_28487), - np.int64(i32_res_28493), - np.int64(num_groups_34936), - np.int64(groups_per_segment_45778), - np.int64(elements_per_thread_45779), - np.int64(virt_num_groups_45780), - np.int64(threads_per_segment_45782), - images_mem_44381, - binop_p_mem_44390, - mem_44531, - mem_44536, - group_res_arr_mem_45783, - mainMagnitudezicounter_mem_45785) + groups_per_segment_128788 = sdiv_up64(num_groups_100279, + smax64(np.int64(1), + ((m_73008 * k2p2zq_73023) * k2p2zq_73023))) + elements_per_thread_128789 = sdiv_up64(n_73011, + (segred_group_sizze_100278 * groups_per_segment_128788)) + virt_num_groups_128790 = (groups_per_segment_128788 * ((m_73008 * k2p2zq_73023) * k2p2zq_73023)) + num_threads_128791 = (num_groups_100279 * segred_group_sizze_100278) + threads_per_segment_128792 = (groups_per_segment_128788 * segred_group_sizze_100278) + group_res_arr_mem_128793 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_100278 * virt_num_groups_128790)), + "group_res_arr_mem_128793") + mainMagnitudezicounter_mem_128795 = self.mainMagnitudezicounter_mem_128795 + if ((1 * (np.int64(num_groups_100279) * np.int64(segred_group_sizze_100278))) != 0): + self.mainMagnitudezisegred_large_100140_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_100278))), + np.int64(N_73007), + np.int64(n_73011), + np.int64(k2p2zq_73023), + np.int64(num_groups_100279), + np.int64(groups_per_segment_128788), + np.int64(elements_per_thread_128789), + np.int64(virt_num_groups_128790), + np.int64(threads_per_segment_128792), + binop_p_mem_120117, + mem_124142, + mem_124276, + mem_124281, + group_res_arr_mem_128793, + mainMagnitudezicounter_mem_128795) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegred_large_34797_var, - ((np.long(num_groups_34936) * np.long(segred_group_sizze_34935)),), - (np.long(segred_group_sizze_34935),)) + self.mainMagnitudezisegred_large_100140_var, + ((np.int64(num_groups_100279) * np.int64(segred_group_sizze_100278)),), + (np.int64(segred_group_sizze_100278),)) if synchronous: sync(self) - mem_44531 = None - defunc_3_map_res_mem_44537 = mem_44536 - defunc_3_map_res_mem_44543 = defunc_3_map_res_mem_44537 - defunc_3_map_res_mem_44549 = defunc_3_map_res_mem_44543 - m_28624 = (np.int32(2) * k2p2zq_28491) - x_28625 = (np.int64(2) * i32_res_28493) - nm_28626 = (i32_res_28493 * x_28625) - bounds_invalid_upwards_28627 = slt64(nm_28626, np.int64(0)) - valid_28628 = not(bounds_invalid_upwards_28627) - range_valid_c_28629 = True - assert valid_28628, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 helpers.fut:2:3-8\n #2 helpers.fut:79:21-29\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:181:5-58\n #5 bfastfinal.fut:176:1-182:37\n" % ("Range ", - np.int64(0), - "..", - np.int64(1), - "..<", - nm_28626, - " is invalid.")) - zzero_28634 = (m_28624 == np.int32(0)) - nonzzero_28635 = not(zzero_28634) - nonzzero_cert_28636 = True - assert nonzzero_28635, ("Error: %s\n\nBacktrace:\n-> #0 helpers.fut:74:41-47\n #1 helpers.fut:74:14-79:30\n #2 bfastfinal.fut:50:35-50\n #3 bfastfinal.fut:181:5-58\n #4 bfastfinal.fut:176:1-182:37\n" % ("division by zero",)) - loop_nonempty_28637 = slt32(np.int32(0), k2p2zq_28491) - loop_not_taken_28638 = not(loop_nonempty_28637) - protect_assert_disj_28639 = (nonzzero_28635 or loop_not_taken_28638) - nonzzero_cert_28640 = True - assert protect_assert_disj_28639, ("Error: %s\n\nBacktrace:\n-> #0 helpers.fut:60:43-49\n #1 helpers.fut:60:16-66:44\n #2 helpers.fut:80:16-34\n #3 bfastfinal.fut:50:35-50\n #4 bfastfinal.fut:181:5-58\n #5 bfastfinal.fut:176:1-182:37\n" % ("division by zero",)) - i32_res_28641 = sext_i32_i64(m_28624) - x_28642 = (i32_res_28493 * i32_res_28641) - dim_ok_28643 = (x_28642 == nm_28626) - dim_ok_cert_28644 = True - assert dim_ok_28643, ("Error: %s\n\nBacktrace:\n-> #0 /prelude/array.fut:141:3-33\n #1 helpers.fut:81:16-43\n #2 bfastfinal.fut:50:35-50\n #3 bfastfinal.fut:181:5-58\n #4 bfastfinal.fut:176:1-182:37\n" % ("new shape has different number of elements than old shape",)) - j_m_i_28645 = (x_28625 - i32_res_28493) - empty_slice_28646 = (j_m_i_28645 == np.int64(0)) - m_28647 = (j_m_i_28645 - np.int64(1)) - i_p_m_t_s_28648 = (i32_res_28493 + m_28647) - zzero_leq_i_p_m_t_s_28649 = sle64(np.int64(0), i_p_m_t_s_28648) - i_p_m_t_s_leq_w_28650 = slt64(i_p_m_t_s_28648, i32_res_28641) - i_lte_j_28651 = sle64(i32_res_28493, x_28625) - y_28652 = (i_lte_j_28574 and i_p_m_t_s_leq_w_28650) - y_28653 = (zzero_leq_i_p_m_t_s_28649 and y_28652) - y_28654 = (i_lte_j_28651 and y_28653) - forwards_ok_28655 = (i_lte_j_28574 and y_28654) - ok_or_empty_28656 = (empty_slice_28646 or forwards_ok_28655) - index_ok_28657 = (ok_or_empty_28590 and ok_or_empty_28656) - index_certs_28658 = True - assert index_ok_28657, ("Error: %s%d%s%d%s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 helpers.fut:83:8-30\n #1 bfastfinal.fut:50:35-50\n #2 bfastfinal.fut:181:5-58\n #3 bfastfinal.fut:176:1-182:37\n" % ("Index [", + mem_124276 = None + defunc_3_map_res_mem_124282 = mem_124281 + defunc_3_map_res_mem_124288 = defunc_3_map_res_mem_124282 + defunc_3_map_res_mem_124294 = defunc_3_map_res_mem_124288 + m_74646 = (np.int64(2) * k2p2zq_73023) + nm_74647 = (k2p2zq_73023 * m_74646) + bounds_invalid_upwards_74648 = slt64(nm_74647, np.int64(0)) + valid_74649 = not(bounds_invalid_upwards_74648) + range_valid_c_74650 = True + assert valid_74649, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 helpers.fut:73:21-27\n #2 bfastfinal.fut:61:35-50\n #3 bfastfinal.fut:192:5-74\n #4 bfastfinal.fut:187:1-193:48\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + nm_74647, + " is invalid.")) + zzero_74652 = (m_74646 == np.int64(0)) + nonzzero_74653 = not(zzero_74652) + nonzzero_cert_74654 = True + assert nonzzero_74653, ("Error: %s\n\nBacktrace:\n-> #0 helpers.fut:68:41-47\n #1 helpers.fut:68:14-73:28\n #2 bfastfinal.fut:61:35-50\n #3 bfastfinal.fut:192:5-74\n #4 bfastfinal.fut:187:1-193:48\n" % ("division by zero",)) + loop_nonempty_74655 = slt64(np.int64(0), k2p2zq_73023) + loop_not_taken_74656 = not(loop_nonempty_74655) + protect_assert_disj_74657 = (nonzzero_74653 or loop_not_taken_74656) + nonzzero_cert_74658 = True + assert protect_assert_disj_74657, ("Error: %s\n\nBacktrace:\n-> #0 helpers.fut:54:43-49\n #1 helpers.fut:54:16-60:30\n #2 helpers.fut:74:16-34\n #3 bfastfinal.fut:61:35-50\n #4 bfastfinal.fut:192:5-74\n #5 bfastfinal.fut:187:1-193:48\n" % ("division by zero",)) + j_m_i_74659 = (m_74646 - k2p2zq_73023) + empty_slice_74660 = (j_m_i_74659 == np.int64(0)) + m_74661 = (j_m_i_74659 - np.int64(1)) + i_p_m_t_s_74662 = (k2p2zq_73023 + m_74661) + zzero_leq_i_p_m_t_s_74663 = sle64(np.int64(0), i_p_m_t_s_74662) + i_p_m_t_s_leq_w_74664 = slt64(i_p_m_t_s_74662, m_74646) + i_lte_j_74665 = sle64(k2p2zq_73023, m_74646) + y_74666 = (i_lte_j_73098 and i_p_m_t_s_leq_w_74664) + y_74667 = (zzero_leq_i_p_m_t_s_74663 and y_74666) + y_74668 = (i_lte_j_74665 and y_74667) + forwards_ok_74669 = (i_lte_j_73098 and y_74668) + ok_or_empty_74670 = (empty_slice_74660 or forwards_ok_74669) + index_ok_74671 = (ok_or_empty_73101 and ok_or_empty_74670) + index_certs_74672 = True + assert index_ok_74671, ("Error: %s%d%s%d%s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 helpers.fut:77:8-30\n #1 bfastfinal.fut:61:35-50\n #2 bfastfinal.fut:192:5-74\n #3 bfastfinal.fut:187:1-193:48\n" % ("Index [", np.int64(0), ":", - i32_res_28493, + k2p2zq_73023, ", ", - i32_res_28493, + k2p2zq_73023, ":", - x_28625, + m_74646, "] out of bounds for array of shape [", - i32_res_28493, + k2p2zq_73023, "][", - i32_res_28641, + m_74646, "].")) - dim_match_28659 = (i32_res_28493 == j_m_i_28645) - empty_or_match_cert_28660 = True - assert dim_match_28659, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 helpers.fut:83:8-45\n #1 bfastfinal.fut:50:35-50\n #2 bfastfinal.fut:181:5-58\n #3 bfastfinal.fut:176:1-182:37\n" % ("Value of (core language) shape (", - i32_res_28493, + dim_match_74673 = (k2p2zq_73023 == j_m_i_74659) + empty_or_match_cert_74674 = True + assert dim_match_74673, ("Error: %s%d%s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 helpers.fut:77:8-45\n #1 bfastfinal.fut:61:35-50\n #2 bfastfinal.fut:192:5-74\n #3 bfastfinal.fut:187:1-193:48\n" % ("Value of (core language) shape (", + k2p2zq_73023, ", ", - j_m_i_28645, + j_m_i_74659, ") cannot match shape of type `[", - i32_res_28493, + k2p2zq_73023, "][", - i32_res_28493, - "]f32`.")) - max_group_sizze_35230 = self.max_group_size - fits_35231 = sle64(nm_28626, max_group_sizze_35230) - suff_intra_par_35229 = (self.sizes["mainMagnitude.suff_intra_par_11"] <= nm_28626) - intra_suff_and_fits_35232 = (suff_intra_par_35229 and fits_35231) - nest_sizze_35896 = (m_28478 * nm_28626) - segmap_group_sizze_35897 = self.sizes["mainMagnitude.segmap_group_size_35840"] - suff_intra_par_35936 = (self.sizes["mainMagnitude.suff_intra_par_13"] <= nm_28626) - intra_suff_and_fits_35937 = (fits_35231 and suff_intra_par_35936) - segmap_group_sizze_35989 = self.sizes["mainMagnitude.segmap_group_size_35722"] - segmap_group_sizze_36005 = self.sizes["mainMagnitude.segmap_group_size_35623"] - segmap_group_sizze_36055 = self.sizes["mainMagnitude.segmap_group_size_35553"] - y_36065 = (i32_res_28493 * j_m_i_28645) - nest_sizze_36066 = (m_28478 * y_36065) - segmap_group_sizze_36067 = self.sizes["mainMagnitude.segmap_group_size_35324"] - segmap_usable_groups_35990 = sdiv_up_safe64(m_28478, - segmap_group_sizze_35989) - segmap_usable_groups_36006 = sdiv_up_safe64(nest_sizze_35896, - segmap_group_sizze_36005) - segmap_usable_groups_36056 = sdiv_up_safe64(nest_sizze_35896, - segmap_group_sizze_36055) - bytes_44552 = (np.int64(4) * nm_28626) - bytes_44575 = (np.int64(4) * nest_sizze_35896) - binop_x_44626 = (j_m_i_28645 * nest_sizze_34882) - bytes_44624 = (np.int64(4) * binop_x_44626) - local_memory_capacity_45820 = self.max_local_memory - if intra_suff_and_fits_35232: - defunc_3_map_res_ixfn_44628 = i32_res_28493 - else: - defunc_3_map_res_ixfn_44628 = j_m_i_28645 - local_memory_capacity_45883 = self.max_local_memory - if (sle64(((bytes_44552 + srem64((np.int64(8) - srem64(bytes_44552, - np.int64(8))), - np.int64(8))) + (bytes_44552 + srem64((np.int64(8) - srem64(bytes_44552, - np.int64(8))), - np.int64(8)))), - sext_i32_i64(local_memory_capacity_45883)) and intra_suff_and_fits_35232): - mem_44573 = opencl_alloc(self, bytes_44544, "mem_44573") - if ((1 * (np.long(m_28478) * np.long(nm_28626))) != 0): - self.mainMagnitudezisegmap_intragroup_35056_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long(bytes_44552)), - cl.LocalMemory(np.long(bytes_44552)), - np.int32(k2p2zq_28491), - np.int64(i32_res_28493), - np.int32(m_28624), - np.int64(nm_28626), - np.int64(i32_res_28641), - defunc_3_map_res_mem_44549, - mem_44573) + k2p2zq_73023, + "]f64`.")) + fits_100364 = sle64(nm_74647, max_group_sizze_90561) + suff_intra_par_100362 = (self.sizes["mainMagnitude.suff_intra_par_26"] <= nm_74647) + intra_suff_and_fits_100365 = (suff_intra_par_100362 and fits_100364) + nest_sizze_100795 = (m_73008 * nm_74647) + segmap_group_sizze_100796 = self.sizes["mainMagnitude.segmap_group_size_100748"] + suff_intra_par_100827 = (self.sizes["mainMagnitude.suff_intra_par_27"] <= nm_74647) + intra_suff_and_fits_100828 = (fits_100364 and suff_intra_par_100827) + segmap_group_sizze_100876 = self.sizes["mainMagnitude.segmap_group_size_100664"] + segmap_group_sizze_100889 = self.sizes["mainMagnitude.segmap_group_size_100575"] + segmap_group_sizze_100934 = self.sizes["mainMagnitude.segmap_group_size_100555"] + segmap_group_sizze_100944 = self.sizes["mainMagnitude.segmap_group_size_100445"] + segmap_usable_groups_100877 = sdiv_up_safe64(m_73008, + segmap_group_sizze_100876) + segmap_usable_groups_100890 = sdiv_up_safe64(nest_sizze_100795, + segmap_group_sizze_100889) + segmap_usable_groups_100935 = sdiv_up_safe64(nest_sizze_100795, + segmap_group_sizze_100934) + bytes_124297 = (np.int64(8) * nm_74647) + bytes_124320 = (np.int64(8) * nest_sizze_100795) + local_memory_capacity_128886 = self.max_local_memory + if (sle64(((bytes_124297 + srem64((np.int64(8) - srem64(bytes_124297, + np.int64(8))), + np.int64(8))) + (bytes_124297 + srem64((np.int64(8) - srem64(bytes_124297, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_128886)) and intra_suff_and_fits_100365): + mem_124318 = opencl_alloc(self, bytes_121997, "mem_124318") + if ((1 * (np.int64(m_73008) * np.int64(nm_74647))) != 0): + self.mainMagnitudezisegmap_intragroup_100360_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64(bytes_124297)), + cl.LocalMemory(np.int64(bytes_124297)), + np.int64(k2p2zq_73023), + np.int64(m_74646), + np.int64(nm_74647), + defunc_3_map_res_mem_124294, + mem_124318) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_intragroup_35056_var, - ((np.long(m_28478) * np.long(nm_28626)),), - (np.long(nm_28626),)) + self.mainMagnitudezisegmap_intragroup_100360_var, + ((np.int64(m_73008) * np.int64(nm_74647)),), + (np.int64(nm_74647),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - defunc_3_map_res_mem_44629 = mem_44573 + defunc_3_map_res_mem_124372 = mem_124318 else: - segmap_usable_groups_35898 = sdiv_up64(nest_sizze_35896, - segmap_group_sizze_35897) - mem_44577 = opencl_alloc(self, bytes_44575, "mem_44577") - if ((1 * (np.long(segmap_usable_groups_35898) * np.long(segmap_group_sizze_35897))) != 0): - self.mainMagnitudezisegmap_35837_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - np.int64(m_28478), - np.int32(k2p2zq_28491), - np.int64(i32_res_28493), - np.int32(m_28624), - np.int64(nm_28626), - defunc_3_map_res_mem_44549, - mem_44577) + segmap_usable_groups_100797 = sdiv_up64(nest_sizze_100795, + segmap_group_sizze_100796) + mem_124322 = opencl_alloc(self, bytes_124320, "mem_124322") + if ((1 * (np.int64(segmap_usable_groups_100797) * np.int64(segmap_group_sizze_100796))) != 0): + self.mainMagnitudezisegmap_100745_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(m_74646), + np.int64(nm_74647), + defunc_3_map_res_mem_124294, + mem_124322) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_35837_var, - ((np.long(segmap_usable_groups_35898) * np.long(segmap_group_sizze_35897)),), - (np.long(segmap_group_sizze_35897),)) + self.mainMagnitudezisegmap_100745_var, + ((np.int64(segmap_usable_groups_100797) * np.int64(segmap_group_sizze_100796)),), + (np.int64(segmap_group_sizze_100796),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - ctx_param_ext_44578 = m_28478 - ctx_param_ext_44579 = nm_28626 - ctx_param_ext_44580 = np.int64(0) - ctx_param_ext_44581 = nm_28626 - ctx_param_ext_44582 = m_28478 - ctx_param_ext_44583 = np.int64(1) - ctx_param_ext_44584 = nm_28626 - mem_param_44585 = mem_44577 - i_35922 = np.int32(0) - one_46888 = np.int32(1) - for counter_46887 in range(k2p2zq_28491): - i32_res_35924 = sext_i32_i64(i_35922) - x_35925 = sle64(np.int64(0), i32_res_35924) - y_35926 = slt64(i32_res_35924, nm_28626) - bounds_check_35927 = (x_35925 and y_35926) - index_certs_35928 = True - assert bounds_check_35927, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 helpers.fut:59:16-27\n #1 helpers.fut:80:16-34\n #2 bfastfinal.fut:50:35-50\n #3 bfastfinal.fut:181:5-58\n #4 bfastfinal.fut:176:1-182:37\n" % ("Index [", - i32_res_35924, - "] out of bounds for array of shape [", - nm_28626, - "].")) - local_memory_capacity_45843 = self.max_local_memory - if intra_suff_and_fits_35937: - gauss_jordan_res_ixfn_44607 = m_28478 + ctx_param_ext_124323 = m_73008 + ctx_param_ext_124324 = nm_74647 + ctx_param_ext_124325 = np.int64(0) + ctx_param_ext_124326 = nm_74647 + ctx_param_ext_124327 = m_73008 + ctx_param_ext_124328 = np.int64(1) + ctx_param_ext_124329 = nm_74647 + mem_param_124330 = mem_124322 + i_100818 = np.int64(0) + one_129958 = np.int64(1) + for counter_129957 in range(k2p2zq_73023): + y_100820 = slt64(i_100818, nm_74647) + index_certs_100821 = True + assert y_100820, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 helpers.fut:53:16-19\n #1 helpers.fut:74:16-34\n #2 bfastfinal.fut:61:35-50\n #3 bfastfinal.fut:192:5-74\n #4 bfastfinal.fut:187:1-193:48\n" % ("Index [", + i_100818, + "] out of bounds for array of shape [", + nm_74647, + "].")) + local_memory_capacity_128852 = self.max_local_memory + if intra_suff_and_fits_100828: + gauss_jordan_res_ixfn_124351 = m_73008 else: - gauss_jordan_res_ixfn_44607 = ctx_param_ext_44582 - local_memory_capacity_45844 = self.max_local_memory - if intra_suff_and_fits_35937: - gauss_jordan_res_ixfn_44608 = nm_28626 + gauss_jordan_res_ixfn_124351 = ctx_param_ext_124327 + local_memory_capacity_128853 = self.max_local_memory + if intra_suff_and_fits_100828: + gauss_jordan_res_ixfn_124352 = nm_74647 else: - gauss_jordan_res_ixfn_44608 = ctx_param_ext_44584 - local_memory_capacity_45845 = self.max_local_memory - if intra_suff_and_fits_35937: - gauss_jordan_res_ixfn_44609 = m_28478 + gauss_jordan_res_ixfn_124352 = ctx_param_ext_124329 + local_memory_capacity_128854 = self.max_local_memory + if intra_suff_and_fits_100828: + gauss_jordan_res_ixfn_124353 = m_73008 else: - gauss_jordan_res_ixfn_44609 = ctx_param_ext_44578 - local_memory_capacity_45846 = self.max_local_memory - if intra_suff_and_fits_35937: - gauss_jordan_res_ixfn_44610 = nm_28626 + gauss_jordan_res_ixfn_124353 = ctx_param_ext_124323 + local_memory_capacity_128855 = self.max_local_memory + if intra_suff_and_fits_100828: + gauss_jordan_res_ixfn_124354 = nm_74647 else: - gauss_jordan_res_ixfn_44610 = ctx_param_ext_44579 - local_memory_capacity_45847 = self.max_local_memory - if intra_suff_and_fits_35937: - gauss_jordan_res_ixfn_44611 = nm_28626 + gauss_jordan_res_ixfn_124354 = ctx_param_ext_124324 + local_memory_capacity_128856 = self.max_local_memory + if intra_suff_and_fits_100828: + gauss_jordan_res_ixfn_124355 = nm_74647 else: - gauss_jordan_res_ixfn_44611 = ctx_param_ext_44581 - local_memory_capacity_45848 = self.max_local_memory - if intra_suff_and_fits_35937: - gauss_jordan_res_ixfn_44612 = np.int64(1) + gauss_jordan_res_ixfn_124355 = ctx_param_ext_124326 + local_memory_capacity_128857 = self.max_local_memory + if intra_suff_and_fits_100828: + gauss_jordan_res_ixfn_124356 = np.int64(1) else: - gauss_jordan_res_ixfn_44612 = ctx_param_ext_44583 - local_memory_capacity_45849 = self.max_local_memory - if intra_suff_and_fits_35937: - gauss_jordan_res_ixfn_44613 = np.int64(0) + gauss_jordan_res_ixfn_124356 = ctx_param_ext_124328 + local_memory_capacity_128858 = self.max_local_memory + if intra_suff_and_fits_100828: + gauss_jordan_res_ixfn_124357 = np.int64(0) else: - gauss_jordan_res_ixfn_44613 = ctx_param_ext_44580 - local_memory_capacity_45877 = self.max_local_memory - if ((sle64(np.int64(0), - sext_i32_i64(local_memory_capacity_45877)) and sle64((bytes_44552 + srem64((np.int64(8) - srem64(bytes_44552, - np.int64(8))), - np.int64(8))), - sext_i32_i64(local_memory_capacity_45877))) and intra_suff_and_fits_35937): - mem_44590 = opencl_alloc(self, bytes_44575, "mem_44590") - group_sizze_45853 = self.sizes["mainMagnitude.group_size_45853"] - num_groups_45854 = sdiv_up64((m_28478 * nm_28626), group_sizze_45853) - if ((1 * (np.long(num_groups_45854) * np.long(group_sizze_45853))) != 0): - self.mainMagnitudezicopy_45850_var.set_args(np.int64(m_28478), - np.int64(nm_28626), - np.int64(ctx_param_ext_44580), - np.int64(ctx_param_ext_44581), - np.int64(ctx_param_ext_44583), - mem_param_44585, - mem_44590) - cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezicopy_45850_var, - ((np.long(num_groups_45854) * np.long(group_sizze_45853)),), - (np.long(group_sizze_45853),)) - if synchronous: - sync(self) - mem_44598 = opencl_alloc(self, bytes_44575, "mem_44598") - if ((1 * (np.long(m_28478) * np.long(nm_28626))) != 0): - self.mainMagnitudezisegmap_intragroup_35383_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long(bytes_44552)), - np.int64(m_28478), - np.int32(k2p2zq_28491), - np.int32(m_28624), - np.int64(nm_28626), - np.int32(i_35922), - np.int64(i32_res_35924), - np.int64(ctx_param_ext_44580), - np.int64(ctx_param_ext_44581), - np.int64(ctx_param_ext_44583), - mem_param_44585, - mem_44590, - mem_44598) + gauss_jordan_res_ixfn_124357 = ctx_param_ext_124325 + local_memory_capacity_128880 = self.max_local_memory + if (sle64(((bytes_124297 + srem64((np.int64(8) - srem64(bytes_124297, + np.int64(8))), + np.int64(8))) + (bytes_124297 + srem64((np.int64(8) - srem64(bytes_124297, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_128880)) and intra_suff_and_fits_100828): + mem_124342 = opencl_alloc(self, bytes_124320, "mem_124342") + if ((1 * (np.int64(m_73008) * np.int64(nm_74647))) != 0): + self.mainMagnitudezisegmap_intragroup_100498_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64(bytes_124297)), + cl.LocalMemory(np.int64(bytes_124297)), + np.int64(k2p2zq_73023), + np.int64(m_74646), + np.int64(nm_74647), + np.int64(i_100818), + np.int64(ctx_param_ext_124325), + np.int64(ctx_param_ext_124326), + np.int64(ctx_param_ext_124328), + mem_param_124330, + mem_124342) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_intragroup_35383_var, - ((np.long(m_28478) * np.long(nm_28626)),), - (np.long(nm_28626),)) + self.mainMagnitudezisegmap_intragroup_100498_var, + ((np.int64(m_73008) * np.int64(nm_74647)),), + (np.int64(nm_74647),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - mem_44590 = None - gauss_jordan_res_mem_44614 = mem_44598 + gauss_jordan_res_mem_124358 = mem_124342 else: - mem_44601 = opencl_alloc(self, m_28478, "mem_44601") - if ((1 * (np.long(segmap_usable_groups_35990) * np.long(segmap_group_sizze_35989))) != 0): - self.mainMagnitudezisegmap_35720_var.set_args(self.global_failure, - np.int64(m_28478), - np.int64(i32_res_35924), - np.int64(ctx_param_ext_44580), - np.int64(ctx_param_ext_44581), - np.int64(ctx_param_ext_44583), - mem_param_44585, - mem_44601) + mem_124345 = opencl_alloc(self, m_73008, "mem_124345") + if ((1 * (np.int64(segmap_usable_groups_100877) * np.int64(segmap_group_sizze_100876))) != 0): + self.mainMagnitudezisegmap_100662_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(i_100818), + np.int64(ctx_param_ext_124325), + np.int64(ctx_param_ext_124326), + np.int64(ctx_param_ext_124328), + mem_param_124330, + mem_124345) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_35720_var, - ((np.long(segmap_usable_groups_35990) * np.long(segmap_group_sizze_35989)),), - (np.long(segmap_group_sizze_35989),)) + self.mainMagnitudezisegmap_100662_var, + ((np.int64(segmap_usable_groups_100877) * np.int64(segmap_group_sizze_100876)),), + (np.int64(segmap_group_sizze_100876),)) if synchronous: sync(self) - mem_44605 = opencl_alloc(self, bytes_44575, "mem_44605") - if ((1 * (np.long(segmap_usable_groups_36006) * np.long(segmap_group_sizze_36005))) != 0): - self.mainMagnitudezisegmap_35620_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - np.int64(m_28478), - np.int32(k2p2zq_28491), - np.int32(m_28624), - np.int64(nm_28626), - np.int32(i_35922), - np.int64(i32_res_35924), - np.int64(ctx_param_ext_44580), - np.int64(ctx_param_ext_44581), - np.int64(ctx_param_ext_44583), - mem_param_44585, - mem_44601, mem_44605) + mem_124349 = opencl_alloc(self, bytes_124320, "mem_124349") + if ((1 * (np.int64(segmap_usable_groups_100890) * np.int64(segmap_group_sizze_100889))) != 0): + self.mainMagnitudezisegmap_100572_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(m_74646), + np.int64(nm_74647), + np.int64(i_100818), + np.int64(ctx_param_ext_124325), + np.int64(ctx_param_ext_124326), + np.int64(ctx_param_ext_124328), + mem_param_124330, + mem_124345, + mem_124349) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_35620_var, - ((np.long(segmap_usable_groups_36006) * np.long(segmap_group_sizze_36005)),), - (np.long(segmap_group_sizze_36005),)) + self.mainMagnitudezisegmap_100572_var, + ((np.int64(segmap_usable_groups_100890) * np.int64(segmap_group_sizze_100889)),), + (np.int64(segmap_group_sizze_100889),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - mem_44601 = None - if ((1 * (np.long(segmap_usable_groups_36056) * np.long(segmap_group_sizze_36055))) != 0): - self.mainMagnitudezisegmap_35550_var.set_args(self.global_failure, - np.int64(m_28478), - np.int64(nm_28626), - np.int64(ctx_param_ext_44580), - np.int64(ctx_param_ext_44581), - np.int64(ctx_param_ext_44583), - mem_param_44585, - mem_44605) + mem_124345 = None + if ((1 * (np.int64(segmap_usable_groups_100935) * np.int64(segmap_group_sizze_100934))) != 0): + self.mainMagnitudezisegmap_100552_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(nm_74647), + np.int64(ctx_param_ext_124325), + np.int64(ctx_param_ext_124326), + np.int64(ctx_param_ext_124328), + mem_param_124330, + mem_124349) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_35550_var, - ((np.long(segmap_usable_groups_36056) * np.long(segmap_group_sizze_36055)),), - (np.long(segmap_group_sizze_36055),)) + self.mainMagnitudezisegmap_100552_var, + ((np.int64(segmap_usable_groups_100935) * np.int64(segmap_group_sizze_100934)),), + (np.int64(segmap_group_sizze_100934),)) if synchronous: sync(self) - mem_44605 = None - gauss_jordan_res_mem_44614 = mem_param_44585 - ctx_param_ext_tmp_45834 = gauss_jordan_res_ixfn_44609 - ctx_param_ext_tmp_45835 = gauss_jordan_res_ixfn_44610 - ctx_param_ext_tmp_45836 = gauss_jordan_res_ixfn_44613 - ctx_param_ext_tmp_45837 = gauss_jordan_res_ixfn_44611 - ctx_param_ext_tmp_45838 = gauss_jordan_res_ixfn_44607 - ctx_param_ext_tmp_45839 = gauss_jordan_res_ixfn_44612 - ctx_param_ext_tmp_45840 = gauss_jordan_res_ixfn_44608 - mem_param_tmp_45841 = gauss_jordan_res_mem_44614 - ctx_param_ext_44578 = ctx_param_ext_tmp_45834 - ctx_param_ext_44579 = ctx_param_ext_tmp_45835 - ctx_param_ext_44580 = ctx_param_ext_tmp_45836 - ctx_param_ext_44581 = ctx_param_ext_tmp_45837 - ctx_param_ext_44582 = ctx_param_ext_tmp_45838 - ctx_param_ext_44583 = ctx_param_ext_tmp_45839 - ctx_param_ext_44584 = ctx_param_ext_tmp_45840 - mem_param_44585 = mem_param_tmp_45841 - i_35922 += one_46888 - gauss_jordan_res_r_ixfn_44615 = ctx_param_ext_44578 - gauss_jordan_res_r_ixfn_44616 = ctx_param_ext_44579 - gauss_jordan_res_r_ixfn_44617 = ctx_param_ext_44580 - gauss_jordan_res_r_ixfn_44618 = ctx_param_ext_44581 - gauss_jordan_res_r_ixfn_44619 = ctx_param_ext_44582 - gauss_jordan_res_r_ixfn_44620 = ctx_param_ext_44583 - gauss_jordan_res_r_ixfn_44621 = ctx_param_ext_44584 - gauss_jordan_res_r_mem_44622 = mem_param_44585 - mem_44577 = None - segmap_usable_groups_36068 = sdiv_up64(nest_sizze_36066, - segmap_group_sizze_36067) - mem_44627 = opencl_alloc(self, bytes_44624, "mem_44627") - if ((1 * (np.long(segmap_usable_groups_36068) * np.long(segmap_group_sizze_36067))) != 0): - self.mainMagnitudezisegmap_35320_var.set_args(self.global_failure, - np.int64(m_28478), - np.int64(i32_res_28493), - np.int64(nm_28626), - np.int64(i32_res_28641), - np.int64(x_28642), - np.int64(j_m_i_28645), - np.int64(gauss_jordan_res_r_ixfn_44617), - np.int64(gauss_jordan_res_r_ixfn_44618), - np.int64(gauss_jordan_res_r_ixfn_44620), - gauss_jordan_res_r_mem_44622, - mem_44627) + mem_124349 = None + gauss_jordan_res_mem_124358 = mem_param_124330 + ctx_param_ext_tmp_128843 = gauss_jordan_res_ixfn_124353 + ctx_param_ext_tmp_128844 = gauss_jordan_res_ixfn_124354 + ctx_param_ext_tmp_128845 = gauss_jordan_res_ixfn_124357 + ctx_param_ext_tmp_128846 = gauss_jordan_res_ixfn_124355 + ctx_param_ext_tmp_128847 = gauss_jordan_res_ixfn_124351 + ctx_param_ext_tmp_128848 = gauss_jordan_res_ixfn_124356 + ctx_param_ext_tmp_128849 = gauss_jordan_res_ixfn_124352 + mem_param_tmp_128850 = gauss_jordan_res_mem_124358 + ctx_param_ext_124323 = ctx_param_ext_tmp_128843 + ctx_param_ext_124324 = ctx_param_ext_tmp_128844 + ctx_param_ext_124325 = ctx_param_ext_tmp_128845 + ctx_param_ext_124326 = ctx_param_ext_tmp_128846 + ctx_param_ext_124327 = ctx_param_ext_tmp_128847 + ctx_param_ext_124328 = ctx_param_ext_tmp_128848 + ctx_param_ext_124329 = ctx_param_ext_tmp_128849 + mem_param_124330 = mem_param_tmp_128850 + i_100818 += one_129958 + gauss_jordan_res_r_ixfn_124359 = ctx_param_ext_124323 + gauss_jordan_res_r_ixfn_124360 = ctx_param_ext_124324 + gauss_jordan_res_r_ixfn_124361 = ctx_param_ext_124325 + gauss_jordan_res_r_ixfn_124362 = ctx_param_ext_124326 + gauss_jordan_res_r_ixfn_124363 = ctx_param_ext_124327 + gauss_jordan_res_r_ixfn_124364 = ctx_param_ext_124328 + gauss_jordan_res_r_ixfn_124365 = ctx_param_ext_124329 + gauss_jordan_res_r_mem_124366 = mem_param_124330 + mem_124322 = None + segmap_usable_groups_100945 = sdiv_up64(comparatee_100256, + segmap_group_sizze_100944) + mem_124371 = opencl_alloc(self, bytes_121997, "mem_124371") + if ((1 * (np.int64(segmap_usable_groups_100945) * np.int64(segmap_group_sizze_100944))) != 0): + self.mainMagnitudezisegmap_100441_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(m_74646), + np.int64(nm_74647), + np.int64(gauss_jordan_res_r_ixfn_124361), + np.int64(gauss_jordan_res_r_ixfn_124362), + np.int64(gauss_jordan_res_r_ixfn_124364), + gauss_jordan_res_r_mem_124366, + mem_124371) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_35320_var, - ((np.long(segmap_usable_groups_36068) * np.long(segmap_group_sizze_36067)),), - (np.long(segmap_group_sizze_36067),)) + self.mainMagnitudezisegmap_100441_var, + ((np.int64(segmap_usable_groups_100945) * np.int64(segmap_group_sizze_100944)),), + (np.int64(segmap_group_sizze_100944),)) if synchronous: sync(self) - gauss_jordan_res_r_mem_44622 = None - defunc_3_map_res_mem_44629 = mem_44627 - defunc_3_map_res_mem_44549 = None - suff_outer_par_36076 = (self.sizes["mainMagnitude.suff_outer_par_16"] <= m_28478) - segmap_group_sizze_36098 = self.sizes["mainMagnitude.segmap_group_size_36080"] - max_num_groups_45884 = self.sizes["mainMagnitude.segmap_num_groups_36082"] - num_groups_36099 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(m_28478, - segmap_group_sizze_36098), - sext_i32_i64(max_num_groups_45884)))) - suff_outer_par_36183 = (self.sizes["mainMagnitude.suff_outer_par_17"] <= nest_sizze_34882) - nest_sizze_36199 = (i32_res_28487 * nest_sizze_34882) - segred_group_sizze_36200 = self.sizes["mainMagnitude.segred_group_size_36142"] - max_num_groups_45885 = self.sizes["mainMagnitude.segred_num_groups_36144"] - num_groups_36201 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(nest_sizze_36199, - segred_group_sizze_36200), - sext_i32_i64(max_num_groups_45885)))) - Ty_42675 = self.sizes["mainMagnitude.Ty_42672"] - Ry_42676 = self.sizes["mainMagnitude.Ry_42674"] - Tx_42677 = self.sizes["mainMagnitude.Tx_42671"] - Rx_42678 = self.sizes["mainMagnitude.Rx_42673"] - Tk_42679 = self.sizes["mainMagnitude.Tk_42670"] - TxRx_42682 = (Tx_42677 * Rx_42678) - TyRy_42683 = (Ty_42675 * Ry_42676) - a_loc_szz_42685 = (Tk_42679 * TyRy_42683) - binop_x_42686 = (Tx_42677 * Tk_42679) - b_loc_szz_42687 = (Rx_42678 * binop_x_42686) - group_sizze_42691 = (Ty_42675 * Tx_42677) - bytes_44648 = (np.int64(4) * nest_sizze_34882) - binop_x_44662 = (Ry_42676 * group_sizze_42691) - binop_x_44663 = (Rx_42678 * binop_x_44662) - bytes_44660 = (np.int64(4) * binop_x_44663) - binop_x_44654 = (Ry_42676 * Rx_42678) - bytes_44653 = (np.int64(4) * binop_x_44654) - bytes_44665 = (np.int64(4) * a_loc_szz_42685) - bytes_44667 = (np.int64(4) * b_loc_szz_42687) - bytes_44736 = (np.int64(4) * binop_x_44662) - binop_x_44742 = (Rx_42678 * group_sizze_42691) - bytes_44740 = (np.int64(4) * binop_x_44742) - bytes_44728 = (np.int64(4) * Ry_42676) - bytes_44730 = (np.int64(4) * Rx_42678) - binop_x_45464 = (np.int64(4) * Ty_42675) - binop_x_45465 = (Tx_42677 * binop_x_45464) - binop_x_45466 = (Ry_42676 * binop_x_45465) - sizze_45467 = (Rx_42678 * binop_x_45466) - num_threads_45596 = (segmap_group_sizze_36098 * num_groups_36099) - total_sizze_45597 = (bytes_44448 * num_threads_45596) - local_memory_capacity_46015 = self.max_local_memory + gauss_jordan_res_r_mem_124366 = None + defunc_3_map_res_mem_124372 = mem_124371 + defunc_3_map_res_mem_124294 = None + suff_outer_par_100952 = (self.sizes["mainMagnitude.suff_outer_par_28"] <= m_73008) + segmap_group_sizze_100974 = self.sizes["mainMagnitude.segmap_group_size_100956"] + max_num_groups_128887 = self.sizes["mainMagnitude.segmap_num_groups_100958"] + num_groups_100975 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_73008, + segmap_group_sizze_100974), + sext_i32_i64(max_num_groups_128887)))) + suff_outer_par_101059 = (self.sizes["mainMagnitude.suff_outer_par_29"] <= binop_x_120244) + nest_sizze_101075 = (n_73011 * binop_x_120244) + segred_group_sizze_101076 = self.sizes["mainMagnitude.segred_group_size_101018"] + max_num_groups_128888 = self.sizes["mainMagnitude.segred_num_groups_101020"] + num_groups_101077 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_101075, + segred_group_sizze_101076), + sext_i32_i64(max_num_groups_128888)))) + Ty_118372 = self.sizes["mainMagnitude.Ty_118369"] + Ry_118373 = self.sizes["mainMagnitude.Ry_118371"] + Tx_118374 = self.sizes["mainMagnitude.Tx_118368"] + Rx_118375 = self.sizes["mainMagnitude.Rx_118370"] + Tk_118376 = self.sizes["mainMagnitude.Tk_118367"] + TxRx_118379 = (Tx_118374 * Rx_118375) + TyRy_118380 = (Ty_118372 * Ry_118373) + a_loc_szz_118382 = (Tk_118376 * TyRy_118380) + binop_x_118383 = (Tx_118374 * Tk_118376) + b_loc_szz_118384 = (Rx_118375 * binop_x_118383) + group_sizze_118388 = (Ty_118372 * Tx_118374) + binop_x_124405 = (Ry_118373 * group_sizze_118388) + binop_x_124406 = (Rx_118375 * binop_x_124405) + bytes_124403 = (np.int64(8) * binop_x_124406) + binop_x_124397 = (Ry_118373 * Rx_118375) + bytes_124396 = (np.int64(8) * binop_x_124397) + bytes_124408 = (np.int64(8) * a_loc_szz_118382) + bytes_124410 = (np.int64(8) * b_loc_szz_118384) + bytes_124479 = (np.int64(8) * binop_x_124405) + binop_x_124485 = (Rx_118375 * group_sizze_118388) + bytes_124483 = (np.int64(8) * binop_x_124485) + bytes_124471 = (np.int64(8) * Ry_118373) + bytes_124473 = (np.int64(8) * Rx_118375) + binop_x_125378 = (np.int64(8) * Ty_118372) + binop_x_125379 = (Tx_118374 * binop_x_125378) + binop_x_125380 = (Ry_118373 * binop_x_125379) + sizze_125381 = (Rx_118375 * binop_x_125380) + num_threads_126061 = (segmap_group_sizze_100974 * num_groups_100975) + total_sizze_126062 = (bytes_120247 * num_threads_126061) + local_memory_capacity_129018 = self.max_local_memory if (sle64(np.int64(0), - sext_i32_i64(local_memory_capacity_46015)) and suff_outer_par_36076): - mem_44632 = opencl_alloc(self, bytes_44398, "mem_44632") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44632, np.int64(0), - images_mem_44381, np.int64(0), - np.int64(1), N_28477, m_28478) - mem_44650 = opencl_alloc(self, bytes_44648, "mem_44650") - mem_44635 = opencl_alloc(self, total_sizze_45597, "mem_44635") - if ((1 * (np.long(num_groups_36099) * np.long(segmap_group_sizze_36098))) != 0): - self.mainMagnitudezisegmap_36078_var.set_args(self.global_failure, - np.int64(N_28477), - np.int64(m_28478), - np.int32(n_28481), - np.int32(k2p2zq_28491), - np.int64(i32_res_28493), - np.int64(num_groups_36099), - binop_p_mem_44390, - mem_44632, mem_44635, - mem_44650) + sext_i32_i64(local_memory_capacity_129018)) and suff_outer_par_100952): + mem_124375 = opencl_alloc(self, bytes_120125, "mem_124375") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124375, np.int64(0), + mem_124142, np.int64(0), + np.int64(1), N_73007, m_73008) + mem_124393 = opencl_alloc(self, bytes_121990, "mem_124393") + mem_124378 = opencl_alloc(self, total_sizze_126062, "mem_124378") + if ((1 * (np.int64(num_groups_100975) * np.int64(segmap_group_sizze_100974))) != 0): + self.mainMagnitudezisegmap_100954_var.set_args(self.global_failure, + np.int64(N_73007), + np.int64(m_73008), + np.int64(n_73011), + np.int64(k2p2zq_73023), + np.int64(num_groups_100975), + np.int64(num_threads_126061), + binop_p_mem_120117, + mem_124375, mem_124378, + mem_124393) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_36078_var, - ((np.long(num_groups_36099) * np.long(segmap_group_sizze_36098)),), - (np.long(segmap_group_sizze_36098),)) + self.mainMagnitudezisegmap_100954_var, + ((np.int64(num_groups_100975) * np.int64(segmap_group_sizze_100974)),), + (np.int64(segmap_group_sizze_100974),)) if synchronous: sync(self) - mem_44632 = None - mem_44635 = None - mem_44848 = opencl_alloc(self, bytes_44648, "mem_44848") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44848, np.int64(0), - mem_44650, np.int64(0), - np.int64(1), m_28478, - i32_res_28493) - mem_44650 = None - defunc_3_map_res_mem_44850 = mem_44848 + mem_124375 = None + mem_124378 = None + mem_124591 = opencl_alloc(self, bytes_121990, "mem_124591") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124591, np.int64(0), + mem_124393, np.int64(0), + np.int64(1), m_73008, + k2p2zq_73023) + mem_124393 = None + defunc_3_map_res_mem_124593 = mem_124591 else: - local_memory_capacity_46014 = self.max_local_memory - if (sle64(((bytes_44665 + srem64((np.int64(8) - srem64(bytes_44665, - np.int64(8))), - np.int64(8))) + (bytes_44667 + srem64((np.int64(8) - srem64(bytes_44667, - np.int64(8))), - np.int64(8)))), - sext_i32_i64(local_memory_capacity_46014)) and suff_outer_par_36183): - tk_div_tx_42680 = sdiv_up64(Tk_42679, Tx_42677) - tk_div_ty_42681 = sdiv_up64(Tk_42679, Ty_42675) - gridDim_x_42688 = sdiv_up64(i32_res_28493, TxRx_42682) - gridDim_y_42689 = sdiv_up64(m_28478, TyRy_42683) - grid_sizze_42690 = (gridDim_x_42688 * gridDim_y_42689) - full_tiles_42719 = squot64(i32_res_28487, Tk_42679) - kk_42926 = (Tk_42679 * full_tiles_42719) - mem_44840 = opencl_alloc(self, bytes_44648, "mem_44840") - if ((1 * (np.long(grid_sizze_42690) * np.long(group_sizze_42691))) != 0): - self.mainMagnitudezisegmap_intragroup_42694_var.set_args(self.global_failure, - cl.LocalMemory(np.long(bytes_44667)), - cl.LocalMemory(np.long(bytes_44665)), - np.int64(N_28477), - np.int64(m_28478), - np.int64(i32_res_28487), - np.int64(i32_res_28493), - np.int64(gridDim_x_42688), - np.int64(full_tiles_42719), - np.int64(kk_42926), - images_mem_44381, - mem_44393, - mem_44840) + local_memory_capacity_129017 = self.max_local_memory + if (sle64(((bytes_124408 + srem64((np.int64(8) - srem64(bytes_124408, + np.int64(8))), + np.int64(8))) + (bytes_124410 + srem64((np.int64(8) - srem64(bytes_124410, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_129017)) and suff_outer_par_101059): + tk_div_tx_118377 = sdiv_up64(Tk_118376, Tx_118374) + tk_div_ty_118378 = sdiv_up64(Tk_118376, Ty_118372) + gridDim_x_118385 = sdiv_up64(k2p2zq_73023, TxRx_118379) + gridDim_y_118386 = sdiv_up64(m_73008, TyRy_118380) + grid_sizze_118387 = (gridDim_x_118385 * gridDim_y_118386) + full_tiles_118416 = squot64(n_73011, Tk_118376) + kk_118623 = (Tk_118376 * full_tiles_118416) + mem_124583 = opencl_alloc(self, bytes_121990, "mem_124583") + if ((1 * (np.int64(grid_sizze_118387) * np.int64(group_sizze_118388))) != 0): + self.mainMagnitudezisegmap_intragroup_118391_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_124410)), + cl.LocalMemory(np.int64(bytes_124408)), + np.int64(N_73007), + np.int64(m_73008), + np.int64(n_73011), + np.int64(k2p2zq_73023), + np.int64(gridDim_x_118385), + np.int64(full_tiles_118416), + np.int64(kk_118623), + mem_120120, + mem_124142, + mem_124583) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_intragroup_42694_var, - ((np.long(grid_sizze_42690) * np.long(group_sizze_42691)),), - (np.long(group_sizze_42691),)) + self.mainMagnitudezisegmap_intragroup_118391_var, + ((np.int64(grid_sizze_118387) * np.int64(group_sizze_118388)),), + (np.int64(group_sizze_118388),)) if synchronous: sync(self) - defunc_3_map_res_mem_44845 = mem_44840 + defunc_3_map_res_mem_124588 = mem_124583 else: - mem_44844 = opencl_alloc(self, bytes_44648, "mem_44844") - if slt64((i32_res_28487 * np.int64(2)), segred_group_sizze_36200): - segment_sizze_nonzzero_45954 = smax64(np.int64(1), i32_res_28487) - num_threads_45955 = (num_groups_36201 * segred_group_sizze_36200) - if ((1 * (np.long(num_groups_36201) * np.long(segred_group_sizze_36200))) != 0): - self.mainMagnitudezisegred_small_36148_var.set_args(self.global_failure, - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_36200))), - np.int64(N_28477), - np.int64(m_28478), - np.int64(i32_res_28487), - np.int64(i32_res_28493), - np.int64(num_groups_36201), - np.int64(segment_sizze_nonzzero_45954), - images_mem_44381, - binop_p_mem_44390, - mem_44844) + mem_124587 = opencl_alloc(self, bytes_121990, "mem_124587") + if slt64((n_73011 * np.int64(2)), segred_group_sizze_101076): + segment_sizze_nonzzero_128957 = smax64(np.int64(1), n_73011) + num_threads_128958 = (num_groups_101077 * segred_group_sizze_101076) + if ((1 * (np.int64(num_groups_101077) * np.int64(segred_group_sizze_101076))) != 0): + self.mainMagnitudezisegred_small_101024_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_101076))), + np.int64(N_73007), + np.int64(m_73008), + np.int64(n_73011), + np.int64(k2p2zq_73023), + np.int64(num_groups_101077), + np.int64(segment_sizze_nonzzero_128957), + binop_p_mem_120117, + mem_124142, + mem_124587) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegred_small_36148_var, - ((np.long(num_groups_36201) * np.long(segred_group_sizze_36200)),), - (np.long(segred_group_sizze_36200),)) + self.mainMagnitudezisegred_small_101024_var, + ((np.int64(num_groups_101077) * np.int64(segred_group_sizze_101076)),), + (np.int64(segred_group_sizze_101076),)) if synchronous: sync(self) else: - groups_per_segment_45975 = sdiv_up64(num_groups_36201, - smax64(np.int64(1), - (m_28478 * i32_res_28493))) - elements_per_thread_45976 = sdiv_up64(i32_res_28487, - (segred_group_sizze_36200 * groups_per_segment_45975)) - virt_num_groups_45977 = (groups_per_segment_45975 * (m_28478 * i32_res_28493)) - num_threads_45978 = (num_groups_36201 * segred_group_sizze_36200) - threads_per_segment_45979 = (groups_per_segment_45975 * segred_group_sizze_36200) - group_res_arr_mem_45980 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_36200 * virt_num_groups_45977)), - "group_res_arr_mem_45980") - mainMagnitudezicounter_mem_45982 = self.mainMagnitudezicounter_mem_45982 - if ((1 * (np.long(num_groups_36201) * np.long(segred_group_sizze_36200))) != 0): - self.mainMagnitudezisegred_large_36148_var.set_args(self.global_failure, - cl.LocalMemory(np.long(np.int32(1))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_36200))), - np.int64(N_28477), - np.int64(i32_res_28487), - np.int64(i32_res_28493), - np.int64(num_groups_36201), - np.int64(groups_per_segment_45975), - np.int64(elements_per_thread_45976), - np.int64(virt_num_groups_45977), - np.int64(threads_per_segment_45979), - images_mem_44381, - binop_p_mem_44390, - mem_44844, - group_res_arr_mem_45980, - mainMagnitudezicounter_mem_45982) + groups_per_segment_128978 = sdiv_up64(num_groups_101077, + smax64(np.int64(1), + (m_73008 * k2p2zq_73023))) + elements_per_thread_128979 = sdiv_up64(n_73011, + (segred_group_sizze_101076 * groups_per_segment_128978)) + virt_num_groups_128980 = (groups_per_segment_128978 * (m_73008 * k2p2zq_73023)) + num_threads_128981 = (num_groups_101077 * segred_group_sizze_101076) + threads_per_segment_128982 = (groups_per_segment_128978 * segred_group_sizze_101076) + group_res_arr_mem_128983 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_101076 * virt_num_groups_128980)), + "group_res_arr_mem_128983") + mainMagnitudezicounter_mem_128985 = self.mainMagnitudezicounter_mem_128985 + if ((1 * (np.int64(num_groups_101077) * np.int64(segred_group_sizze_101076))) != 0): + self.mainMagnitudezisegred_large_101024_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_101076))), + np.int64(N_73007), + np.int64(n_73011), + np.int64(k2p2zq_73023), + np.int64(num_groups_101077), + np.int64(groups_per_segment_128978), + np.int64(elements_per_thread_128979), + np.int64(virt_num_groups_128980), + np.int64(threads_per_segment_128982), + binop_p_mem_120117, + mem_124142, + mem_124587, + group_res_arr_mem_128983, + mainMagnitudezicounter_mem_128985) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegred_large_36148_var, - ((np.long(num_groups_36201) * np.long(segred_group_sizze_36200)),), - (np.long(segred_group_sizze_36200),)) + self.mainMagnitudezisegred_large_101024_var, + ((np.int64(num_groups_101077) * np.int64(segred_group_sizze_101076)),), + (np.int64(segred_group_sizze_101076),)) if synchronous: sync(self) - defunc_3_map_res_mem_44845 = mem_44844 - defunc_3_map_res_mem_44850 = defunc_3_map_res_mem_44845 - binop_p_mem_44390 = None - mem_44393 = None - suff_outer_par_36217 = (self.sizes["mainMagnitude.suff_outer_par_18"] <= m_28478) - segmap_group_sizze_36238 = self.sizes["mainMagnitude.segmap_group_size_36221"] - max_num_groups_46016 = self.sizes["mainMagnitude.segmap_num_groups_36223"] - num_groups_36239 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(m_28478, - segmap_group_sizze_36238), - sext_i32_i64(max_num_groups_46016)))) - suff_outer_par_36318 = (self.sizes["mainMagnitude.suff_outer_par_19"] <= nest_sizze_34882) - segred_group_sizze_36333 = self.sizes["mainMagnitude.segred_group_size_36279"] - max_num_groups_46017 = self.sizes["mainMagnitude.segred_num_groups_36281"] - num_groups_36334 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(binop_x_44546, - segred_group_sizze_36333), - sext_i32_i64(max_num_groups_46017)))) - tile_sizze_43138 = self.sizes["mainMagnitude.tile_size_43137"] - group_sizze_43139 = (tile_sizze_43138 * tile_sizze_43138) - bytes_44882 = (np.int64(4) * group_sizze_43139) - binop_x_45480 = (np.int64(4) * tile_sizze_43138) - sizze_45481 = (tile_sizze_43138 * binop_x_45480) - num_threads_45600 = (segmap_group_sizze_36238 * num_groups_36239) - total_sizze_45601 = (bytes_44448 * num_threads_45600) - local_memory_capacity_46103 = self.max_local_memory + defunc_3_map_res_mem_124588 = mem_124587 + defunc_3_map_res_mem_124593 = defunc_3_map_res_mem_124588 + binop_p_mem_120117 = None + mem_120120 = None + suff_outer_par_101093 = (self.sizes["mainMagnitude.suff_outer_par_30"] <= m_73008) + segmap_group_sizze_101114 = self.sizes["mainMagnitude.segmap_group_size_101097"] + max_num_groups_129019 = self.sizes["mainMagnitude.segmap_num_groups_101099"] + num_groups_101115 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_73008, + segmap_group_sizze_101114), + sext_i32_i64(max_num_groups_129019)))) + suff_outer_par_101194 = (self.sizes["mainMagnitude.suff_outer_par_31"] <= binop_x_120244) + segred_group_sizze_101209 = self.sizes["mainMagnitude.segred_group_size_101155"] + max_num_groups_129020 = self.sizes["mainMagnitude.segred_num_groups_101157"] + num_groups_101210 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_121999, + segred_group_sizze_101209), + sext_i32_i64(max_num_groups_129020)))) + tile_sizze_118835 = self.sizes["mainMagnitude.tile_size_118834"] + group_sizze_118836 = (tile_sizze_118835 * tile_sizze_118835) + bytes_124625 = (np.int64(8) * group_sizze_118836) + binop_x_125394 = (np.int64(8) * tile_sizze_118835) + sizze_125395 = (tile_sizze_118835 * binop_x_125394) + num_threads_126065 = (segmap_group_sizze_101114 * num_groups_101115) + total_sizze_126066 = (bytes_120247 * num_threads_126065) + local_memory_capacity_129106 = self.max_local_memory if (sle64(np.int64(0), - sext_i32_i64(local_memory_capacity_46103)) and suff_outer_par_36217): - mem_44854 = opencl_alloc(self, bytes_44443, "mem_44854") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44854, np.int64(0), - defunc_3_map_res_mem_44629, + sext_i32_i64(local_memory_capacity_129106)) and suff_outer_par_101093): + mem_124597 = opencl_alloc(self, bytes_124188, "mem_124597") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124597, np.int64(0), + defunc_3_map_res_mem_124372, np.int64(0), np.int64(1), - (i32_res_28493 * i32_res_28493), - m_28478) - mem_44857 = opencl_alloc(self, bytes_44648, "mem_44857") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44857, np.int64(0), - defunc_3_map_res_mem_44850, + (k2p2zq_73023 * k2p2zq_73023), + m_73008) + mem_124600 = opencl_alloc(self, bytes_121990, "mem_124600") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124600, np.int64(0), + defunc_3_map_res_mem_124593, np.int64(0), np.int64(1), - i32_res_28493, m_28478) - mem_44875 = opencl_alloc(self, bytes_44648, "mem_44875") - mem_44860 = opencl_alloc(self, total_sizze_45601, "mem_44860") - if ((1 * (np.long(num_groups_36239) * np.long(segmap_group_sizze_36238))) != 0): - self.mainMagnitudezisegmap_36219_var.set_args(self.global_failure, - np.int64(m_28478), - np.int32(k2p2zq_28491), - np.int64(i32_res_28493), - np.int64(num_groups_36239), - mem_44854, mem_44857, - mem_44860, mem_44875) + k2p2zq_73023, m_73008) + mem_124618 = opencl_alloc(self, bytes_121990, "mem_124618") + mem_124603 = opencl_alloc(self, total_sizze_126066, "mem_124603") + if ((1 * (np.int64(num_groups_101115) * np.int64(segmap_group_sizze_101114))) != 0): + self.mainMagnitudezisegmap_101095_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(num_groups_101115), + np.int64(num_threads_126065), + mem_124597, mem_124600, + mem_124603, mem_124618) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_36219_var, - ((np.long(num_groups_36239) * np.long(segmap_group_sizze_36238)),), - (np.long(segmap_group_sizze_36238),)) + self.mainMagnitudezisegmap_101095_var, + ((np.int64(num_groups_101115) * np.int64(segmap_group_sizze_101114)),), + (np.int64(segmap_group_sizze_101114),)) if synchronous: sync(self) - mem_44854 = None - mem_44857 = None - mem_44860 = None - mem_44914 = opencl_alloc(self, bytes_44648, "mem_44914") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44914, np.int64(0), - mem_44875, np.int64(0), - np.int64(1), m_28478, - i32_res_28493) - mem_44875 = None - defunc_4_map_res_mem_44916 = mem_44914 + mem_124597 = None + mem_124600 = None + mem_124603 = None + mem_124657 = opencl_alloc(self, bytes_121990, "mem_124657") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124657, np.int64(0), + mem_124618, np.int64(0), + np.int64(1), m_73008, + k2p2zq_73023) + mem_124618 = None + defunc_4_map_res_mem_124659 = mem_124657 else: - local_memory_capacity_46102 = self.max_local_memory - if (sle64(((bytes_44882 + srem64((np.int64(8) - srem64(bytes_44882, - np.int64(8))), - np.int64(8))) + (bytes_44882 + srem64((np.int64(8) - srem64(bytes_44882, - np.int64(8))), - np.int64(8)))), - sext_i32_i64(local_memory_capacity_46102)) and suff_outer_par_36318): - mem_44879 = opencl_alloc(self, bytes_44544, "mem_44879") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44879, np.int64(0), - defunc_3_map_res_mem_44629, + local_memory_capacity_129105 = self.max_local_memory + if (sle64(((bytes_124625 + srem64((np.int64(8) - srem64(bytes_124625, + np.int64(8))), + np.int64(8))) + (bytes_124625 + srem64((np.int64(8) - srem64(bytes_124625, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_129105)) and suff_outer_par_101194): + mem_124622 = opencl_alloc(self, bytes_121997, "mem_124622") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124622, np.int64(0), + defunc_3_map_res_mem_124372, np.int64(0), np.int64(1), - i32_res_28493, - (m_28478 * i32_res_28493)) - num_groups_x_43140 = sdiv_up64(m_28478, tile_sizze_43138) - num_groups_y_43141 = sdiv_up64(i32_res_28493, tile_sizze_43138) - num_groups_top_43142 = (num_groups_x_43140 * num_groups_y_43141) - num_whole_tiles_43159 = squot64(i32_res_28493, tile_sizze_43138) - residual_input_43286 = srem64(i32_res_28493, tile_sizze_43138) - cond_43287 = (residual_input_43286 == np.int64(0)) - mem_44906 = opencl_alloc(self, bytes_44648, "mem_44906") - if ((1 * (np.long(num_groups_top_43142) * np.long(group_sizze_43139))) != 0): - self.mainMagnitudezisegmap_intragroup_43143_var.set_args(self.global_failure, - cl.LocalMemory(np.long(bytes_44882)), - cl.LocalMemory(np.long(bytes_44882)), - np.int64(m_28478), - np.int64(i32_res_28493), - np.int64(num_groups_y_43141), - np.int64(num_whole_tiles_43159), - np.int64(residual_input_43286), - np.byte(cond_43287), - defunc_3_map_res_mem_44850, - mem_44879, - mem_44906) + k2p2zq_73023, + (m_73008 * k2p2zq_73023)) + num_groups_x_118837 = sdiv_up64(m_73008, tile_sizze_118835) + num_groups_y_118838 = sdiv_up64(k2p2zq_73023, tile_sizze_118835) + num_groups_top_118839 = (num_groups_x_118837 * num_groups_y_118838) + num_whole_tiles_118856 = squot64(k2p2zq_73023, tile_sizze_118835) + residual_input_118983 = srem64(k2p2zq_73023, tile_sizze_118835) + cond_118984 = (residual_input_118983 == np.int64(0)) + mem_124649 = opencl_alloc(self, bytes_121990, "mem_124649") + if ((1 * (np.int64(num_groups_top_118839) * np.int64(group_sizze_118836))) != 0): + self.mainMagnitudezisegmap_intragroup_118840_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_124625)), + cl.LocalMemory(np.int64(bytes_124625)), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(num_groups_y_118838), + np.int64(num_whole_tiles_118856), + np.int64(residual_input_118983), + np.byte(cond_118984), + defunc_3_map_res_mem_124593, + mem_124622, + mem_124649) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_intragroup_43143_var, - ((np.long(num_groups_top_43142) * np.long(group_sizze_43139)),), - (np.long(group_sizze_43139),)) + self.mainMagnitudezisegmap_intragroup_118840_var, + ((np.int64(num_groups_top_118839) * np.int64(group_sizze_118836)),), + (np.int64(group_sizze_118836),)) if synchronous: sync(self) - mem_44879 = None - defunc_4_map_res_mem_44911 = mem_44906 + mem_124622 = None + defunc_4_map_res_mem_124654 = mem_124649 else: - mem_44910 = opencl_alloc(self, bytes_44648, "mem_44910") - if slt64((i32_res_28493 * np.int64(2)), segred_group_sizze_36333): - segment_sizze_nonzzero_46042 = smax64(np.int64(1), i32_res_28493) - num_threads_46043 = (num_groups_36334 * segred_group_sizze_36333) - if ((1 * (np.long(num_groups_36334) * np.long(segred_group_sizze_36333))) != 0): - self.mainMagnitudezisegred_small_36285_var.set_args(self.global_failure, - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_36333))), - np.int64(m_28478), - np.int64(i32_res_28493), - np.int64(num_groups_36334), - np.int64(segment_sizze_nonzzero_46042), - defunc_3_map_res_mem_44629, - defunc_3_map_res_mem_44850, - mem_44910) + mem_124653 = opencl_alloc(self, bytes_121990, "mem_124653") + if slt64((k2p2zq_73023 * np.int64(2)), segred_group_sizze_101209): + segment_sizze_nonzzero_129045 = smax64(np.int64(1), k2p2zq_73023) + num_threads_129046 = (num_groups_101210 * segred_group_sizze_101209) + if ((1 * (np.int64(num_groups_101210) * np.int64(segred_group_sizze_101209))) != 0): + self.mainMagnitudezisegred_small_101161_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_101209))), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(num_groups_101210), + np.int64(segment_sizze_nonzzero_129045), + defunc_3_map_res_mem_124372, + defunc_3_map_res_mem_124593, + mem_124653) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegred_small_36285_var, - ((np.long(num_groups_36334) * np.long(segred_group_sizze_36333)),), - (np.long(segred_group_sizze_36333),)) + self.mainMagnitudezisegred_small_101161_var, + ((np.int64(num_groups_101210) * np.int64(segred_group_sizze_101209)),), + (np.int64(segred_group_sizze_101209),)) if synchronous: sync(self) else: - groups_per_segment_46063 = sdiv_up64(num_groups_36334, - smax64(np.int64(1), - (m_28478 * i32_res_28493))) - elements_per_thread_46064 = sdiv_up64(i32_res_28493, - (segred_group_sizze_36333 * groups_per_segment_46063)) - virt_num_groups_46065 = (groups_per_segment_46063 * (m_28478 * i32_res_28493)) - num_threads_46066 = (num_groups_36334 * segred_group_sizze_36333) - threads_per_segment_46067 = (groups_per_segment_46063 * segred_group_sizze_36333) - group_res_arr_mem_46068 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_36333 * virt_num_groups_46065)), - "group_res_arr_mem_46068") - mainMagnitudezicounter_mem_46070 = self.mainMagnitudezicounter_mem_46070 - if ((1 * (np.long(num_groups_36334) * np.long(segred_group_sizze_36333))) != 0): - self.mainMagnitudezisegred_large_36285_var.set_args(self.global_failure, - cl.LocalMemory(np.long(np.int32(1))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_36333))), - np.int64(i32_res_28493), - np.int64(num_groups_36334), - np.int64(groups_per_segment_46063), - np.int64(elements_per_thread_46064), - np.int64(virt_num_groups_46065), - np.int64(threads_per_segment_46067), - defunc_3_map_res_mem_44629, - defunc_3_map_res_mem_44850, - mem_44910, - group_res_arr_mem_46068, - mainMagnitudezicounter_mem_46070) + groups_per_segment_129066 = sdiv_up64(num_groups_101210, + smax64(np.int64(1), + (m_73008 * k2p2zq_73023))) + elements_per_thread_129067 = sdiv_up64(k2p2zq_73023, + (segred_group_sizze_101209 * groups_per_segment_129066)) + virt_num_groups_129068 = (groups_per_segment_129066 * (m_73008 * k2p2zq_73023)) + num_threads_129069 = (num_groups_101210 * segred_group_sizze_101209) + threads_per_segment_129070 = (groups_per_segment_129066 * segred_group_sizze_101209) + group_res_arr_mem_129071 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_101209 * virt_num_groups_129068)), + "group_res_arr_mem_129071") + mainMagnitudezicounter_mem_129073 = self.mainMagnitudezicounter_mem_129073 + if ((1 * (np.int64(num_groups_101210) * np.int64(segred_group_sizze_101209))) != 0): + self.mainMagnitudezisegred_large_101161_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_101209))), + np.int64(k2p2zq_73023), + np.int64(num_groups_101210), + np.int64(groups_per_segment_129066), + np.int64(elements_per_thread_129067), + np.int64(virt_num_groups_129068), + np.int64(threads_per_segment_129070), + defunc_3_map_res_mem_124372, + defunc_3_map_res_mem_124593, + mem_124653, + group_res_arr_mem_129071, + mainMagnitudezicounter_mem_129073) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegred_large_36285_var, - ((np.long(num_groups_36334) * np.long(segred_group_sizze_36333)),), - (np.long(segred_group_sizze_36333),)) + self.mainMagnitudezisegred_large_101161_var, + ((np.int64(num_groups_101210) * np.int64(segred_group_sizze_101209)),), + (np.int64(segred_group_sizze_101209),)) if synchronous: sync(self) - defunc_4_map_res_mem_44911 = mem_44910 - defunc_4_map_res_mem_44916 = defunc_4_map_res_mem_44911 - defunc_3_map_res_mem_44629 = None - defunc_3_map_res_mem_44850 = None - suff_outer_par_36349 = (self.sizes["mainMagnitude.suff_outer_par_20"] <= m_28478) - segmap_group_sizze_36369 = self.sizes["mainMagnitude.segmap_group_size_36353"] - max_num_groups_46104 = self.sizes["mainMagnitude.segmap_num_groups_36355"] - num_groups_36370 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(m_28478, - segmap_group_sizze_36369), - sext_i32_i64(max_num_groups_46104)))) - suff_outer_par_36446 = (self.sizes["mainMagnitude.suff_outer_par_21"] <= binop_x_44399) - nest_sizze_36460 = (i32_res_28493 * binop_x_44399) - segred_group_sizze_36461 = self.sizes["mainMagnitude.segred_group_size_36409"] - max_num_groups_46105 = self.sizes["mainMagnitude.segred_num_groups_36411"] - num_groups_36462 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(nest_sizze_36460, - segred_group_sizze_36461), - sext_i32_i64(max_num_groups_46105)))) - Ty_43416 = self.sizes["mainMagnitude.Ty_43413"] - Ry_43417 = self.sizes["mainMagnitude.Ry_43415"] - Tx_43418 = self.sizes["mainMagnitude.Tx_43412"] - Rx_43419 = self.sizes["mainMagnitude.Rx_43414"] - Tk_43420 = self.sizes["mainMagnitude.Tk_43411"] - TxRx_43423 = (Tx_43418 * Rx_43419) - TyRy_43424 = (Ty_43416 * Ry_43417) - a_loc_szz_43426 = (Tk_43420 * TyRy_43424) - binop_x_43427 = (Tx_43418 * Tk_43420) - b_loc_szz_43428 = (Rx_43419 * binop_x_43427) - group_sizze_43432 = (Ty_43416 * Tx_43418) - bytes_44921 = (np.int64(4) * N_28477) - binop_x_44952 = (Ry_43417 * group_sizze_43432) - binop_x_44953 = (Rx_43419 * binop_x_44952) - bytes_44950 = (np.int64(4) * binop_x_44953) - binop_x_44944 = (Ry_43417 * Rx_43419) - bytes_44943 = (np.int64(4) * binop_x_44944) - bytes_44955 = (np.int64(4) * a_loc_szz_43426) - bytes_44957 = (np.int64(4) * b_loc_szz_43428) - bytes_45026 = (np.int64(4) * binop_x_44952) - binop_x_45032 = (Rx_43419 * group_sizze_43432) - bytes_45030 = (np.int64(4) * binop_x_45032) - bytes_45018 = (np.int64(4) * Ry_43417) - bytes_45020 = (np.int64(4) * Rx_43419) - binop_x_45494 = (np.int64(4) * Ty_43416) - binop_x_45495 = (Tx_43418 * binop_x_45494) - binop_x_45496 = (Ry_43417 * binop_x_45495) - sizze_45497 = (Rx_43419 * binop_x_45496) - num_threads_45604 = (segmap_group_sizze_36369 * num_groups_36370) - total_sizze_45605 = (bytes_44921 * num_threads_45604) - local_memory_capacity_46235 = self.max_local_memory + defunc_4_map_res_mem_124654 = mem_124653 + defunc_4_map_res_mem_124659 = defunc_4_map_res_mem_124654 + defunc_3_map_res_mem_124372 = None + defunc_3_map_res_mem_124593 = None + suff_outer_par_101225 = (self.sizes["mainMagnitude.suff_outer_par_32"] <= m_73008) + segmap_group_sizze_101245 = self.sizes["mainMagnitude.segmap_group_size_101229"] + max_num_groups_129107 = self.sizes["mainMagnitude.segmap_num_groups_101231"] + num_groups_101246 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_73008, + segmap_group_sizze_101245), + sext_i32_i64(max_num_groups_129107)))) + suff_outer_par_101322 = (self.sizes["mainMagnitude.suff_outer_par_33"] <= binop_x_120126) + nest_sizze_101336 = (k2p2zq_73023 * binop_x_120126) + segred_group_sizze_101337 = self.sizes["mainMagnitude.segred_group_size_101285"] + max_num_groups_129108 = self.sizes["mainMagnitude.segred_num_groups_101287"] + num_groups_101338 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_101336, + segred_group_sizze_101337), + sext_i32_i64(max_num_groups_129108)))) + Ty_119113 = self.sizes["mainMagnitude.Ty_119110"] + Ry_119114 = self.sizes["mainMagnitude.Ry_119112"] + Tx_119115 = self.sizes["mainMagnitude.Tx_119109"] + Rx_119116 = self.sizes["mainMagnitude.Rx_119111"] + Tk_119117 = self.sizes["mainMagnitude.Tk_119108"] + TxRx_119120 = (Tx_119115 * Rx_119116) + TyRy_119121 = (Ty_119113 * Ry_119114) + a_loc_szz_119123 = (Tk_119117 * TyRy_119121) + binop_x_119124 = (Tx_119115 * Tk_119117) + b_loc_szz_119125 = (Rx_119116 * binop_x_119124) + group_sizze_119129 = (Ty_119113 * Tx_119115) + bytes_124664 = (np.int64(8) * N_73007) + binop_x_124695 = (Ry_119114 * group_sizze_119129) + binop_x_124696 = (Rx_119116 * binop_x_124695) + bytes_124693 = (np.int64(8) * binop_x_124696) + binop_x_124687 = (Ry_119114 * Rx_119116) + bytes_124686 = (np.int64(8) * binop_x_124687) + bytes_124698 = (np.int64(8) * a_loc_szz_119123) + bytes_124700 = (np.int64(8) * b_loc_szz_119125) + bytes_124769 = (np.int64(8) * binop_x_124695) + binop_x_124775 = (Rx_119116 * group_sizze_119129) + bytes_124773 = (np.int64(8) * binop_x_124775) + bytes_124761 = (np.int64(8) * Ry_119114) + bytes_124763 = (np.int64(8) * Rx_119116) + binop_x_125408 = (np.int64(8) * Ty_119113) + binop_x_125409 = (Tx_119115 * binop_x_125408) + binop_x_125410 = (Ry_119114 * binop_x_125409) + sizze_125411 = (Rx_119116 * binop_x_125410) + num_threads_126069 = (segmap_group_sizze_101245 * num_groups_101246) + total_sizze_126070 = (bytes_124664 * num_threads_126069) + local_memory_capacity_129238 = self.max_local_memory if (sle64(np.int64(0), - sext_i32_i64(local_memory_capacity_46235)) and suff_outer_par_36349): - mem_44919 = opencl_alloc(self, bytes_44648, "mem_44919") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44919, np.int64(0), - defunc_4_map_res_mem_44916, + sext_i32_i64(local_memory_capacity_129238)) and suff_outer_par_101225): + mem_124662 = opencl_alloc(self, bytes_121990, "mem_124662") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124662, np.int64(0), + defunc_4_map_res_mem_124659, np.int64(0), np.int64(1), - i32_res_28493, m_28478) - mem_44937 = opencl_alloc(self, bytes_44398, "mem_44937") - mem_44922 = opencl_alloc(self, total_sizze_45605, "mem_44922") - if ((1 * (np.long(num_groups_36370) * np.long(segmap_group_sizze_36369))) != 0): - self.mainMagnitudezisegmap_36351_var.set_args(self.global_failure, - np.int64(N_28477), - np.int64(m_28478), - np.int32(k2p2zq_28491), - np.int64(i32_res_28493), - np.int64(num_groups_36370), - mem_44397, mem_44919, - mem_44922, mem_44937) + k2p2zq_73023, m_73008) + mem_124680 = opencl_alloc(self, bytes_120125, "mem_124680") + mem_124665 = opencl_alloc(self, total_sizze_126070, "mem_124665") + if ((1 * (np.int64(num_groups_101246) * np.int64(segmap_group_sizze_101245))) != 0): + self.mainMagnitudezisegmap_101227_var.set_args(self.global_failure, + np.int64(N_73007), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(num_groups_101246), + np.int64(num_threads_126069), + mem_120124, mem_124662, + mem_124665, mem_124680) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_36351_var, - ((np.long(num_groups_36370) * np.long(segmap_group_sizze_36369)),), - (np.long(segmap_group_sizze_36369),)) + self.mainMagnitudezisegmap_101227_var, + ((np.int64(num_groups_101246) * np.int64(segmap_group_sizze_101245)),), + (np.int64(segmap_group_sizze_101245),)) if synchronous: sync(self) - mem_44919 = None - mem_44922 = None - mem_45138 = opencl_alloc(self, bytes_44398, "mem_45138") - self.futhark_builtinzhgpu_map_transpose_f32(mem_45138, np.int64(0), - mem_44937, np.int64(0), - np.int64(1), m_28478, N_28477) - mem_44937 = None - defunc_3_map_res_mem_45140 = mem_45138 + mem_124662 = None + mem_124665 = None + mem_124881 = opencl_alloc(self, bytes_120125, "mem_124881") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124881, np.int64(0), + mem_124680, np.int64(0), + np.int64(1), m_73008, N_73007) + mem_124680 = None + defunc_3_map_res_mem_124883 = mem_124881 else: - local_memory_capacity_46234 = self.max_local_memory - if (sle64(((bytes_44955 + srem64((np.int64(8) - srem64(bytes_44955, - np.int64(8))), - np.int64(8))) + (bytes_44957 + srem64((np.int64(8) - srem64(bytes_44957, - np.int64(8))), - np.int64(8)))), - sext_i32_i64(local_memory_capacity_46234)) and suff_outer_par_36446): - mem_44940 = opencl_alloc(self, bytes_44383, "mem_44940") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44940, np.int64(0), - mem_44397, np.int64(0), - np.int64(1), i32_res_28493, - N_28477) - tk_div_tx_43421 = sdiv_up64(Tk_43420, Tx_43418) - tk_div_ty_43422 = sdiv_up64(Tk_43420, Ty_43416) - gridDim_x_43429 = sdiv_up64(N_28477, TxRx_43423) - gridDim_y_43430 = sdiv_up64(m_28478, TyRy_43424) - grid_sizze_43431 = (gridDim_x_43429 * gridDim_y_43430) - full_tiles_43460 = squot64(i32_res_28493, Tk_43420) - kk_43663 = (Tk_43420 * full_tiles_43460) - mem_45130 = opencl_alloc(self, bytes_44398, "mem_45130") - if ((1 * (np.long(grid_sizze_43431) * np.long(group_sizze_43432))) != 0): - self.mainMagnitudezisegmap_intragroup_43435_var.set_args(self.global_failure, - cl.LocalMemory(np.long(bytes_44957)), - cl.LocalMemory(np.long(bytes_44955)), - np.int64(N_28477), - np.int64(m_28478), - np.int64(i32_res_28493), - np.int64(gridDim_x_43429), - np.int64(full_tiles_43460), - np.int64(kk_43663), - defunc_4_map_res_mem_44916, - mem_44940, - mem_45130) + local_memory_capacity_129237 = self.max_local_memory + if (sle64(((bytes_124698 + srem64((np.int64(8) - srem64(bytes_124698, + np.int64(8))), + np.int64(8))) + (bytes_124700 + srem64((np.int64(8) - srem64(bytes_124700, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_129237)) and suff_outer_par_101322): + mem_124683 = opencl_alloc(self, bytes_120110, "mem_124683") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124683, np.int64(0), + mem_120124, np.int64(0), + np.int64(1), k2p2zq_73023, + N_73007) + tk_div_tx_119118 = sdiv_up64(Tk_119117, Tx_119115) + tk_div_ty_119119 = sdiv_up64(Tk_119117, Ty_119113) + gridDim_x_119126 = sdiv_up64(N_73007, TxRx_119120) + gridDim_y_119127 = sdiv_up64(m_73008, TyRy_119121) + grid_sizze_119128 = (gridDim_x_119126 * gridDim_y_119127) + full_tiles_119157 = squot64(k2p2zq_73023, Tk_119117) + kk_119360 = (Tk_119117 * full_tiles_119157) + mem_124873 = opencl_alloc(self, bytes_120125, "mem_124873") + if ((1 * (np.int64(grid_sizze_119128) * np.int64(group_sizze_119129))) != 0): + self.mainMagnitudezisegmap_intragroup_119132_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_124700)), + cl.LocalMemory(np.int64(bytes_124698)), + np.int64(N_73007), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(gridDim_x_119126), + np.int64(full_tiles_119157), + np.int64(kk_119360), + defunc_4_map_res_mem_124659, + mem_124683, + mem_124873) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_intragroup_43435_var, - ((np.long(grid_sizze_43431) * np.long(group_sizze_43432)),), - (np.long(group_sizze_43432),)) + self.mainMagnitudezisegmap_intragroup_119132_var, + ((np.int64(grid_sizze_119128) * np.int64(group_sizze_119129)),), + (np.int64(group_sizze_119129),)) if synchronous: sync(self) - mem_44940 = None - defunc_3_map_res_mem_45135 = mem_45130 + mem_124683 = None + defunc_3_map_res_mem_124878 = mem_124873 else: - mem_45134 = opencl_alloc(self, bytes_44398, "mem_45134") - if slt64((i32_res_28493 * np.int64(2)), segred_group_sizze_36461): - segment_sizze_nonzzero_46174 = smax64(np.int64(1), i32_res_28493) - num_threads_46175 = (num_groups_36462 * segred_group_sizze_36461) - if ((1 * (np.long(num_groups_36462) * np.long(segred_group_sizze_36461))) != 0): - self.mainMagnitudezisegred_small_36415_var.set_args(self.global_failure, - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_36461))), - np.int64(N_28477), - np.int64(m_28478), - np.int64(i32_res_28493), - np.int64(num_groups_36462), - np.int64(segment_sizze_nonzzero_46174), - mem_44397, - defunc_4_map_res_mem_44916, - mem_45134) + mem_124877 = opencl_alloc(self, bytes_120125, "mem_124877") + if slt64((k2p2zq_73023 * np.int64(2)), segred_group_sizze_101337): + segment_sizze_nonzzero_129177 = smax64(np.int64(1), k2p2zq_73023) + num_threads_129178 = (num_groups_101338 * segred_group_sizze_101337) + if ((1 * (np.int64(num_groups_101338) * np.int64(segred_group_sizze_101337))) != 0): + self.mainMagnitudezisegred_small_101291_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_101337))), + np.int64(N_73007), + np.int64(m_73008), + np.int64(k2p2zq_73023), + np.int64(num_groups_101338), + np.int64(segment_sizze_nonzzero_129177), + mem_120124, + defunc_4_map_res_mem_124659, + mem_124877) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegred_small_36415_var, - ((np.long(num_groups_36462) * np.long(segred_group_sizze_36461)),), - (np.long(segred_group_sizze_36461),)) + self.mainMagnitudezisegred_small_101291_var, + ((np.int64(num_groups_101338) * np.int64(segred_group_sizze_101337)),), + (np.int64(segred_group_sizze_101337),)) if synchronous: sync(self) else: - groups_per_segment_46195 = sdiv_up64(num_groups_36462, - smax64(np.int64(1), - (m_28478 * N_28477))) - elements_per_thread_46196 = sdiv_up64(i32_res_28493, - (segred_group_sizze_36461 * groups_per_segment_46195)) - virt_num_groups_46197 = (groups_per_segment_46195 * (m_28478 * N_28477)) - num_threads_46198 = (num_groups_36462 * segred_group_sizze_36461) - threads_per_segment_46199 = (groups_per_segment_46195 * segred_group_sizze_36461) - group_res_arr_mem_46200 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_36461 * virt_num_groups_46197)), - "group_res_arr_mem_46200") - mainMagnitudezicounter_mem_46202 = self.mainMagnitudezicounter_mem_46202 - if ((1 * (np.long(num_groups_36462) * np.long(segred_group_sizze_36461))) != 0): - self.mainMagnitudezisegred_large_36415_var.set_args(self.global_failure, - cl.LocalMemory(np.long(np.int32(1))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_36461))), - np.int64(N_28477), - np.int64(i32_res_28493), - np.int64(num_groups_36462), - np.int64(groups_per_segment_46195), - np.int64(elements_per_thread_46196), - np.int64(virt_num_groups_46197), - np.int64(threads_per_segment_46199), - mem_44397, - defunc_4_map_res_mem_44916, - mem_45134, - group_res_arr_mem_46200, - mainMagnitudezicounter_mem_46202) + groups_per_segment_129198 = sdiv_up64(num_groups_101338, + smax64(np.int64(1), + (m_73008 * N_73007))) + elements_per_thread_129199 = sdiv_up64(k2p2zq_73023, + (segred_group_sizze_101337 * groups_per_segment_129198)) + virt_num_groups_129200 = (groups_per_segment_129198 * (m_73008 * N_73007)) + num_threads_129201 = (num_groups_101338 * segred_group_sizze_101337) + threads_per_segment_129202 = (groups_per_segment_129198 * segred_group_sizze_101337) + group_res_arr_mem_129203 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_101337 * virt_num_groups_129200)), + "group_res_arr_mem_129203") + mainMagnitudezicounter_mem_129205 = self.mainMagnitudezicounter_mem_129205 + if ((1 * (np.int64(num_groups_101338) * np.int64(segred_group_sizze_101337))) != 0): + self.mainMagnitudezisegred_large_101291_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_101337))), + np.int64(N_73007), + np.int64(k2p2zq_73023), + np.int64(num_groups_101338), + np.int64(groups_per_segment_129198), + np.int64(elements_per_thread_129199), + np.int64(virt_num_groups_129200), + np.int64(threads_per_segment_129202), + mem_120124, + defunc_4_map_res_mem_124659, + mem_124877, + group_res_arr_mem_129203, + mainMagnitudezicounter_mem_129205) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegred_large_36415_var, - ((np.long(num_groups_36462) * np.long(segred_group_sizze_36461)),), - (np.long(segred_group_sizze_36461),)) + self.mainMagnitudezisegred_large_101291_var, + ((np.int64(num_groups_101338) * np.int64(segred_group_sizze_101337)),), + (np.int64(segred_group_sizze_101337),)) if synchronous: sync(self) - defunc_3_map_res_mem_45135 = mem_45134 - defunc_3_map_res_mem_45140 = defunc_3_map_res_mem_45135 - mem_44397 = None - defunc_4_map_res_mem_44916 = None - i_28781 = (N_28477 - np.int64(1)) - x_28782 = sle64(np.int64(0), i_28781) - y_28783 = slt64(i_28781, N_28477) - bounds_check_28784 = (x_28782 and y_28783) - index_certs_28785 = True - assert bounds_check_28784, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:26:29-34\n #1 helpers.fut:20:13-20\n #2 bfastfinal.fut:76:16-75\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 bfastfinal.fut:71:5-77:25\n #6 bfastfinal.fut:181:5-58\n #7 bfastfinal.fut:176:1-182:37\n" % ("Index [", - i_28781, + defunc_3_map_res_mem_124878 = mem_124877 + defunc_3_map_res_mem_124883 = defunc_3_map_res_mem_124878 + mem_120124 = None + defunc_4_map_res_mem_124659 = None + i_74783 = (N_73007 - np.int64(1)) + x_74784 = sle64(np.int64(0), i_74783) + y_74785 = slt64(i_74783, N_73007) + bounds_check_74786 = (x_74784 and y_74785) + index_certs_74787 = True + assert bounds_check_74786, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:26:29-34\n #1 helpers.fut:14:13-20\n #2 bfastfinal.fut:87:16-75\n #3 /prelude/soacs.fut:67:19-23\n #4 /prelude/soacs.fut:67:3-37\n #5 bfastfinal.fut:82:5-88:25\n #6 bfastfinal.fut:192:5-74\n #7 bfastfinal.fut:187:1-193:48\n" % ("Index [", + i_74783, "] out of bounds for array of shape [", - N_28477, + N_73007, "].")) - fits_36591 = sle64(N_28477, max_group_sizze_35230) - suff_intra_par_36589 = (self.sizes["mainMagnitude.suff_intra_par_24"] <= N_28477) - intra_suff_and_fits_36592 = (suff_intra_par_36589 and fits_36591) - segscan_group_sizze_36741 = self.sizes["mainMagnitude.segscan_group_size_36718"] - max_num_groups_46236 = self.sizes["mainMagnitude.segscan_num_groups_36720"] - num_groups_36742 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(binop_x_44399, - segscan_group_sizze_36741), - sext_i32_i64(max_num_groups_46236)))) - segmap_group_sizze_36761 = self.sizes["mainMagnitude.segmap_group_size_36709"] - segmap_group_sizze_36797 = self.sizes["mainMagnitude.segmap_group_size_36634"] - bytes_45152 = (np.int64(4) * m_28478) - bytes_45143 = (np.int64(8) * N_28477) - bytes_45161 = (np.int64(8) * binop_x_44399) - local_memory_capacity_46338 = self.max_local_memory - if (sle64(((((bytes_45143 + srem64((np.int64(8) - srem64(bytes_45143, - np.int64(8))), - np.int64(8))) + (bytes_44921 + srem64((np.int64(8) - srem64(bytes_44921, - np.int64(8))), - np.int64(8)))) + (bytes_44921 + srem64((np.int64(8) - srem64(bytes_44921, - np.int64(8))), - np.int64(8)))) + (bytes_44921 + srem64((np.int64(8) - srem64(bytes_44921, - np.int64(8))), - np.int64(8)))), - sext_i32_i64(local_memory_capacity_46338)) and intra_suff_and_fits_36592): - mem_45153 = opencl_alloc(self, bytes_45152, "mem_45153") - mem_45156 = opencl_alloc(self, bytes_44398, "mem_45156") - mem_45159 = opencl_alloc(self, bytes_44398, "mem_45159") - if ((1 * (np.long(m_28478) * np.long(N_28477))) != 0): - self.mainMagnitudezisegmap_intragroup_36514_var.set_args(self.global_failure, - cl.LocalMemory(np.long(bytes_44921)), - cl.LocalMemory(np.long(bytes_44921)), - cl.LocalMemory(np.long(bytes_44921)), - cl.LocalMemory(np.long(bytes_45143)), - np.int64(N_28477), - np.int64(i_28781), - images_mem_44381, - defunc_3_map_res_mem_45140, - mem_45153, - mem_45156, - mem_45159) + fits_101364 = sle64(N_73007, max_group_sizze_90561) + suff_intra_par_101362 = (self.sizes["mainMagnitude.suff_intra_par_34"] <= N_73007) + intra_suff_and_fits_101365 = (suff_intra_par_101362 and fits_101364) + segscan_group_sizze_101500 = self.sizes["mainMagnitude.segscan_group_size_101477"] + max_num_groups_129239 = self.sizes["mainMagnitude.segscan_num_groups_101479"] + num_groups_101501 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(binop_x_120126, + segscan_group_sizze_101500), + sext_i32_i64(max_num_groups_129239)))) + segmap_group_sizze_101549 = self.sizes["mainMagnitude.segmap_group_size_101408"] + local_memory_capacity_129323 = self.max_local_memory + if (sle64(((((bytes_124664 + srem64((np.int64(8) - srem64(bytes_124664, + np.int64(8))), + np.int64(8))) + (bytes_124664 + srem64((np.int64(8) - srem64(bytes_124664, + np.int64(8))), + np.int64(8)))) + (bytes_124664 + srem64((np.int64(8) - srem64(bytes_124664, + np.int64(8))), + np.int64(8)))) + (bytes_124664 + srem64((np.int64(8) - srem64(bytes_124664, + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_129323)) and intra_suff_and_fits_101365): + mem_124896 = opencl_alloc(self, bytes_120173, "mem_124896") + mem_124899 = opencl_alloc(self, bytes_120125, "mem_124899") + mem_124902 = opencl_alloc(self, bytes_120125, "mem_124902") + if ((1 * (np.int64(m_73008) * np.int64(N_73007))) != 0): + self.mainMagnitudezisegmap_intragroup_101360_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(bytes_124664)), + cl.LocalMemory(np.int64(bytes_124664)), + cl.LocalMemory(np.int64(bytes_124664)), + cl.LocalMemory(np.int64(bytes_124664)), + np.int64(N_73007), + np.int64(i_74783), + mem_124142, + defunc_3_map_res_mem_124883, + mem_124896, + mem_124899, + mem_124902) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_intragroup_36514_var, - ((np.long(m_28478) * np.long(N_28477)),), - (np.long(N_28477),)) + self.mainMagnitudezisegmap_intragroup_101360_var, + ((np.int64(m_73008) * np.int64(N_73007)),), + (np.int64(N_73007),)) if synchronous: sync(self) - defunc_4_map_res_mem_45177 = mem_45153 - defunc_4_map_res_mem_45178 = mem_45156 - defunc_4_map_res_mem_45179 = mem_45159 + defunc_4_map_res_mem_124919 = mem_124896 + defunc_4_map_res_mem_124920 = mem_124899 + defunc_4_map_res_mem_124921 = mem_124902 else: - mem_45163 = opencl_alloc(self, bytes_45161, "mem_45163") - mem_45166 = opencl_alloc(self, bytes_44398, "mem_45166") - if slt64(np.int64(0), (m_28478 * N_28477)): - stage1_max_num_groups_46254 = self.max_group_size - stage1_num_groups_46255 = smin64(stage1_max_num_groups_46254, - num_groups_36742) - num_threads_46256 = sext_i64_i32((stage1_num_groups_46255 * segscan_group_sizze_36741)) - if ((1 * (np.long(stage1_num_groups_46255) * np.long(segscan_group_sizze_36741))) != 0): - self.mainMagnitudeziscan_stage1_36724_var.set_args(self.global_failure, - cl.LocalMemory(np.long(smax64(np.int64(1), - (np.int32(8) * segscan_group_sizze_36741)))), - np.int64(N_28477), - np.int64(m_28478), - np.int32(num_threads_46256), - images_mem_44381, - defunc_3_map_res_mem_45140, - mem_45163, - mem_45166) + mem_124906 = opencl_alloc(self, bytes_120125, "mem_124906") + mem_124909 = opencl_alloc(self, bytes_120125, "mem_124909") + if slt64(np.int64(0), (m_73008 * N_73007)): + stage1_max_num_groups_129257 = self.max_group_size + stage1_num_groups_129258 = smin64(stage1_max_num_groups_129257, + num_groups_101501) + num_threads_129259 = sext_i64_i32((stage1_num_groups_129258 * segscan_group_sizze_101500)) + if ((1 * (np.int64(stage1_num_groups_129258) * np.int64(segscan_group_sizze_101500))) != 0): + self.mainMagnitudeziscan_stage1_101483_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(smax64(np.int64(1), + (np.int32(8) * segscan_group_sizze_101500)))), + np.int64(N_73007), + np.int64(m_73008), + np.int32(num_threads_129259), + mem_124142, + defunc_3_map_res_mem_124883, + mem_124906, + mem_124909) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudeziscan_stage1_36724_var, - ((np.long(stage1_num_groups_46255) * np.long(segscan_group_sizze_36741)),), - (np.long(segscan_group_sizze_36741),)) + self.mainMagnitudeziscan_stage1_101483_var, + ((np.int64(stage1_num_groups_129258) * np.int64(segscan_group_sizze_101500)),), + (np.int64(segscan_group_sizze_101500),)) if synchronous: sync(self) - if ((1 * (np.long(np.int64(1)) * np.long(stage1_num_groups_46255))) != 0): - self.mainMagnitudeziscan_stage2_36724_var.set_args(self.global_failure, - cl.LocalMemory(np.long(smax64(np.int64(1), - (np.int32(8) * stage1_num_groups_46255)))), - np.int64(N_28477), - np.int64(m_28478), - np.int64(stage1_num_groups_46255), - np.int32(num_threads_46256), - mem_45163) + if ((1 * (np.int64(np.int64(1)) * np.int64(stage1_num_groups_129258))) != 0): + self.mainMagnitudeziscan_stage2_101483_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(smax64(np.int64(1), + (np.int32(8) * stage1_num_groups_129258)))), + np.int64(N_73007), + np.int64(m_73008), + np.int64(stage1_num_groups_129258), + np.int32(num_threads_129259), + mem_124906) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudeziscan_stage2_36724_var, - ((np.long(np.int64(1)) * np.long(stage1_num_groups_46255)),), - (np.long(stage1_num_groups_46255),)) + self.mainMagnitudeziscan_stage2_101483_var, + ((np.int64(np.int64(1)) * np.int64(stage1_num_groups_129258)),), + (np.int64(stage1_num_groups_129258),)) if synchronous: sync(self) - required_groups_46298 = sext_i64_i32(sdiv_up64((m_28478 * N_28477), - segscan_group_sizze_36741)) - if ((1 * (np.long(num_groups_36742) * np.long(segscan_group_sizze_36741))) != 0): - self.mainMagnitudeziscan_stage3_36724_var.set_args(self.global_failure, - np.int64(N_28477), - np.int64(m_28478), - np.int64(num_groups_36742), - np.int32(num_threads_46256), - np.int32(required_groups_46298), - mem_45163) + required_groups_129301 = sext_i64_i32(sdiv_up64((m_73008 * N_73007), + segscan_group_sizze_101500)) + if ((1 * (np.int64(num_groups_101501) * np.int64(segscan_group_sizze_101500))) != 0): + self.mainMagnitudeziscan_stage3_101483_var.set_args(self.global_failure, + np.int64(N_73007), + np.int64(m_73008), + np.int64(num_groups_101501), + np.int32(num_threads_129259), + np.int32(required_groups_129301), + mem_124906) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudeziscan_stage3_36724_var, - ((np.long(num_groups_36742) * np.long(segscan_group_sizze_36741)),), - (np.long(segscan_group_sizze_36741),)) + self.mainMagnitudeziscan_stage3_101483_var, + ((np.int64(num_groups_101501) * np.int64(segscan_group_sizze_101500)),), + (np.int64(segscan_group_sizze_101500),)) if synchronous: sync(self) - segmap_usable_groups_36762 = sdiv_up64(m_28478, segmap_group_sizze_36761) - mem_45169 = opencl_alloc(self, bytes_45152, "mem_45169") - if ((1 * (np.long(segmap_usable_groups_36762) * np.long(segmap_group_sizze_36761))) != 0): - self.mainMagnitudezisegmap_36707_var.set_args(self.global_failure, - np.int64(N_28477), - np.int64(m_28478), - np.int64(i_28781), - mem_45163, mem_45169) + mem_124911 = opencl_alloc(self, bytes_120173, "mem_124911") + group_sizze_129316 = self.sizes["mainMagnitude.group_size_129316"] + num_groups_129317 = sdiv_up64(m_73008, group_sizze_129316) + if ((1 * (np.int64(num_groups_129317) * np.int64(group_sizze_129316))) != 0): + self.mainMagnitudezicopy_129313_var.set_args(np.int64(N_73007), + np.int64(m_73008), + np.int64(i_74783), + mem_124906, mem_124911) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_36707_var, - ((np.long(segmap_usable_groups_36762) * np.long(segmap_group_sizze_36761)),), - (np.long(segmap_group_sizze_36761),)) + self.mainMagnitudezicopy_129313_var, + ((np.int64(num_groups_129317) * np.int64(group_sizze_129316)),), + (np.int64(group_sizze_129316),)) if synchronous: sync(self) - mem_45172 = opencl_alloc(self, bytes_44398, "mem_45172") - self.futhark_builtinzhreplicate_f32(mem_45172, (m_28478 * N_28477), + mem_124914 = opencl_alloc(self, bytes_120125, "mem_124914") + self.futhark_builtinzhreplicate_f64(mem_124914, (m_73008 * N_73007), np.nan) - mem_45175 = opencl_alloc(self, bytes_44398, "mem_45175") - self.futhark_builtinzhreplicate_i32(mem_45175, (m_28478 * N_28477), - np.int32(0)) - segmap_usable_groups_36798 = sdiv_up64(binop_x_44399, - segmap_group_sizze_36797) - if ((1 * (np.long(segmap_usable_groups_36798) * np.long(segmap_group_sizze_36797))) != 0): - self.mainMagnitudezisegmap_36631_var.set_args(self.global_failure, - np.int64(N_28477), - np.int64(m_28478), - mem_45163, mem_45166, - mem_45172, mem_45175) + mem_124917 = opencl_alloc(self, bytes_120125, "mem_124917") + self.futhark_builtinzhreplicate_i64(mem_124917, (m_73008 * N_73007), + np.int64(0)) + segmap_usable_groups_101550 = sdiv_up64(binop_x_120126, + segmap_group_sizze_101549) + if ((1 * (np.int64(segmap_usable_groups_101550) * np.int64(segmap_group_sizze_101549))) != 0): + self.mainMagnitudezisegmap_101405_var.set_args(self.global_failure, + np.int64(N_73007), + np.int64(m_73008), + mem_124906, mem_124909, + mem_124914, mem_124917) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_36631_var, - ((np.long(segmap_usable_groups_36798) * np.long(segmap_group_sizze_36797)),), - (np.long(segmap_group_sizze_36797),)) + self.mainMagnitudezisegmap_101405_var, + ((np.int64(segmap_usable_groups_101550) * np.int64(segmap_group_sizze_101549)),), + (np.int64(segmap_group_sizze_101549),)) if synchronous: sync(self) - mem_45163 = None - mem_45166 = None - defunc_4_map_res_mem_45177 = mem_45169 - defunc_4_map_res_mem_45178 = mem_45172 - defunc_4_map_res_mem_45179 = mem_45175 - defunc_3_map_res_mem_45140 = None - suff_outer_par_36846 = (self.sizes["mainMagnitude.suff_outer_par_28"] <= m_28478) - fits_36932 = sle64(i32_res_28487, max_group_sizze_35230) - suff_intra_par_36930 = (self.sizes["mainMagnitude.suff_intra_par_29"] <= i32_res_28487) - intra_suff_and_fits_36933 = (suff_intra_par_36930 and fits_36932) - segmap_group_sizze_36893 = self.sizes["mainMagnitude.segmap_group_size_36858"] - nest_sizze_37028 = (m_28478 * i32_res_28487) - segred_group_sizze_37029 = self.sizes["mainMagnitude.segred_group_size_37012"] - max_num_groups_46339 = self.sizes["mainMagnitude.segred_num_groups_37014"] - num_groups_37030 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(nest_sizze_37028, - segred_group_sizze_37029), - sext_i32_i64(max_num_groups_46339)))) - segred_group_sizze_37043 = self.sizes["mainMagnitude.segred_group_size_36987"] - max_num_groups_46340 = self.sizes["mainMagnitude.segred_num_groups_36989"] - num_groups_37044 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(nest_sizze_37028, - segred_group_sizze_37043), - sext_i32_i64(max_num_groups_46340)))) - segmap_group_sizze_37063 = self.sizes["mainMagnitude.segmap_group_size_36972"] - bytes_45188 = (np.int64(4) * segmap_group_sizze_36893) - local_memory_capacity_46492 = self.max_local_memory - if (sle64(((bytes_45188 + srem64((np.int64(8) - srem64(bytes_45188, - np.int64(8))), - np.int64(8))) + (bytes_45188 + srem64((np.int64(8) - srem64(bytes_45188, - np.int64(8))), - np.int64(8)))), - sext_i32_i64(local_memory_capacity_46492)) and suff_outer_par_36846): - segmap_usable_groups_36894 = sdiv_up64(m_28478, segmap_group_sizze_36893) - mem_45182 = opencl_alloc(self, bytes_44398, "mem_45182") - self.futhark_builtinzhgpu_map_transpose_f32(mem_45182, np.int64(0), - images_mem_44381, np.int64(0), - np.int64(1), N_28477, m_28478) - mem_45185 = opencl_alloc(self, bytes_44398, "mem_45185") - self.futhark_builtinzhgpu_map_transpose_f32(mem_45185, np.int64(0), - defunc_4_map_res_mem_45178, + mem_124906 = None + mem_124909 = None + defunc_4_map_res_mem_124919 = mem_124911 + defunc_4_map_res_mem_124920 = mem_124914 + defunc_4_map_res_mem_124921 = mem_124917 + defunc_3_map_res_mem_124883 = None + suff_outer_par_101574 = (self.sizes["mainMagnitude.suff_outer_par_35"] <= m_73008) + suff_intra_par_101648 = (self.sizes["mainMagnitude.suff_intra_par_36"] <= n_73011) + intra_suff_and_fits_101651 = (fits_90562 and suff_intra_par_101648) + segmap_group_sizze_101612 = self.sizes["mainMagnitude.segmap_group_size_101578"] + segred_group_sizze_101747 = self.sizes["mainMagnitude.segred_group_size_101730"] + max_num_groups_129324 = self.sizes["mainMagnitude.segred_num_groups_101732"] + num_groups_101748 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_90595, + segred_group_sizze_101747), + sext_i32_i64(max_num_groups_129324)))) + segred_group_sizze_101761 = self.sizes["mainMagnitude.segred_group_size_101706"] + max_num_groups_129325 = self.sizes["mainMagnitude.segred_num_groups_101708"] + num_groups_101762 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_90595, + segred_group_sizze_101761), + sext_i32_i64(max_num_groups_129325)))) + segmap_group_sizze_101780 = self.sizes["mainMagnitude.segmap_group_size_101691"] + local_memory_capacity_129473 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_129473)) and suff_outer_par_101574): + segmap_usable_groups_101613 = sdiv_up64(m_73008, + segmap_group_sizze_101612) + mem_124924 = opencl_alloc(self, bytes_120125, "mem_124924") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124924, np.int64(0), + mem_124142, np.int64(0), + np.int64(1), N_73007, m_73008) + mem_124927 = opencl_alloc(self, bytes_120125, "mem_124927") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124927, np.int64(0), + defunc_4_map_res_mem_124920, np.int64(0), np.int64(1), - N_28477, m_28478) - num_whole_tiles_43891 = squot64(i32_res_28487, segmap_group_sizze_36893) - residual_input_43992 = srem64(i32_res_28487, segmap_group_sizze_36893) - cond_43993 = (residual_input_43992 == np.int64(0)) - mem_45216 = opencl_alloc(self, bytes_45152, "mem_45216") - mem_45218 = opencl_alloc(self, bytes_45152, "mem_45218") - mem_45220 = opencl_alloc(self, bytes_45152, "mem_45220") - if ((1 * (np.long(segmap_usable_groups_36894) * np.long(segmap_group_sizze_36893))) != 0): - self.mainMagnitudezisegmap_intragroup_43869_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long(bytes_45188)), - cl.LocalMemory(np.long(bytes_45188)), - np.int64(N_28477), - np.int64(m_28478), - np.int32(n_28481), - np.float32(hfrac_28483), - np.int64(i32_res_28487), - np.int32(k2p2_28489), - np.int64(num_whole_tiles_43891), - np.int64(residual_input_43992), - np.byte(cond_43993), - mem_45182, - mem_45185, - mem_45216, - mem_45218, - mem_45220) + N_73007, m_73008) + mem_124930 = opencl_alloc(self, bytes_120173, "mem_124930") + mem_124932 = opencl_alloc(self, bytes_120173, "mem_124932") + mem_124934 = opencl_alloc(self, bytes_120173, "mem_124934") + if ((1 * (np.int64(segmap_usable_groups_101613) * np.int64(segmap_group_sizze_101612))) != 0): + self.mainMagnitudezisegmap_101576_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(N_73007), + np.int64(m_73008), + np.int64(n_73011), + np.float64(hfrac_73013), + np.int64(k2p2_73021), + mem_124924, mem_124927, + mem_124930, mem_124932, + mem_124934) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_intragroup_43869_var, - ((np.long(segmap_usable_groups_36894) * np.long(segmap_group_sizze_36893)),), - (np.long(segmap_group_sizze_36893),)) + self.mainMagnitudezisegmap_101576_var, + ((np.int64(segmap_usable_groups_101613) * np.int64(segmap_group_sizze_101612)),), + (np.int64(segmap_group_sizze_101612),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - mem_45182 = None - mem_45185 = None - defunc_3_map_res_mem_45244 = mem_45216 - defunc_3_map_res_mem_45245 = mem_45218 - defunc_3_map_res_mem_45246 = mem_45220 + mem_124924 = None + mem_124927 = None + defunc_3_map_res_mem_124958 = mem_124930 + defunc_3_map_res_mem_124959 = mem_124932 + defunc_3_map_res_mem_124960 = mem_124934 else: - local_memory_capacity_46491 = self.max_local_memory - if (sle64((((np.int32(4) * i32_res_28487) + srem64((np.int64(8) - srem64((np.int32(4) * i32_res_28487), - np.int64(8))), - np.int64(8))) + ((np.int32(4) * i32_res_28487) + srem64((np.int64(8) - srem64((np.int32(4) * i32_res_28487), - np.int64(8))), - np.int64(8)))), - sext_i32_i64(local_memory_capacity_46491)) and intra_suff_and_fits_36933): - mem_45225 = opencl_alloc(self, bytes_45152, "mem_45225") - mem_45227 = opencl_alloc(self, bytes_45152, "mem_45227") - mem_45229 = opencl_alloc(self, bytes_45152, "mem_45229") - if ((1 * (np.long(m_28478) * np.long(i32_res_28487))) != 0): - self.mainMagnitudezisegmap_intragroup_36854_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long((np.int32(4) * i32_res_28487))), - cl.LocalMemory(np.long((np.int32(4) * i32_res_28487))), - np.int64(N_28477), - np.float32(hfrac_28483), - np.int64(i32_res_28487), - np.int32(k2p2_28489), - images_mem_44381, - defunc_4_map_res_mem_45178, - mem_45225, - mem_45227, - mem_45229) + local_memory_capacity_129472 = self.max_local_memory + if (sle64((((np.int32(8) * n_73011) + srem64((np.int64(8) - srem64((np.int32(8) * n_73011), + np.int64(8))), + np.int64(8))) + ((np.int32(8) * n_73011) + srem64((np.int64(8) - srem64((np.int32(8) * n_73011), + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_129472)) and intra_suff_and_fits_101651): + mem_124939 = opencl_alloc(self, bytes_120173, "mem_124939") + mem_124941 = opencl_alloc(self, bytes_120173, "mem_124941") + mem_124943 = opencl_alloc(self, bytes_120173, "mem_124943") + if ((1 * (np.int64(m_73008) * np.int64(n_73011))) != 0): + self.mainMagnitudezisegmap_intragroup_101572_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64((np.int32(8) * n_73011))), + cl.LocalMemory(np.int64((np.int32(8) * n_73011))), + np.int64(N_73007), + np.int64(n_73011), + np.float64(hfrac_73013), + np.int64(k2p2_73021), + mem_124142, + defunc_4_map_res_mem_124920, + mem_124939, + mem_124941, + mem_124943) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_intragroup_36854_var, - ((np.long(m_28478) * np.long(i32_res_28487)),), - (np.long(i32_res_28487),)) + self.mainMagnitudezisegmap_intragroup_101572_var, + ((np.int64(m_73008) * np.int64(n_73011)),), + (np.int64(n_73011),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - defunc_3_map_res_mem_45241 = mem_45225 - defunc_3_map_res_mem_45242 = mem_45227 - defunc_3_map_res_mem_45243 = mem_45229 + defunc_3_map_res_mem_124955 = mem_124939 + defunc_3_map_res_mem_124956 = mem_124941 + defunc_3_map_res_mem_124957 = mem_124943 else: - mem_45232 = opencl_alloc(self, bytes_45152, "mem_45232") - if slt64((i32_res_28487 * np.int64(2)), segred_group_sizze_37029): - segment_sizze_nonzzero_46366 = smax64(np.int64(1), i32_res_28487) - num_threads_46367 = (num_groups_37030 * segred_group_sizze_37029) - if ((1 * (np.long(num_groups_37030) * np.long(segred_group_sizze_37029))) != 0): - self.mainMagnitudezisegred_small_37018_var.set_args(self.global_failure, - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_37029))), - np.int64(N_28477), - np.int64(m_28478), - np.int64(i32_res_28487), - np.int64(num_groups_37030), - np.int64(segment_sizze_nonzzero_46366), - images_mem_44381, - mem_45232) + mem_124946 = opencl_alloc(self, bytes_120173, "mem_124946") + if slt64((n_73011 * np.int64(2)), segred_group_sizze_101747): + segment_sizze_nonzzero_129347 = smax64(np.int64(1), n_73011) + num_threads_129348 = (num_groups_101748 * segred_group_sizze_101747) + if ((1 * (np.int64(num_groups_101748) * np.int64(segred_group_sizze_101747))) != 0): + self.mainMagnitudezisegred_small_101736_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_101747))), + np.int64(N_73007), + np.int64(m_73008), + np.int64(n_73011), + np.int64(num_groups_101748), + np.int64(segment_sizze_nonzzero_129347), + mem_124142, + mem_124946) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegred_small_37018_var, - ((np.long(num_groups_37030) * np.long(segred_group_sizze_37029)),), - (np.long(segred_group_sizze_37029),)) + self.mainMagnitudezisegred_small_101736_var, + ((np.int64(num_groups_101748) * np.int64(segred_group_sizze_101747)),), + (np.int64(segred_group_sizze_101747),)) if synchronous: sync(self) else: - groups_per_segment_46387 = sdiv_up64(num_groups_37030, - smax64(np.int64(1), m_28478)) - elements_per_thread_46388 = sdiv_up64(i32_res_28487, - (segred_group_sizze_37029 * groups_per_segment_46387)) - virt_num_groups_46389 = (groups_per_segment_46387 * m_28478) - num_threads_46390 = (num_groups_37030 * segred_group_sizze_37029) - threads_per_segment_46391 = (groups_per_segment_46387 * segred_group_sizze_37029) - group_res_arr_mem_46392 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_37029 * virt_num_groups_46389)), - "group_res_arr_mem_46392") - mainMagnitudezicounter_mem_46394 = self.mainMagnitudezicounter_mem_46394 - if ((1 * (np.long(num_groups_37030) * np.long(segred_group_sizze_37029))) != 0): - self.mainMagnitudezisegred_large_37018_var.set_args(self.global_failure, - cl.LocalMemory(np.long(np.int32(1))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_37029))), - np.int64(N_28477), - np.int64(i32_res_28487), - np.int64(num_groups_37030), - np.int64(groups_per_segment_46387), - np.int64(elements_per_thread_46388), - np.int64(virt_num_groups_46389), - np.int64(threads_per_segment_46391), - images_mem_44381, - mem_45232, - group_res_arr_mem_46392, - mainMagnitudezicounter_mem_46394) + groups_per_segment_129368 = sdiv_up64(num_groups_101748, + smax64(np.int64(1), m_73008)) + elements_per_thread_129369 = sdiv_up64(n_73011, + (segred_group_sizze_101747 * groups_per_segment_129368)) + virt_num_groups_129370 = (groups_per_segment_129368 * m_73008) + num_threads_129371 = (num_groups_101748 * segred_group_sizze_101747) + threads_per_segment_129372 = (groups_per_segment_129368 * segred_group_sizze_101747) + group_res_arr_mem_129373 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_101747 * virt_num_groups_129370)), + "group_res_arr_mem_129373") + mainMagnitudezicounter_mem_129375 = self.mainMagnitudezicounter_mem_129375 + if ((1 * (np.int64(num_groups_101748) * np.int64(segred_group_sizze_101747))) != 0): + self.mainMagnitudezisegred_large_101736_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_101747))), + np.int64(N_73007), + np.int64(n_73011), + np.int64(num_groups_101748), + np.int64(groups_per_segment_129368), + np.int64(elements_per_thread_129369), + np.int64(virt_num_groups_129370), + np.int64(threads_per_segment_129372), + mem_124142, + mem_124946, + group_res_arr_mem_129373, + mainMagnitudezicounter_mem_129375) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegred_large_37018_var, - ((np.long(num_groups_37030) * np.long(segred_group_sizze_37029)),), - (np.long(segred_group_sizze_37029),)) + self.mainMagnitudezisegred_large_101736_var, + ((np.int64(num_groups_101748) * np.int64(segred_group_sizze_101747)),), + (np.int64(segred_group_sizze_101747),)) if synchronous: sync(self) - mem_45235 = opencl_alloc(self, bytes_45152, "mem_45235") - if slt64((i32_res_28487 * np.int64(2)), segred_group_sizze_37043): - segment_sizze_nonzzero_46426 = smax64(np.int64(1), i32_res_28487) - num_threads_46427 = (num_groups_37044 * segred_group_sizze_37043) - if ((1 * (np.long(num_groups_37044) * np.long(segred_group_sizze_37043))) != 0): - self.mainMagnitudezisegred_small_36993_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_37043))), - np.int64(N_28477), - np.int64(m_28478), - np.int64(i32_res_28487), - np.int64(num_groups_37044), - np.int64(segment_sizze_nonzzero_46426), - defunc_4_map_res_mem_45178, - mem_45232, - mem_45235) + mem_124949 = opencl_alloc(self, bytes_120173, "mem_124949") + if slt64((n_73011 * np.int64(2)), segred_group_sizze_101761): + segment_sizze_nonzzero_129407 = smax64(np.int64(1), n_73011) + num_threads_129408 = (num_groups_101762 * segred_group_sizze_101761) + if ((1 * (np.int64(num_groups_101762) * np.int64(segred_group_sizze_101761))) != 0): + self.mainMagnitudezisegred_small_101712_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_101761))), + np.int64(N_73007), + np.int64(m_73008), + np.int64(n_73011), + np.int64(num_groups_101762), + np.int64(segment_sizze_nonzzero_129407), + defunc_4_map_res_mem_124920, + mem_124946, + mem_124949) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegred_small_36993_var, - ((np.long(num_groups_37044) * np.long(segred_group_sizze_37043)),), - (np.long(segred_group_sizze_37043),)) + self.mainMagnitudezisegred_small_101712_var, + ((np.int64(num_groups_101762) * np.int64(segred_group_sizze_101761)),), + (np.int64(segred_group_sizze_101761),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) else: - groups_per_segment_46447 = sdiv_up64(num_groups_37044, - smax64(np.int64(1), m_28478)) - elements_per_thread_46448 = sdiv_up64(i32_res_28487, - (segred_group_sizze_37043 * groups_per_segment_46447)) - virt_num_groups_46449 = (groups_per_segment_46447 * m_28478) - num_threads_46450 = (num_groups_37044 * segred_group_sizze_37043) - threads_per_segment_46451 = (groups_per_segment_46447 * segred_group_sizze_37043) - group_res_arr_mem_46452 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_37043 * virt_num_groups_46449)), - "group_res_arr_mem_46452") - mainMagnitudezicounter_mem_46454 = self.mainMagnitudezicounter_mem_46454 - if ((1 * (np.long(num_groups_37044) * np.long(segred_group_sizze_37043))) != 0): - self.mainMagnitudezisegred_large_36993_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long(np.int32(1))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_37043))), - np.int64(N_28477), - np.int64(i32_res_28487), - np.int64(num_groups_37044), - np.int64(groups_per_segment_46447), - np.int64(elements_per_thread_46448), - np.int64(virt_num_groups_46449), - np.int64(threads_per_segment_46451), - defunc_4_map_res_mem_45178, - mem_45232, - mem_45235, - group_res_arr_mem_46452, - mainMagnitudezicounter_mem_46454) + groups_per_segment_129428 = sdiv_up64(num_groups_101762, + smax64(np.int64(1), m_73008)) + elements_per_thread_129429 = sdiv_up64(n_73011, + (segred_group_sizze_101761 * groups_per_segment_129428)) + virt_num_groups_129430 = (groups_per_segment_129428 * m_73008) + num_threads_129431 = (num_groups_101762 * segred_group_sizze_101761) + threads_per_segment_129432 = (groups_per_segment_129428 * segred_group_sizze_101761) + group_res_arr_mem_129433 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_101761 * virt_num_groups_129430)), + "group_res_arr_mem_129433") + mainMagnitudezicounter_mem_129435 = self.mainMagnitudezicounter_mem_129435 + if ((1 * (np.int64(num_groups_101762) * np.int64(segred_group_sizze_101761))) != 0): + self.mainMagnitudezisegred_large_101712_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_101761))), + np.int64(N_73007), + np.int64(n_73011), + np.int64(num_groups_101762), + np.int64(groups_per_segment_129428), + np.int64(elements_per_thread_129429), + np.int64(virt_num_groups_129430), + np.int64(threads_per_segment_129432), + defunc_4_map_res_mem_124920, + mem_124946, + mem_124949, + group_res_arr_mem_129433, + mainMagnitudezicounter_mem_129435) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegred_large_36993_var, - ((np.long(num_groups_37044) * np.long(segred_group_sizze_37043)),), - (np.long(segred_group_sizze_37043),)) + self.mainMagnitudezisegred_large_101712_var, + ((np.int64(num_groups_101762) * np.int64(segred_group_sizze_101761)),), + (np.int64(segred_group_sizze_101761),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - segmap_usable_groups_37064 = sdiv_up64(m_28478, - segmap_group_sizze_37063) - mem_45238 = opencl_alloc(self, bytes_45152, "mem_45238") - mem_45240 = opencl_alloc(self, bytes_45152, "mem_45240") - if ((1 * (np.long(segmap_usable_groups_37064) * np.long(segmap_group_sizze_37063))) != 0): - self.mainMagnitudezisegmap_36970_var.set_args(self.global_failure, - np.int64(m_28478), - np.float32(hfrac_28483), - np.int32(k2p2_28489), - mem_45232, mem_45235, - mem_45238, mem_45240) + segmap_usable_groups_101781 = sdiv_up64(m_73008, + segmap_group_sizze_101780) + mem_124952 = opencl_alloc(self, bytes_120173, "mem_124952") + mem_124954 = opencl_alloc(self, bytes_120173, "mem_124954") + if ((1 * (np.int64(segmap_usable_groups_101781) * np.int64(segmap_group_sizze_101780))) != 0): + self.mainMagnitudezisegmap_101689_var.set_args(self.global_failure, + np.int64(m_73008), + np.float64(hfrac_73013), + np.int64(k2p2_73021), + mem_124946, mem_124949, + mem_124952, mem_124954) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_36970_var, - ((np.long(segmap_usable_groups_37064) * np.long(segmap_group_sizze_37063)),), - (np.long(segmap_group_sizze_37063),)) + self.mainMagnitudezisegmap_101689_var, + ((np.int64(segmap_usable_groups_101781) * np.int64(segmap_group_sizze_101780)),), + (np.int64(segmap_group_sizze_101780),)) if synchronous: sync(self) - mem_45235 = None - defunc_3_map_res_mem_45241 = mem_45238 - defunc_3_map_res_mem_45242 = mem_45232 - defunc_3_map_res_mem_45243 = mem_45240 - defunc_3_map_res_mem_45244 = defunc_3_map_res_mem_45241 - defunc_3_map_res_mem_45245 = defunc_3_map_res_mem_45242 - defunc_3_map_res_mem_45246 = defunc_3_map_res_mem_45243 - segred_group_sizze_37084 = self.sizes["mainMagnitude.segred_group_size_37083"] - max_num_groups_46493 = self.sizes["mainMagnitude.segred_num_groups_37085"] - num_groups_37086 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(m_28478, - segred_group_sizze_37084), - sext_i32_i64(max_num_groups_46493)))) - mem_45249 = opencl_alloc(self, np.int64(4), "mem_45249") - mainMagnitudezicounter_mem_46494 = self.mainMagnitudezicounter_mem_46494 - group_res_arr_mem_46496 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_37084 * num_groups_37086)), - "group_res_arr_mem_46496") - num_threads_46498 = (num_groups_37086 * segred_group_sizze_37084) - if ((1 * (np.long(num_groups_37086) * np.long(segred_group_sizze_37084))) != 0): - self.mainMagnitudezisegred_nonseg_37091_var.set_args(self.global_failure, - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_37084))), - cl.LocalMemory(np.long(np.int32(1))), - np.int64(m_28478), - np.int64(num_groups_37086), - np.int64(num_threads_46498), - defunc_3_map_res_mem_45244, - mem_45249, - mainMagnitudezicounter_mem_46494, - group_res_arr_mem_46496) + mem_124949 = None + defunc_3_map_res_mem_124955 = mem_124952 + defunc_3_map_res_mem_124956 = mem_124946 + defunc_3_map_res_mem_124957 = mem_124954 + defunc_3_map_res_mem_124958 = defunc_3_map_res_mem_124955 + defunc_3_map_res_mem_124959 = defunc_3_map_res_mem_124956 + defunc_3_map_res_mem_124960 = defunc_3_map_res_mem_124957 + mem_124142 = None + segred_group_sizze_101801 = self.sizes["mainMagnitude.segred_group_size_101800"] + max_num_groups_129474 = self.sizes["mainMagnitude.segred_num_groups_101802"] + num_groups_101803 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_73008, + segred_group_sizze_101801), + sext_i32_i64(max_num_groups_129474)))) + mem_124963 = opencl_alloc(self, np.int64(8), "mem_124963") + mainMagnitudezicounter_mem_129475 = self.mainMagnitudezicounter_mem_129475 + group_res_arr_mem_129477 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_101801 * num_groups_101803)), + "group_res_arr_mem_129477") + num_threads_129479 = (num_groups_101803 * segred_group_sizze_101801) + if ((1 * (np.int64(num_groups_101803) * np.int64(segred_group_sizze_101801))) != 0): + self.mainMagnitudezisegred_nonseg_101808_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_101801))), + cl.LocalMemory(np.int64(np.int32(1))), + np.int64(m_73008), + np.int64(num_groups_101803), + np.int64(num_threads_129479), + defunc_3_map_res_mem_124958, + mem_124963, + mainMagnitudezicounter_mem_129475, + group_res_arr_mem_129477) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegred_nonseg_37091_var, - ((np.long(num_groups_37086) * np.long(segred_group_sizze_37084)),), - (np.long(segred_group_sizze_37084),)) + self.mainMagnitudezisegred_nonseg_101808_var, + ((np.int64(num_groups_101803) * np.int64(segred_group_sizze_101801)),), + (np.int64(segred_group_sizze_101801),)) if synchronous: sync(self) - read_res_46895 = np.empty(1, dtype=ct.c_int32) - cl.enqueue_copy(self.queue, read_res_46895, mem_45249, - device_offset=(np.long(np.int64(0)) * 4), + read_res_129965 = np.empty(1, dtype=ct.c_int64) + cl.enqueue_copy(self.queue, read_res_129965, mem_124963, + device_offset=(np.int64(np.int64(0)) * 8), is_blocking=synchronous) sync(self) - defunc_2_reduce_comm_res_28875 = read_res_46895[0] - mem_45249 = None - i32_res_28880 = sext_i32_i64(defunc_2_reduce_comm_res_28875) - bounds_invalid_upwards_28881 = slt64(i32_res_28880, np.int64(0)) - valid_28882 = not(bounds_invalid_upwards_28881) - range_valid_c_28883 = True - assert valid_28882, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 helpers.fut:5:3-18\n #2 bfastfinal.fut:102:34-46\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:97:17-104:24\n #5 bfastfinal.fut:181:5-58\n #6 bfastfinal.fut:176:1-182:37\n" % ("Range ", - np.int64(0), - "..", - np.int64(1), - "..<", - i32_res_28880, - " is invalid.")) - suff_outer_par_37123 = (self.sizes["mainMagnitude.suff_outer_par_31"] <= m_28478) - segmap_group_sizze_37150 = self.sizes["mainMagnitude.segmap_group_size_37127"] - nest_sizze_37201 = (m_28478 * i32_res_28880) - segred_group_sizze_37202 = self.sizes["mainMagnitude.segred_group_size_37175"] - max_num_groups_46524 = self.sizes["mainMagnitude.segred_num_groups_37177"] - num_groups_37203 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(nest_sizze_37201, - segred_group_sizze_37202), - sext_i32_i64(max_num_groups_46524)))) - bytes_45252 = (np.int64(4) * segmap_group_sizze_37150) - local_memory_capacity_46595 = self.max_local_memory - if (sle64(((bytes_45252 + srem64((np.int64(8) - srem64(bytes_45252, - np.int64(8))), - np.int64(8))) + (bytes_45252 + srem64((np.int64(8) - srem64(bytes_45252, - np.int64(8))), - np.int64(8)))), - sext_i32_i64(local_memory_capacity_46595)) and suff_outer_par_37123): - segmap_usable_groups_37151 = sdiv_up64(m_28478, segmap_group_sizze_37150) - num_whole_tiles_44095 = squot64(i32_res_28880, segmap_group_sizze_37150) - residual_input_44206 = srem64(i32_res_28880, segmap_group_sizze_37150) - cond_44207 = (residual_input_44206 == np.int64(0)) - mem_45275 = opencl_alloc(self, bytes_45152, "mem_45275") - if ((1 * (np.long(segmap_usable_groups_37151) * np.long(segmap_group_sizze_37150))) != 0): - self.mainMagnitudezisegmap_intragroup_44075_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long(bytes_45252)), - cl.LocalMemory(np.long(bytes_45252)), - np.int64(N_28477), - np.int64(m_28478), - np.int64(i32_res_28880), - np.int64(num_whole_tiles_44095), - np.int64(residual_input_44206), - np.byte(cond_44207), - defunc_4_map_res_mem_45178, - defunc_3_map_res_mem_45244, - defunc_3_map_res_mem_45245, - mem_45275) + defunc_2_reduce_comm_res_74867 = read_res_129965[0] + mem_124963 = None + bounds_invalid_upwards_74872 = slt64(defunc_2_reduce_comm_res_74867, + np.int64(0)) + valid_74873 = not(bounds_invalid_upwards_74872) + range_valid_c_74874 = True + assert valid_74873, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 bfastfinal.fut:113:34-42\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:108:17-115:24\n #4 bfastfinal.fut:192:5-74\n #5 bfastfinal.fut:187:1-193:48\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + defunc_2_reduce_comm_res_74867, + " is invalid.")) + suff_outer_par_101810 = (self.sizes["mainMagnitude.suff_outer_par_37"] <= m_73008) + segmap_group_sizze_101836 = self.sizes["mainMagnitude.segmap_group_size_101814"] + nest_sizze_101885 = (m_73008 * defunc_2_reduce_comm_res_74867) + segred_group_sizze_101886 = self.sizes["mainMagnitude.segred_group_size_101860"] + max_num_groups_129505 = self.sizes["mainMagnitude.segred_num_groups_101862"] + num_groups_101887 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_101885, + segred_group_sizze_101886), + sext_i32_i64(max_num_groups_129505)))) + local_memory_capacity_129572 = self.max_local_memory + if (sle64(np.int64(0), + sext_i32_i64(local_memory_capacity_129572)) and suff_outer_par_101810): + segmap_usable_groups_101837 = sdiv_up64(m_73008, + segmap_group_sizze_101836) + mem_124966 = opencl_alloc(self, bytes_120173, "mem_124966") + if ((1 * (np.int64(segmap_usable_groups_101837) * np.int64(segmap_group_sizze_101836))) != 0): + self.mainMagnitudezisegmap_101812_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(N_73007), + np.int64(m_73008), + np.int64(defunc_2_reduce_comm_res_74867), + defunc_4_map_res_mem_124920, + defunc_3_map_res_mem_124958, + defunc_3_map_res_mem_124959, + mem_124966) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_intragroup_44075_var, - ((np.long(segmap_usable_groups_37151) * np.long(segmap_group_sizze_37150)),), - (np.long(segmap_group_sizze_37150),)) + self.mainMagnitudezisegmap_101812_var, + ((np.int64(segmap_usable_groups_101837) * np.int64(segmap_group_sizze_101836)),), + (np.int64(segmap_group_sizze_101836),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - defunc_0_f_res_mem_45279 = mem_45275 + defunc_0_f_res_mem_124970 = mem_124966 else: - mem_45278 = opencl_alloc(self, bytes_45152, "mem_45278") - if slt64((i32_res_28880 * np.int64(2)), segred_group_sizze_37202): - segment_sizze_nonzzero_46535 = smax64(np.int64(1), i32_res_28880) - num_threads_46536 = (num_groups_37203 * segred_group_sizze_37202) - if ((1 * (np.long(num_groups_37203) * np.long(segred_group_sizze_37202))) != 0): - self.mainMagnitudezisegred_small_37181_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_37202))), - np.int64(N_28477), - np.int64(m_28478), - np.int64(i32_res_28880), - np.int64(num_groups_37203), - np.int64(segment_sizze_nonzzero_46535), - defunc_4_map_res_mem_45178, - defunc_3_map_res_mem_45244, - defunc_3_map_res_mem_45245, - mem_45278) + mem_124969 = opencl_alloc(self, bytes_120173, "mem_124969") + if slt64((defunc_2_reduce_comm_res_74867 * np.int64(2)), + segred_group_sizze_101886): + segment_sizze_nonzzero_129512 = smax64(np.int64(1), + defunc_2_reduce_comm_res_74867) + num_threads_129513 = (num_groups_101887 * segred_group_sizze_101886) + if ((1 * (np.int64(num_groups_101887) * np.int64(segred_group_sizze_101886))) != 0): + self.mainMagnitudezisegred_small_101866_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_101886))), + np.int64(N_73007), + np.int64(m_73008), + np.int64(defunc_2_reduce_comm_res_74867), + np.int64(num_groups_101887), + np.int64(segment_sizze_nonzzero_129512), + defunc_4_map_res_mem_124920, + defunc_3_map_res_mem_124958, + defunc_3_map_res_mem_124959, + mem_124969) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegred_small_37181_var, - ((np.long(num_groups_37203) * np.long(segred_group_sizze_37202)),), - (np.long(segred_group_sizze_37202),)) + self.mainMagnitudezisegred_small_101866_var, + ((np.int64(num_groups_101887) * np.int64(segred_group_sizze_101886)),), + (np.int64(segred_group_sizze_101886),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) else: - groups_per_segment_46556 = sdiv_up64(num_groups_37203, - smax64(np.int64(1), m_28478)) - elements_per_thread_46557 = sdiv_up64(i32_res_28880, - (segred_group_sizze_37202 * groups_per_segment_46556)) - virt_num_groups_46558 = (groups_per_segment_46556 * m_28478) - num_threads_46559 = (num_groups_37203 * segred_group_sizze_37202) - threads_per_segment_46560 = (groups_per_segment_46556 * segred_group_sizze_37202) - group_res_arr_mem_46561 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_37202 * virt_num_groups_46558)), - "group_res_arr_mem_46561") - mainMagnitudezicounter_mem_46563 = self.mainMagnitudezicounter_mem_46563 - if ((1 * (np.long(num_groups_37203) * np.long(segred_group_sizze_37202))) != 0): - self.mainMagnitudezisegred_large_37181_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long(np.int32(1))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_37202))), - np.int64(N_28477), - np.int64(i32_res_28880), - np.int64(num_groups_37203), - np.int64(groups_per_segment_46556), - np.int64(elements_per_thread_46557), - np.int64(virt_num_groups_46558), - np.int64(threads_per_segment_46560), - defunc_4_map_res_mem_45178, - defunc_3_map_res_mem_45244, - defunc_3_map_res_mem_45245, - mem_45278, - group_res_arr_mem_46561, - mainMagnitudezicounter_mem_46563) + groups_per_segment_129533 = sdiv_up64(num_groups_101887, + smax64(np.int64(1), m_73008)) + elements_per_thread_129534 = sdiv_up64(defunc_2_reduce_comm_res_74867, + (segred_group_sizze_101886 * groups_per_segment_129533)) + virt_num_groups_129535 = (groups_per_segment_129533 * m_73008) + num_threads_129536 = (num_groups_101887 * segred_group_sizze_101886) + threads_per_segment_129537 = (groups_per_segment_129533 * segred_group_sizze_101886) + group_res_arr_mem_129538 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_101886 * virt_num_groups_129535)), + "group_res_arr_mem_129538") + mainMagnitudezicounter_mem_129540 = self.mainMagnitudezicounter_mem_129540 + if ((1 * (np.int64(num_groups_101887) * np.int64(segred_group_sizze_101886))) != 0): + self.mainMagnitudezisegred_large_101866_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_101886))), + np.int64(N_73007), + np.int64(defunc_2_reduce_comm_res_74867), + np.int64(num_groups_101887), + np.int64(groups_per_segment_129533), + np.int64(elements_per_thread_129534), + np.int64(virt_num_groups_129535), + np.int64(threads_per_segment_129537), + defunc_4_map_res_mem_124920, + defunc_3_map_res_mem_124958, + defunc_3_map_res_mem_124959, + mem_124969, + group_res_arr_mem_129538, + mainMagnitudezicounter_mem_129540) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegred_large_37181_var, - ((np.long(num_groups_37203) * np.long(segred_group_sizze_37202)),), - (np.long(segred_group_sizze_37202),)) + self.mainMagnitudezisegred_large_101866_var, + ((np.int64(num_groups_101887) * np.int64(segred_group_sizze_101886)),), + (np.int64(segred_group_sizze_101886),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - defunc_0_f_res_mem_45279 = mem_45278 - iota32_arg_28909 = (N_28477 - i32_res_28487) - bounds_invalid_upwards_28910 = slt64(iota32_arg_28909, np.int64(0)) - valid_28911 = not(bounds_invalid_upwards_28910) - range_valid_c_28912 = True - assert valid_28911, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 helpers.fut:2:3-8\n #2 bfastfinal.fut:109:22-35\n #3 bfastfinal.fut:181:5-58\n #4 bfastfinal.fut:176:1-182:37\n" % ("Range ", - np.int64(0), - "..", - np.int64(1), - "..<", - iota32_arg_28909, - " is invalid.")) - i_28914 = (n_28481 - np.int32(1)) - i_28915 = sext_i32_i64(i_28914) - x_28916 = sle64(np.int64(0), i_28915) - y_28917 = slt64(i_28915, N_28477) - bounds_check_28918 = (x_28916 and y_28917) - index_certs_28919 = True - assert bounds_check_28918, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 bfastfinal.fut:107:64-84\n #1 bfastfinal.fut:106:15-109:36\n #2 bfastfinal.fut:181:5-58\n #3 bfastfinal.fut:176:1-182:37\n" % ("Index [", - i_28915, - "] out of bounds for array of shape [", - N_28477, - "].")) - read_res_46897 = np.empty(1, dtype=ct.c_int32) - cl.enqueue_copy(self.queue, read_res_46897, mappingindices_mem_44380, - device_offset=(np.long(i_28915) * 4), + defunc_0_f_res_mem_124970 = mem_124969 + iota_arg_74896 = (N_73007 - n_73011) + bounds_invalid_upwards_74897 = slt64(iota_arg_74896, np.int64(0)) + valid_74898 = not(bounds_invalid_upwards_74897) + range_valid_c_74899 = True + assert valid_74898, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 bfastfinal.fut:120:22-31\n #2 bfastfinal.fut:192:5-74\n #3 bfastfinal.fut:187:1-193:48\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + iota_arg_74896, + " is invalid.")) + index_certs_74901 = True + assert y_73107, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 bfastfinal.fut:118:72-92\n #1 bfastfinal.fut:117:15-120:32\n #2 bfastfinal.fut:192:5-74\n #3 bfastfinal.fut:187:1-193:48\n" % ("Index [", + m_73103, + "] out of bounds for array of shape [", + N_73007, + "].")) + read_res_129967 = np.empty(1, dtype=ct.c_int64) + cl.enqueue_copy(self.queue, read_res_129967, mappingindices_mem_120107, + device_offset=(np.int64(m_73103) * 8), is_blocking=synchronous) sync(self) - r32_arg_28920 = read_res_46897[0] - i32_res_28921 = sitofp_i32_f32(r32_arg_28920) - range_valid_c_28922 = True - assert valid_28911, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 helpers.fut:2:3-8\n #2 bfastfinal.fut:119:20-35\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:114:20-131:9\n #5 bfastfinal.fut:181:5-58\n #6 bfastfinal.fut:176:1-182:37\n" % ("Range ", - np.int64(0), - "..", - np.int64(1), - "..<", - iota32_arg_28909, - " is invalid.")) - segmap_group_sizze_37293 = self.sizes["mainMagnitude.segmap_group_size_37273"] - segmap_usable_groups_37294 = sdiv_up64(iota32_arg_28909, - segmap_group_sizze_37293) - bytes_45281 = (np.int64(4) * iota32_arg_28909) - mem_45282 = opencl_alloc(self, bytes_45281, "mem_45282") - mem_45284 = opencl_alloc(self, bytes_45281, "mem_45284") - if ((1 * (np.long(segmap_usable_groups_37294) * np.long(segmap_group_sizze_37293))) != 0): - self.mainMagnitudezisegmap_37271_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - np.int64(N_28477), - np.int32(n_28481), - np.float32(lam_28484), - np.int64(iota32_arg_28909), - np.float32(i32_res_28921), - mappingindices_mem_44380, - mem_45282, mem_45284) + i64_arg_74902 = read_res_129967[0] + i64_res_74903 = sitofp_i64_f64(i64_arg_74902) + segmap_group_sizze_101929 = self.sizes["mainMagnitude.segmap_group_size_101911"] + segmap_usable_groups_101930 = sdiv_up64(iota_arg_74896, + segmap_group_sizze_101929) + bytes_124972 = (np.int64(8) * iota_arg_74896) + mem_124973 = opencl_alloc(self, bytes_124972, "mem_124973") + if ((1 * (np.int64(segmap_usable_groups_101930) * np.int64(segmap_group_sizze_101929))) != 0): + self.mainMagnitudezisegmap_101909_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(N_73007), + np.int64(n_73011), + np.float64(lam_73015), + np.int64(iota_arg_74896), + np.float64(i64_res_74903), + mappingindices_mem_120107, + mem_124973) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_37271_var, - ((np.long(segmap_usable_groups_37294) * np.long(segmap_group_sizze_37293)),), - (np.long(segmap_group_sizze_37293),)) + self.mainMagnitudezisegmap_101909_var, + ((np.int64(segmap_usable_groups_101930) * np.int64(segmap_group_sizze_101929)),), + (np.int64(segmap_group_sizze_101929),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - mem_45282 = None - max_res_28941 = smax64(np.int64(1), iota32_arg_28909) - bounds_invalid_upwards_28942 = slt64(max_res_28941, np.int64(1)) - distance_28943 = (max_res_28941 - np.int64(1)) - valid_28944 = not(bounds_invalid_upwards_28942) - range_valid_c_28945 = True - assert valid_28944, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/diku-dk/sorts/insertion_sort.fut:16:30-45\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:116:13-121:42\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:114:20-131:9\n #5 bfastfinal.fut:181:5-58\n #6 bfastfinal.fut:176:1-182:37\n" % ("Range ", + range_valid_c_74920 = True + assert valid_74898, ("Error: %s%d%s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:90:3-10\n #1 bfastfinal.fut:130:20-31\n #2 /prelude/functional.fut:9:42-44\n #3 bfastfinal.fut:125:20-142:9\n #4 bfastfinal.fut:192:5-74\n #5 bfastfinal.fut:187:1-193:48\n" % ("Range ", + np.int64(0), + "..", + np.int64(1), + "..<", + iota_arg_74896, + " is invalid.")) + max_res_74921 = smax64(np.int64(1), iota_arg_74896) + bounds_invalid_upwards_74922 = slt64(max_res_74921, np.int64(1)) + distance_74923 = (max_res_74921 - np.int64(1)) + valid_74924 = not(bounds_invalid_upwards_74922) + range_valid_c_74925 = True + assert valid_74924, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 lib/github.com/diku-dk/sorts/insertion_sort.fut:16:30-45\n #1 /prelude/functional.fut:9:42-44\n #2 bfastfinal.fut:127:13-132:42\n #3 /prelude/functional.fut:9:42-44\n #4 bfastfinal.fut:125:20-142:9\n #5 bfastfinal.fut:192:5-74\n #6 bfastfinal.fut:187:1-193:48\n" % ("Range ", np.int64(1), "..<", - max_res_28941, + max_res_74921, " is invalid.")) - segmap_group_sizze_37716 = self.sizes["mainMagnitude.segmap_group_size_37709"] - segmap_usable_groups_37717 = sdiv_up64(m_28478, segmap_group_sizze_37716) - mem_45287 = opencl_alloc(self, bytes_45152, "mem_45287") - if ((1 * (np.long(segmap_usable_groups_37717) * np.long(segmap_group_sizze_37716))) != 0): - self.mainMagnitudezisegmap_37707_var.set_args(self.global_failure, - np.int64(m_28478), - defunc_4_map_res_mem_45177, - defunc_3_map_res_mem_45245, - mem_45287) + segmap_group_sizze_102102 = self.sizes["mainMagnitude.segmap_group_size_102095"] + segmap_usable_groups_102103 = sdiv_up64(m_73008, segmap_group_sizze_102102) + mem_124976 = opencl_alloc(self, bytes_120173, "mem_124976") + if ((1 * (np.int64(segmap_usable_groups_102103) * np.int64(segmap_group_sizze_102102))) != 0): + self.mainMagnitudezisegmap_102093_var.set_args(self.global_failure, + np.int64(m_73008), + defunc_4_map_res_mem_124919, + defunc_3_map_res_mem_124959, + mem_124976) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_37707_var, - ((np.long(segmap_usable_groups_37717) * np.long(segmap_group_sizze_37716)),), - (np.long(segmap_group_sizze_37716),)) + self.mainMagnitudezisegmap_102093_var, + ((np.int64(segmap_usable_groups_102103) * np.int64(segmap_group_sizze_102102)),), + (np.int64(segmap_group_sizze_102102),)) if synchronous: sync(self) - nest_sizze_37730 = (m_28478 * iota32_arg_28909) - segmap_group_sizze_37731 = self.sizes["mainMagnitude.segmap_group_size_37652"] - segmap_usable_groups_37732 = sdiv_up64(nest_sizze_37730, - segmap_group_sizze_37731) - bytes_45289 = (np.int64(4) * nest_sizze_37730) - mem_45291 = opencl_alloc(self, bytes_45289, "mem_45291") - if ((1 * (np.long(segmap_usable_groups_37732) * np.long(segmap_group_sizze_37731))) != 0): - self.mainMagnitudezisegmap_37649_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - np.int64(N_28477), - np.int64(m_28478), - np.int64(iota32_arg_28909), - defunc_4_map_res_mem_45178, - defunc_3_map_res_mem_45245, - mem_45287, mem_45291) + nest_sizze_102113 = (m_73008 * iota_arg_74896) + segmap_group_sizze_102114 = self.sizes["mainMagnitude.segmap_group_size_102042"] + segmap_usable_groups_102115 = sdiv_up64(nest_sizze_102113, + segmap_group_sizze_102114) + bytes_124978 = (np.int64(8) * nest_sizze_102113) + mem_124980 = opencl_alloc(self, bytes_124978, "mem_124980") + if ((1 * (np.int64(segmap_usable_groups_102115) * np.int64(segmap_group_sizze_102114))) != 0): + self.mainMagnitudezisegmap_102039_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(N_73007), + np.int64(m_73008), + np.int64(iota_arg_74896), + defunc_4_map_res_mem_124920, + defunc_3_map_res_mem_124959, + mem_124976, mem_124980) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_37649_var, - ((np.long(segmap_usable_groups_37732) * np.long(segmap_group_sizze_37731)),), - (np.long(segmap_group_sizze_37731),)) + self.mainMagnitudezisegmap_102039_var, + ((np.int64(segmap_usable_groups_102115) * np.int64(segmap_group_sizze_102114)),), + (np.int64(segmap_group_sizze_102114),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - segmap_group_sizze_37760 = self.sizes["mainMagnitude.segmap_group_size_37530"] - segmap_usable_groups_37761 = sdiv_up64(m_28478, segmap_group_sizze_37760) - mem_45294 = opencl_alloc(self, bytes_45289, "mem_45294") - self.futhark_builtinzhgpu_map_transpose_f32(mem_45294, np.int64(0), - mem_45291, np.int64(0), - np.int64(1), iota32_arg_28909, - m_28478) - mem_45291 = None - mem_45325 = opencl_alloc(self, bytes_45152, "mem_45325") - num_threads_45623 = (segmap_group_sizze_37760 * segmap_usable_groups_37761) - total_sizze_45624 = (bytes_45281 * num_threads_45623) - mem_45305 = opencl_alloc(self, total_sizze_45624, "mem_45305") - if ((1 * (np.long(segmap_usable_groups_37761) * np.long(segmap_group_sizze_37760))) != 0): - self.mainMagnitudezisegmap_37528_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - np.int64(m_28478), - np.int64(iota32_arg_28909), - np.int64(distance_28943), - np.int64(segmap_usable_groups_37761), - defunc_4_map_res_mem_45177, - defunc_3_map_res_mem_45245, - mem_45287, mem_45294, - mem_45305, mem_45325) + segmap_group_sizze_102140 = self.sizes["mainMagnitude.segmap_group_size_101951"] + segmap_usable_groups_102141 = sdiv_up64(m_73008, segmap_group_sizze_102140) + mem_124983 = opencl_alloc(self, bytes_124978, "mem_124983") + self.futhark_builtinzhgpu_map_transpose_f64(mem_124983, np.int64(0), + mem_124980, np.int64(0), + np.int64(1), iota_arg_74896, + m_73008) + mem_124980 = None + mem_125014 = opencl_alloc(self, bytes_120173, "mem_125014") + num_threads_126087 = (segmap_group_sizze_102140 * segmap_usable_groups_102141) + total_sizze_126088 = (bytes_124972 * num_threads_126087) + mem_124994 = opencl_alloc(self, total_sizze_126088, "mem_124994") + if ((1 * (np.int64(segmap_usable_groups_102141) * np.int64(segmap_group_sizze_102140))) != 0): + self.mainMagnitudezisegmap_101949_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(m_73008), + np.int64(iota_arg_74896), + np.int64(distance_74923), + np.int64(num_threads_126087), + defunc_4_map_res_mem_124919, + defunc_3_map_res_mem_124959, + mem_124976, mem_124983, + mem_124994, mem_125014) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_37528_var, - ((np.long(segmap_usable_groups_37761) * np.long(segmap_group_sizze_37760)),), - (np.long(segmap_group_sizze_37760),)) + self.mainMagnitudezisegmap_101949_var, + ((np.int64(segmap_usable_groups_102141) * np.int64(segmap_group_sizze_102140)),), + (np.int64(segmap_group_sizze_102140),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - mem_45287 = None - mem_45294 = None - mem_45305 = None - fits_38119 = sle64(iota32_arg_28909, max_group_sizze_35230) - suff_intra_par_38117 = (self.sizes["mainMagnitude.suff_intra_par_37"] <= iota32_arg_28909) - intra_suff_and_fits_38120 = (suff_intra_par_38117 and fits_38119) - segmap_group_sizze_38417 = self.sizes["mainMagnitude.segmap_group_size_38397"] - max_num_groups_46621 = self.sizes["mainMagnitude.segmap_num_groups_38399"] - num_groups_38418 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(m_28478, - segmap_group_sizze_38417), - sext_i32_i64(max_num_groups_46621)))) - segscan_group_sizze_38440 = self.sizes["mainMagnitude.segscan_group_size_38348"] - max_num_groups_46622 = self.sizes["mainMagnitude.segscan_num_groups_38350"] - num_groups_38441 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(nest_sizze_37730, - segscan_group_sizze_38440), - sext_i32_i64(max_num_groups_46622)))) - segred_group_sizze_38486 = self.sizes["mainMagnitude.segred_group_size_38294"] - max_num_groups_46623 = self.sizes["mainMagnitude.segred_num_groups_38296"] - num_groups_38487 = sext_i64_i32(smax64(np.int64(1), - smin64(sdiv_up64(nest_sizze_37730, - segred_group_sizze_38486), - sext_i32_i64(max_num_groups_46623)))) - segmap_group_sizze_38525 = self.sizes["mainMagnitude.segmap_group_size_38243"] - local_memory_capacity_46816 = self.max_local_memory - if (sle64(((((bytes_45281 + srem64((np.int64(8) - srem64(bytes_45281, - np.int64(8))), - np.int64(8))) + ((np.int32(1) * iota32_arg_28909) + srem64((np.int64(8) - srem64((np.int32(1) * iota32_arg_28909), - np.int64(8))), - np.int64(8)))) + ((np.int32(4) * iota32_arg_28909) + srem64((np.int64(8) - srem64((np.int32(4) * iota32_arg_28909), - np.int64(8))), - np.int64(8)))) + ((np.int32(4) * iota32_arg_28909) + srem64((np.int64(8) - srem64((np.int32(4) * iota32_arg_28909), - np.int64(8))), - np.int64(8)))), - sext_i32_i64(local_memory_capacity_46816)) and intra_suff_and_fits_38120): - mem_45332 = opencl_alloc(self, bytes_45152, "mem_45332") - mem_45334 = opencl_alloc(self, bytes_45152, "mem_45334") - if ((1 * (np.long(m_28478) * np.long(iota32_arg_28909))) != 0): - self.mainMagnitudezisegmap_intragroup_37880_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long((np.int32(4) * iota32_arg_28909))), - cl.LocalMemory(np.long((np.int32(4) * iota32_arg_28909))), - cl.LocalMemory(np.long((np.int32(1) * iota32_arg_28909))), - cl.LocalMemory(np.long(bytes_45281)), - np.int64(N_28477), - np.int32(n_28481), - np.int64(iota32_arg_28909), - defunc_4_map_res_mem_45177, - defunc_4_map_res_mem_45178, - defunc_4_map_res_mem_45179, - defunc_3_map_res_mem_45244, - defunc_3_map_res_mem_45245, - defunc_3_map_res_mem_45246, - defunc_0_f_res_mem_45279, - mem_45284, - mem_45332, - mem_45334) + mem_124976 = None + mem_124983 = None + mem_124994 = None + fits_102231 = sle64(iota_arg_74896, max_group_sizze_90561) + suff_intra_par_102229 = (self.sizes["mainMagnitude.suff_intra_par_38"] <= iota_arg_74896) + intra_suff_and_fits_102232 = (suff_intra_par_102229 and fits_102231) + segmap_group_sizze_102525 = self.sizes["mainMagnitude.segmap_group_size_102505"] + max_num_groups_129598 = self.sizes["mainMagnitude.segmap_num_groups_102507"] + num_groups_102526 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(m_73008, + segmap_group_sizze_102525), + sext_i32_i64(max_num_groups_129598)))) + segscan_group_sizze_102548 = self.sizes["mainMagnitude.segscan_group_size_102458"] + max_num_groups_129599 = self.sizes["mainMagnitude.segscan_num_groups_102460"] + num_groups_102549 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_102113, + segscan_group_sizze_102548), + sext_i32_i64(max_num_groups_129599)))) + segred_group_sizze_102592 = self.sizes["mainMagnitude.segred_group_size_102404"] + max_num_groups_129600 = self.sizes["mainMagnitude.segred_num_groups_102406"] + num_groups_102593 = sext_i64_i32(smax64(np.int64(1), + smin64(sdiv_up64(nest_sizze_102113, + segred_group_sizze_102592), + sext_i32_i64(max_num_groups_129600)))) + segmap_group_sizze_102631 = self.sizes["mainMagnitude.segmap_group_size_102354"] + local_memory_capacity_129793 = self.max_local_memory + if (sle64(((((bytes_124972 + srem64((np.int64(8) - srem64(bytes_124972, + np.int64(8))), + np.int64(8))) + ((np.int32(1) * iota_arg_74896) + srem64((np.int64(8) - srem64((np.int32(1) * iota_arg_74896), + np.int64(8))), + np.int64(8)))) + ((np.int32(8) * iota_arg_74896) + srem64((np.int64(8) - srem64((np.int32(8) * iota_arg_74896), + np.int64(8))), + np.int64(8)))) + ((np.int32(8) * iota_arg_74896) + srem64((np.int64(8) - srem64((np.int32(8) * iota_arg_74896), + np.int64(8))), + np.int64(8)))), + sext_i32_i64(local_memory_capacity_129793)) and intra_suff_and_fits_102232): + mem_125021 = opencl_alloc(self, bytes_120173, "mem_125021") + mem_125023 = opencl_alloc(self, bytes_120173, "mem_125023") + if ((1 * (np.int64(m_73008) * np.int64(iota_arg_74896))) != 0): + self.mainMagnitudezisegmap_intragroup_102227_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64((np.int32(8) * iota_arg_74896))), + cl.LocalMemory(np.int64((np.int32(8) * iota_arg_74896))), + cl.LocalMemory(np.int64((np.int32(1) * iota_arg_74896))), + cl.LocalMemory(np.int64(bytes_124972)), + np.int64(N_73007), + np.int64(n_73011), + np.int64(iota_arg_74896), + defunc_4_map_res_mem_124919, + defunc_4_map_res_mem_124920, + defunc_4_map_res_mem_124921, + defunc_3_map_res_mem_124958, + defunc_3_map_res_mem_124959, + defunc_3_map_res_mem_124960, + defunc_0_f_res_mem_124970, + mem_124973, + mem_125021, + mem_125023) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_intragroup_37880_var, - ((np.long(m_28478) * np.long(iota32_arg_28909)),), - (np.long(iota32_arg_28909),)) + self.mainMagnitudezisegmap_intragroup_102227_var, + ((np.int64(m_73008) * np.int64(iota_arg_74896)),), + (np.int64(iota_arg_74896),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - defunc_0_f_res_mem_45356 = mem_45332 - defunc_0_f_res_mem_45357 = mem_45334 + defunc_0_f_res_mem_125045 = mem_125021 + defunc_0_f_res_mem_125046 = mem_125023 else: - mem_45337 = opencl_alloc(self, bytes_45152, "mem_45337") - mem_45339 = opencl_alloc(self, bytes_45152, "mem_45339") - if ((1 * (np.long(num_groups_38418) * np.long(segmap_group_sizze_38417))) != 0): - self.mainMagnitudezisegmap_38395_var.set_args(self.global_failure, - np.int64(m_28478), - np.int64(num_groups_38418), - defunc_4_map_res_mem_45177, - defunc_3_map_res_mem_45245, - defunc_3_map_res_mem_45246, - mem_45337, mem_45339) + mem_125026 = opencl_alloc(self, bytes_120173, "mem_125026") + mem_125028 = opencl_alloc(self, bytes_120173, "mem_125028") + if ((1 * (np.int64(num_groups_102526) * np.int64(segmap_group_sizze_102525))) != 0): + self.mainMagnitudezisegmap_102503_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(num_groups_102526), + defunc_4_map_res_mem_124919, + defunc_3_map_res_mem_124959, + defunc_3_map_res_mem_124960, + mem_125026, mem_125028) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_38395_var, - ((np.long(num_groups_38418) * np.long(segmap_group_sizze_38417)),), - (np.long(segmap_group_sizze_38417),)) + self.mainMagnitudezisegmap_102503_var, + ((np.int64(num_groups_102526) * np.int64(segmap_group_sizze_102525)),), + (np.int64(segmap_group_sizze_102525),)) if synchronous: sync(self) - mem_45343 = opencl_alloc(self, bytes_45289, "mem_45343") - if slt64(np.int64(0), (m_28478 * iota32_arg_28909)): - stage1_max_num_groups_46657 = self.max_group_size - stage1_num_groups_46658 = smin64(stage1_max_num_groups_46657, - num_groups_38441) - num_threads_46659 = sext_i64_i32((stage1_num_groups_46658 * segscan_group_sizze_38440)) - if ((1 * (np.long(stage1_num_groups_46658) * np.long(segscan_group_sizze_38440))) != 0): - self.mainMagnitudeziscan_stage1_38354_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - cl.LocalMemory(np.long(smax64(np.int64(1), - (np.int32(4) * segscan_group_sizze_38440)))), - np.int64(N_28477), - np.int64(m_28478), - np.int64(iota32_arg_28909), - np.int32(num_threads_46659), - defunc_4_map_res_mem_45178, - defunc_3_map_res_mem_45244, - defunc_3_map_res_mem_45245, - defunc_0_f_res_mem_45279, - mem_45339, - mem_45343) + mem_125032 = opencl_alloc(self, bytes_124978, "mem_125032") + if slt64(np.int64(0), (m_73008 * iota_arg_74896)): + stage1_max_num_groups_129634 = self.max_group_size + stage1_num_groups_129635 = smin64(stage1_max_num_groups_129634, + num_groups_102549) + num_threads_129636 = sext_i64_i32((stage1_num_groups_129635 * segscan_group_sizze_102548)) + if ((1 * (np.int64(stage1_num_groups_129635) * np.int64(segscan_group_sizze_102548))) != 0): + self.mainMagnitudeziscan_stage1_102464_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + cl.LocalMemory(np.int64(smax64(np.int64(1), + (np.int32(8) * segscan_group_sizze_102548)))), + np.int64(N_73007), + np.int64(m_73008), + np.int64(iota_arg_74896), + np.int32(num_threads_129636), + defunc_4_map_res_mem_124920, + defunc_3_map_res_mem_124958, + defunc_3_map_res_mem_124959, + defunc_0_f_res_mem_124970, + mem_125028, + mem_125032) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudeziscan_stage1_38354_var, - ((np.long(stage1_num_groups_46658) * np.long(segscan_group_sizze_38440)),), - (np.long(segscan_group_sizze_38440),)) + self.mainMagnitudeziscan_stage1_102464_var, + ((np.int64(stage1_num_groups_129635) * np.int64(segscan_group_sizze_102548)),), + (np.int64(segscan_group_sizze_102548),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - if ((1 * (np.long(np.int64(1)) * np.long(stage1_num_groups_46658))) != 0): - self.mainMagnitudeziscan_stage2_38354_var.set_args(self.global_failure, - cl.LocalMemory(np.long(smax64(np.int64(1), - (np.int32(4) * stage1_num_groups_46658)))), - np.int64(m_28478), - np.int64(iota32_arg_28909), - np.int64(stage1_num_groups_46658), - np.int32(num_threads_46659), - mem_45343) + if ((1 * (np.int64(np.int64(1)) * np.int64(stage1_num_groups_129635))) != 0): + self.mainMagnitudeziscan_stage2_102464_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(smax64(np.int64(1), + (np.int32(8) * stage1_num_groups_129635)))), + np.int64(m_73008), + np.int64(iota_arg_74896), + np.int64(stage1_num_groups_129635), + np.int32(num_threads_129636), + mem_125032) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudeziscan_stage2_38354_var, - ((np.long(np.int64(1)) * np.long(stage1_num_groups_46658)),), - (np.long(stage1_num_groups_46658),)) + self.mainMagnitudeziscan_stage2_102464_var, + ((np.int64(np.int64(1)) * np.int64(stage1_num_groups_129635)),), + (np.int64(stage1_num_groups_129635),)) if synchronous: sync(self) - required_groups_46701 = sext_i64_i32(sdiv_up64((m_28478 * iota32_arg_28909), - segscan_group_sizze_38440)) - if ((1 * (np.long(num_groups_38441) * np.long(segscan_group_sizze_38440))) != 0): - self.mainMagnitudeziscan_stage3_38354_var.set_args(self.global_failure, - np.int64(m_28478), - np.int64(iota32_arg_28909), - np.int64(num_groups_38441), - np.int32(num_threads_46659), - np.int32(required_groups_46701), - mem_45343) + required_groups_129678 = sext_i64_i32(sdiv_up64((m_73008 * iota_arg_74896), + segscan_group_sizze_102548)) + if ((1 * (np.int64(num_groups_102549) * np.int64(segscan_group_sizze_102548))) != 0): + self.mainMagnitudeziscan_stage3_102464_var.set_args(self.global_failure, + np.int64(m_73008), + np.int64(iota_arg_74896), + np.int64(num_groups_102549), + np.int32(num_threads_129636), + np.int32(required_groups_129678), + mem_125032) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudeziscan_stage3_38354_var, - ((np.long(num_groups_38441) * np.long(segscan_group_sizze_38440)),), - (np.long(segscan_group_sizze_38440),)) + self.mainMagnitudeziscan_stage3_102464_var, + ((np.int64(num_groups_102549) * np.int64(segscan_group_sizze_102548)),), + (np.int64(segscan_group_sizze_102548),)) if synchronous: sync(self) - mem_45346 = opencl_alloc(self, m_28478, "mem_45346") - mem_45348 = opencl_alloc(self, bytes_45152, "mem_45348") - mem_45350 = opencl_alloc(self, bytes_45152, "mem_45350") - if slt64((iota32_arg_28909 * np.int64(2)), segred_group_sizze_38486): - segment_sizze_nonzzero_46713 = smax64(np.int64(1), iota32_arg_28909) - num_threads_46714 = (num_groups_38487 * segred_group_sizze_38486) - if ((1 * (np.long(num_groups_38487) * np.long(segred_group_sizze_38486))) != 0): - self.mainMagnitudezisegred_small_38300_var.set_args(self.global_failure, - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_38486))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_38486))), - cl.LocalMemory(np.long((np.int32(1) * segred_group_sizze_38486))), - np.int64(m_28478), - np.int64(iota32_arg_28909), - np.int64(num_groups_38487), - np.int64(segment_sizze_nonzzero_46713), - mem_45284, - mem_45337, - mem_45339, - mem_45343, - mem_45346, - mem_45348, - mem_45350) + mem_125035 = opencl_alloc(self, m_73008, "mem_125035") + mem_125037 = opencl_alloc(self, bytes_120173, "mem_125037") + mem_125039 = opencl_alloc(self, bytes_120173, "mem_125039") + if slt64((iota_arg_74896 * np.int64(2)), segred_group_sizze_102592): + segment_sizze_nonzzero_129690 = smax64(np.int64(1), iota_arg_74896) + num_threads_129691 = (num_groups_102593 * segred_group_sizze_102592) + if ((1 * (np.int64(num_groups_102593) * np.int64(segred_group_sizze_102592))) != 0): + self.mainMagnitudezisegred_small_102410_var.set_args(self.global_failure, + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_102592))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_102592))), + cl.LocalMemory(np.int64((np.int32(1) * segred_group_sizze_102592))), + np.int64(m_73008), + np.int64(iota_arg_74896), + np.int64(num_groups_102593), + np.int64(segment_sizze_nonzzero_129690), + mem_124973, + mem_125026, + mem_125028, + mem_125032, + mem_125035, + mem_125037, + mem_125039) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegred_small_38300_var, - ((np.long(num_groups_38487) * np.long(segred_group_sizze_38486)),), - (np.long(segred_group_sizze_38486),)) + self.mainMagnitudezisegred_small_102410_var, + ((np.int64(num_groups_102593) * np.int64(segred_group_sizze_102592)),), + (np.int64(segred_group_sizze_102592),)) if synchronous: sync(self) else: - groups_per_segment_46749 = sdiv_up64(num_groups_38487, - smax64(np.int64(1), m_28478)) - elements_per_thread_46750 = sdiv_up64(iota32_arg_28909, - (segred_group_sizze_38486 * groups_per_segment_46749)) - virt_num_groups_46751 = (groups_per_segment_46749 * m_28478) - num_threads_46752 = (num_groups_38487 * segred_group_sizze_38486) - threads_per_segment_46753 = (groups_per_segment_46749 * segred_group_sizze_38486) - group_res_arr_mem_46754 = opencl_alloc(self, - (np.int32(1) * (segred_group_sizze_38486 * virt_num_groups_46751)), - "group_res_arr_mem_46754") - group_res_arr_mem_46756 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_38486 * virt_num_groups_46751)), - "group_res_arr_mem_46756") - group_res_arr_mem_46758 = opencl_alloc(self, - (np.int32(4) * (segred_group_sizze_38486 * virt_num_groups_46751)), - "group_res_arr_mem_46758") - mainMagnitudezicounter_mem_46760 = self.mainMagnitudezicounter_mem_46760 - if ((1 * (np.long(num_groups_38487) * np.long(segred_group_sizze_38486))) != 0): - self.mainMagnitudezisegred_large_38300_var.set_args(self.global_failure, - cl.LocalMemory(np.long(np.int32(1))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_38486))), - cl.LocalMemory(np.long((np.int32(4) * segred_group_sizze_38486))), - cl.LocalMemory(np.long((np.int32(1) * segred_group_sizze_38486))), - np.int64(iota32_arg_28909), - np.int64(num_groups_38487), - np.int64(groups_per_segment_46749), - np.int64(elements_per_thread_46750), - np.int64(virt_num_groups_46751), - mem_45284, - mem_45337, - mem_45339, - mem_45343, - mem_45346, - mem_45348, - mem_45350, - group_res_arr_mem_46754, - group_res_arr_mem_46756, - group_res_arr_mem_46758, - mainMagnitudezicounter_mem_46760) + groups_per_segment_129726 = sdiv_up64(num_groups_102593, + smax64(np.int64(1), m_73008)) + elements_per_thread_129727 = sdiv_up64(iota_arg_74896, + (segred_group_sizze_102592 * groups_per_segment_129726)) + virt_num_groups_129728 = (groups_per_segment_129726 * m_73008) + num_threads_129729 = (num_groups_102593 * segred_group_sizze_102592) + threads_per_segment_129730 = (groups_per_segment_129726 * segred_group_sizze_102592) + group_res_arr_mem_129731 = opencl_alloc(self, + (np.int32(1) * (segred_group_sizze_102592 * virt_num_groups_129728)), + "group_res_arr_mem_129731") + group_res_arr_mem_129733 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_102592 * virt_num_groups_129728)), + "group_res_arr_mem_129733") + group_res_arr_mem_129735 = opencl_alloc(self, + (np.int32(8) * (segred_group_sizze_102592 * virt_num_groups_129728)), + "group_res_arr_mem_129735") + mainMagnitudezicounter_mem_129737 = self.mainMagnitudezicounter_mem_129737 + if ((1 * (np.int64(num_groups_102593) * np.int64(segred_group_sizze_102592))) != 0): + self.mainMagnitudezisegred_large_102410_var.set_args(self.global_failure, + cl.LocalMemory(np.int64(np.int32(1))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_102592))), + cl.LocalMemory(np.int64((np.int32(8) * segred_group_sizze_102592))), + cl.LocalMemory(np.int64((np.int32(1) * segred_group_sizze_102592))), + np.int64(iota_arg_74896), + np.int64(num_groups_102593), + np.int64(groups_per_segment_129726), + np.int64(elements_per_thread_129727), + np.int64(virt_num_groups_129728), + mem_124973, + mem_125026, + mem_125028, + mem_125032, + mem_125035, + mem_125037, + mem_125039, + group_res_arr_mem_129731, + group_res_arr_mem_129733, + group_res_arr_mem_129735, + mainMagnitudezicounter_mem_129737) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegred_large_38300_var, - ((np.long(num_groups_38487) * np.long(segred_group_sizze_38486)),), - (np.long(segred_group_sizze_38486),)) + self.mainMagnitudezisegred_large_102410_var, + ((np.int64(num_groups_102593) * np.int64(segred_group_sizze_102592)),), + (np.int64(segred_group_sizze_102592),)) if synchronous: sync(self) - mem_45337 = None - mem_45343 = None - segmap_usable_groups_38526 = sdiv_up64(m_28478, segmap_group_sizze_38525) - mem_45353 = opencl_alloc(self, bytes_45152, "mem_45353") - mem_45355 = opencl_alloc(self, bytes_45152, "mem_45355") - if ((1 * (np.long(segmap_usable_groups_38526) * np.long(segmap_group_sizze_38525))) != 0): - self.mainMagnitudezisegmap_38241_var.set_args(self.global_failure, - self.failure_is_an_option, - self.global_failure_args, - np.int64(N_28477), - np.int64(m_28478), - np.int32(n_28481), - defunc_4_map_res_mem_45179, - defunc_3_map_res_mem_45245, - mem_45339, mem_45346, - mem_45348, mem_45350, - mem_45353, mem_45355) + mem_125026 = None + mem_125032 = None + segmap_usable_groups_102632 = sdiv_up64(m_73008, + segmap_group_sizze_102631) + mem_125042 = opencl_alloc(self, bytes_120173, "mem_125042") + mem_125044 = opencl_alloc(self, bytes_120173, "mem_125044") + if ((1 * (np.int64(segmap_usable_groups_102632) * np.int64(segmap_group_sizze_102631))) != 0): + self.mainMagnitudezisegmap_102352_var.set_args(self.global_failure, + self.failure_is_an_option, + self.global_failure_args, + np.int64(N_73007), + np.int64(m_73008), + np.int64(n_73011), + defunc_4_map_res_mem_124921, + defunc_3_map_res_mem_124959, + mem_125028, mem_125035, + mem_125037, mem_125039, + mem_125042, mem_125044) cl.enqueue_nd_range_kernel(self.queue, - self.mainMagnitudezisegmap_38241_var, - ((np.long(segmap_usable_groups_38526) * np.long(segmap_group_sizze_38525)),), - (np.long(segmap_group_sizze_38525),)) + self.mainMagnitudezisegmap_102352_var, + ((np.int64(segmap_usable_groups_102632) * np.int64(segmap_group_sizze_102631)),), + (np.int64(segmap_group_sizze_102631),)) if synchronous: sync(self) self.failure_is_an_option = np.int32(1) - mem_45339 = None - mem_45346 = None - mem_45348 = None - mem_45350 = None - defunc_0_f_res_mem_45356 = mem_45353 - defunc_0_f_res_mem_45357 = mem_45355 - defunc_4_map_res_mem_45178 = None - defunc_4_map_res_mem_45179 = None - defunc_3_map_res_mem_45244 = None - defunc_3_map_res_mem_45245 = None - defunc_3_map_res_mem_45246 = None - defunc_0_f_res_mem_45279 = None - mem_45284 = None - out_mem_45676 = defunc_4_map_res_mem_45177 - out_mem_45677 = defunc_0_f_res_mem_45356 - out_mem_45678 = defunc_0_f_res_mem_45357 - out_mem_45679 = mem_45325 - return (out_mem_45676, out_mem_45677, out_mem_45678, out_mem_45679) - def futhark_reshapeTransp(self, images_mem_44380, m_27747, n_27748, p_27749): - flatten_to_arg_27751 = (n_27748 * p_27749) - flat_dim_27752 = (n_27748 * p_27749) - dim_match_27753 = (flatten_to_arg_27751 == flat_dim_27752) - empty_or_match_cert_27754 = True - assert dim_match_27753, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:127:3-20\n #1 bfastfinal.fut:200:17-47\n #2 bfastfinal.fut:199:1-201:23\n" % ("Value of (core language) shape (", - flat_dim_27752, + mem_125028 = None + mem_125035 = None + mem_125037 = None + mem_125039 = None + defunc_0_f_res_mem_125045 = mem_125042 + defunc_0_f_res_mem_125046 = mem_125044 + defunc_4_map_res_mem_124920 = None + defunc_4_map_res_mem_124921 = None + defunc_3_map_res_mem_124958 = None + defunc_3_map_res_mem_124959 = None + defunc_3_map_res_mem_124960 = None + defunc_0_f_res_mem_124970 = None + mem_124973 = None + out_mem_126320 = defunc_4_map_res_mem_124919 + out_mem_126321 = defunc_0_f_res_mem_125045 + out_mem_126322 = defunc_0_f_res_mem_125046 + out_mem_126323 = mem_125014 + out_mem_126324 = hist_inds_mem_124138 + return (out_mem_126320, out_mem_126321, out_mem_126322, out_mem_126323, + out_mem_126324) + def futhark_reshapeTransp(self, images_mem_120107, m_70836, n_70837, p_70838): + flatten_to_arg_70840 = (n_70837 * p_70838) + flat_dim_70841 = (n_70837 * p_70838) + dim_match_70842 = (flatten_to_arg_70840 == flat_dim_70841) + empty_or_match_cert_70843 = True + assert dim_match_70842, ("Error: %s%d%s%d%s\n\nBacktrace:\n-> #0 /prelude/array.fut:127:3-20\n #1 bfastfinal.fut:211:17-47\n #2 bfastfinal.fut:210:1-212:23\n" % ("Value of (core language) shape (", + flat_dim_70841, ") cannot match shape of type `[", - flatten_to_arg_27751, + flatten_to_arg_70840, "]t`.")) - binop_x_44382 = (m_27747 * flatten_to_arg_27751) - bytes_44381 = (np.int64(4) * binop_x_44382) - mem_44383 = opencl_alloc(self, bytes_44381, "mem_44383") - self.futhark_builtinzhgpu_map_transpose_f32(mem_44383, np.int64(0), - images_mem_44380, np.int64(0), + binop_x_120109 = (m_70836 * flatten_to_arg_70840) + bytes_120108 = (np.int64(8) * binop_x_120109) + mem_120110 = opencl_alloc(self, bytes_120108, "mem_120110") + self.futhark_builtinzhgpu_map_transpose_f64(mem_120110, np.int64(0), + images_mem_120107, np.int64(0), np.int64(1), - flatten_to_arg_27751, m_27747) - out_arrsizze_45677 = flatten_to_arg_27751 - out_mem_45676 = mem_44383 - return (out_mem_45676, out_arrsizze_45677) - def convertToFloat(self, nan_value_27760_ext, images_mem_44380_ext): - m_27757 = None - n_27758 = None - p_27759 = None + flatten_to_arg_70840, m_70836) + out_arrsizze_126321 = flatten_to_arg_70840 + out_mem_126320 = mem_120110 + return (out_mem_126320, out_arrsizze_126321) + def convertToFloat(self, nan_value_70849_ext, images_mem_120107_ext): + m_70846 = None + n_70847 = None + p_70848 = None try: - nan_value_27760 = np.int16(ct.c_int16(nan_value_27760_ext)) + nan_value_70849 = np.int16(ct.c_int16(nan_value_70849_ext)) except (TypeError, AssertionError) as e: raise TypeError("Argument #0 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("i16", - type(nan_value_27760_ext), - nan_value_27760_ext)) + type(nan_value_70849_ext), + nan_value_70849_ext)) try: - assert ((type(images_mem_44380_ext) in [np.ndarray, - cl.array.Array]) and (images_mem_44380_ext.dtype == np.int16)), "Parameter has unexpected type" - if (m_27757 == None): - m_27757 = np.int64(images_mem_44380_ext.shape[0]) + assert ((type(images_mem_120107_ext) in [np.ndarray, + cl.array.Array]) and (images_mem_120107_ext.dtype == np.int16)), "Parameter has unexpected type" + if (m_70846 == None): + m_70846 = np.int64(images_mem_120107_ext.shape[0]) else: - assert (m_27757 == images_mem_44380_ext.shape[0]), "Error: entry point arguments have invalid sizes." - if (n_27758 == None): - n_27758 = np.int64(images_mem_44380_ext.shape[1]) + assert (m_70846 == images_mem_120107_ext.shape[0]), "Error: entry point arguments have invalid sizes." + if (n_70847 == None): + n_70847 = np.int64(images_mem_120107_ext.shape[1]) else: - assert (n_27758 == images_mem_44380_ext.shape[1]), "Error: entry point arguments have invalid sizes." - if (p_27759 == None): - p_27759 = np.int64(images_mem_44380_ext.shape[2]) + assert (n_70847 == images_mem_120107_ext.shape[1]), "Error: entry point arguments have invalid sizes." + if (p_70848 == None): + p_70848 = np.int64(images_mem_120107_ext.shape[2]) else: - assert (p_27759 == images_mem_44380_ext.shape[2]), "Error: entry point arguments have invalid sizes." - if (type(images_mem_44380_ext) == cl.array.Array): - images_mem_44380 = images_mem_44380_ext.data + assert (p_70848 == images_mem_120107_ext.shape[2]), "Error: entry point arguments have invalid sizes." + if (type(images_mem_120107_ext) == cl.array.Array): + images_mem_120107 = images_mem_120107_ext.data else: - images_mem_44380 = opencl_alloc(self, - np.int64(images_mem_44380_ext.nbytes), - "images_mem_44380") - if (np.int64(images_mem_44380_ext.nbytes) != 0): - cl.enqueue_copy(self.queue, images_mem_44380, - normaliseArray(images_mem_44380_ext), + images_mem_120107 = opencl_alloc(self, + np.int64(images_mem_120107_ext.nbytes), + "images_mem_120107") + if (np.int64(images_mem_120107_ext.nbytes) != 0): + cl.enqueue_copy(self.queue, images_mem_120107, + normaliseArray(images_mem_120107_ext), is_blocking=synchronous) except (TypeError, AssertionError) as e: raise TypeError("Argument #1 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("[][][]i16", - type(images_mem_44380_ext), - images_mem_44380_ext)) + type(images_mem_120107_ext), + images_mem_120107_ext)) time_start = time.time() with np.errstate(divide="ignore", over="ignore", under="ignore", invalid="ignore"): - out_mem_45676 = self.futhark_convertToFloat(images_mem_44380, m_27757, - n_27758, p_27759, - nan_value_27760) + out_mem_126320 = self.futhark_convertToFloat(images_mem_120107, m_70846, + n_70847, p_70848, + nan_value_70849) runtime = (int((time.time() * 1000000)) - int((time_start * 1000000))) sync(self) - return cl.array.Array(self.queue, (m_27757, n_27758, p_27759), ct.c_float, - data=out_mem_45676) - def main(self, trend_29167_ext, k_29168_ext, n_29169_ext, freq_29170_ext, - hfrac_29171_ext, lam_29172_ext, mappingindices_mem_44380_ext, - images_mem_44381_ext): - N_29165 = None - m_29166 = None - N_29165 = None + return cl.array.Array(self.queue, (m_70846, n_70847, p_70848), ct.c_double, + data=out_mem_126320) + def main(self, trend_75137_ext, k_75138_ext, n_75139_ext, freq_75140_ext, + hfrac_75141_ext, level_75142_ext, lam_75143_ext, hist_75144_ext, + conf_75145_ext, mappingindices_mem_120107_ext, + images_mem_120108_ext): + N_75135 = None + m_75136 = None + N_75135 = None + try: + trend_75137 = np.int64(ct.c_int64(trend_75137_ext)) + except (TypeError, AssertionError) as e: + raise TypeError("Argument #0 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("i64", + type(trend_75137_ext), + trend_75137_ext)) try: - trend_29167 = np.int32(ct.c_int32(trend_29167_ext)) + k_75138 = np.int64(ct.c_int64(k_75138_ext)) except (TypeError, AssertionError) as e: - raise TypeError("Argument #0 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("i32", - type(trend_29167_ext), - trend_29167_ext)) + raise TypeError("Argument #1 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("i64", + type(k_75138_ext), + k_75138_ext)) try: - k_29168 = np.int32(ct.c_int32(k_29168_ext)) + n_75139 = np.int64(ct.c_int64(n_75139_ext)) except (TypeError, AssertionError) as e: - raise TypeError("Argument #1 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("i32", - type(k_29168_ext), - k_29168_ext)) + raise TypeError("Argument #2 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("i64", + type(n_75139_ext), + n_75139_ext)) try: - n_29169 = np.int32(ct.c_int32(n_29169_ext)) + freq_75140 = np.float64(ct.c_double(freq_75140_ext)) except (TypeError, AssertionError) as e: - raise TypeError("Argument #2 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("i32", - type(n_29169_ext), - n_29169_ext)) + raise TypeError("Argument #3 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("f64", + type(freq_75140_ext), + freq_75140_ext)) try: - freq_29170 = np.float32(ct.c_float(freq_29170_ext)) + hfrac_75141 = np.float64(ct.c_double(hfrac_75141_ext)) except (TypeError, AssertionError) as e: - raise TypeError("Argument #3 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("f32", - type(freq_29170_ext), - freq_29170_ext)) + raise TypeError("Argument #4 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("f64", + type(hfrac_75141_ext), + hfrac_75141_ext)) try: - hfrac_29171 = np.float32(ct.c_float(hfrac_29171_ext)) + level_75142 = np.float64(ct.c_double(level_75142_ext)) except (TypeError, AssertionError) as e: - raise TypeError("Argument #4 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("f32", - type(hfrac_29171_ext), - hfrac_29171_ext)) + raise TypeError("Argument #5 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("f64", + type(level_75142_ext), + level_75142_ext)) try: - lam_29172 = np.float32(ct.c_float(lam_29172_ext)) + lam_75143 = np.float64(ct.c_double(lam_75143_ext)) except (TypeError, AssertionError) as e: - raise TypeError("Argument #5 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("f32", - type(lam_29172_ext), - lam_29172_ext)) + raise TypeError("Argument #6 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("f64", + type(lam_75143_ext), + lam_75143_ext)) try: - assert ((type(mappingindices_mem_44380_ext) in [np.ndarray, - cl.array.Array]) and (mappingindices_mem_44380_ext.dtype == np.int32)), "Parameter has unexpected type" - if (N_29165 == None): - N_29165 = np.int64(mappingindices_mem_44380_ext.shape[0]) + hist_75144 = np.int64(ct.c_int64(hist_75144_ext)) + except (TypeError, AssertionError) as e: + raise TypeError("Argument #7 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("i64", + type(hist_75144_ext), + hist_75144_ext)) + try: + conf_75145 = np.float64(ct.c_double(conf_75145_ext)) + except (TypeError, AssertionError) as e: + raise TypeError("Argument #8 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("f64", + type(conf_75145_ext), + conf_75145_ext)) + try: + assert ((type(mappingindices_mem_120107_ext) in [np.ndarray, + cl.array.Array]) and (mappingindices_mem_120107_ext.dtype == np.int64)), "Parameter has unexpected type" + if (N_75135 == None): + N_75135 = np.int64(mappingindices_mem_120107_ext.shape[0]) else: - assert (N_29165 == mappingindices_mem_44380_ext.shape[0]), "Error: entry point arguments have invalid sizes." - if (type(mappingindices_mem_44380_ext) == cl.array.Array): - mappingindices_mem_44380 = mappingindices_mem_44380_ext.data + assert (N_75135 == mappingindices_mem_120107_ext.shape[0]), "Error: entry point arguments have invalid sizes." + if (type(mappingindices_mem_120107_ext) == cl.array.Array): + mappingindices_mem_120107 = mappingindices_mem_120107_ext.data else: - mappingindices_mem_44380 = opencl_alloc(self, - np.int64(mappingindices_mem_44380_ext.nbytes), - "mappingindices_mem_44380") - if (np.int64(mappingindices_mem_44380_ext.nbytes) != 0): - cl.enqueue_copy(self.queue, mappingindices_mem_44380, - normaliseArray(mappingindices_mem_44380_ext), + mappingindices_mem_120107 = opencl_alloc(self, + np.int64(mappingindices_mem_120107_ext.nbytes), + "mappingindices_mem_120107") + if (np.int64(mappingindices_mem_120107_ext.nbytes) != 0): + cl.enqueue_copy(self.queue, mappingindices_mem_120107, + normaliseArray(mappingindices_mem_120107_ext), is_blocking=synchronous) except (TypeError, AssertionError) as e: - raise TypeError("Argument #6 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("[]i32", - type(mappingindices_mem_44380_ext), - mappingindices_mem_44380_ext)) + raise TypeError("Argument #9 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("[]i64", + type(mappingindices_mem_120107_ext), + mappingindices_mem_120107_ext)) try: - assert ((type(images_mem_44381_ext) in [np.ndarray, - cl.array.Array]) and (images_mem_44381_ext.dtype == np.float32)), "Parameter has unexpected type" - if (m_29166 == None): - m_29166 = np.int64(images_mem_44381_ext.shape[0]) + assert ((type(images_mem_120108_ext) in [np.ndarray, + cl.array.Array]) and (images_mem_120108_ext.dtype == np.float64)), "Parameter has unexpected type" + if (m_75136 == None): + m_75136 = np.int64(images_mem_120108_ext.shape[0]) else: - assert (m_29166 == images_mem_44381_ext.shape[0]), "Error: entry point arguments have invalid sizes." - if (N_29165 == None): - N_29165 = np.int64(images_mem_44381_ext.shape[1]) + assert (m_75136 == images_mem_120108_ext.shape[0]), "Error: entry point arguments have invalid sizes." + if (N_75135 == None): + N_75135 = np.int64(images_mem_120108_ext.shape[1]) else: - assert (N_29165 == images_mem_44381_ext.shape[1]), "Error: entry point arguments have invalid sizes." - if (type(images_mem_44381_ext) == cl.array.Array): - images_mem_44381 = images_mem_44381_ext.data + assert (N_75135 == images_mem_120108_ext.shape[1]), "Error: entry point arguments have invalid sizes." + if (type(images_mem_120108_ext) == cl.array.Array): + images_mem_120108 = images_mem_120108_ext.data else: - images_mem_44381 = opencl_alloc(self, - np.int64(images_mem_44381_ext.nbytes), - "images_mem_44381") - if (np.int64(images_mem_44381_ext.nbytes) != 0): - cl.enqueue_copy(self.queue, images_mem_44381, - normaliseArray(images_mem_44381_ext), + images_mem_120108 = opencl_alloc(self, + np.int64(images_mem_120108_ext.nbytes), + "images_mem_120108") + if (np.int64(images_mem_120108_ext.nbytes) != 0): + cl.enqueue_copy(self.queue, images_mem_120108, + normaliseArray(images_mem_120108_ext), is_blocking=synchronous) except (TypeError, AssertionError) as e: - raise TypeError("Argument #7 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("[][]f32", - type(images_mem_44381_ext), - images_mem_44381_ext)) + raise TypeError("Argument #10 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("[][]f64", + type(images_mem_120108_ext), + images_mem_120108_ext)) time_start = time.time() with np.errstate(divide="ignore", over="ignore", under="ignore", invalid="ignore"): - (out_mem_45676, out_mem_45677, - out_mem_45678) = self.futhark_main(mappingindices_mem_44380, - images_mem_44381, N_29165, m_29166, - trend_29167, k_29168, n_29169, - freq_29170, hfrac_29171, lam_29172) + (out_mem_126320, out_mem_126321, out_mem_126322, + out_mem_126323) = self.futhark_main(mappingindices_mem_120107, + images_mem_120108, N_75135, m_75136, + trend_75137, k_75138, n_75139, + freq_75140, hfrac_75141, level_75142, + lam_75143, hist_75144, conf_75145) runtime = (int((time.time() * 1000000)) - int((time_start * 1000000))) sync(self) - return (cl.array.Array(self.queue, (m_29166,), ct.c_int32, - data=out_mem_45676), cl.array.Array(self.queue, - (m_29166,), - ct.c_int32, - data=out_mem_45677), - cl.array.Array(self.queue, (m_29166,), ct.c_float, - data=out_mem_45678)) - def mainDetailed(self, trend_27773_ext, k_27774_ext, n_27775_ext, - freq_27776_ext, hfrac_27777_ext, lam_27778_ext, - mappingindices_mem_44380_ext, images_mem_44381_ext): - N_27771 = None - m_27772 = None - N_27771 = None + return (cl.array.Array(self.queue, (m_75136,), ct.c_int64, + data=out_mem_126320), cl.array.Array(self.queue, + (m_75136,), + ct.c_int64, + data=out_mem_126321), + cl.array.Array(self.queue, (m_75136,), ct.c_double, + data=out_mem_126322), cl.array.Array(self.queue, + (m_75136,), + ct.c_int64, + data=out_mem_126323)) + def mainDetailed(self, trend_70862_ext, k_70863_ext, n_70864_ext, + freq_70865_ext, hfrac_70866_ext, level_70867_ext, + lam_70868_ext, hist_70869_ext, conf_70870_ext, + mappingindices_mem_120107_ext, images_mem_120108_ext): + N_70860 = None + m_70861 = None + N_70860 = None + try: + trend_70862 = np.int64(ct.c_int64(trend_70862_ext)) + except (TypeError, AssertionError) as e: + raise TypeError("Argument #0 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("i64", + type(trend_70862_ext), + trend_70862_ext)) + try: + k_70863 = np.int64(ct.c_int64(k_70863_ext)) + except (TypeError, AssertionError) as e: + raise TypeError("Argument #1 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("i64", + type(k_70863_ext), + k_70863_ext)) try: - trend_27773 = np.int32(ct.c_int32(trend_27773_ext)) + n_70864 = np.int64(ct.c_int64(n_70864_ext)) except (TypeError, AssertionError) as e: - raise TypeError("Argument #0 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("i32", - type(trend_27773_ext), - trend_27773_ext)) + raise TypeError("Argument #2 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("i64", + type(n_70864_ext), + n_70864_ext)) try: - k_27774 = np.int32(ct.c_int32(k_27774_ext)) + freq_70865 = np.float64(ct.c_double(freq_70865_ext)) except (TypeError, AssertionError) as e: - raise TypeError("Argument #1 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("i32", - type(k_27774_ext), - k_27774_ext)) + raise TypeError("Argument #3 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("f64", + type(freq_70865_ext), + freq_70865_ext)) try: - n_27775 = np.int32(ct.c_int32(n_27775_ext)) + hfrac_70866 = np.float64(ct.c_double(hfrac_70866_ext)) except (TypeError, AssertionError) as e: - raise TypeError("Argument #2 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("i32", - type(n_27775_ext), - n_27775_ext)) + raise TypeError("Argument #4 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("f64", + type(hfrac_70866_ext), + hfrac_70866_ext)) try: - freq_27776 = np.float32(ct.c_float(freq_27776_ext)) + level_70867 = np.float64(ct.c_double(level_70867_ext)) except (TypeError, AssertionError) as e: - raise TypeError("Argument #3 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("f32", - type(freq_27776_ext), - freq_27776_ext)) + raise TypeError("Argument #5 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("f64", + type(level_70867_ext), + level_70867_ext)) try: - hfrac_27777 = np.float32(ct.c_float(hfrac_27777_ext)) + lam_70868 = np.float64(ct.c_double(lam_70868_ext)) except (TypeError, AssertionError) as e: - raise TypeError("Argument #4 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("f32", - type(hfrac_27777_ext), - hfrac_27777_ext)) + raise TypeError("Argument #6 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("f64", + type(lam_70868_ext), + lam_70868_ext)) try: - lam_27778 = np.float32(ct.c_float(lam_27778_ext)) + hist_70869 = np.int64(ct.c_int64(hist_70869_ext)) except (TypeError, AssertionError) as e: - raise TypeError("Argument #5 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("f32", - type(lam_27778_ext), - lam_27778_ext)) + raise TypeError("Argument #7 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("i64", + type(hist_70869_ext), + hist_70869_ext)) try: - assert ((type(mappingindices_mem_44380_ext) in [np.ndarray, - cl.array.Array]) and (mappingindices_mem_44380_ext.dtype == np.int32)), "Parameter has unexpected type" - if (N_27771 == None): - N_27771 = np.int64(mappingindices_mem_44380_ext.shape[0]) + conf_70870 = np.float64(ct.c_double(conf_70870_ext)) + except (TypeError, AssertionError) as e: + raise TypeError("Argument #8 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("f64", + type(conf_70870_ext), + conf_70870_ext)) + try: + assert ((type(mappingindices_mem_120107_ext) in [np.ndarray, + cl.array.Array]) and (mappingindices_mem_120107_ext.dtype == np.int64)), "Parameter has unexpected type" + if (N_70860 == None): + N_70860 = np.int64(mappingindices_mem_120107_ext.shape[0]) else: - assert (N_27771 == mappingindices_mem_44380_ext.shape[0]), "Error: entry point arguments have invalid sizes." - if (type(mappingindices_mem_44380_ext) == cl.array.Array): - mappingindices_mem_44380 = mappingindices_mem_44380_ext.data + assert (N_70860 == mappingindices_mem_120107_ext.shape[0]), "Error: entry point arguments have invalid sizes." + if (type(mappingindices_mem_120107_ext) == cl.array.Array): + mappingindices_mem_120107 = mappingindices_mem_120107_ext.data else: - mappingindices_mem_44380 = opencl_alloc(self, - np.int64(mappingindices_mem_44380_ext.nbytes), - "mappingindices_mem_44380") - if (np.int64(mappingindices_mem_44380_ext.nbytes) != 0): - cl.enqueue_copy(self.queue, mappingindices_mem_44380, - normaliseArray(mappingindices_mem_44380_ext), + mappingindices_mem_120107 = opencl_alloc(self, + np.int64(mappingindices_mem_120107_ext.nbytes), + "mappingindices_mem_120107") + if (np.int64(mappingindices_mem_120107_ext.nbytes) != 0): + cl.enqueue_copy(self.queue, mappingindices_mem_120107, + normaliseArray(mappingindices_mem_120107_ext), is_blocking=synchronous) except (TypeError, AssertionError) as e: - raise TypeError("Argument #6 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("[]i32", - type(mappingindices_mem_44380_ext), - mappingindices_mem_44380_ext)) + raise TypeError("Argument #9 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("[]i64", + type(mappingindices_mem_120107_ext), + mappingindices_mem_120107_ext)) try: - assert ((type(images_mem_44381_ext) in [np.ndarray, - cl.array.Array]) and (images_mem_44381_ext.dtype == np.float32)), "Parameter has unexpected type" - if (m_27772 == None): - m_27772 = np.int64(images_mem_44381_ext.shape[0]) + assert ((type(images_mem_120108_ext) in [np.ndarray, + cl.array.Array]) and (images_mem_120108_ext.dtype == np.float64)), "Parameter has unexpected type" + if (m_70861 == None): + m_70861 = np.int64(images_mem_120108_ext.shape[0]) else: - assert (m_27772 == images_mem_44381_ext.shape[0]), "Error: entry point arguments have invalid sizes." - if (N_27771 == None): - N_27771 = np.int64(images_mem_44381_ext.shape[1]) + assert (m_70861 == images_mem_120108_ext.shape[0]), "Error: entry point arguments have invalid sizes." + if (N_70860 == None): + N_70860 = np.int64(images_mem_120108_ext.shape[1]) else: - assert (N_27771 == images_mem_44381_ext.shape[1]), "Error: entry point arguments have invalid sizes." - if (type(images_mem_44381_ext) == cl.array.Array): - images_mem_44381 = images_mem_44381_ext.data + assert (N_70860 == images_mem_120108_ext.shape[1]), "Error: entry point arguments have invalid sizes." + if (type(images_mem_120108_ext) == cl.array.Array): + images_mem_120108 = images_mem_120108_ext.data else: - images_mem_44381 = opencl_alloc(self, - np.int64(images_mem_44381_ext.nbytes), - "images_mem_44381") - if (np.int64(images_mem_44381_ext.nbytes) != 0): - cl.enqueue_copy(self.queue, images_mem_44381, - normaliseArray(images_mem_44381_ext), + images_mem_120108 = opencl_alloc(self, + np.int64(images_mem_120108_ext.nbytes), + "images_mem_120108") + if (np.int64(images_mem_120108_ext.nbytes) != 0): + cl.enqueue_copy(self.queue, images_mem_120108, + normaliseArray(images_mem_120108_ext), is_blocking=synchronous) except (TypeError, AssertionError) as e: - raise TypeError("Argument #7 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("[][]f32", - type(images_mem_44381_ext), - images_mem_44381_ext)) + raise TypeError("Argument #10 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("[][]f64", + type(images_mem_120108_ext), + images_mem_120108_ext)) time_start = time.time() with np.errstate(divide="ignore", over="ignore", under="ignore", invalid="ignore"): - (out_mem_45676, out_mem_45677, out_mem_45678, out_mem_45679, - out_mem_45680, out_arrsizze_45681, out_mem_45682, out_arrsizze_45683, - out_mem_45684, out_arrsizze_45685, out_mem_45686, out_mem_45687, - out_mem_45688, out_mem_45689, - out_mem_45690) = self.futhark_mainDetailed(mappingindices_mem_44380, - images_mem_44381, N_27771, - m_27772, trend_27773, k_27774, - n_27775, freq_27776, - hfrac_27777, lam_27778) + (out_mem_126320, out_mem_126321, out_mem_126322, out_mem_126323, + out_mem_126324, out_arrsizze_126325, out_mem_126326, out_arrsizze_126327, + out_mem_126328, out_arrsizze_126329, out_mem_126330, out_mem_126331, + out_mem_126332, out_mem_126333, out_mem_126334, + out_mem_126335) = self.futhark_mainDetailed(mappingindices_mem_120107, + images_mem_120108, N_70860, + m_70861, trend_70862, + k_70863, n_70864, freq_70865, + hfrac_70866, level_70867, + lam_70868, hist_70869, + conf_70870) runtime = (int((time.time() * 1000000)) - int((time_start * 1000000))) sync(self) - return (cl.array.Array(self.queue, (m_27772,), ct.c_float, - data=out_mem_45676), cl.array.Array(self.queue, - (m_27772,), - ct.c_int32, - data=out_mem_45677), - cl.array.Array(self.queue, (m_27772,), ct.c_int32, - data=out_mem_45678), cl.array.Array(self.queue, - (m_27772,), - ct.c_float, - data=out_mem_45679), - cl.array.Array(self.queue, (m_27772, out_arrsizze_45681), - ct.c_float, data=out_mem_45680), - cl.array.Array(self.queue, (m_27772, out_arrsizze_45683), - ct.c_float, data=out_mem_45682), - cl.array.Array(self.queue, (out_arrsizze_45685,), ct.c_float, - data=out_mem_45684), cl.array.Array(self.queue, - (m_27772,), - ct.c_int32, - data=out_mem_45686), - cl.array.Array(self.queue, (m_27772,), ct.c_float, - data=out_mem_45687), cl.array.Array(self.queue, - (m_27772,), - ct.c_float, - data=out_mem_45688), - cl.array.Array(self.queue, (m_27772, N_27771), ct.c_float, - data=out_mem_45689), cl.array.Array(self.queue, - (m_27772, - N_27771), - ct.c_float, - data=out_mem_45690)) - def mainMagnitude(self, trend_28479_ext, k_28480_ext, n_28481_ext, - freq_28482_ext, hfrac_28483_ext, lam_28484_ext, - mappingindices_mem_44380_ext, images_mem_44381_ext): - N_28477 = None - m_28478 = None - N_28477 = None + return (cl.array.Array(self.queue, (m_70861,), ct.c_double, + data=out_mem_126320), cl.array.Array(self.queue, + (m_70861,), + ct.c_int64, + data=out_mem_126321), + cl.array.Array(self.queue, (m_70861,), ct.c_int64, + data=out_mem_126322), cl.array.Array(self.queue, + (m_70861,), + ct.c_double, + data=out_mem_126323), + cl.array.Array(self.queue, (m_70861, out_arrsizze_126325), + ct.c_double, data=out_mem_126324), + cl.array.Array(self.queue, (m_70861, out_arrsizze_126327), + ct.c_double, data=out_mem_126326), + cl.array.Array(self.queue, (out_arrsizze_126329,), ct.c_double, + data=out_mem_126328), cl.array.Array(self.queue, + (m_70861,), + ct.c_int64, + data=out_mem_126330), + cl.array.Array(self.queue, (m_70861,), ct.c_double, + data=out_mem_126331), cl.array.Array(self.queue, + (m_70861,), + ct.c_double, + data=out_mem_126332), + cl.array.Array(self.queue, (m_70861, N_70860), ct.c_double, + data=out_mem_126333), cl.array.Array(self.queue, + (m_70861, + N_70860), + ct.c_double, + data=out_mem_126334), + cl.array.Array(self.queue, (m_70861,), ct.c_int64, + data=out_mem_126335)) + def mainMagnitude(self, trend_73009_ext, k_73010_ext, n_73011_ext, + freq_73012_ext, hfrac_73013_ext, level_73014_ext, + lam_73015_ext, hist_73016_ext, conf_73017_ext, + mappingindices_mem_120107_ext, images_mem_120108_ext): + N_73007 = None + m_73008 = None + N_73007 = None + try: + trend_73009 = np.int64(ct.c_int64(trend_73009_ext)) + except (TypeError, AssertionError) as e: + raise TypeError("Argument #0 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("i64", + type(trend_73009_ext), + trend_73009_ext)) + try: + k_73010 = np.int64(ct.c_int64(k_73010_ext)) + except (TypeError, AssertionError) as e: + raise TypeError("Argument #1 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("i64", + type(k_73010_ext), + k_73010_ext)) + try: + n_73011 = np.int64(ct.c_int64(n_73011_ext)) + except (TypeError, AssertionError) as e: + raise TypeError("Argument #2 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("i64", + type(n_73011_ext), + n_73011_ext)) try: - trend_28479 = np.int32(ct.c_int32(trend_28479_ext)) + freq_73012 = np.float64(ct.c_double(freq_73012_ext)) except (TypeError, AssertionError) as e: - raise TypeError("Argument #0 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("i32", - type(trend_28479_ext), - trend_28479_ext)) + raise TypeError("Argument #3 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("f64", + type(freq_73012_ext), + freq_73012_ext)) try: - k_28480 = np.int32(ct.c_int32(k_28480_ext)) + hfrac_73013 = np.float64(ct.c_double(hfrac_73013_ext)) except (TypeError, AssertionError) as e: - raise TypeError("Argument #1 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("i32", - type(k_28480_ext), - k_28480_ext)) + raise TypeError("Argument #4 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("f64", + type(hfrac_73013_ext), + hfrac_73013_ext)) try: - n_28481 = np.int32(ct.c_int32(n_28481_ext)) + level_73014 = np.float64(ct.c_double(level_73014_ext)) except (TypeError, AssertionError) as e: - raise TypeError("Argument #2 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("i32", - type(n_28481_ext), - n_28481_ext)) + raise TypeError("Argument #5 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("f64", + type(level_73014_ext), + level_73014_ext)) try: - freq_28482 = np.float32(ct.c_float(freq_28482_ext)) + lam_73015 = np.float64(ct.c_double(lam_73015_ext)) except (TypeError, AssertionError) as e: - raise TypeError("Argument #3 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("f32", - type(freq_28482_ext), - freq_28482_ext)) + raise TypeError("Argument #6 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("f64", + type(lam_73015_ext), + lam_73015_ext)) try: - hfrac_28483 = np.float32(ct.c_float(hfrac_28483_ext)) + hist_73016 = np.int64(ct.c_int64(hist_73016_ext)) except (TypeError, AssertionError) as e: - raise TypeError("Argument #4 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("f32", - type(hfrac_28483_ext), - hfrac_28483_ext)) + raise TypeError("Argument #7 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("i64", + type(hist_73016_ext), + hist_73016_ext)) try: - lam_28484 = np.float32(ct.c_float(lam_28484_ext)) + conf_73017 = np.float64(ct.c_double(conf_73017_ext)) except (TypeError, AssertionError) as e: - raise TypeError("Argument #5 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("f32", - type(lam_28484_ext), - lam_28484_ext)) + raise TypeError("Argument #8 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("f64", + type(conf_73017_ext), + conf_73017_ext)) try: - assert ((type(mappingindices_mem_44380_ext) in [np.ndarray, - cl.array.Array]) and (mappingindices_mem_44380_ext.dtype == np.int32)), "Parameter has unexpected type" - if (N_28477 == None): - N_28477 = np.int64(mappingindices_mem_44380_ext.shape[0]) + assert ((type(mappingindices_mem_120107_ext) in [np.ndarray, + cl.array.Array]) and (mappingindices_mem_120107_ext.dtype == np.int64)), "Parameter has unexpected type" + if (N_73007 == None): + N_73007 = np.int64(mappingindices_mem_120107_ext.shape[0]) else: - assert (N_28477 == mappingindices_mem_44380_ext.shape[0]), "Error: entry point arguments have invalid sizes." - if (type(mappingindices_mem_44380_ext) == cl.array.Array): - mappingindices_mem_44380 = mappingindices_mem_44380_ext.data + assert (N_73007 == mappingindices_mem_120107_ext.shape[0]), "Error: entry point arguments have invalid sizes." + if (type(mappingindices_mem_120107_ext) == cl.array.Array): + mappingindices_mem_120107 = mappingindices_mem_120107_ext.data else: - mappingindices_mem_44380 = opencl_alloc(self, - np.int64(mappingindices_mem_44380_ext.nbytes), - "mappingindices_mem_44380") - if (np.int64(mappingindices_mem_44380_ext.nbytes) != 0): - cl.enqueue_copy(self.queue, mappingindices_mem_44380, - normaliseArray(mappingindices_mem_44380_ext), + mappingindices_mem_120107 = opencl_alloc(self, + np.int64(mappingindices_mem_120107_ext.nbytes), + "mappingindices_mem_120107") + if (np.int64(mappingindices_mem_120107_ext.nbytes) != 0): + cl.enqueue_copy(self.queue, mappingindices_mem_120107, + normaliseArray(mappingindices_mem_120107_ext), is_blocking=synchronous) except (TypeError, AssertionError) as e: - raise TypeError("Argument #6 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("[]i32", - type(mappingindices_mem_44380_ext), - mappingindices_mem_44380_ext)) + raise TypeError("Argument #9 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("[]i64", + type(mappingindices_mem_120107_ext), + mappingindices_mem_120107_ext)) try: - assert ((type(images_mem_44381_ext) in [np.ndarray, - cl.array.Array]) and (images_mem_44381_ext.dtype == np.float32)), "Parameter has unexpected type" - if (m_28478 == None): - m_28478 = np.int64(images_mem_44381_ext.shape[0]) + assert ((type(images_mem_120108_ext) in [np.ndarray, + cl.array.Array]) and (images_mem_120108_ext.dtype == np.float64)), "Parameter has unexpected type" + if (m_73008 == None): + m_73008 = np.int64(images_mem_120108_ext.shape[0]) else: - assert (m_28478 == images_mem_44381_ext.shape[0]), "Error: entry point arguments have invalid sizes." - if (N_28477 == None): - N_28477 = np.int64(images_mem_44381_ext.shape[1]) + assert (m_73008 == images_mem_120108_ext.shape[0]), "Error: entry point arguments have invalid sizes." + if (N_73007 == None): + N_73007 = np.int64(images_mem_120108_ext.shape[1]) else: - assert (N_28477 == images_mem_44381_ext.shape[1]), "Error: entry point arguments have invalid sizes." - if (type(images_mem_44381_ext) == cl.array.Array): - images_mem_44381 = images_mem_44381_ext.data + assert (N_73007 == images_mem_120108_ext.shape[1]), "Error: entry point arguments have invalid sizes." + if (type(images_mem_120108_ext) == cl.array.Array): + images_mem_120108 = images_mem_120108_ext.data else: - images_mem_44381 = opencl_alloc(self, - np.int64(images_mem_44381_ext.nbytes), - "images_mem_44381") - if (np.int64(images_mem_44381_ext.nbytes) != 0): - cl.enqueue_copy(self.queue, images_mem_44381, - normaliseArray(images_mem_44381_ext), + images_mem_120108 = opencl_alloc(self, + np.int64(images_mem_120108_ext.nbytes), + "images_mem_120108") + if (np.int64(images_mem_120108_ext.nbytes) != 0): + cl.enqueue_copy(self.queue, images_mem_120108, + normaliseArray(images_mem_120108_ext), is_blocking=synchronous) except (TypeError, AssertionError) as e: - raise TypeError("Argument #7 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("[][]f32", - type(images_mem_44381_ext), - images_mem_44381_ext)) + raise TypeError("Argument #10 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("[][]f64", + type(images_mem_120108_ext), + images_mem_120108_ext)) time_start = time.time() with np.errstate(divide="ignore", over="ignore", under="ignore", invalid="ignore"): - (out_mem_45676, out_mem_45677, out_mem_45678, - out_mem_45679) = self.futhark_mainMagnitude(mappingindices_mem_44380, - images_mem_44381, N_28477, - m_28478, trend_28479, - k_28480, n_28481, freq_28482, - hfrac_28483, lam_28484) + (out_mem_126320, out_mem_126321, out_mem_126322, out_mem_126323, + out_mem_126324) = self.futhark_mainMagnitude(mappingindices_mem_120107, + images_mem_120108, N_73007, + m_73008, trend_73009, + k_73010, n_73011, + freq_73012, hfrac_73013, + level_73014, lam_73015, + hist_73016, conf_73017) runtime = (int((time.time() * 1000000)) - int((time_start * 1000000))) sync(self) - return (cl.array.Array(self.queue, (m_28478,), ct.c_int32, - data=out_mem_45676), cl.array.Array(self.queue, - (m_28478,), - ct.c_int32, - data=out_mem_45677), - cl.array.Array(self.queue, (m_28478,), ct.c_float, - data=out_mem_45678), cl.array.Array(self.queue, - (m_28478,), - ct.c_float, - data=out_mem_45679)) - def reshapeTransp(self, images_mem_44380_ext): - m_27747 = None - n_27748 = None - p_27749 = None + return (cl.array.Array(self.queue, (m_73008,), ct.c_int64, + data=out_mem_126320), cl.array.Array(self.queue, + (m_73008,), + ct.c_int64, + data=out_mem_126321), + cl.array.Array(self.queue, (m_73008,), ct.c_double, + data=out_mem_126322), cl.array.Array(self.queue, + (m_73008,), + ct.c_double, + data=out_mem_126323), + cl.array.Array(self.queue, (m_73008,), ct.c_int64, + data=out_mem_126324)) + def reshapeTransp(self, images_mem_120107_ext): + m_70836 = None + n_70837 = None + p_70838 = None try: - assert ((type(images_mem_44380_ext) in [np.ndarray, - cl.array.Array]) and (images_mem_44380_ext.dtype == np.float32)), "Parameter has unexpected type" - if (m_27747 == None): - m_27747 = np.int64(images_mem_44380_ext.shape[0]) + assert ((type(images_mem_120107_ext) in [np.ndarray, + cl.array.Array]) and (images_mem_120107_ext.dtype == np.float64)), "Parameter has unexpected type" + if (m_70836 == None): + m_70836 = np.int64(images_mem_120107_ext.shape[0]) else: - assert (m_27747 == images_mem_44380_ext.shape[0]), "Error: entry point arguments have invalid sizes." - if (n_27748 == None): - n_27748 = np.int64(images_mem_44380_ext.shape[1]) + assert (m_70836 == images_mem_120107_ext.shape[0]), "Error: entry point arguments have invalid sizes." + if (n_70837 == None): + n_70837 = np.int64(images_mem_120107_ext.shape[1]) else: - assert (n_27748 == images_mem_44380_ext.shape[1]), "Error: entry point arguments have invalid sizes." - if (p_27749 == None): - p_27749 = np.int64(images_mem_44380_ext.shape[2]) + assert (n_70837 == images_mem_120107_ext.shape[1]), "Error: entry point arguments have invalid sizes." + if (p_70838 == None): + p_70838 = np.int64(images_mem_120107_ext.shape[2]) else: - assert (p_27749 == images_mem_44380_ext.shape[2]), "Error: entry point arguments have invalid sizes." - if (type(images_mem_44380_ext) == cl.array.Array): - images_mem_44380 = images_mem_44380_ext.data + assert (p_70838 == images_mem_120107_ext.shape[2]), "Error: entry point arguments have invalid sizes." + if (type(images_mem_120107_ext) == cl.array.Array): + images_mem_120107 = images_mem_120107_ext.data else: - images_mem_44380 = opencl_alloc(self, - np.int64(images_mem_44380_ext.nbytes), - "images_mem_44380") - if (np.int64(images_mem_44380_ext.nbytes) != 0): - cl.enqueue_copy(self.queue, images_mem_44380, - normaliseArray(images_mem_44380_ext), + images_mem_120107 = opencl_alloc(self, + np.int64(images_mem_120107_ext.nbytes), + "images_mem_120107") + if (np.int64(images_mem_120107_ext.nbytes) != 0): + cl.enqueue_copy(self.queue, images_mem_120107, + normaliseArray(images_mem_120107_ext), is_blocking=synchronous) except (TypeError, AssertionError) as e: - raise TypeError("Argument #0 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("[][][]f32", - type(images_mem_44380_ext), - images_mem_44380_ext)) + raise TypeError("Argument #0 has invalid value\nFuthark type: {}\nArgument has Python type {} and value: {}\n".format("[][][]f64", + type(images_mem_120107_ext), + images_mem_120107_ext)) time_start = time.time() with np.errstate(divide="ignore", over="ignore", under="ignore", invalid="ignore"): - (out_mem_45676, - out_arrsizze_45677) = self.futhark_reshapeTransp(images_mem_44380, - m_27747, n_27748, - p_27749) + (out_mem_126320, + out_arrsizze_126321) = self.futhark_reshapeTransp(images_mem_120107, + m_70836, n_70837, + p_70838) runtime = (int((time.time() * 1000000)) - int((time_start * 1000000))) sync(self) - return cl.array.Array(self.queue, (out_arrsizze_45677, m_27747), ct.c_float, - data=out_mem_45676) \ No newline at end of file + return cl.array.Array(self.queue, (out_arrsizze_126321, m_70836), + ct.c_double, data=out_mem_126320) \ No newline at end of file